线性回归
导入库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
人工数据集
'''
n = 100
true_theta = np.array([[1], [1]])
X = np.insert(np.random.normal(5, 1, size=(n, 1)), 0, 1, 1)
y = X @ true_theta + np.random.normal(0, 0.04, size=(n, 1))
X,y
plt.scatter(X[:,1],y)
'''
'\nn = 100\ntrue_theta = np.array([[1], [1]])\nX = np.insert(np.random.normal(5, 1, size=(n, 1)), 0, 1, 1)\ny = X @ true_theta + np.random.normal(0, 0.04, size=(n, 1))\nX,y\nplt.scatter(X[:,1],y)\n'
导入ML-ex1data1数据
data = pd.read_csv("ex1data1.txt", names=['x','y'])
data.insert(0, "Ones", 1)
n = data.shape[0]
m = data.shape[1]-1
X = np.array(data.iloc[:,:-1])
y = np.array(data.y).reshape(-1,1)
data
plt.scatter(X[:,-1],y)
<matplotlib.collections.PathCollection at 0x7f3d028c97e0>
导入ML-ex1data2数据
'''
data = pd.read_csv("ex1data2.txt", names=['x1', 'x2', 'y'])
data.insert(0, "Ones", 1)
n = data.shape[0]
m = data.shape[1]-1
X = np.array(data.iloc[:,:-1])
y = np.array(data.y).reshape(-1,1)
n,m,X,y
plt.scatter(X[:,-2],y)
'''
损失函数
def compterCost(X, theta, y):
delta = X @ theta - y
return delta.T @ delta
compterCost(X, np.zeros([m,1]), y)
array([[6222.11037223]])
梯度
def getGradient(X, theta, y):
return ((X@theta - y).T @ X).T
getGradient(X, np.zeros([m,1]), y)
array([[ -566.3961 ],
[-6336.89842532]])
梯度下降
def gradientDescent(X, theta, y, alpha, iters):
cost = np.zeros(iters + 1)
cost[0] = compterCost(X, theta, y)
print(f"loop {0}'s cost is {cost[0]}")
for i in range(iters):
theta = theta - getGradient(X, theta, y)*alpha
cost[i+1] = compterCost(X, theta, y)
print(f"loop {i+1}'s cost is {cost[i+1]}")
plt.plot(range(iters+1), cost)
#print(cost)
return theta
theta = np.zeros([m, 1])
theta = gradientDescent(X, theta, y, 0.000003, 2000)
预测函数
def getMy(x):
return np.insert(x, 0, 1, 1) @ theta
#getMy(np.array([[1],[2]]))
#theta
拟合图像
plt.scatter(X[:,-1], y)
#x = np.arange(5,23,0.5)
#plt.plot(x, getMy(x.reshape(-1,1)))
plt.axline([1,getMy(np.array([[1]]))[0][0]], xy2 = [2,getMy(np.array([[2]]))[0][0]])
#plt.plot(x, np.insert(x.reshape(-1,1), 0, 1, 1)@true_theta)
<matplotlib.lines._AxLine at 0x7f3d006718d0>
标签:plt,回归,np,cost,线性,theta,array,data
From: https://www.cnblogs.com/RanX2018/p/16742881.html