用梯度上升算法进行Logistic回归
$w=w+\nabla{f(w)}$
对应代码如下
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_classification
data_1, labels = make_classification(n_samples=400, n_features=2, n_informative=2,n_redundant=0,n_repeated=0, n_classes=2, random_state=42)
data_0 = np.ones((400, 1))
data = np.hstack((data_0, data_1))
def sigmoid(inX):
sig = 1 / (1 + np.exp(-inX))
return sig
def grad_ascent():
datamat = np.mat(data)
labelsmat = np.mat(labels).transpose()
m, n = np.shape(datamat)
weights = np.ones((n, 1)) # 初始化weight
alpha = 0.001
iters = 100
for i in range(iters):
h = sigmoid(datamat * weights)
error = labelsmat - h
weights = weights + alpha * datamat.transpose() * error
return weights
def plotBestFit(weights): # 加载数据集
dataArr = np.array(data) # 转换成numpy的array数组
n = np.shape(dataArr)[0] # 数据个数
xcord1 = []
ycord1 = [] # 正样本
xcord2 = []
ycord2 = [] # 负样本
for i in range(n): # 根据数据集标签进行分类
if int(labels[i]) == 1:
xcord1.append(dataArr[i, 1])
ycord1.append(dataArr[i, 2]) # 1为正样本
else:
xcord2.append(dataArr[i, 1])
ycord2.append(dataArr[i, 2]) # 0为负样本
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') # 绘制正样本
ax.scatter(xcord2, ycord2, s=30, c='green') # 绘制负样本
x = np.arange(-2.0, 2.0, 0.01) # x区间
y = (-weights[0] - weights[1] * x) / weights[2] # 最佳拟合直线
plt.axis([-4, 4, -4, 4])
ax.plot(x, np.ravel(y))
plt.title('BestFit') # 标题
plt.xlabel('X1')
plt.ylabel('X2') # x,y轴的标签
plt.show()
if __name__ == '__main__':
plotBestFit(grad_ascent())
标签:plt,梯度,样本,dataArr,算法,weights,np,上升,data
From: https://blog.51cto.com/u_16248600/7378095