1. 随机梯度下降法
梯度计算的时候 随机抽取一条
import numpy as np
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]
n_epochs = 10000
learn_rate = 0.001
m = 100
theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
random_index = np.random.randint(m)
Xi = X_b[random_index: random_index + 1] # 切片 切出一条样本
yi = y[random_index: random_index + 1] # 切片 切出一条样本
gradient = Xi.T.dot(Xi.dot(theta) - yi) # 梯度公式Xi.T * (Xi*theta-yi) Xi yi 可以是全部样本 也可以是部分样本
theta = theta - learn_rate * gradient
print(theta)
2. 销批量梯度下降法
梯度计算的时候 抽取一部分
import numpy as np
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]
n_epochs = 10000
learn_rate = 0.001
batch_size = 10
m = 100
num_batch = int(m/10)
theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
for i in range(num_batch):
random_index = np.random.randint(m)
Xi = X_b[random_index: random_index + batch_size] # 切片 切出batch_size样本
yi = y[random_index: random_index + batch_size] # 切片 切出batch_size样本
gradient = Xi.T.dot(Xi.dot(theta) - yi) # 梯度公式Xi.T * (Xi*theta-yi) Xi yi 可以是全部样本 也可以是部分样本
theta = theta - learn_rate * gradient
print(theta)
3. 优化1
双层 for循环之间将样本打乱
arr = np.arange(len(X_b)) # 生成索引序列
np.random.shuffle(arr) # 将索引打乱
# 相同的打乱的搜索引序列arr去 取值 一一对应
X_b = X_b[arr]
y = y[arr]
import numpy as np
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]
n_epochs = 10000
learn_rate = 0.001
batch_size = 10
m = 100
num_batch = int(m/10)
theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
arr = np.arange(len(X_b)) # 生成索引序列
np.random.shuffle(arr) # 将索引打乱
# 相同的打乱的搜索引序列arr去 取值 一一对应
X_b = X_b[arr]
y = y[arr]
for i in range(num_batch):
random_index = np.random.randint(m)
Xi = X_b[random_index: random_index + batch_size] # 切片 切出batch_size样本
yi = y[random_index: random_index + batch_size] # 切片 切出batch_size样本
gradient = Xi.T.dot(Xi.dot(theta) - yi) # 梯度公式Xi.T * (Xi*theta-yi) Xi yi 可以是全部样本 也可以是部分样本
theta = theta - learn_rate * gradient
print(theta)
4. 优化2
随着迭代的次数增加 学习率 减小
def schedule_learn_rate(t):
return t0/(t+t1)
import numpy as np
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
X_b = np.c_[np.ones((100, 1)), X]
n_epochs = 10000
learn_rate = 0.001
batch_size = 10
m = 100
num_batch = int(m/10)
theta = np.random.randn(2, 1)
t0, t1 = 5, 500
def schedule_learn_rate(t):
return t0/(t+t1)
for epoch in range(n_epochs):
arr = np.arange(len(X_b)) # 生成索引序列
np.random.shuffle(arr) # 将索引打乱
# 相同的打乱的搜索引序列arr去 取值 一一对应
X_b = X_b[arr]
y = y[arr]
for i in range(num_batch):
random_index = np.random.randint(m)
Xi = X_b[random_index: random_index + batch_size] # 切片 切出batch_size样本
yi = y[random_index: random_index + batch_size] # 切片 切出batch_size样本
gradient = Xi.T.dot(Xi.dot(theta) - yi) # 梯度公式Xi.T * (Xi*theta-yi) Xi yi 可以是全部样本 也可以是部分样本
learn_rate = schedule_learn_rate(epoch * m+i)
theta = theta - learn_rate * gradient
print(theta)
标签:index,Xi,04,梯度,random,batch,小批量,np,theta
From: https://www.cnblogs.com/cavalier-chen/p/17895519.html