涌井良幸、涌井贞美著的《深度学习的数学》这本书,浅显易懂。书中还用Excel示例神经网络的计算,真是不错。但光有Excel示例还是有点欠缺的,如果有代码演示就更好了。
百度了半天在网上没找到别人写的,只好自己撸一个(使用python + pytorch),供同样在学习神经网络的初学者参考。
(注,这是书中4-4节:体验神经网络的误差反向传播法,数据是64个3x4的0和1,用平方误差的总和作为代价函数, 用 Sigmoid 函数作为激活函数)
主程序demo44.py:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optimal
from torch import cosine_similarity
import demo44data as demo
# 定义一个简单的神经网络
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(SimpleNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
# self.activation = nn.ReLU()
self.activation = nn.Sigmoid()
self.fc2 = nn.Linear(hidden_size, output_size)
init.normal_(self.fc1.weight, mean=0.0, std=0.5) # 使用正态分布初始化权重矩阵,本例未用到
init.normal_(self.fc2.weight, mean=0.0, std=0.5) # 使用正态分布初始化权重矩阵,本例未用到
init.normal_(self.fc1.bias, mean=0.0, std=0.5) # 使用正态分布初始化偏置矩阵,本例未用到
init.normal_(self.fc2.bias, mean=0.0, std=0.5) # 使用正态分布初始化偏置矩阵,本例未用到
self.fc1.weight.data = demo.get_param1() # 若用正态分布,注释此行
self.fc1.bias.data = demo.get_param2() # 若用正态分布,注释此行
self.fc2.weight.data = demo.get_param3() # 若用正态分布,注释此行
self.fc2.bias.data = demo.get_param4() # 若用正态分布,注释此行
def forward(self, x):
x = self.fc1(x)
print_x("z21=", x)
x = self.activation(x)
print_x("a21=", x)
x = self.fc2(x)
print_x("z31=", x)
x = self.activation(x)
print_x("a31=", x)
return x
def print_x(name, x):
if x.dim() > 1:
print(name, end='')
print("", x[0, :].data.numpy(), end='')
for i in range(x.size()[0]):
if i > 0:
print("\t\t", x[i, :].data.numpy(), end='')
print()
def print_params(params):
for param in params:
if param.dim() > 1:
for i in range(param.size()[0]):
print('\t[', end='')
print(param[i, 0].data.numpy(), end='')
for j in range(param.size()[1]):
if j > 0:
print('\t', param[i, j].data.numpy(), end='')
print('] ', end='')
print()
else:
print('\t[', end='')
print(param[0].data.numpy(), end='')
for i in range(param.size()[0]):
if i > 0:
print('\t', param[i].data.numpy(), end='')
print('] ')
print()
# 书中用最小二乘法做为代价函数
# 若用torch提供的最小二乘法做为代价函数,计算后得再除以2
# 为了与书中数字一致,自己实现了一个
def mse_loss(x, y):
z = x - y
print(" c= ", end='')
print(((z[0, 0] ** 2 + z[0, 1] ** 2) / 2).data.numpy(), end='')
for i in range(z.size()[0]):
if i>0:
print("\t", ((z[i, 0] ** 2 + z[i, 1] ** 2) / 2).data.numpy(), end='')
print()
return (z[:, 0] ** 2 + z[:, 1] ** 2).sum() / 2
# 定义超参数
input_size = 12
hidden_size = 3
output_size = 2
learning_rate = 0.2
epochs = 1000
# 创建模型实例
model = SimpleNet(input_size, hidden_size, output_size)
# print_params(model.parameters())
# 定义损失函数和优化器
# criterion = nn.MSELoss(size_average=False) # torch提供的最小二乘法做为代价函数,criterion(outputs, labels)计算后得再除以2
# criterion = nn.CrossEntropyLoss() 这个应该更好,但为了跟书中数字一致,这里舍弃不用
criterion = mse_loss
# optimizer = optimal.Adam(model.parameters(), lr=learning_rate) # Adam优化算法,请自行百度
optimizer = optimal.SGD(model.parameters(), lr=learning_rate) # 随机梯度下降法
# 准备数据集和标签
# 这里假设数据集是一个包含输入张量的数据集,标签是一个包含期望输出的张量
data = demo.get_data()
labels = demo.get_result()
# outputs = model(data)
# outputs = torch.sigmoid(output)
# outputs = 1 / (1 + torch.exp(output * -1))
# print("output=", output)
# print("outputs=", outputs)
# loss = criterion(outputs, labels)
# print("loss=", loss)
# 训练模型
for epoch in range(epochs):
print("\nepoch=", epoch + 1)
print_params(model.parameters())
# 前向传播
outputs = model(data)
# print("outputs=", outputs)
# outputs = torch.sigmoid(output)
# outputs = 1 / (1 + torch.exp(output * -1))
loss = criterion(outputs, labels)
print(" C=", loss.data.numpy())
# 反向传播和优化
optimizer.zero_grad()
loss.backward() # 使用.backward()进行反向传播计算梯度值
optimizer.step() # 使用优化器更新网络参数
# 输出训练进度信息
# if (epoch + 1) % 10 == 0:
if loss.item() < 0.25 or epoch + 1 == epochs:
print("\n===========================")
print("loss=", loss)
print_params(model.parameters())
# print("output=", output)
# print("outputs=", outputs)
print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item()}")
break
print("\n===========================")
# 将模型设置为评估模式
model.eval()
output = model(demo.get_test0()).data
# output = torch.sigmoid(model(demo.get_test0())).data
print(torch.round(output, decimals=0), output)
output = model(demo.get_test1()).data
# output = torch.sigmoid(model(demo.get_test1())).data
print(torch.round(output, decimals=0), output)
print("\n======= 比对全部结果 ======")
result = demo.get_result()
data = demo.get_data()
i = 0
for d in data:
output = model(d).data
# output = torch.sigmoid(model(d)).data
print(i + 1, "=", torch.round(output, decimals=0).numpy(), "\t(", output.numpy(), ")")
result[i, :] = torch.round(output, decimals=0)
i += 1
print("准确度:", (cosine_similarity(result, demo.get_result()) * 100).mean().data.numpy(), "%")
demo44data.py
import torch
def get_data():
return torch.tensor([[
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 0.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
0.0, 1.0, 1.0], [
0.0, 0.0, 0.0,
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 0.0, 0.0,
0.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 0.0, 0.0,
1.0, 1.0, 0.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 0.0, 0.0,
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 0.0], [
0.0, 0.0, 0.0,
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
0.0, 1.0, 1.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0,
0.0, 0.0, 0.0], [
0.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0,
0.0, 0.0, 0.0], [
1.0, 1.0, 0.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0,
0.0, 0.0, 0.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 0.0,
0.0, 0.0, 0.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
0.0, 1.0, 1.0,
0.0, 0.0, 0.0], [
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 0.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 0.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0], [
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
0.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 1.0,
0.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 1.0, 1.0,
1.0, 0.0, 0.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 0.0,
1.0, 1.0, 1.0], [
0.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0], [
0.0, 1.0, 1.0,
1.0, 0.0, 1.0,
0.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 1.0, 1.0,
0.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
1.0, 0.0, 0.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
1.0, 0.0, 1.0,
1.0, 0.0, 0.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0], [
1.0, 1.0, 0.0,
1.0, 0.0, 1.0,
0.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
0.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
1.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
1.0, 1.0, 0.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
1.0, 1.0, 1.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 1.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 1.0,
0.0, 1.0, 0.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 1.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 1.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 1.0,
0.0, 1.0, 0.0,
1.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 1.0,
1.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
1.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 1.0,
0.0, 1.0, 1.0,
0.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 1.0,
0.0, 1.0, 1.0,
0.0, 1.0, 1.0,
0.0, 1.0, 1.0], [
1.0, 1.0, 0.0,
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
1.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
1.0, 1.0, 0.0,
1.0, 1.0, 0.0,
1.0, 1.0, 0.0,
1.0, 1.0, 0.0], [
1.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 0.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
1.0, 0.0, 0.0], [
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0], [
0.0, 1.0, 0.0,
0.0, 0.0, 0.0,
0.0, 1.0, 0.0,
1.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 0.0,
1.0, 1.0, 0.0], [
0.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
1.0, 1.0, 0.0], [
0.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 0.0], [
0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
0.0, 0.0, 1.0,
0.0, 1.0, 0.0], [
0.0, 1.0, 0.0,
1.0, 0.0, 0.0,
1.0, 0.0, 0.0,
0.0, 1.0, 0.0]])
def get_result():
return torch.tensor([[
1.0, 0.0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
1, 0], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1], [
0, 1]
])
def get_result2():
return torch.tensor(
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
def get_test0():
return torch.tensor([
1.0, 1.0, 1.0,
1.0, 0.0, 1.0,
1.0, 0.0, 1.0,
1.0, 1.0, 1.0])
def get_test1():
return torch.tensor([
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0,
0.0, 1.0, 0.0])
def get_param1():
return torch.tensor([[
0.490, 0.348, 0.073, 0.837, -0.071, -3.617, -0.536, -0.023, -1.717, -1.456, -0.556, 0.852], [
0.442, -0.537, 1.008, 1.072, -0.733, 0.823, -0.453, -0.014, -0.027, -0.427, 1.876, -2.305], [
0.654, -1.389, 1.246, 0.057, -0.183, -0.743, -0.461, 0.331, 0.449, -1.296, 1.569, -0.471]
])
def get_param2():
return torch.tensor([-0.185, 0.526, -1.169])
def get_param3():
return torch.tensor([[
0.388, 0.803, 0.029], [
0.025, -0.790, 1.553]
])
def get_param4():
return torch.tensor([-1.438, -1.379])