标签：self axes iter tf 神经网络 train 线性 def

线性模型

当我们的输入包含d个特征时，我们将预测结果表示为：

向量化：

矩阵化：

损失函数

平方误差损失函数：

在n个样本上度量：

最优化：

解析解：

随机梯度下降：

矢量化加速

import math
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
import random
from IPython import display

n = 10000
a = tf.ones([n])
b = tf.ones([n])

# 计时器
class Timer:
    def __init__(self):
        self.times = []
        self.start()
        
    def start(self):
        self.tik = time.time()
        
    def stop(self):
        self.times.append(time.time() - self.tik)
        return self.times[-1]
    
    def avg(self):
        return sum(self.times) / len(self.times)
    
    def sum(self):
        return sum(self.times)
    
    def cumsum(self):
        return np.array(self.times).cumsum().tolist()

c = tf.Variable(tf.zeros(n))
timer = Timer()
for i in range(n):
    c[i].assign(a[i] + b[i])
f'{timer.stop():.5f} sec'

"""
'2.87643 sec'
"""

timer.start()
d = a + b
f'{timer.stop():.5f} sec'

"""
'0.00000 sec'
"""

正态分布与平方损失

正态分布概率密度函数：

def normal(x, mu, sigma):
    p = 1 / math.sqrt(2 * math.pi * sigma ** 2)
    return p * np.exp(-0.5 / sigma**2 * (x - mu) ** 2)

def use_svg_display():
    backend_inline.set_matplotlib_formats('png')

def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    plt.rcParams['figure.figsize'] = figsize
    
def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
    """设置matplotlib的轴"""
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid()
    
def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,
         ylim=None, xscale='linear', yscale='linear',
         fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):
    """绘制数据点"""
    if legend is None:
        legend = []

    set_figsize(figsize)
    axes = axes if axes else plt.gca()

    # 如果X有一个轴，输出True
    def has_one_axis(X):
        return (hasattr(X, "ndim") and X.ndim == 1 or isinstance(X, list)
                and not hasattr(X[0], "__len__"))

    if has_one_axis(X):
        X = [X]
    if Y is None:
        X, Y = [[]] * len(X), X
    elif has_one_axis(Y):
        Y = [Y]
    if len(X) != len(Y):
        X = X * len(Y)
    axes.cla()
    for x, y, fmt in zip(X, Y, fmts):
        if len(x):
            axes.plot(x, y, fmt)
        else:
            axes.plot(y, fmt)
    set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
    
x = np.arange(-7, 7, 0.01)

# 均值和标准差对
params = [(0, 1), (0, 2), (3, 1)]
plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x', ylabel='p(x)', figsize=(4.5, 2.5), 
        legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])
plt.show()

均方误差损失函数可以用于线性回归的一个原因是：我们假设了观测中包含噪声，其中噪声服从正态分布：

根据极大似然估计法：

上述式子的解并不依赖于σ，因此在高斯噪声的假设下，最小化均方误差等价于对线性模型的极大似然估计。

生成数据集

def synthetic_data(w, b, num_examples):
    X = tf.zeros((num_examples, w.shape[0]))
    X += tf.random.normal(shape=X.shape)
    y = tf.matmul(X, tf.reshape(w, (-1, 1))) + b
    y += tf.random.normal(shape=y.shape, stddev=0.01)
    y = tf.reshape(y, (-1, 1))
    return X, y

true_w = tf.constant([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

print('features:', features[0], '\nlabel:', labels[0])

"""
features: tf.Tensor([ 0.6943889  -0.36124966], shape=(2,), dtype=float32) 
label: tf.Tensor([6.813045], shape=(1,), dtype=float32)
"""

set_figsize()
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1)

读取数据集

def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        j = tf.constant(indices[i: min(i+batch_size, num_examples)])
        yield tf.gather(features, j), tf.gather(labels, j)
        
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break
    
"""
tf.Tensor(
[[-0.31621101  0.54274404]
 [-0.5347021   0.2458507 ]
 [ 1.1816338  -1.166924  ]
 [ 0.57692814 -0.8892977 ]
 [-0.4833463  -0.26053128]
 [-0.40469778 -0.53955495]
 [ 0.459481    1.0673087 ]
 [-1.0149183   0.08969461]
 [-0.02690575  0.62452453]
 [ 1.8112805  -1.7324816 ]], shape=(10, 2), dtype=float32) 
 tf.Tensor(
[[ 1.7132785]
 [ 2.2795804]
 [10.5091   ]
 [ 8.380901 ]
 [ 4.1135254]
 [ 5.2237015]
 [ 1.4995631]
 [ 1.8668351]
 [ 2.0210123]
 [13.720167 ]], shape=(10, 1), dtype=float32)
"""

初始化模型参数

w = tf.Variable(tf.random.normal(shape=(2, 1), mean=0, stddev=0.01), trainable=True)
b = tf.Variable(tf.zeros(1), trainable=True)

定义模型

def linreg(X, w, b):
    return tf.matmul(X, w) + b

定义损失函数

def squared_loss(y_hat, y):
    return (y_hat - tf.reshape(y, y_hat.shape)) ** 2 / 2

定义优化算法

def sgd(params, grads, lr, batch_size):
    for param, grad in zip(params, grads):
        param.assign_sub(lr * grad / batch_size)

训练

lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        with tf.GradientTape() as g:
            l = loss(net(X, w, b), y)
        # 计算l关于[w, b]的梯度
        dw, db = g.gradient(l, [w, b])
        # 使用参数的梯度更新参数
        sgd([w, b], [dw, db], lr, batch_size)
    train_l = loss(net(features, w, b), labels)
    print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_l)):f}')
    
"""
epoch 1, loss 0.046574
epoch 2, loss 0.000190
epoch 3, loss 0.000051
"""

print(f'w的估计误差：{true_w - tf.reshape(w, true_w.shape)}')
print(f'b的估计误差：{true_b - b}')

"""
w的估计误差：[-0.00029325 -0.00076914]
b的估计误差：[0.00124931]
"""

线性回归的简单实现

# 生成数据集
true_w = tf.constant([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

# 读取数据集
def load_array(data_arrays, batch_size, is_train=True):
    dataset = tf.data.Dataset.from_tensor_slices(data_arrays)
    if is_train:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size)
    return dataset

batch_size = 10
data_iter = load_array((features, labels), batch_size)

next(iter(data_iter))

"""
(<tf.Tensor: shape=(10, 2), dtype=float32, numpy=
 array([[ 1.2998679 , -1.8226012 ],
        [ 1.2075672 , -1.4134687 ],
        [-0.2921682 , -0.15664841],
        [-0.8171001 ,  0.50474656],
        [ 0.49231467, -0.51207936],
        [-0.66198266, -0.26582628],
        [-1.556911  , -1.0870522 ],
        [ 0.47099817, -0.18538263],
        [ 1.9214609 ,  0.41514587],
        [-1.4364696 ,  1.447287  ]], dtype=float32)>,
 <tf.Tensor: shape=(10, 1), dtype=float32, numpy=
 array([[13.005127  ],
        [11.413691  ],
        [ 4.137837  ],
        [ 0.84807134],
        [ 6.9113398 ],
        [ 3.7745607 ],
        [ 4.777771  ],
        [ 5.780456  ],
        [ 6.636665  ],
        [-3.5739417 ]], dtype=float32)>)
"""

# 定义模型
net = tf.keras.Sequential()
net.add(tf.keras.layers.Dense(1))

# 初始化模型参数
initializer = tf.initializers.RandomNormal(stddev=0.01)
net = tf.keras.Sequential()
net.add(tf.keras.layers.Dense(1, kernel_initializer=initializer))

# 定义损失函数
loss = tf.keras.losses.MeanSquaredError()

# 定义优化算法
trainer = tf.keras.optimizers.SGD(learning_rate=0.03)

# 训练
num_epochs = 3
net.compile(optimizer=trainer, loss=loss)
net.fit(data_iter, epochs=num_epochs)

"""
Epoch 1/3
100/100 [==============================] - 0s 1ms/step - loss: 1.3062e-04
Epoch 2/3
100/100 [==============================] - 0s 1ms/step - loss: 1.0071e-04
Epoch 3/3
100/100 [==============================] - 0s 1ms/step - loss: 1.0147e-04
<keras.callbacks.History at 0x20be800bfd0>
"""

w = net.get_weights()[0]
print('w的估计误差：', true_w - tf.reshape(w, true_w.shape))
b = net.get_weights()[1]
print('b的估计误差：', true_b - b)

"""
w的估计误差： tf.Tensor([8.7976456e-05 9.8037720e-04], shape=(2,), dtype=float32)
b的估计误差： [0.00094318]
"""

softmax运算

softmax函数能够将未规范化的预测变换为非负数并且总和为1，同时让模型保持可导的性质，softmax运算不会改变未规范化的预测O之间的大小次序，只会确定分配给每个类别的概率。

交叉熵损失

softmax：

softmax回归-图像分类数据集

# 读取数据集
mnist_train, mnist_test = tf.keras.datasets.fashion_mnist.load_data()

len(mnist_train[0]), len(mnist_test[0])

"""
(60000, 10000)
"""

mnist_train[0][0].shape

"""
(28, 28)
"""

def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        ax.imshow(img.numpy())
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    plt.show()
    return axes

X = tf.constant(mnist_train[0][:18])
y = tf.constant(mnist_train[1][:18])
show_images(X, 2, 9, titles=get_fashion_mnist_labels(y))

# 读取小批量
batch_size = 256
train_iter = tf.data.Dataset.from_tensor_slices(mnist_train).batch(batch_size).shuffle(len(mnist_train[0]))

timer = Timer()
for X, y in train_iter:
    continue
f'{timer.stop():.2f} sec'

"""
'0.06 sec'
"""

# 整合所有组件
def load_data_fashion_mnist(batch_size, resize=None):
    mnist_train, mnist_test = tf.keras.datasets.fashion_mnist.load_data()
    # 将所有数字除以255， 使所有像素值介于0-1之间，在最后添加一个批处理维度
    # 并将所有标签转换为int32
    process = lambda X, y: (tf.expand_dims(X, axis=3) / 255, tf.cast(y, dtype='int32'))
    resize_fn = lambda X, y: (tf.image.resize_with_pad(X, resize, resize) if resize else X, y)
    return (
        tf.data.Dataset.from_tensor_slices(process(*mnist_train)).batch(batch_size).shuffle(len(mnist_train[0])).map(resize_fn),
        tf.data.Dataset.from_tensor_slices(process(*mnist_test)).batch(batch_size).map(resize_fn)
    )
    
train_iter, test_iter = load_data_fashion_mnist(32, resize=64)
for X, y in train_iter:
    print(X.shape, X.dtype, y.shape, y.dtype)
    break
    
"""
(32, 64, 64, 1) <dtype: 'float32'> (32,) <dtype: 'int32'>
"""

# 初始化模型参数
num_inputs = 784
num_outputs = 10
W = tf.Variable(tf.random.normal(shape=(num_inputs, num_outputs), mean=0, stddev=0.01))
b = tf.Variable(tf.zeros(num_outputs))

# 定义softmax操作
X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
tf.reduce_sum(X, 0, keepdims=True), tf.reduce_sum(X, 1, keepdims=True)

"""
(<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[5., 7., 9.]], dtype=float32)>,
 <tf.Tensor: shape=(2, 1), dtype=float32, numpy=
 array([[ 6.],
        [15.]], dtype=float32)>)
"""

def softmax(X):
    X_exp = tf.exp(X)
    partition = tf.reduce_sum(X_exp, 1, keepdims=True)
    return X_exp / partition

X = tf.random.normal((2, 5), 0, 1)
X_prob = softmax(X)
X_prob, tf.reduce_sum(X_prob, 1)

"""
(<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
 array([[0.17010075, 0.26666152, 0.23135945, 0.06085074, 0.27102757],
        [0.06654868, 0.05887584, 0.03383256, 0.41056633, 0.4301766 ]],
       dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>)
"""

# 定义模型
def net(X):
    return softmax(tf.matmul(tf.reshape(X, (-1, W.shape[0])), W) + b)

# 定义损失函数
y_hat = tf.constant([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = tf.constant([0, 2])
tf.boolean_mask(y_hat, tf.one_hot(y, depth=y_hat.shape[-1]))

"""
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.1, 0.5], dtype=float32)>
"""

def cross_entropy(y_hat, y):
    return -tf.math.log(tf.boolean_mask(y_hat, tf.one_hot(y, depth=y_hat.shape[-1])))

cross_entropy(y_hat, y)

"""
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.3025851, 0.6931472], dtype=float32)>
"""

# 分类精度
def accuracy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = tf.argmax(y_hat, axis=1)
    cmp = tf.cast(y_hat, y.dtype) == y
    return float(tf.reduce_sum(tf.cast(cmp, y.dtype)))

accuracy(y_hat, y) / len(y)

"""
0.5
"""

class Accumulator:
    def __init__(self, n):
        self.data = [0.0] * n
        
    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]
        
    def reset(self):
        self.data = [0.0] * len(self.data)
        
    def __getitem__(self, idx):
        return self.data[idx]
    
def evaluate_accuracy(net, data_iter):
    metric = Accumulator(2)
    for X, y in data_iter:
        metric.add(accuracy(net(X), y), tf.size(y))
    return metric[0] / metric[1]

train_iter, test_iter = load_data_fashion_mnist(32)
evaluate_accuracy(net, test_iter)

"""
0.1283
"""

# 训练
def train_epoch_ch3(net, train_iter, loss, updater):
    metric = Accumulator(3)
    for X, y in train_iter:
        with tf.GradientTape() as tape:
            y_hat = net(X)
            if isinstance(loss, tf.keras.losses.Loss):
                l = loss(y, y_hat)
            else:
                l = loss(y_hat, y)
        if isinstance(updater, tf.keras.optimizers.Optimizer):
            params = net.trainable_variables
            grads = tape.gradient(l, params)
            updater.apply_gradients(zip(grads, params))
        else:
            updater(X.shape[0], tape.gradient(l, updater.params))
        # keras的loss默认返回一个批量的平均损失
        l_sum = l * float(tf.size(y)) if isinstance(loss, tf.keras.losses.Loss) else tf.reduce_sum(l)
        metric.add(l_sum, accuracy(y_hat, y), tf.size(y))
    return metric[0] / metric[2], metric[1] / metric[2]

class Animator:
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None, ylim=None, xscale='linear', yscale='linear', 
               fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1, figsize=(3.5, 2.5)):
        # 增量地绘制多条线
        if legend is None:
            legend = []
        use_svg_display()
        self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 使用lambda函数捕获参数
        self.config_axes = lambda: set_axes(self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts
        
    def add(self, x, y):
        # 向图中添加多个数据点
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"): 
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)
        
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9], legend=['train loss', 'train_acc', 'test_acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch+1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <=1 and test_acc > 0.7, test_acc
    
class Updater:
    def __init__(self, params, lr):
        self.params = params
        self.lr = lr
        
    def __call__(self, batch_size, grads):
        sgd(self.params, grads, self.lr, batch_size)
        
updater = Updater([W, b], lr=0.1)

num_epochs = 10
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)

# 预测
def predict_ch3(net, test_iter, n=6):
    for X, y in test_iter:
        break
    trues = get_fashion_mnist_labels(y)
    preds = get_fashion_mnist_labels(tf.argmax(net(X), axis=1))
    titles = [true + '\n' + pred for true, pred in zip(trues, preds)]
    show_images(tf.reshape(X[0:n], (n, 28, 28)), 1, n, titles=titles[0:n])
    
predict_ch3(net, test_iter)

# Keras高层API
# 初始化模型参数
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

net = tf.keras.models.Sequential()
net.add(tf.keras.layers.Flatten(input_shape=[28, 28]))
weight_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01)
net.add(tf.keras.layers.Dense(10, kernel_initializer=weight_initializer))

# 重新审视softmax的实现
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# 优化算法
trainer = tf.keras.optimizers.SGD(learning_rate=.1)

# 训练
num_epochs = 10
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

标签：self,axes,iter,tf,神经网络,train,线性,def
From： https://www.cnblogs.com/lotuslaw/p/17323181.html

3.线性神经网络

线性模型

损失函数

矢量化加速

正态分布与平方损失

生成数据集

读取数据集

初始化模型参数

定义模型

定义损失函数

定义优化算法

训练

线性回归的简单实现

softmax运算

交叉熵损失

softmax回归-图像分类数据集

相关文章

赞助商

阅读排行