BN层只是从一定程度上解决了梯度衰减的问题但是并没有完全解决如果输入值的差距过大会导致模型加BN层后loss依旧无变化。代码:
from enum import auto from scipy.io import loadmat import numpy as np import torch import torch.utils.data as data_utils from torch import nn import torch.optim as optim network=nn.Linear(1,1) network1=nn.BatchNorm1d(1) w=nn.Sigmoid() tr=torch.Tensor([[1],[20000000000000]]) #tr=torch.Tensor([[1],[2]]) test=torch.Tensor([[150000],[300000]]) optimizer = optim.Adam(network.parameters(), lr=0.04) optimizer1 = optim.Adam(network1.parameters(), lr=0.04) for i in range(5000): network.train() network1.train() #network1.eval()# optimizer.zero_grad() optimizer1.zero_grad() l=w(network1(network(tr))) #print(network1(network(tr))) l=(l[0]-0)**2+(l[1]-1)**2 l.backward() optimizer.step() optimizer1.step() print(l) 标签:Training,Network,nn,torch,tr,network,import,Covariate,network1 From: https://www.cnblogs.com/hahaah/p/16863018.html