不管用。因为学习率乘以梯度是步长,而梯度衰减返回的梯度是0所以调大学习率不管用。
from enum import auto from scipy.io import loadmat import numpy as np import torch import torch.utils.data as data_utils from torch import nn import torch.optim as optim network=nn.Linear(1,1) #network1=nn.BatchNorm1d(1) w=nn.Sigmoid() tr=torch.Tensor([[100000],[200000]]) #tr=torch.Tensor([[1],[2]]) test=torch.Tensor([[150000],[300000]]) optimizer = optim.Adam(network.parameters(), lr=4000000000000000) #optimizer1 = optim.Adam(network1.parameters(), lr=0.04) l1=0 while True: network.train() #network1.train() #network1.eval()# optimizer.zero_grad() #optimizer1.zero_grad() l=w(network(tr)) #l=w(network1(network(tr))) #print(network1(network(tr))) l=(l[0]-0)**2+(l[1]-1)**2 l.backward() for name, parms in network.named_parameters(): print('-->name:', name) print('-->para:', parms) print('-->grad_requirs:',parms.requires_grad) print('-->grad_value:',parms.grad) print("===") 输出是0 标签:network,管用,torch,调大,梯度,print,import,grad,network1 From: https://www.cnblogs.com/hahaah/p/16865167.html