points, directions = generate_sequences(n=256, seed=13)
And then let’s visualize the first five squares:
class Encoder(nn.Module): def __init__(self, n_features, hidden_dim): super().__init__() self.n_features = n_features self.hidden_dim = hidden_dim self.hidden = None self.basic_rnn = nn.GRU(self.n_features, self.hidden_dim, batch_first=True) def forward(self, x): rnn_out, self.hidden = self.basic_rnn(x) return rnn_out # N, L, F
coordinates of a "perfect" square and split it into source and target sequences:
full_seq = torch.tensor([[-1, -1], [-1, 1], [1, 1], [1, -1]]).float().view(1, 4, 2) source_seq = full_seq[:, :2] # first two corners target_seq = full_seq[:, 2:] # last two corners
Now, let’s encode the source sequence and take the final hidden state:
torch.manual_seed(21) encoder = Encoder(n_features=2, hidden_dim=2) hidden_seq = encoder(source_seq) # output is N, L, F hidden_final = hidden_seq[:, -1:] # takes last hidden state hidden_final # tensor([[[ 0.3105, -0.5263]]], grad_fn=<SliceBackward0>)
The decoder model is actually quite similar to the models we developed in Chapter 8:
class Decoder(nn.Module): def __init__(self, n_features, hidden_dim): super().__init__() self.n_features = n_features self.hidden_dim = hidden_dim self.hidden = None self.basic_rnn = nn.GRU(self.n_features, self.hidden_dim, batch_first=True) self.regression = nn.Linear(self.hidden_dim, self.n_features) def init_hidden(self, hidden_seq): # We only need the final hidden state hidden_final = hidden_seq[:, -1:] # N, 1, H # Initialize decoder’s hidden state using encoder’s final hidden state. # But we need to make it sequence-first self.hidden = hidden_final.permute(1, 0, 2) # 1, N, H def forward(self, x): # x is N, 1, F # The recurrent layer both uses and updates the hidden state. batch_first_output, self.hidden = self.basic_rnn(x, self.hidden) last_output = batch_first_output[:, -1:] out = self.regression(last_output) # The output has the same shape as the input (N, 1, F). return out.view(-1, 1, self.n_features)
torch.manual_seed(21) decoder = Decoder(n_features=2, hidden_dim=2) # Initial hidden state will be encoder's final hidden state decoder.init_hidden(hidden_seq) # Initial data point is the last element of source sequence inputs = source_seq[:, -1:] target_len = 2 for i in range(target_len): print(f'Hidden: {decoder.hidden}') out = decoder(inputs) # Predicts coordinates print(f'Output: {out}\n') # Predicted coordinates are next step's inputs inputs = out
Hidden: tensor([[[ 0.3105, -0.5263]]], grad_fn=<PermuteBackward0>) Output: tensor([[[-0.2339, 0.4702]]], grad_fn=<ViewBackward0>) Hidden: tensor([[[ 0.3913, -0.6853]]], grad_fn=<StackBackward0>) Output: tensor([[[-0.0226, 0.4628]]], grad_fn=<ViewBackward0>)
# Initial hidden state will be encoder's final hidden state decoder.init_hidden(hidden_seq) # Initial data point is the last element of source sequence inputs = source_seq[:, -1:] target_len = 2 for i in range(target_len): print(f'Hidden: {decoder.hidden}') out = decoder(inputs) # Predicts coordinates print(f'Output: {out}\n') # But completely ignores the predictions and uses real data instead inputs = target_seq[:, i:i+1]
Hidden: tensor([[[ 0.3105, -0.5263]]], grad_fn=<PermuteBackward0>) Output: tensor([[[-0.2339, 0.4702]]], grad_fn=<ViewBackward0>) Hidden: tensor([[[ 0.3913, -0.6853]]], grad_fn=<StackBackward0>) Output: tensor([[[0.2265, 0.4529]]], grad_fn=<ViewBackward0>)
Now, a bad prediction can only be traced to the model itself, and any bad predictions in previous steps have no effect whatsoever.
# Initial hidden state is encoder's final hidden state decoder.init_hidden(hidden_seq) # Initial data point is the last element of source sequence inputs = source_seq[:, -1:] teacher_forcing_prob = 0.5 target_len = 2 for i in range(target_len): print(f'Hidden: {decoder.hidden}') out = decoder(inputs) print(f'Output: {out}\n') # If it is teacher forcing if torch.rand(1) <= teacher_forcing_prob: # Takes the actual element inputs = target_seq[:, i:i+1] else: # Otherwise uses the last predicted output inputs = out
Hidden: tensor([[[ 0.3105, -0.5263]]], grad_fn=<PermuteBackward0>) Output: tensor([[[-0.2339, 0.4702]]], grad_fn=<ViewBackward0>) Hidden: tensor([[[ 0.3913, -0.6853]]], grad_fn=<StackBackward0>) Output: tensor([[[-0.0226, 0.4628]]], grad_fn=<ViewBackward0>)
class EncoderDecoder(nn.Module): def __init__(self, encoder, decoder, input_len, target_len, teacher_forcing_prob=0.5): super().__init__() self.encoder = encoder self.decoder = decoder self.input_len = input_len self.target_len = target_len self.teacher_forcing_prob = teacher_forcing_prob self.outputs = None def init_outputs(self, batch_size): device = next(self.parameters()).device # N, L (target), F self.outputs = torch.zeros(batch_size, self.target_len, self.encoder.n_features).to(device) def store_output(self, i, out): # Stores the output self.outputs[:, i:i+1, :] = out def forward(self, x): # splits the data in source and target sequences # the target seq will be empty in testing mode # N, L, F source_seq = x[:, :self.input_len, :] target_seq = x[:, self.input_len:, :] self.init_outputs(x.shape[0]) # Encoder expected N, L, F hidden_seq = self.encoder(source_seq) # Output is N, L, H self.decoder.init_hidden(hidden_seq) # The last input of the encoder is also # the first input of the decoder dec_inputs = source_seq[:, -1:, :] # Generates as many outputs as the target length for i in range(self.target_len): # Output of decoder is N, 1, F out = self.decoder(dec_inputs) self.store_output(i, out) prob = self.teacher_forcing_prob # In evaluation/test the target sequence is # unknown, so we cannot use teacher forcing if not self.training: prob = 0 # If it is teacher forcing if torch.rand(1) <= prob: # Takes the actual element dec_inputs = target_seq[:, i:i+1, :] else: # Otherwise uses the last predicted output dec_inputs = out return self.outputs
标签:Chapter,target,seq,Sequence,self,PyTorchStepByStep,decoder,hidden,out From: https://www.cnblogs.com/zhangzhihui/p/18522949