'Trying to compute the loss of an encoder/decoder model

I am attempting to create an encoder/decoder model with mini-batch. I continue to encounter an errors stating:

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [32, 6]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

The traceback reveals something is wrong with the y=self.linear(out) but I am unsure what exactly. Any help would be greatly appreciated. Below is the model. Thank you.

import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable
from sliding_window import sliding_window
from training_datasets import get_training_datasets_batch
torch.autograd.set_detect_anomaly(True)


class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(Encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
    def forward(self, x):
        flat = x.view(x.shape[0], x.shape[1], self.input_size)
        out,h = self.gru(flat)
        return out, h


class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, output_size=6, num_layers=1):
        super(Decoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size

        self.gru = nn.GRU(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
        self.ReLU = nn.ReLU()
    def forward(self, x, h):
        x = x.unsqueeze(1)
        out, h = self.gru(x, h)
        out = out.squeeze(1)
        print(out.shape)
        y = self.linear(out)
        print(y.shape)
        y = self.ReLU(y)
        return y,h

class EncoderDecoder(nn.Module):
    def __init__(self, hidden_size, input_size=6, output_size=6):
        super(EncoderDecoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.encoder = Encoder(input_size=input_size, hidden_size=hidden_size)
        self.decoder = Decoder(input_size=input_size, hidden_size=hidden_size, output_size=output_size)

    def train_model(self, ts, epochs, target_len, features, batch_size=64, test_len=288, method = 'teacher_forcing', tfr = 0.5, lr = 0.01, dynamic_tf=False):
        X,Y= sliding_window(ts, features=288, target_len=target_len)

        x_train, x_val, x_test, y_train, y_val, y_test = get_training_datasets_batch(X,Y, features, test_len=test_len, batch_size=batch_size)
        losses = np.full(epochs,np.nan)
        optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, self.parameters()),
                                  lr=lr)
        criterion = nn.MSELoss()
        for e in range(epochs):
            print('Starting epoch {}'.format(e))
            x_train_data = iter(x_train)
            y_train_data = iter(y_train)
            x_val_data = iter(x_val)
            y_val_data = iter(y_val)
            x_train_shape = list(x_train)[0].shape
            # predicted = torch.zeros(target_len,batch_size,x_train_shape[2])
            # print(predicted.shape)
            loss=0
            for x_train_in in x_train_data:
                optimizer.zero_grad()
                x_train_in = Variable(x_train_in)
                y_train_in = Variable(next(y_train_data).transpose(0,1))
                _, enc_h = self.encoder(x_train_in)
                dec_in = x_train_in[:,-1,:]
                dec_h = enc_h
                
                if method == 'recursive':
                    for t in range(target_len):
                        dec_out, dec_h = self.decoder(dec_in, dec_h)
                        predicted = dec_out
                        dec_in = dec_out
                        loss += criterion(predicted,y_train_in[t])

                loss.backward(retain_graph=True)
                optimizer.step()

python pytorch

Solution 1:^[1]

The problem in this case was the loss.backward(retain_graph=True). The code started working after adding the line loss=0. The loss value continues to increase and needs to be reset.

loss.backward()
optimizer.step()
loss=0

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source
Solution 1	Ninnamon12

'Trying to compute the loss of an encoder/decoder model

Solution 1:[1]

Sources

Related Questions

Solution 1:^[1]