'How can I send data of each individual timestep through a fully connected layer prior to sending it through an RNN?
I have data of 100 timesteps with 1000 values per timestep.
Now I want to train an RNN with this data but first I want to have the data of each timestep through a fully connected layer.
from torch import nn
import torch
from tqdm import tqdm
class Model(nn.Module):
def __init__(self, values, hidden_dims, n_layers):
super(Model, self).__init__()
self.hidden_dims = hidden_dims
self.fc = nn.Linear(values, hidden_dims)
self.rnn = nn.RNN(hidden_dims, values, n_layers, batch_first=True)
def forward(self, x):
# First the fully connected layer applied on each timestep
hidden_layer = torch.empty(0, self.hidden_dims)
for timestep in range(x.size(0)):
hidden_layer = torch.cat((hidden_layer, self.fc(x[timestep])[None,:]), 0)
# I add an additional dimension to the hidden layer as it's only one single batch
hidden_layer = hidden_layer[None,:]
out, h = self.rnn(hidden_layer)
return out, h
# As criterion I'm using the mean absolute percentage error:
def MAPELoss(output, target):
return torch.mean(torch.abs((target - output) / target))
# And this is how I'd predict the values of the next timestep:
def predict(model, prev_states):
prev_states.to(device)
out, h = model(prev_states)
return out[0][-1]
# Now I want to train the NN with the data:
data = torch.rand(100, 1000)
input_data = data[:-1]
target_data = data[1:]
model = Model(values=1000, hidden_dims=100, n_layers=10)
device = torch.device("cpu")
model.to(device)
n_epochs = 100
lr=0.01
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
for epoch in tqdm(range(1, n_epochs + 1), desc='Training'):
optimizer.zero_grad()
input_data.to(device)
output, h = model(input_data.float())
loss = MAPELoss(output, target_data)
loss.backward()
optimizer.step()
# And lastly I try to let the trained NN predict the fourth timestep given the first three timesteps:
prev_states = data[:3]
prediction = predict(model, prev_states)
print(target_data[3])
print(prediction)
I'm really unsure if this is a good approach especially because of the loop over the fully connected layer.
The values I'm getting from the NN are also not even close to the target data but almost exclusively +/-1.
How would I have to setup the NN in order to make it work correctly?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
