'NLP with PyTorch - RuntimeError: shape '[32, 128, 1]' is invalid for input of size 61440
I'm trying to run this code for the attention model in NLP.
class DecoderAttn(nn.Module):
def __init__(self, output_dim, emb_dim, hid_dim, n_layers, attn_dim):
super().__init__()
self.hid_dim = hid_dim
self.n_layers = n_layers
self.output_dim = output_dim
self.embedding = nn.Embedding(output_dim, emb_dim)
self.rnn = nn.GRU(emb_dim, hid_dim, n_layers, batch_first=True)
# Implement this
# BEGIN
# self.fc_out =
# add attention layer and linear transform layers
# attention layer's elements defined (key, query, value).
self.key = nn.Linear(hid_dim, attn_dim)
self.query = nn.Linear(hid_dim, attn_dim)
self.value = nn.Linear(hid_dim, attn_dim)
self.attention = nn.MultiheadAttention(attn_dim, 128)
self.fc_out = nn.Linear(hid_dim, output_dim)
# END
def forward(self, input, hidden, encoder_outputs):
#input: [batch size]
#hidden: [batch size, hid_dim]
#encoder_outputs: [batch size, src_len, hid_dim]
input = input.unsqueeze(1)
#input: [batch size, 1]
embedded = self.embedding(input)
#embedded: [batch size, 1, emb dim]
output, hidden = self.rnn(embedded, hidden)
# implement this
# BEGIN
# compute v* (attention output)
# compute prediction, using a fully connected layer that takes as input
# both attention output and output from GRU
attention_out, attention_out_w = self.attention(self.query(output), self.key(encoder_outputs), self.value(encoder_outputs))
concat_out = torch.cat((output, attention_out), 2)
prediction = self.fc_out(output.squeeze(1))
# END
#prediction : [batch size, output dim]
return prediction, hidden
and after this part trying to run this:
INPUT_DIM = len(CHARS.vocab)
OUTPUT_DIM = len(PHONEMES.vocab)
ENC_EMB_DIM = 500
DEC_EMB_DIM = 50
HID_DIM = 256
ATTN_DIM = 128
N_LAYERS = 4
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1
enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS)
dec = DecoderAttn(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, ATTN_DIM)
model_attn = Seq2Seq(enc, dec, device).to(device)
N_EPOCHS = 10
CLIP = 1
train(model_attn, N_EPOCHS, CLIP)
but it gives the following error:
RuntimeError: shape '[32, 128, 1]' is invalid for input of size 61440
I am also adding the google colab link. It could be easier to investigate this way:
https://colab.research.google.com/drive/1MEemocW8nvebjq17CNnRzvUHdd_wQumP?usp=sharing
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
