'RuntimeError: Function MmBackward returned an invalid gradient at index 0 - got [4096, 32] but expected shape compatible with [4096, 512]
Error when replacing nn.Linear with Mlp.
I am replacing my nn.Linear module with Mlp from timm module. In the case of nn.Linear, the model starts to learn, I tried to change the dimension of the data feed to the model, but this made more problems. How do I rewrite Mlp or convert data ?
def _ntuple(n):
def parse(x):
if isinstance(x, collections.abc.Iterable):
return x
return tuple(repeat(x, n))
return parse
to_1tuple = _ntuple(1)
to_2tuple = _ntuple(2)
to_3tuple = _ntuple(3)
to_4tuple = _ntuple(4)
to_ntuple = _ntuple
def make_divisible(v, divisor=8, min_value=None, round_limit=.9):
min_value = min_value or divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < round_limit * v:
new_v += divisor
return new_v
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
drop_probs = to_2tuple(drop)
self.fc1 = nn.Linear(in_features, hidden_features)
self.act = act_layer()
self.drop1 = nn.Dropout(drop_probs[0])
self.fc2 = nn.Linear(hidden_features, out_features)
self.drop2 = nn.Dropout(drop_probs[1])
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop1(x)
x = self.fc2(x)
x = self.drop2(x)
return x
class DenseNetAtt(nn.Module):
def __init__(
self, number_class_symbols, time_feature_count=32, lstm_hidden=256,
lstm_len=2,
):
super().__init__()
self.feature_extractor = get_densenet201_backbone(pretrained=True)
self.avg_pool = nn.AdaptiveAvgPool2d(
(time_feature_count, time_feature_count))
self.bilstm = BiLSTM(time_feature_count, lstm_hidden, lstm_len)
self.classifier = nn.Sequential(
Mlp(lstm_hidden *2, time_feature_count,),
nn.GELU(),
nn.Dropout(0.1),
nn.Linear(time_feature_count, number_class_symbols)
)
def forward(self, x):
x = self.feature_extractor(x)
b, c, h, w = x.size()
x = x.view(b, c * h, w)
x = self.avg_pool(x)
x = x.transpose(1, 2)
x = self.bilstm(x)
x = self.classifier(x)
x = nn.functional.log_softmax(x, dim=2).permute(1, 0, 2)
return x
Code where the error occurs
def train_loop(data_loader, model, criterion, optimizer, epoch):
loss_avg = AverageMeter()
model.train()
print("train loop")
for images, texts, enc_pad_texts, text_lens in tqdm.tqdm(data_loader):
model.zero_grad()
images = images.to(DEVICE)
batch_size = len(texts)
output = model(images)
output_lenghts = torch.full(
size=(output.size(1),),
fill_value=output.size(0),
dtype=torch.long
)
loss = criterion(output, enc_pad_texts, output_lenghts, text_lens)
loss.backward(retain_graph=True)
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
loss_avg.update(loss.item(), batch_size)
torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
optimizer.step()
optimizer.sync_lookahead()
RuntimeError: Function MmBackward returned an invalid gradient at index 0 - got [2048, 32] but expected shape compatible with [2048, 512]
/tmp/ipykernel_6070/4094404913.py in <module>
----> 1 train(config_json)
/tmp/ipykernel_6070/872149847.py in train(config)
141 timer = time.time()
142 print("\nEpoch", epoch, "Previous took", epoch_time_m, "minutes")
--> 143 loss_avg = train_loop(train_loader, model, criterion, optimizer, epoch)
144 acc_avg, cer_avg = val_loop(val_loader, model, tokenizer, DEVICE)
145 print(f'acc: {acc_avg}; cer: {cer_avg};')
/tmp/ipykernel_6070/872149847.py in train_loop(data_loader, model, criterion, optimizer, epoch)
56 )
57 loss = criterion(output, enc_pad_texts, output_lenghts, text_lens)
---> 58 loss.backward(retain_graph=True)
59
60 with amp.scale_loss(loss, optimizer) as scaled_loss:
/opt/conda/lib/python3.7/site-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
253 create_graph=create_graph,
254 inputs=inputs)
--> 255 torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
256
257 def register_hook(self, hook):
/opt/conda/lib/python3.7/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
147 Variable._execution_engine.run_backward(
148 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 149 allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
150
151
RuntimeError: Function MmBackward returned an invalid gradient at index 0 - got [2048, 32] but expected shape compatible with [2048, 512]
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
