'nn.NLLLoss PyTorch IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

I’m trying to implement a Bert Classifier to discriminate between for 1 text class with label 0 and 1 (BINARY CLASSIFICATION ONE HOT ENCODING WITH 1 CLASS). This is all my code implemented anticipated by a sample of my datasets ( I have 3 csv, train-test-val). My problem is the loss function. I think that the error is from the way she wants the label/target because in my case I have only 1 class like yes-no, but I tried almost everything and I have no more ideas. Thank you very much !

df_train=pd.read_csv('CLASSIFIER_train',sep=',',header=None)
df_train
                                               0        1
    M A T T D R P T P D G T D A I D L T T R V R R...    1
    M K K L F Q T E P L L E L F N C N E L R I I G...    0
    M L V A A A V C P H P P L L I P E L A A G A A...    1
    M I V A W G N S G S G L L I L I L S L A V S A...    0
    M V E E G R R L A A L H P N I V V K L P T T E...    1
    M G S K V S K N A L V F N V L Q A L R E G L T...    1
    M P S K E T S P A E R M A R D E Y Y M R L A M...    1
    M V K E Y A L E W I D G Y R E R L V K V S D A...    1
    M G T A A S Q D R A A M A E A A Q R V G D S F...    0



class SequenceDataset(Dataset):

  def __init__(self, sequences, targets, tokenizer, max_len):
    self.sequences = sequences
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.sequences)
  
  def __getitem__(self, item):
    sequences = str(self.sequences[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
      sequences,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      pad_to_max_length=True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    return {
      'sequences_text': sequences,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }
    
class SequenceDataset(Dataset):

  def __init__(self, sequences, targets, tokenizer, max_len):
    self.sequences = sequences
    self.targets = targets
    self.tokenizer = tokenizer
    self.max_len = max_len
  
  def __len__(self):
    return len(self.sequences)
  
  def __getitem__(self, item):
    sequences = str(self.sequences[item])
    target = self.targets[item]

    encoding = self.tokenizer.encode_plus(
      sequences,
      add_special_tokens=True,
      max_length=self.max_len,
      return_token_type_ids=False,
      pad_to_max_length=True,
      return_attention_mask=True,
      return_tensors='pt',
    )

    return {
      'sequences_text': sequences,
      'input_ids': encoding['input_ids'].flatten(),
      'attention_mask': encoding['attention_mask'].flatten(),
      'targets': torch.tensor(target, dtype=torch.long)
    }

def create_data_loader(df, tokenizer, max_len, batch_size):
  ds = SequenceDataset(
    sequences=df[0].to_numpy(),
    targets=df[1].to_numpy(),
    tokenizer=tokenizer,
    max_len=max_len
  )

  return DataLoader(
    ds,
    batch_size=batch_size,
    num_workers=2,
    shuffle=True
  )

class CategoricalCrossEntropyLoss(nn.Module):
    def __init__(self):
        super(CategoricalCrossEntropyLoss, self).__init__()

    def forward(self, y_hat, y):
        return nn.NLLLoss()(torch.log(y_hat), torch.argmax(y, dim=1))

def net_train(net, train_data_loader, parameters, dtype, device):
  net.to(dtype=dtype, device=device)

  # Define loss and optimizer
  #criterion = nn.CrossEntropyLoss()
  #criterion = nn.BCEWithLogitsLoss()
  #criterion = nn.NLLLoss()
  criterion = CategoricalCrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer 
                        lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
                        momentum=parameters.get("momentum", 0.9)
  )

  scheduler = optim.lr_scheduler.StepLR(
      optimizer,
      step_size=int(parameters.get("step_size", 30)),
      gamma=parameters.get("gamma", 1.0),  # default is no learning rate decay
  )

  num_epochs = parameters.get("num_epochs", 3) # Play around with epoch number
  # Train Network
# Train Network
  for _ in range(num_epochs):
      # Your dataloader returns a dictionary
      # so access it as such
      for batch in train_data_loader:
        #print (batch.size)
          #batch = tuple(t.to(device) for t in batch)
          # move data to proper dtype and device
        input_ids = batch['input_ids']
        attention_mask =batch['attention_mask'] #.type(torch.LongTensor)
        labels = batch['targets'] #.type(torch.LongTensor)
        labels = labels.to(device=device)
          #labels = labels.argmax(dim=-1)
        #labels = labels.view(-1,32)
          
        attention_mask = attention_mask.to(device=device)
        #input_ids=torch.tensor(input_ids) #,dtype=float)
        input_ids = input_ids.to(device=device)
          #labels = labels.type(torch.FloatTensor)
                  #.reshape((labels.shape[0], 1))

          
          #labels = labels.long()
          # zero the parameter gradients
        optimizer.zero_grad()

          # forward + backward + optimize
        outputs,x= net(input_ids, attention_mask,return_dict=False)
          #outputs,x= net(input_ids,atten_mask)
        print(outputs)
        print(outputs.shape) #torch.Size([32, 450, 1024])

          
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()
  return net

#from transformers.models.bert.modeling_bert import BertForSequenceClassification,AutoModel
def init_net(parameterization):

    model = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME,return_dict=True) #pretrained ResNet50

    # The depth of unfreezing is also a hyperparameter
    for param in model.parameters():
        param.requires_grad = False # Freeze feature extractor
        
    Hs = 512 # Hidden layer size; you can optimize this as well
                                  
    model.fc = nn.Sequential(nn.Linear(1024, 512), # attach trainable classifier
                                 nn.ReLU(),
                                 nn.Dropout(0.2),
                                 nn.Linear(512, 1),
                                 nn.LogSoftmax(dim=1))
                                 #nn.Sigmoid())
                                 #nn.Sigmoid()
                                
    return model 
 
def train_evaluate(parameterization):

    # constructing a new training data loader allows us to tune the batch size


    train_data_loader=create_data_loader(df_train, tokenizer, MAX_LEN, batch_size=parameterization.get("batchsize", 32))
    
    
    # Get neural net
    untrained_net = init_net(parameterization) 
    
    # train
    trained_net = net_train(net=untrained_net, train_data_loader=train_data_loader, 
                            parameters=parameterization, dtype=dtype, device=device)
    
    # return the accuracy of the model as it was trained in this run
    return evaluate(
        net=trained_net,
        data_loader=test_data_loader,
        dtype=dtype,
        device=device,
    )

dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

best_parameters, values, experiment, model = optimize(
    parameters=[
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
        #{"name": "batchsize", "type": "range", "bounds": [16, 128]},
        {"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
        #{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
        #{"name": "stepsize", "type": "range", "bounds": [20, 40]},        
    ], 
  
    evaluation_function=train_evaluate,
    objective_name='accuracy',
)

print(best_parameters)
means, covariances = values
print(means)
print(covariances)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source