'how to set a threshold in pytorch
I am working on a very imbalanced data, 15% labeled as 1 and the rest as 0, using BERT. the code i wrote uses maxing outputs which gives me predictions of 0 for everything. How do I include thresholds in my code to maximise my predictions of 1.
nsteps=215
nepoch=3
best_val_acc = 0
for epoch in range(nepoch):
model.train()
print(f"epoch n°{epoch+1}:")
av_epoch_loss=0
progress_bar = tqdm(range(nsteps))
for batch in trainloader:
batch = {k:v.cuda() for k,v in batch.items()}
outputs = model(**batch)
loss = criterion(outputs, *batch)
av_epoch_loss += loss
loss.backward()
optim.step()
optim.zero_grad()
predictions=torch.argmax(outputs.logits, dim=-1)
f1.add_batch(predictions=predictions, references=batch["labels"])
acc.add_batch(predictions=predictions, references=batch["labels"])
progress_bar.update(1)
av_epoch_loss /= nsteps
print(f"Training Loss: {av_epoch_loss: .2f}")
acc_res = acc.compute()["accuracy"]
print(f"Training Accuracy: {acc_res:.2f}")
f_res = f1.compute()["f1"]
print(f"Training F1-score: {f_res:.2f}")
model.eval()
val_acc = validate(model)
if val_acc > best_val_acc:
print("Achieved best validation accuracy so far. Saving model.")
best_val_acc = val_acc
best_model_state = deepcopy(model.state_dict())
print("\n\n")
I looked in pytorch documentation but i couldn't figure it out.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
