'Why does my trained PyTorch CNN model not perform better than random?

When I train my PyTorch-based CNN model, I can get the classification (BCELoss) error to go from ~0.6 to 0.1 or better on the training data. But when I rerun the training or test data through the trained model, the results are essentially random. This isn't even an issue of how good the model is -- I'm not even looking at testing data. I can crank the learning rate up high and get complete overfitting on training, but when I check the predictions on the same training data, it's just junk.

I must have a bug somewhere in my code, but I can't figure out where.

If anyone can take a look at this code and spot the error, I would be very appreciative!

## data loader
def create_balanced_DataLoader(X, Y, batch_size, train=True, num_workers=0):
    classes, counts = np.unique(Y, return_counts=True)
    class_weights = [sum(counts) / c for c in counts]
    sample_weights = [class_weights[x] for x in Y]
    sample_weights = torch.from_numpy(np.array(sample_weights)).float()
    wrs = utils.WeightedRandomSampler(sample_weights, len(sample_weights))

    td = utils.TensorDataset(
            torch.from_numpy(np.moveaxis(X, 2, 1)).float(),
            torch.from_numpy(Y).float()
    )
    dl = utils.DataLoader(td, batch_size=batch_size, num_workers=num_workers, sampler=wrs)
    return dl

batch_size = 128
train_full_dl = create_balanced_DataLoader(X_train, Y_train, batch_size, train=True, num_workers=4)
test_full_dl = create_balanced_DataLoader(X_test, Y_test, batch_size, train=False, num_workers=4)



## training code
def train(epochs, optimizer, scheduler, model, loss_fn, train_loader, test_loader):
    for epoch in range(1, epochs + 1):
        train_losses = []
        model.train()

        for images, labels in train_loader:
            optimizer.zero_grad()

            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)

            loss = loss_fn(torch.flatten(outputs), labels)
            loss.backward()

            optimizer.step()

            train_losses.append( loss.item() )

        test_loss = test(model, test_loader, loss_fn)
        test_loss = np.round(test_loss, 4)
        train_loss = np.round(np.mean(np.asarray(train_losses)),4)

def test(model, test_loader, loss_fn):
    model.eval()
    test_losses = []
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)

            loss = loss_fn(torch.flatten(outputs), labels)
            test_losses.append( loss.item() )

    mean_loss = np.mean(np.asarray(test_losses))
    return mean_loss

## run training
loss_fn = torch.nn.BCELoss()
epoch_n = 100
lr = 1e-4
betas = (0.9, 0.999)
weight_decay = 0.5e-4
eps = 1e-8
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=betas, weight_decay=weight_decay, eps=eps)

train(
    epochs = epoch_n,
    optimizer = optimizer,
    scheduler = None,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_full_dl,
    test_loader = test_short_dl,
    weights_and_baises = wandb
    )


## check predictions
with torch.no_grad():
    model.eval()
    test_predictions = list()
    test_actual = list()
    for i, (images,labels) in enumerate(test_full_dl):
        images = images.to(device)
        test_actual += copy.deepcopy(labels) # fix pty issue
        labels = labels.to(device)
        images = images.to(device)
        outputs = model(images)
        test_predictions += outputs.cpu()

    test_predictions = torch.stack(test_predictions).cpu().detach().numpy()
    test_actual    = torch.stack(test_actual).cpu().detach().numpy()

    train_predictions = list()
    train_actual = list()
    for i, (images,labels) in enumerate(train_full_dl):
        images = images.to(device)
        train_actual += copy.deepcopy(labels) # fix pty issue
        labels = labels.to(device)
        outputs = model(images)
        train_predictions += outputs.cpu()

    train_predictions = torch.stack(train_predictions).cpu().detach().numpy()
    train_actual    = torch.stack(train_actual).cpu().detach().numpy()

## now compare predictions and actual data...
## ...



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source