'pre-trained network ，CUDA out of memory

    import torch
    from torch import nn
    import torch.nn.functional as F
    import torch.optim as optim
    import matplotlib.pyplot as plt
    %matplotlib inline
    import numpy as np
    import torchvision
    from torchvision import transforms
    import os
    base_dir = './data/4weather'
    train_dir = os.path.join(base_dir,'train')
    test_dir = os.path.join(base_dir,'test')
    Test_transform = transforms.Compose([
        transforms.Resize((192,192)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[.5,.5,.5],std=[.5,.5,.5])
    ])
    Train_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.RandomCrop(192),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(0.2),
        #Randomly change the brightness, contrast, saturation and hue of an image.
        transforms.ColorJitter(brightness=0.5),
        transforms.ColorJitter(contrast=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[.5,.5,.5],std=[.5,.5,.5])
    ])
    train_ds = torchvision.datasets.ImageFolder(train_dir,transform = Train_transform)
    test_ds = torchvision.datasets.ImageFolder(test_dir,transform =Test_transform)
    BATCH_SIZE = 4
    train_dl = torch.utils.data.DataLoader(
        train_ds,
        batch_size = BATCH_SIZE,
        shuffle = True
    )
    test_dl = torch.utils.data.DataLoader(
        test_ds,
        batch_size = BATCH_SIZE
    )
    model = torchvision.models.vgg16(pretrained=True)
    for p in model.features.parameters():
        p.requires_grad = False
    model.classifier[-1].out_features = 4
        model.to('cuda')a
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    loss_fn = nn.CrossEntropyLoss()
    epochs=10
    def train(dl,model,loss_fn,optimizer):
        size = len(dl.dataset)
        num_batches = len(dl)
        train_loss , train_accuracy = 0 , 0
        model.train()
        for x , y in dl:
            x, y  = x.to('cuda') , y.to('cuda')
            pred = model(x)
            loss = loss_fn(pred,y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                train_accuracy += (pred.argmax(1)==y).type(torch.float32).sum().item()
                train_loss +=loss.item()
        train_accuracy /=size
        train_loss /= num_batches
        return train_loss,train_accuracy
    def test(dl,model,loss_fn):
        size = len(dl.dataset)
        num_batches = len(dl)
        test_loss , test_accuracy = 0 , 0
        model.eval()
        with torch.no_grad():
            for x , y in dl:
                x, y  = x.to('cuda') , y.to('cuda')
                pred = model(x)
                loss = loss_fn(pred,y)
                test_accuracy += (pred.argmax(1)==y).type(torch.float32).sum().item()
                test_loss +=loss.item()
            test_accuracy /=size
            test_loss /= num_batches
            return test_loss,test_accuracy
    def fit(epochs,train_dl,test_dl,model,loss_fn,optimizer):
        fit_epochs = epochs
        epoch_train_accuracy = []
        epoch_train_loss = []
        epoch_test_accuracy = []
        epoch_test_loss = []
        for epoch in range(fit_epochs):
            train_loss,train_acc = train(train_dl,model,loss_fn,optimizer)
            test_loss,test_acc = test(test_dl,model,loss_fn)
            epoch_train_loss.append(train_loss)
            epoch_train_accuracy.append(train_acc)
            epoch_test_loss.append(test_loss)
            epoch_test_accuracy.append(test_acc)
            template = ('Epoch:{} train_loss: {:.5f},train_accuracy: {:.2f},test_loss: 
                  {:.5f},test_accuracy: {:.2f}.')
            print(template.format(epoch+1,train_loss,train_acc*100,test_loss,test_acc*100))
        print('Done')[`enter image description here`][1]
        return epoch_train_accuracy,epoch_train_loss,epoch_test_accuracy,epoch_test_loss
    (train_accuracy_result,
    train_loss_result,
    test_accuracy_result,
    test_loss_result) = fit(epochs,train_dl,test_dl,model,loss_fn,optimizer)

When I use the pre-trained network VGG16 with PyTorch, I get CUDA out of memory, and no matter how I debug BATCH_SIZE and memory, the result is still the same. This problem does not occur when the pretrained network is not used:

CUDA out of memory. Tried to allocate 392.00 MiB (GPU 0; 4.00 GiB total capacity; 2.28 GiB already allocated; 93.08 MiB free; 2.30 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF.

pytorch

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'pre-trained network ，CUDA out of memory

Sources

Related Questions