'Transfer learning with pytorch in python where output is two class

I am doing a transfer learning using torch. i want to classify 2 classes.

so, here is the code-:

from torchvision import models
net = models.resnet18(pretrained=True)


# this is resnet summary
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer2): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer3): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (layer4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=512, out_features=1000, bias=True)
)

when i use net.fc = nn.Linear(512, 2) and when i run the code. it throws an error RuntimeError: The size of tensor a (2) must match the size of tensor b (64) at non-singleton dimension 1

#training code


net = net.to('cuda')

optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
loss_fct = nn.MSELoss()

train_losses = []
val_losses = []
val_accuracy = []
train_acc=[]
for t in range(15):

  net.train()

  X_train = np.array([data_augment(img) for img in X_train])

  epoch_train_loss = 0.
  for i in range(len(X_train) // 64):
    
    X_batch = X_train[i * 64: (i+1) * 64]
    X_batch = torch.tensor(X_batch, dtype=torch.float32).to('cuda')
    X_batch = torch.transpose(X_batch, 1, -1)

    print(X_batch.shape)

    y_batch = y_train[i * 64: (i+1) * 64]
    y_batch = torch.tensor(y_batch, dtype=torch.float32).to('cuda')

    y_pred = net(X_batch)

    loss = loss_fct(y_pred.squeeze(), y_batch)
    epoch_train_loss += loss

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  train_losses.append(epoch_train_loss / len(X_train))


  net.eval()
  with torch.no_grad():
    y_pred_val = net(X_val)
    loss_val = loss_fct(y_pred_val.squeeze(), y_val)
    val_losses.append(loss_val / len(X_val))
    acc_val = torch.mean(((y_pred_val.squeeze() > 0).int() == y_val).float())

    val_accuracy.append(acc_val)
  print('Train loss: {} Val loss: {} Val Accuracy: {}'.format(epoch_train_loss, loss_val, acc_val))

I Want to solve this, i don't know how

when i use net.fc = nn.Linear(512, 1), then it works. but this is not correct if i understand it correctly.

shape and type of my X and y are following:


print(type(X_train))
print(type(y_train))
print(type(X_val))
print(type(y_val))


<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>


print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

(2600, 315, 270, 3)
(2600,)
torch.Size([500, 3, 270, 315])
torch.Size([500])



Solution 1:[1]

So you created your final fully connected layer with dim (512, 2), which means for every input X, you are expecting a output y with dim (2,). This is not the case: your y_val.shape evaluates to (2600,), which means the label is a single number.

I think your confusion stems from the fact that you are trying to classify examples into 2 classes. However, every example seems to only have one class, which can be represented by a single number. (e.g. 1 for cat, 0 for dog)

Therefore, the final fully connected layer should have a dim of (512, 1).

In the error you're seeing (reproduced below), 64 is your batch size, and since you're using a FC with (512, 2), the output to each batch prediction is actually (64, 2), instead of (64), which is what it's expecting.

RuntimeError: The size of tensor a (2) must match the size of tensor b (64) at non-singleton dimension 1

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 kwsp