'Transfer learning with pytorch in python where output is two class
I am doing a transfer learning using torch. i want to classify 2 classes.
so, here is the code-:
from torchvision import models
net = models.resnet18(pretrained=True)
# this is resnet summary
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer3): Sequential(
(0): BasicBlock(
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
when i use net.fc = nn.Linear(512, 2) and when i run the code. it throws an error RuntimeError: The size of tensor a (2) must match the size of tensor b (64) at non-singleton dimension 1
#training code
net = net.to('cuda')
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
loss_fct = nn.MSELoss()
train_losses = []
val_losses = []
val_accuracy = []
train_acc=[]
for t in range(15):
net.train()
X_train = np.array([data_augment(img) for img in X_train])
epoch_train_loss = 0.
for i in range(len(X_train) // 64):
X_batch = X_train[i * 64: (i+1) * 64]
X_batch = torch.tensor(X_batch, dtype=torch.float32).to('cuda')
X_batch = torch.transpose(X_batch, 1, -1)
print(X_batch.shape)
y_batch = y_train[i * 64: (i+1) * 64]
y_batch = torch.tensor(y_batch, dtype=torch.float32).to('cuda')
y_pred = net(X_batch)
loss = loss_fct(y_pred.squeeze(), y_batch)
epoch_train_loss += loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_losses.append(epoch_train_loss / len(X_train))
net.eval()
with torch.no_grad():
y_pred_val = net(X_val)
loss_val = loss_fct(y_pred_val.squeeze(), y_val)
val_losses.append(loss_val / len(X_val))
acc_val = torch.mean(((y_pred_val.squeeze() > 0).int() == y_val).float())
val_accuracy.append(acc_val)
print('Train loss: {} Val loss: {} Val Accuracy: {}'.format(epoch_train_loss, loss_val, acc_val))
I Want to solve this, i don't know how
when i use net.fc = nn.Linear(512, 1), then it works. but this is not correct if i understand it correctly.
shape and type of my X and y are following:
print(type(X_train))
print(type(y_train))
print(type(X_val))
print(type(y_val))
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
(2600, 315, 270, 3)
(2600,)
torch.Size([500, 3, 270, 315])
torch.Size([500])
Solution 1:[1]
So you created your final fully connected layer with dim (512, 2), which means for every input X, you are expecting a output y with dim (2,). This is not the case: your y_val.shape evaluates to (2600,), which means the label is a single number.
I think your confusion stems from the fact that you are trying to classify examples into 2 classes. However, every example seems to only have one class, which can be represented by a single number. (e.g. 1 for cat, 0 for dog)
Therefore, the final fully connected layer should have a dim of (512, 1).
In the error you're seeing (reproduced below), 64 is your batch size, and since you're using a FC with (512, 2), the output to each batch prediction is actually (64, 2), instead of (64), which is what it's expecting.
RuntimeError: The size of tensor a (2) must match the size of tensor b (64) at non-singleton dimension 1
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | kwsp |
