'RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same | another case
When I run my script, I run into:
RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same
while my model and my input all seem to be on CUDA.
Check for the model:
In [5]: [i.device for i in net.parameters()]
Out[5]:
[device(type='cuda', index=0),
device(type='cuda', index=0),
device(type='cuda', index=0),
device(type='cuda', index=0),
device(type='cuda', index=0),
device(type='cuda', index=0),
device(type='cuda', index=0),
device(type='cuda', index=0)]
Check for the input:
In [14]: [i for i in iter(train_loader)][0][0].device
context has already been set
[W CudaIPCTypes.cpp:15] Producer process has been terminated before all shared CUDA tensors released. See Note [Sharing CUDA tensors]
Out[14]: device(type='cuda', index=0)
The script (training.py):
import os
os.environ['MKL_THREADING_LAYER'] = 'GNU'
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
# from torch.multiprocessing import Pool, Process, set_start_method
from torch.multiprocessing import set_start_method
try:
set_start_method('spawn')
except RuntimeError as rte:
print(rte)
import matplotlib.pyplot as plt
import unet
from datenaufbereitung import *
from datetime import datetime
from softcatch import CatchSIGINT
# net = NotImplemented
net = unet.FlexibleUNet(2, 4, bilinear = False, volume = 32, depth = 4)
lr = 1e-1
# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
# optimizer = optim.Adam(net.parameters(), lr = lr)
optimizer = optim.AdamW(net.parameters(), lr = lr)
# optimizer = optim.SGD(net.parameters(), lr = lr, momentum = 0.7, dampening = 0.3)
checkpoint_path = 'checkpoints'
runs_path = 'runs'
def save_checkpoint(unique=True):
if unique:
isotime = datetime.now().isoformat()
fname = f'unet-{isotime}.pt'
else:
fname = 'unet-cpt.pt'
torch.save(net, f'{checkpoint_path}/{fname}')
def load_newest():
global net
import os
from glob import glob
flist = glob(f'{checkpoint_path}/*')
if flist:
newest = max(flist, key=os.path.getctime)
net = torch.load(newest)
def train(trainloader, n_epochs = 50, stride = 1, testset = None):
# if not testset is None:
# fig = plt.figure()
with CatchSIGINT() as sigint:
for epoch in range(n_epochs): # loop over the dataset multiple times
# config net for training
net.train()
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, targets = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % stride == stride-1: # print every <stride> mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / stride))
running_loss = 0.0
if sigint.caught:
break
if sigint.caught:
break
# save checkpoint
save_checkpoint(unique = False)
# # config net for evaluation
# if not testset is None:
# net.eval()
# ip, tg = testset[0]
# output = net(ip)
# imshow(output[0])
save_checkpoint()
if __name__ == '__main__':
# __spec__ = None
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
wdh = ' '
while not wdh in 'jn':
wdh = str.lower(input('Letzten Stand wiederherstellen (j/n)? '))
make_data()
# trainset, testset = make_datasets(train_fname = 'train_short.lx', test_fname = 'train_short.lx', in_transform = in_transform)
# trainset, testset = make_datasets(train_fname = '2px_train.lx', test_fname = '2px_test.lx', maxlen = 10, in_transform = in_transform)
trainset, testset = make_datasets(maxlen = 10)
trainset.to(device)
testset.to(device)
net.to(device)
criterion.to(device)
train_loader, test_loader = make_dataloaders(trainset, testset)
if wdh == 'j':
load_newest()
print('## Trainiere Netz')
train(train_loader, 500)
I gave my datasets a "to" method to ensure they move their output to CUDA in another script named "datenaufbereitung.py":
lass DisplacementDataset(Dataset):
def __init__(self, base_path, lexicon, transform=None, target_transform=None, maxlen=None, device=None):
self.base_path = base_path
self.lexicon = lexicon
self.transform = transform
self.target_transform = target_transform
if not maxlen is None:
self.length = min(len(lexicon), maxlen)
else:
self.length = len(lexicon)
self.device = device
def to(self, device):
self.device = device
def __len__(self):
return self.length
def __getitem__(self, idx):
src_fn, warp = self.lexicon[idx]
warp = deserialize_warp(warp)
src = np.load(os.path.join(self.base_path,src_fn))
src = np.squeeze(src)
dst = warp.warp(src)
images = warp.valid_roi(np.stack((src,dst)))
shifts = warp.valid_roi(np.concatenate((warp.shiftmesh(), warp.inverse_shiftmesh())))
if self.transform:
images = self.transform(images)
if self.target_transform:
shifts = self.target_transform(shifts)
if not self.device is None:
return images.to(self.device), shifts.to(self.device)
return images, shifts
So, as I seem to have sent my model, my optimizer and my input/target data to CUDA, I would not expect this behavior.
What did I miss?
EDIT (2022-05-20):
Today, I tried to reproduce the error to get a stack trace, but errors have changed. Looks like my installation is unstable somehow. The error I now reproducibly get is this one:
/home/burkhard/anaconda3/envs/ml-lab/lib/python3.7/site-packages/torch/cuda/__init__.py:82: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:109.) return torch._C._cuda_getDeviceCount() > 0
This happens at "import unet" or at "net = FlexibleUNet", i.e. one of the first times I actually use torch. The warning comes without a stack trace, so I had to verify this by inserting prints before and after.
Might this depend on the versions of my installation? After I installed the newest driver capable of handling the RTX 3080 on my Ubuntu system, I had to resort to nightly builds and not-yet-canon PiPy packages.
(ml-lab) burkhard@pc:~$ nvidia-smi
Fri May 20 11:20:48 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.73.05 Driver Version: 510.73.05 CUDA Version: 11.6 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... Off | 00000000:01:00.0 On | N/A |
| 0% 57C P8 28W / 350W | 532MiB / 12288MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 1036 G /usr/lib/xorg/Xorg 102MiB |
| 0 N/A N/A 1664 G /usr/lib/xorg/Xorg 326MiB |
| 0 N/A N/A 1794 G /usr/bin/gnome-shell 77MiB |
| 0 N/A N/A 2024 G ...bexec/gnome-initial-setup 3MiB |
| 0 N/A N/A 26082 G ...a3/envs/ml-lab/bin/python 3MiB |
+-----------------------------------------------------------------------------+
(ml-lab) burkhard@burkhard@pc:~$ dkms status
nvidia, 510.73.05, 5.4.0-110-generic, x86_64: installed
(ml-lab) burkhard@burkhard@pc:~$ cat /proc/driver/nvidia/version
NVRM version: NVIDIA UNIX x86_64 Kernel Module 510.73.05 Sat May 7 05:30:26 UTC 2022
GCC version: gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~20.04.1)
(ml-lab) burkhard@burkhard@pc:~$ conda list | grep torch
_pytorch_select 0.2 gpu_0
torch 1.12.0.dev20220518+cu116 pypi_0 pypi
torchaudio 0.12.0.dev20220518+cu116 pypi_0 pypi
torchvision 0.13.0.dev20220518+cu116 pypi_0 pypi
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
