'Problem with loading image data using Pytorch dataset and dataloader

I have a problem with loading image data.

train_dir = 'images'
train_mask_dir = 'masks'

class TissueDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transforms=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transforms = transforms
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.images[idx])

        image = np.array(Image.open(img_path).convert('RGB'))
        mask = np.array(Image.open(mask_path).convert('L'), dtype=np.float32)
        mask = np.round(mask / 255).astype(np.float32)
        if self.transforms:
            aug = self.transforms(image=image, mask=mask)
            image = aug['image']
            mask = aug['mask']
            
            return image, mask

train_dataset = TissueDataset(
    image_dir = train_dir,
    mask_dir = train_mask_dir,
    transforms=None
)

train_loader = DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=2,
        pin_memory=PIN_MEMORY,
        shuffle=True
    )

x, y = next(iter(train_loader))

print(f'x = shape: {x.shape}; type: {x.dtype}')
print(f'x = min: {x.min()}; max: {x.max()}')
print(f'y = shape: {y.shape}; class: {y.unique()}; type: {y.dtype}')

The error I have is following:

FileNotFoundError                         Traceback (most recent call last)
<ipython-input-36-869de9fa31b7> in <module>()
----> 1 x, y = next(iter(train_loader))
      2 
      3 print(f'x = shape: {x.shape}; type: {x.dtype}')
      4 print(f'x = min: {x.min()}; max: {x.max()}')
      5 print(f'y = shape: {y.shape}; class: {y.unique()}; type: {y.dtype}')

3 frames
/usr/local/lib/python3.7/dist-packages/torch/_utils.py in reraise(self)
    432             # instantiate since we don't know how to
    433             raise RuntimeError(msg) from None
--> 434         raise exception
    435 
    436 

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-29-c33cd66a240c>", line 16, in __getitem__
    mask = np.array(Image.open(mask_path).convert('L'), dtype=np.float32)
  File "/usr/local/lib/python3.7/dist-packages/PIL/Image.py", line 2843, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: 'masks/2018_74969_1-1_2019-02-2100_48_39-lv1-35186-14908-3285-3747.jpg'

I cannot understand why it is showing right directory for wrong files (or otherwise) when images and masks are in the right directories. I've also checked my custom dataset and seems that it is working right (I can open this images).

img_path = os.path.join(train_dir, os.listdir(train_dir)[0])
mask_path = os.path.join(train_mask_dir, os.listdir(train_mask_dir)[3])
image = np.array(Image.open(img_path).convert('RGB'))
mask = np.array(Image.open(mask_path).convert('L'), dtype=np.float32)
mask = np.round(mask / 255).astype(np.float32)
print(mask_path)
print(img_path)

Output:

masks/18-09530A_2019-05-0723_50_03-lv1-34626-18358-3736-6181_mask.jpg
images/18-09530A_2019-05-0723_50_03-lv1-34626-18358-3736-6181.jpg

I will really appreciate any help or tip on this issue.



Solution 1:[1]

You can use exact paths like "C:\sample_folder\masks\example.jpg" in order to use relative paths like "masks/example.jpg".

Please check the values of these assignments

img_path = os.path.join(train_dir, os.listdir(train_dir)[0])
mask_path = os.path.join(train_mask_dir, os.listdir(train_mask_dir)[3])

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 ?lker Kara