'How to grab image links correctly? My scraper only make blank folders
My code is only making empty folders and not downloading images.
So, I think I need it to be modified so that the images can be clearly downloaded.
I tried to fix it by myself, but can't figure it out how to do.
Anyone please help me. Thank you!
import requests
import parsel
import os
import time
for page in range(1, 310): # Total 309pages
print(f'======= Scraping data from page {page} =======')
url = f'https://www.bikeexif.com/page/{page}'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
response = requests.get(url, headers=headers)
html_data = response.text
selector = parsel.Selector(html_data)
containers = selector.xpath('//div[@class="container"]/div/article[@class="smallhalf"]')
for v in containers:
old_title = v.xpath('.//div[2]/h2/a/text()').get()#.replace(':', ' -')
if old_title is not None:
title = old_title.replace(':', ' -')
title_url = v.xpath('.//div[2]/h2/a/@href').get()
print(title, title_url)
if not os.path.exists('img\\' + title):
os.mkdir('img\\' + title)
response_image = requests.get(url=title_url, headers=headers).text
selector_image = parsel.Selector(response_image)
# Full Size Images
images_url = selector_image.xpath('//div[@class="image-context"]/a[@class="download"]/@href').getall()
for title_url in images_url:
image_data = requests.get(url=title_url, headers=headers).content
file_name = title_url.split('/')[-1]
time.sleep(1)
with open(f'img\\{title}\\' + file_name, mode='wb') as f:
f.write(image_data)
print('Download complete!!:', file_name)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
