'How to scrape image urls of comments?
Following code scrapes comments and customer country from each product page for example this product from aliexpress website
Code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f=%7B%22sku_id%22%3A%2212000027213624098%22%7D&pdp_pi=-1%3B40.81%3B-1%3B-1%40salePrice%3BMAD%3Bsearch-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
try:
comment = e.find_element(By.CSS_SELECTOR, '.buyer-feedback span').text
except:
comment = None
data.append({
'country':country,
'comment':comment
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)
I need to have a code that scrape url of images available in each comment of customers, but I really don't know how to get it.
Solution 1:[1]
It works in the same way as with comment and country so simply add a new try/except to add a list of image urls to your dict:
try:
images = [i.get_attribute('src') for i in e.find_elements(By.CSS_SELECTOR, '.r-photo-list img')]
except:
images = []
data.append({
'country':country,
'comment':comment,
'images':images
})
Example
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
url = 'https://www.aliexpress.com/item/1005003801507855.html?spm=a2g0o.productlist.0.0.1e951bc72xISfE&algo_pvid=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad&algo_exp_id=6d3ed61e-f378-43d0-a429-5f6cddf3d6ad-8&pdp_ext_f=%7B%22sku_id%22%3A%2212000027213624098%22%7D&pdp_pi=-1%3B40.81%3B-1%3B-1%40salePrice%3BMAD%3Bsearch-mainSearch'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
driver.get(url)
wait = WebDriverWait(driver, 10)
driver.execute_script("arguments[0].scrollIntoView();", wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.tab-content'))))
driver.get(wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#product-evaluation'))).get_attribute('src'))
data=[]
while True:
for e in driver.find_elements(By.CSS_SELECTOR, 'div.feedback-item'):
try:
country = e.find_element(By.CSS_SELECTOR, '.user-country > b').text
except:
country = None
try:
comment = e.find_element(By.CSS_SELECTOR, '.buyer-feedback span').text
except:
comment = None
try:
images = [i.get_attribute('src') for i in e.find_elements(By.CSS_SELECTOR, '.r-photo-list img')]
except:
images = []
data.append({
'country':country,
'comment':comment,
'images':images
})
try:
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#complex-pager a.ui-pagination-next'))).click()
except:
break
pd.DataFrame(data).to_csv('filename.csv',index=False)
Output
| country | comment | images |
|---|---|---|
| ES | ICH empfehlen sie, ICH geliebt sie und sie arbeiten sehr gut. | ['https://ae01.alicdn.com/kf/Ue579837c61094df2af612e27b3f5b415R.jpg', 'https://ae01.alicdn.com/kf/U3fd665774a6d4aebbee85d35da080350f.jpg', 'https://ae01.alicdn.com/kf/U18d93ae3598c482e8023d0663e0a6fc9U.jpg'] |
| TR | Angekommen in Der Türkei so bald wie 10 tage ICH hatte gekauft für meine mutter nicht wie zu hören sehr laut erfüllt die erwartung. Ganz tiny und süße dank verkäufer. | ['https://ae01.alicdn.com/kf/Ue4dc72f3a91b43248ad6ae2f4444e5b9g.jpg', 'https://ae01.alicdn.com/kf/U1ec20cd1f09f421a80a96d6a6cc0d6a7C.jpg'] |
| RU | Die besten kopfhörer! Sehr lange halten die batterie! Super design!! | [] |
| MA | ['https://ae01.alicdn.com/kf/Ua0c22f565f0648c7943169ffbb74dd792.jpg'] | |
| SA | It is an amazing, good for use…!!!! | ['https://ae01.alicdn.com/kf/U000b27dca5874bd681c6e285b69e0e71p.jpg', 'https://ae01.alicdn.com/kf/U0020e8a4147047ec9646cf64bb7b18cay.jpg'] |
| SA | Gute | [] |
| CL | Sie sind kleine, ICH nicht versucht es noch. | [] |
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | HedgeHog |
