'Want to remove some text from the line
I need only the address not need tel, Fax, Email When I run the code they give me the whole data but I want only the address this is page link https://all.accor.com/hotel/8392/index.de.shtml
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['https://all.accor.com/de/region/hotels-sachsen-dsn.shtml']
page_number = 0
custom_settings = {
'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
'DOWNLOAD_DELAY': 1,
'USER_AGENT': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
}
def parse(self, response):
books = response.xpath("//a[@class='Teaser-link']//@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
title=response.xpath("//h3//text()").get()
address = response.xpath("//div[@class='infos__content']//p//text()")[:-3].getall()
address = [i.strip() for i in address]
# remove empty strings:
address = [i for i in address if i]
yield{
'name':title,
'address':address,
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|

