'trying to collect name
I am trying to collect there first and last name from responsable,president,salaries de la fedration these link http://www.normandie.cuma.fr/fiches/federation-des-cuma-normandie
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
name=response.xpath("//article[@class='profile']")
for names in name:
first_name=names.xpath("//div[@class='field-item even'][1]//text()").getall()
last_name=names.xpath("//div[@class='field-item even'][3]//text()").getall()
yield{
'first': first_name,
'last':last_name
}
Solution 1:[1]
See the comments for explanation.
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
name = response.xpath("//article[@class='profile']")
for names in name:
# use relative xpath: names.xpath('.//div..........')
# Options 1:
# I'm using /div/div/text() instead of //text() to avoid whitespace
first_name = names.xpath('.//div[contains(@class, "field-name-field-user-txt-prenom")]/div/div/text()').get(default='').strip()
last_name = names.xpath('.//div[contains(@class, "field-name-field-user-txt-nom")]/div/div/text()').get(default='').strip()
# Option 2:
first_name = names.xpath('.//div[contains(@class, "field-name-field-user-txt-prenom")]//text()').getall()
last_name = names.xpath('.//div[contains(@class, "field-name-field-user-txt-nom")]//text()').getall()
first_name = ''.join(first_name).strip()
last_name = ''.join(last_name).strip()
yield{
'first': first_name,
'last': last_name
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | SuperUser |

