'Error in Scraping the website after making its .exe file
I am trying to scrap the website using scrapy. I write the logic in python and it works properly in the python terminal. But when I make the .exe file of it and run the .exe file of it. It shows the error. I have tried to get the answer but didn't get it. Can anyone please help me? The error is :
This is the error I get after running the .exe file
import scrapy
class BBCUrduSpider(scrapy.Spider):
name = 'bbcurdu'
url = ""
fileName = 'dataFileForAll.csv'
index = 0
def start_requests(self):
yield scrapy.Request(url=self.url)
# Creating code for the 100 news for each category.
# To BBC Main Page.
def parse(self, response):
links = response.xpath("//ul[@class='bbc-11krpir e1lim4kn4']/li[position() > 1 and position() < 8]/a")
for link in links:
self.index = 0
href = response.urljoin(link.xpath(".//@href").get()) # Links of all the categories.
yield response.follow(url=href, callback=self.getIntoLinks)
# To Every Individual Category.
def getIntoLinks(self, response):
ctgry = response.xpath("//h1[@class='topic-title gel-trafalgar-bold gs-u-pt-alt+ gs-u-pb+']//text()").get() # ctgry = pakistan
linkks = response.xpath("//article/header/div/h3/a")
for linkk in linkks:
href = response.urljoin(linkk.xpath(".//@href").get())
yield response.follow(url=href, callback=self.getTheFullDetail, meta={'Label':ctgry})
nextLink = response.urljoin(response.xpath("//a[@class='lx-pagination__btn gs-u-mr+ qa-pagination-next-page lx-pagination__btn--rtl lx-pagination__btn--active']/@href").get())
last6characters = nextLink[(len(nextLink) - 6): len(nextLink)]
if nextLink and last6characters != "age/12":
yield scrapy.Request(url=nextLink, callback=self.getIntoLinks)
# Third function to get the headline and detail of the news.
def getTheFullDetail(self, response):
ctgry = response.request.meta['Label']
heading = response.xpath("//h1[@id='content']//text()").get()
fullnewsLinks = response.xpath("//div[@class='bbc-4wucq3 e57qer20']/p[@class='bbc-1sy09mr e1cc2ql70']")
fullnews = ''
for fullnewslink in fullnewsLinks:
if fullnewslink.xpath(".//b"):
fullnews += fullnewslink.xpath(".//b/text()").get()
else:
fullnews += fullnewslink.xpath(".//text()").get()
yield {
'Category: ':ctgry,
'Heading: ':heading,
'Full News: ':fullnews
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
