'im getting json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
import json
import re
import scrapy
import ast
class Scraper(scrapy.spiders.Spider):
name = 'scraper'
#mandatory=None
def __init__(self, page=None, config=None, *args, **kwargs):
self.page =page
self.config = json.loads(config)
print(type(self.config))
#self.mandatory_fields = mandatory.split(',')
super(Scraper, self).__init__(*args, **kwargs)
def start_requests(self):
self.logger.info('Start url: %s' % self.page)
yield scrapy.Request(url=self.page, callback=self.parse)
def parse(self, response):
item = dict(url=response.url)
# iterate over all keys in config and extract value for each of thems
for key in self.config:
print("++"+key)
# extract the data for the key from the html response
#print("++++++++++"+type(key))
print("+++"+self.config)
res = response.css(self.config[key]).extract()
# if the label is any type of url then make sure we have an absolute url instead of a relative one
if bool(re.search('url', key.lower())):
res = self.get_absolute_url(response, res)
item[key] = ' '.join(elem for elem in res).strip()
# ensure that all mandatory fields are present, else discard this scrape
mandatory_fileds_present = True
for key in self.mandatory_fields:
if not item[key]:
mandatory_fileds_present = False
if mandatory_fileds_present:
yield dict(data=item)
@staticmethod
def get_absolute_url(response, urls):
final_url = []
for url in urls:
if not bool(re.match('^http', url)):
final_url.append(response.urljoin(url))
else:
final_url.append(url)
return final_url
im getting this error : json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
im passing css selector as argument in config : scrapy crawl scraper -a page=appeloffres.com/appels-offres/telecom -a config='{"Nom":".table_taille td > b::text","des":".desc_text b::text"}'
when im doing : self.config = json.loads(config) any solution???
Solution 1:[1]
This means that you are trying to convert a variable into dict that loads method cannot convert.
json.loads() converts a string into a dictionary.
For example:
>>> import json
>>>
>>> my_str = '{"key1": "value1", "key2": "value2"}'
>>>
>>> loaded_dict = json.loads(my_str)
>>>
>>> loaded_dict
{'key1': 'value1', 'key2': 'value2'}
>>> type(loaded_dict)
<class 'dict'>
This is how json.loads work, i.e., converting a string to dict.
However, if you try something like this:
>>> import json
>>>
>>> some_var = 'cannot be converted to dict'
>>>
>>> loaded_dict = json.loads(some_var)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/Cellar/[email protected]/3.9.9/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py", line 346, in loads
return _default_decoder.decode(s)
File "/usr/local/Cellar/[email protected]/3.9.9/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/Cellar/[email protected]/3.9.9/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
This means that you not every string can be converted into a dictionary object. You should debug and carefully check what config variable holds.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Burakhan Aksoy |
