'im getting json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)

import json
import re
import scrapy
import ast
class Scraper(scrapy.spiders.Spider):
    name = 'scraper'
    #mandatory=None
    def __init__(self, page=None, config=None, *args, **kwargs):
        self.page =page
        self.config = json.loads(config)
        print(type(self.config))
        #self.mandatory_fields = mandatory.split(',')
        super(Scraper, self).__init__(*args, **kwargs)

    def start_requests(self):
        self.logger.info('Start url: %s' % self.page)
        yield scrapy.Request(url=self.page, callback=self.parse)
    def parse(self, response):
        item = dict(url=response.url)
        # iterate over all keys in config and extract value for each of thems
        for key in self.config:
            print("++"+key)
            # extract the data for the key from the html response
            #print("++++++++++"+type(key))
            print("+++"+self.config)
            res = response.css(self.config[key]).extract()
            # if the label is any type of url then make sure we have an absolute url instead of a relative one
            if bool(re.search('url', key.lower())):
                res = self.get_absolute_url(response, res)
            item[key] = ' '.join(elem for elem in res).strip()
        # ensure that all mandatory fields are present, else discard this scrape
        mandatory_fileds_present = True
        for key in self.mandatory_fields:
            if not item[key]:
                mandatory_fileds_present = False
        if mandatory_fileds_present:
            yield dict(data=item)
    @staticmethod
    def get_absolute_url(response, urls):
        final_url = []
        for url in urls:
            if not bool(re.match('^http', url)):
                final_url.append(response.urljoin(url))
            else:
                final_url.append(url)
        return final_url

im getting this error : json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)

im passing css selector as argument in config : scrapy crawl scraper -a page=appeloffres.com/appels-offres/telecom -a config='{"Nom":".table_taille td > b::text","des":".desc_text b::text"}'

when im doing : self.config = json.loads(config) any solution???



Solution 1:[1]

This means that you are trying to convert a variable into dict that loads method cannot convert.

json.loads() converts a string into a dictionary.

For example:

>>> import json
>>> 
>>> my_str = '{"key1": "value1", "key2": "value2"}'
>>> 
>>> loaded_dict = json.loads(my_str)
>>> 
>>> loaded_dict
{'key1': 'value1', 'key2': 'value2'}
>>> type(loaded_dict)
<class 'dict'>

This is how json.loads work, i.e., converting a string to dict.

However, if you try something like this:

>>> import json
>>> 
>>> some_var = 'cannot be converted to dict'
>>> 
>>> loaded_dict = json.loads(some_var)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/local/Cellar/[email protected]/3.9.9/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/usr/local/Cellar/[email protected]/3.9.9/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/usr/local/Cellar/[email protected]/3.9.9/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

This means that you not every string can be converted into a dictionary object. You should debug and carefully check what config variable holds.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Burakhan Aksoy