'Extracting Data Using Scrapy to a JSON File
I'm trying to collect specific data using Scrapy and export it to a JSON file.
I've created a class specifying the URL and defining the parse function. I can see the desired info in the terminal, but when I try to export it there's only an empty file:
def parse(self, response):
title = response.css('title::title').get()
yield {'name: ': title}
I use the scrapy crawl name -o filename.json command in the terminal in order to extract the data.
Also, there are additional data that I want to extract, which is in a javascript script tag:
<script type="text/javascript">
var dataLayerV2Json = {"mkt":{"recoverCartUrl":null,"criteo":{"account":"2820"}},"server":{"environment":"pro","application":"shop","sessionID":"E3F7DD02491DF62ED679483F0E2A2D4C"},"viewType":{"type":"pageview"},"shop":{"countryID":"006","countryISO":"GB","countryName":"reino unido","onSale":true,"warehouse":["001"],"languageID":"IN","languageISO":"en","device":"desktop","access":"web","isApp":false,"isVOTF":false,"isDoubleOptIn":false,"currency":"GBP"},"user":{"logged":false,"userID":null,"externalID":null,"navigationID":"S000000000000005UWRHGDUR80M2PC30NSD5TXEL","mail":null,"emailMD5":null,"countryID":"006","countryISO":"GB","address":"United Kingdom","firstName":null,"newsletter":"no","registered":"no","firstSession":"si","showPopupRGPD":false,"loyalty":false,"freeDelivery":"si","appsClientId":null,"anonymousUserID":"TKIHP5GRIRQYAGWXPH3J05JX"},"page":{"brand":"mango","brandID":"she","brandEN":"she","pageType":"ficha","pdpVersion":2,"categoryID":"20","canonical":"https://shop.mango.com/gb/women/skirts-midi/midi-satin-skirt_17042020.html","womanVisit":1},"ecommerce":{"detail":{"availability":true,"products":[{"id":"17042020","name":"m-laura:falda midi satinada","price":15.44,"brand":"mango","dimension117":"mango","gender":"m","category":"faldas","categoryID":"20","dimension22":null,"dimension124":null,"dimension123":"Regular","dimension21":"99","dimension107":"negro","variant":"99","colorId":"99","dimension92":"00","collection":"OI","productType":"PE","simpleName":"falda midi satinada","description":"flowy fabric, satin, midi design, flared design, elastic waist, party collection, the garments labelled as committed are products that have been produced using sustainable fibres or processes, reducing their environmental impact. mango's goal is to support the implementation of practices more committed to the environment, and therefore increase the number of sustainable garments in the collection.","categories":[{"name":"prendas"},{"name":"faldas"},{"name":"midi"}],"salePrice":12.99,"originalPrice":39.99,"currency":"GBP","photos":{"bodegon_b3":"https://st.mngbcn.com/rcs/pics/static/T1/fotos/S6/17042020_99_B3.jpg?ts=1627998509472","bodegon":"https://st.mngbcn.com/rcs/pics/static/T1/fotos/S6/17042020_99_B.jpg?ts=1624269403091","frontal":"https://st.mngbcn.com/rcs/pics/static/T1/fotos/S6/17042020_99.jpg?ts=1624265668193","outfit":"https://st.mngbcn.com/rcs/pics/static/T1/fotos/outfit/S6/17042020_99-99999999_01.jpg?ts=1624269403091"},"exclusiveOnline":"no","stock":"destallado","sizeAvailability":"M","sizeNoAvailability":"XS,S,L,XL","priceType":"rebajas","personalizable":"no"}]}}};
var dataLayer = [dataLayerV2Json];
</script>
I need only the salePrice attribute.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
