'Web scraping VIVINO using Python

I have made a script for scraping all red wines on Vivinos page for price, ratings, country, grapes etc. However when I use the below code I get everything I need until I reach around 2000 wines. After this, the loop just repeats it self. As I can see there are many more red wines avaliable on Vivino, I would kindly ask for any help regarding getting as many wines as possible with the stated variables.

enter code herimport requests
import pandas as pd
import urllib, json
import urllib.request, json
import time

def scrape():
    dataframe = pd.DataFrame()
    results = []
print("Retrieved Wines:")
print("---", 0)

for i in range(500):
    r = requests.get("https://www.vivino.com/api/explore/explore",
    params = {
    "min_rating":"1",
    "order":"asc",
    "page": i+1,
    "price_range_max":"500",
    "price_range_min":"0",
    "wine_type_ids[]":"1"
},
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"})


    for j in range(24):
        try:
            result_list = []
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["name"]) #Name
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["year"]) #Year
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["wine"]['region']["country"]["name"]) #country
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["wine"]["winery"]["name"]) #wine_yard
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["wine"]["region"]["name"]) #region
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["statistics"]["ratings_average"]) #Rating
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["statistics"]["ratings_count"]) #reviews
            result_list.append(r.json()["explore_vintage"]["matches"][j]["price"]["amount"]) #price
            result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["wine"]["vintage_type"]) #vintage_type

            number_of_grapes_in_wine = (int(len(r.json()["explore_vintage"]["matches"][j]["vintage"]["wine"]["style"]["grapes"])))
            for grape in range(number_of_grapes_in_wine):
                result_list.append(r.json()["explore_vintage"]["matches"][j]["vintage"]["wine"]["style"]["grapes"][grape]["name"]) #Grape(s)
        except:
            pass

        results.append(result_list)

    print("-- ",len(results))

df_results = pd.DataFrame(results)
df_results.to_excel("Red_wine_Data3.xlsx")

scrape() e



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source