'How to speed up BeautifulSoup for loop

I have a for loop that parses through 6 urls to get the text of the first class with "GARawf". The loop works however I've noticed that it now takes the page about 9 seconds to load compared to 1 second before. As I am new to Django and BeautifulSoup I was wondering if there was a way I could refactor the code so it loads the view faster.

views.py

def results(request, result1, result2, result3, result4, result5, result6, broad_variable1, broad_variable2, broad_variable3, specific_variable_dictionary, user_city):
    result1 = City.objects.filter(city=result1).first()
    result2 = City.objects.filter(city=result2).first()
    result3 = City.objects.filter(city=result3).first()
    result4 = City.objects.filter(city=result4).first()
    result5 = City.objects.filter(city=result5).first()
    result6 = City.objects.filter(city=result6).first()

    # get the first user selected specific variable value for each result
    result1_value1 = City.objects.filter(city=result1.city).values(broad_variable1)[0][broad_variable1]
    result2_value1 = City.objects.filter(city=result2.city).values(broad_variable1)[0][broad_variable1]
    result3_value1 = City.objects.filter(city=result3.city).values(broad_variable1)[0][broad_variable1]
    result4_value1 = City.objects.filter(city=result4.city).values(broad_variable1)[0][broad_variable1]
    result5_value1 = City.objects.filter(city=result5.city).values(broad_variable1)[0][broad_variable1]
    result6_value1 = City.objects.filter(city=result6.city).values(broad_variable1)[0][broad_variable1]

    # assign variables before referencing them
    result1_value2 = None
    result2_value2 = None
    result3_value2 = None
    result4_value2 = None
    result5_value2 = None
    result6_value2 = None

    # check if the user chose a second variable
    # get the second user selected specific variable value for each result
    if broad_variable2 != "Nothing":        
        result1_value2 = City.objects.filter(city=result1.city).values(broad_variable2)[0][broad_variable2]
        result2_value2 = City.objects.filter(city=result2.city).values(broad_variable2)[0][broad_variable2]
        result3_value2 = City.objects.filter(city=result3.city).values(broad_variable2)[0][broad_variable2]
        result4_value2 = City.objects.filter(city=result4.city).values(broad_variable2)[0][broad_variable2]
        result5_value2 = City.objects.filter(city=result5.city).values(broad_variable2)[0][broad_variable2]
        result6_value2 = City.objects.filter(city=result6.city).values(broad_variable2)[0][broad_variable2]

    # assign variables before referencing them
    result1_value3 = None
    result2_value3 = None
    result3_value3 = None
    result4_value3 = None
    result5_value3 = None
    result6_value3 = None

    # check if the user chose a third variable
    # get the third user selected specific variable value for each result
    if broad_variable3 != "Nothing":        
        result1_value3 = City.objects.filter(city=result1.city).values(broad_variable3)[0][broad_variable3]
        result2_value3 = City.objects.filter(city=result2.city).values(broad_variable3)[0][broad_variable3]
        result3_value3 = City.objects.filter(city=result3.city).values(broad_variable3)[0][broad_variable3]
        result4_value3 = City.objects.filter(city=result4.city).values(broad_variable3)[0][broad_variable3]
        result5_value3 = City.objects.filter(city=result5.city).values(broad_variable3)[0][broad_variable3]        
        result6_value3 = City.objects.filter(city=result6.city).values(broad_variable3)[0][broad_variable3]        

    # create list of cities
    city_list = [result1.city, result2.city, result3.city, result4.city, result5.city, result6.city]

    # create price list
    prices_list = []

    # set origin for flight
    origin = user_city

    # set headers
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"
    }

    for city in city_list:

        # set search query
        url = "https://google.com/search?q=" + origin + " to " + city + " Google Flights"

        response = requests.get(url, headers=headers)

        soup = BeautifulSoup(response.text, 'lxml')

        # get price element
        prices = soup.find("span", attrs={"class": "GARawf"})
        if prices != None:
            prices_list.append(prices.text.strip())
        else:
            prices_list.append("Not Available")

    # change string dictionary into actual dictionary
    specific_variable_dictionary = ast.literal_eval(specific_variable_dictionary)

    context = {
        "result1": result1, "result2": result2, "result3": result3, "result4": result4, "result5": result5,
        "result6": result6, "result1_value1": result1_value1, "result2_value1": result2_value1, "result3_value1": result3_value1,
        "result4_value1": result4_value1, "result5_value1": result5_value1, "result6_value1": result6_value1, "result1_value2": result1_value2,
        "result2_value2": result2_value2, "result3_value2": result3_value2, "result4_value2": result4_value2,
        "result5_value2": result5_value2, "result6_value2": result6_value2, "result1_value3": result1_value3, "result2_value3": result2_value3,
        "result3_value3": result3_value3, "result4_value3": result4_value3, "result5_value3": result5_value3, "result6_value3": result6_value3,
        "broad_variable1": broad_variable1, "broad_variable2": broad_variable2, "broad_variable3": broad_variable3,
        "specific_variable_dictionary": specific_variable_dictionary, "prices_list": prices_list
    }
    return render(request, 'Discovery_App/results.html', context)


Solution 1:[1]

I would use threading to call the function doing the requests, so that the requests will run concurrently. You will want to import concurrent.futures

Then move citylist list to threading function like below. Read more about using Thread Pools

Update 2/11: Due to your recent comments, I have changed the code below. In your original post you stated that the code works, but its very slow. What you were doing is using a for loop to go one by one through each city. Doing this is slow because the program will wait for the previous request to finish and return before making a new request.

What I have changed in the code below is seperated your code into 2 functions. The first function ends right after you define city_list, because that is the last information collected before you open a new request.

After city_list is defined as list, we then define the threadpoolexecutor, and pass whatever values you need into the next function, in this case it's items from city_list, and user_city.

This will concurrently call the function get_price for each item in the city_list. No need for the for loop. But speaking of for loops, your first function could definitely use some to cut down the amount of lines.

import concurrent.futures

def results(request, result1, result2, result3, result4, result5, result6, broad_variable1, broad_variable2, broad_variable3, specific_variable_dictionary, user_city):
    result1 = City.objects.filter(city=result1).first()
    result2 = City.objects.filter(city=result2).first()
    result3 = City.objects.filter(city=result3).first()
    result4 = City.objects.filter(city=result4).first()
    result5 = City.objects.filter(city=result5).first()
    result6 = City.objects.filter(city=result6).first()

    # get the first user selected specific variable value for each result
    result1_value1 = City.objects.filter(city=result1.city).values(broad_variable1)[0][broad_variable1]
    result2_value1 = City.objects.filter(city=result2.city).values(broad_variable1)[0][broad_variable1]
    result3_value1 = City.objects.filter(city=result3.city).values(broad_variable1)[0][broad_variable1]
    result4_value1 = City.objects.filter(city=result4.city).values(broad_variable1)[0][broad_variable1]
    result5_value1 = City.objects.filter(city=result5.city).values(broad_variable1)[0][broad_variable1]
    result6_value1 = City.objects.filter(city=result6.city).values(broad_variable1)[0][broad_variable1]

    # assign variables before referencing them
    result1_value2 = None
    result2_value2 = None
    result3_value2 = None
    result4_value2 = None
    result5_value2 = None
    result6_value2 = None

    # check if the user chose a second variable
    # get the second user selected specific variable value for each result
    if broad_variable2 != "Nothing":        
        result1_value2 = City.objects.filter(city=result1.city).values(broad_variable2)[0][broad_variable2]
        result2_value2 = City.objects.filter(city=result2.city).values(broad_variable2)[0][broad_variable2]
        result3_value2 = City.objects.filter(city=result3.city).values(broad_variable2)[0][broad_variable2]
        result4_value2 = City.objects.filter(city=result4.city).values(broad_variable2)[0][broad_variable2]
        result5_value2 = City.objects.filter(city=result5.city).values(broad_variable2)[0][broad_variable2]
        result6_value2 = City.objects.filter(city=result6.city).values(broad_variable2)[0][broad_variable2]

    # assign variables before referencing them
    result1_value3 = None
    result2_value3 = None
    result3_value3 = None
    result4_value3 = None
    result5_value3 = None
    result6_value3 = None

    # check if the user chose a third variable
    # get the third user selected specific variable value for each result
    if broad_variable3 != "Nothing":        
        result1_value3 = City.objects.filter(city=result1.city).values(broad_variable3)[0][broad_variable3]
        result2_value3 = City.objects.filter(city=result2.city).values(broad_variable3)[0][broad_variable3]
        result3_value3 = City.objects.filter(city=result3.city).values(broad_variable3)[0][broad_variable3]
        result4_value3 = City.objects.filter(city=result4.city).values(broad_variable3)[0][broad_variable3]
        result5_value3 = City.objects.filter(city=result5.city).values(broad_variable3)[0][broad_variable3]        
        result6_value3 = City.objects.filter(city=result6.city).values(broad_variable3)[0][broad_variable3]        

    # create list of cities
    city_list = [result1.city, result2.city, result3.city, result4.city, result5.city, result6.city]

     #define threadpool executor
    with concurrent.futures.ThreadPoolExecutor() as executor:
                                
        results = executor.map(get_price, city_list, user_city)
        for result in results:
            print(result)

def get_price(cities, user_city):

    # create price list
    prices_list = []

    # set origin for flight
    origin = user_city

    # set headers
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"
    }

    url = "https://google.com/search?q=" + origin + " to " + city + " Google Flights"

    response = requests.get(url, headers=headers)

    soup = BeautifulSoup(response.text, 'lxml')

    # get price element
    prices = soup.find("span", attrs={"class": "GARawf"})
    if prices != None:
        prices_list.append(prices.text.strip())
    else:
        prices_list.append("Not Available")

# change string dictionary into actual dictionary
specific_variable_dictionary = ast.literal_eval(specific_variable_dictionary)

context = {
    "result1": result1, "result2": result2, "result3": result3, "result4": result4, "result5": result5,
    "result6": result6, "result1_value1": result1_value1, "result2_value1": result2_value1, "result3_value1": result3_value1,
    "result4_value1": result4_value1, "result5_value1": result5_value1, "result6_value1": result6_value1, "result1_value2": result1_value2,
    "result2_value2": result2_value2, "result3_value2": result3_value2, "result4_value2": result4_value2,
    "result5_value2": result5_value2, "result6_value2": result6_value2, "result1_value3": result1_value3, "result2_value3": result2_value3,
    "result3_value3": result3_value3, "result4_value3": result4_value3, "result5_value3": result5_value3, "result6_value3": result6_value3,
    "broad_variable1": broad_variable1, "broad_variable2": broad_variable2, "broad_variable3": broad_variable3,
    "specific_variable_dictionary": specific_variable_dictionary, "prices_list": prices_list
}
return render(request, 'Discovery_App/results.html', context)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1