'can i make my scraping (pandas read html) script faster?

i have a very simple script, it just scrapes some tables off the internet and inserts it into a db. however: tickerlist contains about 8000 rows. and the script takes about 2 hours to complete (80ms per ticker). is there anyway to speed it up?

this is my code:

import pandas as pd import weslib as wl from datetime import datetime

wl.joblog('AQstats', '', '','Starting AQ scrape stats')
con = wl.condb()

tickerlist = wl.tickerlist(['NASDAQ','NYSE'])
#tickerlist = ['AAPL', 'MSFT', 'T','INTC']

scrapedate = datetime.now()

for n, stock in enumerate(tickerlist):
    wl.joblog('AQstats', stock, '','Starting: ' + str(n+1) + '/' + str(len(tickerlist)) + ' ' + str(stock))
    try:
        url = 'https://www.alphaquery.com/stock/' + str(stock) + '/all-data-variables/'
        df = pd.read_html(url)[0]
        df['numvalue'] = wl.convertnumbers(df[1])
        df['scrapedate'] = scrapedate
        df['symbol'] = stock
        df.columns=['Label','orgvalue', 'numvalue','scrapedate','symbol']
        df.reset_index(inplace=True) 
    except (Exception) as error:
        wl.joblog('AQstats', stock, '','error: scraping \n' + str(error) )
        continue

    try:
        wl.insert_values(con,df,'AQstats',stock,'AQstats')
    except (Exception) as error:
        wl.joblog('AQstats', stock, '','error: scraping \n' + str(error) )
        continue
        
con.close()
wl.joblog('AQstats', '', '','Ending AQ scrape stats')

and these are the two relevant functions from my own lib.

import psycopg2 as db
import psycopg2.extras as extras
def insert_values(conn, df, table, ticker, call):
    """ Using psycopg2.extras.execute_values() to insert the dataframe """
    df.replace('None',0,inplace=True)
    # Create a list of tupples from the dataframe values
    tuples = [tuple(x) for x in df.to_numpy()]
    # Comma-separated dataframe columns
    cols = ','.join(list(df.columns))
    # SQL quert to execute
    query  = "INSERT INTO %s(%s) VALUES %%s on conflict do nothing" % (table, cols)
    cursor = conn.cursor()
    try:
        extras.execute_values(cursor, query, tuples)
        conn.commit()
    except (Exception, db.DatabaseError) as error:
        joblog(call, ticker, table, "db error: %s" % error)
        print("DB Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
        joblog(call, ticker, table,"insert values done for: %s %s", call, ticker)
    cursor.close()



def joblog(job, ticker, table, note):
    print('JOBLOG', job, ticker, table, note)
    conn = condb()
    query = "insert into log (logdate, logtable, logjob, logticker, note) values ( current_timestamp, %s, %s, %s, %s ) on conflict do nothing" 
    cursor = conn.cursor()
    try:
        cursor.execute(query, (table, job, ticker, note))
    except (Exception, db.DatabaseError) as error:
        print(query)
        dblog(job, ticker, 'log' ,"DB Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    conn.commit()
    cursor.close()
    conn.close()


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source