'How do properly paginate the results from polygon.io API?

I'm trying to download all the minute bars between two dates for stocks symbols using polygon.io. According to their API the API is limited to 50000 results.

From their Github issues, I've found the following comment:

The aggregates endpoint does not have the next_url for pagination. Instead, if there are more than 50,000 messages in the response, you will need to query a smaller time frame of data. I recommend querying 1 months worth of minute bars per query.

So here's what I've done so far:

Return a list of symbols:

from polygon import RESTClient
import os.path
from IPython.display import display
import pandas as pd

key = ''
all_tickers = []
df_list = []
final_df = []
from_ = '2021-05-01'
to = '2022-12-01'


def get_tickers():
    
    with RESTClient(key) as client:
        next_url = None
        while True:
            if next_url is None:
                tickers = client.reference_tickers_v3(type="CS")
            else:
                tickers = client._handle_response("ReferenceTickersV3ApiResponse", next_url, {})
            all_tickers.extend(tickers.results)
            if hasattr(tickers, 'next_url'):
                next_url = tickers.next_url
            else:
                break
    

file_name = 'tickers.csv'
if not os.path.exists(file_name):
    get_tickers()

    all_tickers_copy = pd.DataFrame(all_tickers)
    all_tickers_copy.to_csv(file_name, index=False)
else:
    all_tickers = pd.read_csv(file_name)
    all_tickers = all_tickers['ticker']

Return a list with the start and the end days of the months, between the from_ and to dates:

import pandas as pd

start_date, end_date = from_, to
dtrange = pd.date_range(start=start_date, end=end_date, freq='d')
months = pd.Series(dtrange .month)

starts, ends = months.ne(months.shift(1)), months.ne(months.shift(-1))
df = pd.DataFrame({'month_starting_date': dtrange[starts].strftime('%Y-%m-%d'),
                   'month_ending_date': dtrange[ends].strftime('%Y-%m-%d')})

# as a list of lists:
months = [df.columns.values.tolist()] + df.values.tolist()
months = pd.DataFrame(months)

I then have a function which loops through my symbols and makes an API request for every month between from_ and to:

def get_daily_agg(from_, to, ticker):
    with RESTClient(key) as client:
        folder_name = 'intraday_bars_gapped_new'
        final_df = pd.DataFrame([])

        try:
            # skip the header and loop through the rows
            for index, row in months[1:].iterrows():
                # save the start and end dates as variables
                from_ = row[0]
                to = row[1]
                print(f'{to} and {from_}')
                r = client.stocks_equities_aggregates(ticker, 1, "minute", from_, to, unadjusted=False, limit='50000')
                print(f'downloading {ticker} from {from_} to {to}')
                df = pd.DataFrame(r.results)
                df = df[['t','v','o','c','h','l', 'vw']]
                df.columns = ['datetime', 'volume','open','close','high', 'low', 'vwap']
                df['datetime'] = pd.to_datetime(df['datetime'],unit='ms')
                df['time'] = df['datetime'].dt.strftime("%H:%M:%S")
                df['date'] = df['datetime'].dt.strftime("%Y-%m-%d")

                final_df.append(df)

        except:
            print(f'nothing found for {ticker} from {from_} to {to}')
            pass

            
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
        final_df.to_csv('{}/{}.csv'.format(folder_name, ticker), index=False)
    else:
        final_df.to_csv('{}/{}.csv'.format(folder_name, ticker), index=False)

import glob
from pathlib import Path

folder = "daily_bars_filtered/*.csv"
for fname in glob.glob(folder)[:20]:
    ticker = Path(fname).stem
    get_daily_agg(from_, to, ticker)

My question is - how do properly paginate the results from polygon.io API?

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution	Source

'How do properly paginate the results from polygon.io API?

Sources

Related Questions