'How do properly paginate the results from polygon.io API?
I'm trying to download all the minute bars between two dates for stocks symbols using polygon.io. According to their API the API is limited to 50000 results.
From their Github issues, I've found the following comment:
The aggregates endpoint does not have the next_url for pagination. Instead, if there are more than 50,000 messages in the response, you will need to query a smaller time frame of data. I recommend querying 1 months worth of minute bars per query.
So here's what I've done so far:
Return a list of symbols:
from polygon import RESTClient
import os.path
from IPython.display import display
import pandas as pd
key = ''
all_tickers = []
df_list = []
final_df = []
from_ = '2021-05-01'
to = '2022-12-01'
def get_tickers():
with RESTClient(key) as client:
next_url = None
while True:
if next_url is None:
tickers = client.reference_tickers_v3(type="CS")
else:
tickers = client._handle_response("ReferenceTickersV3ApiResponse", next_url, {})
all_tickers.extend(tickers.results)
if hasattr(tickers, 'next_url'):
next_url = tickers.next_url
else:
break
file_name = 'tickers.csv'
if not os.path.exists(file_name):
get_tickers()
all_tickers_copy = pd.DataFrame(all_tickers)
all_tickers_copy.to_csv(file_name, index=False)
else:
all_tickers = pd.read_csv(file_name)
all_tickers = all_tickers['ticker']
Return a list with the start and the end days of the months, between the from_ and to dates:
import pandas as pd
start_date, end_date = from_, to
dtrange = pd.date_range(start=start_date, end=end_date, freq='d')
months = pd.Series(dtrange .month)
starts, ends = months.ne(months.shift(1)), months.ne(months.shift(-1))
df = pd.DataFrame({'month_starting_date': dtrange[starts].strftime('%Y-%m-%d'),
'month_ending_date': dtrange[ends].strftime('%Y-%m-%d')})
# as a list of lists:
months = [df.columns.values.tolist()] + df.values.tolist()
months = pd.DataFrame(months)
I then have a function which loops through my symbols and makes an API request for every month between from_ and to:
def get_daily_agg(from_, to, ticker):
with RESTClient(key) as client:
folder_name = 'intraday_bars_gapped_new'
final_df = pd.DataFrame([])
try:
# skip the header and loop through the rows
for index, row in months[1:].iterrows():
# save the start and end dates as variables
from_ = row[0]
to = row[1]
print(f'{to} and {from_}')
r = client.stocks_equities_aggregates(ticker, 1, "minute", from_, to, unadjusted=False, limit='50000')
print(f'downloading {ticker} from {from_} to {to}')
df = pd.DataFrame(r.results)
df = df[['t','v','o','c','h','l', 'vw']]
df.columns = ['datetime', 'volume','open','close','high', 'low', 'vwap']
df['datetime'] = pd.to_datetime(df['datetime'],unit='ms')
df['time'] = df['datetime'].dt.strftime("%H:%M:%S")
df['date'] = df['datetime'].dt.strftime("%Y-%m-%d")
final_df.append(df)
except:
print(f'nothing found for {ticker} from {from_} to {to}')
pass
if not os.path.exists(folder_name):
os.makedirs(folder_name)
final_df.to_csv('{}/{}.csv'.format(folder_name, ticker), index=False)
else:
final_df.to_csv('{}/{}.csv'.format(folder_name, ticker), index=False)
import glob
from pathlib import Path
folder = "daily_bars_filtered/*.csv"
for fname in glob.glob(folder)[:20]:
ticker = Path(fname).stem
get_daily_agg(from_, to, ticker)
My question is - how do properly paginate the results from polygon.io API?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
