Get Historical Price Data from Polygon.io

Learn how to use the Polygon.io Python API to get historical price data for crypto, stocks, futures, and forex.

You can download the code at the Analyzing Alpha GitHub Repo.

Install Polygon API

The first thing we’ll want to do is activate our virtual environment and install the Polygon Python API REST client. If you don’t know how to create a virtual environment:

!pip install polygon-api-client

Add Imports

Import the polygon RESTClient and the local_settings file, which should contain your api_key.

from polygon import RESTClient
from local_settings import polygon as settings

Inherit RESTClient and Add Retry Strategy

Here we inherit the functionality from Polygon’s RESTClient. We call super().__init__ to get initialize our class using our RESTClient’s __init__, giving us the ability to modify the _session attribute with an adapter.

<em>#from datetime import date</em>
from datetime import date, datetime
from typing import Any, Optional
import pandas as pd
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

markets = ['crypto', 'stocks', 'fx']

class MyRESTClient(RESTClient):
    def __init__(self, auth_key: str=settings['api_key'], timeout:int=5):
        super().__init__(auth_key)
        retry_strategy = Retry(total=10,
                               backoff_factor=10,
                               status_forcelist=[429, 500, 502, 503, 504])
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self._session.mount('https://', adapter)

Create Get Tickers Method

Let’s create a client from our new child class, MyRESTClient, and see what we need to use the reference tickers method.

client = MyRESTClient(settings['api_key'])
help(client.reference_tickers_v3)

After reviewing the help, we can now create our get_tickers method.

class MyRESTClient(RESTClient):
    def __init__(self, auth_key: str=settings['api_key'], timeout:int=5):
        super().__init__(auth_key)
        retry_strategy = Retry(total=10,
                               backoff_factor=10,
                               status_forcelist=[429, 500, 502, 503, 504])
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self._session.mount('https://', adapter)

    def get_tickers(self, market:str=None) -> pd.DataFrame:
        if not market in markets:
            raise Exception(f'Market must be one of {markets}.')

        resp = self.reference_tickers_v3(market=market)
        if hasattr(resp, 'results'):
            df = pd.DataFrame(resp.results)

            while hasattr(resp, 'next_url'):
                resp = self.reference_tickers_v3(next_url=resp.next_url)
                df = df.append(pd.DataFrame(resp.results))

            if market == 'crypto':
                <em># Only use USD pairings.</em>
                df = df[df['currency_symbol'] == 'USD']
                df['name'] = df['base_currency_name']
                df = df[['ticker', 'name', 'market', 'active']]

            df = df.drop_duplicates(subset='ticker')
            return df
        return None
client = MyRESTClient(settings['api_key'])
df = client.get_tickers(market='crypto')
df

ticker
namemarketactive
0X:1INCHUSD1inchcryptoTrue
1X:AAVEUSDAavecryptoTrue
2X:ACATUSDAlphacatcryptoTrue
3X:ACHUSDAlchemy PaycryptoTrue
4X:ACTUSDAchaincryptoTrue
16X:ZECUSDZcashcryptoTrue
17X:ZENUSDHorizencryptoTrue
18X:ZILUSDZilliqacryptoTrue
19X:ZRXUSD0xcryptoTrue
20X:ZSCUSDZeusshieldcryptoTrue

Create Get Minute Bars Method

Let’s do the same thing for the get minute bars method. We need to keep looping until we have all the data, making sure we only append data we haven’t seen before.

help(client.stocks_equities_aggregates)
class MyRESTClient(RESTClient):
    def __init__(self, auth_key: str=settings['api_key'], timeout:int=5):
        super().__init__(auth_key)
        retry_strategy = Retry(total=10,
                               backoff_factor=10,
                               status_forcelist=[429, 500, 502, 503, 504])
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self._session.mount('https://', adapter)

    def get_tickers(self, market:str=None) -> pd.DataFrame:
        if not market in markets:
            raise Exception(f'Market must be one of {markets}.')

        resp = self.reference_tickers_v3(market=market)
        if hasattr(resp, 'results'):
            df = pd.DataFrame(resp.results)

            while hasattr(resp, 'next_url'):
                resp = self.reference_tickers_v3(next_url=resp.next_url)
                df = df.append(pd.DataFrame(resp.results))

            if market == 'crypto':
                <em># Only use USD pairings.</em>
                df = df[df['currency_symbol'] == 'USD']
                df['name'] = df['base_currency_name']
                df = df[['ticker', 'name', 'market', 'active']]

            df = df.drop_duplicates(subset='ticker')
            return df
        return None

    def get_bars(self, market:str=None, ticker:str=None, multiplier:int=1,
                 timespan:str='minute', from_:date=None, to:date=None) -> pd.DataFrame:

        if not market in markets:
            raise Exception(f'Market must be one of {markets}.')

        if ticker is None:
            raise Exception('Ticker must not be None.')

        from_ = from_ if from_ else date(2000,1,1)
        to = to if to else date.today()

        if market == 'crypto':
            resp = self.crypto_aggregates(ticker, multiplier, timespan,
                                          from_.strftime('%Y-%m-%d'), to.strftime('%Y-%m-%d'),
                                          limit=50000)
            df = pd.DataFrame(resp.results)
            last_minute = 0
            while resp.results[-1]['t'] > last_minute:
                last_minute = resp.results[-1]['t'] <em># Last minute in response</em>
                last_minute_date = datetime.fromtimestamp(last_minute/1000).strftime('%Y-%m-%d')
                resp = self.crypto_aggregates(ticker, multiplier, timespan,
                                          last_minute_date, to.strftime('%Y-%m-%d'),
                                          limit=50000)
                new_bars = pd.DataFrame(resp.results)
                df = df.append(new_bars[new_bars['t'] > last_minute])

            df['date'] = pd.to_datetime(df['t'], unit='ms')
            df = df.rename(columns={'o':'open',
                                    'h':'high',
                                    'l':'low',
                                    'c':'close',
                                    'v':'volume',
                                    'vw':'vwap',
                                    'n':'transactions'})
            df = df[['date','open','high','low','close','volume']]

            return df
        return None
start = datetime(2021,1,1)
client = MyRESTClient(settings['api_key'])
df = client.get_bars(market='crypto', ticker='X:BTCUSD', from_=start)
df