Source code for analysis_engine.scripts.inspect_datasets

#!/usr/bin/env python

"""
Tool for inspecting cached pricing data to find common errors.
This tool uses the
`Extraction API <https://stock-analysis-engine.
readthedocs.io/en/latest/extract.html>`__ to look for dates
that are not in sync with the redis cached date.

.. note:: This tool requires redis to be running with
    fetched datasets already stored in supported
    keys

**Examples**

**Inspect Minute Datasets for a Ticker**

::

    inspect_datasets.py -t SPY

**Inspect Daily Datasets for a Ticker**

::

    inspect_datasets.py -t AAPL -g daily
    # or
    # inspect_datasets.py -t AAPL -g day

**Usage**

::

    inspect_datasets.py -h
    usage: inspect_datasets.py [-h] [-t TICKER] [-g DATASETS] [-s START_DATE]

    Inspect datasets looking for dates in redis that look incorrect

    optional arguments:
    -h, --help     show this help message and exit
    -t TICKER      ticker
    -g DATASETS    optional - datasets: minute or min = examine IEX Cloud
                    intraday minute data, daily or day = examine IEX Cloud
                    daily
                    data, quote = examine IEX Cloud quotes data, stats =
                    examine
                    IEX Cloud key stats data, peers = examine IEX Cloud
                    peers
                    data, news = examine IEX Cloud news data, fin = examine
                    IEX
                    Cloud financials data, earn = examine IEX Cloud earnings
                    data, div = examine IEX Cloud dividendsdata, comp =
                    examine
                    IEX Cloud company data, calls = examine Tradier calls
                    data,
                    puts = examine Tradier puts data, and comma delimited is
                    supported as well
    -s START_DATE  start date format YYYY-MM-DD (default is 2019-01-01)
"""

import datetime
import argparse
import analysis_engine.consts as ae_consts
import analysis_engine.utils as ae_utils
import analysis_engine.extract as ae_extract
import spylunking.log.setup_logging as log_utils

log = log_utils.build_colorized_logger(
    name='inspect-redis-data',
    handler_name='no_date_colors')


[docs]def inspect_datasets( ticker=None, start_date=None, datasets=None): """inspect_datasets Loop over all cached data in redis by going sequentially per date and examine the latest ``date`` value in the cache to check if it matches the redis key's date. For IEX Cloud minute data errors, running this function will print out commands to fix any issues (if possible): :: fetch -t TICKER -g iex_min -F DATE_TO_FIX :param ticker: optional - string ticker :param start_date: optional - datetime start date for the loop (default is ``2019-01-01``) :param datasets: optional - list of strings to extract specific, supported datasets (default is ``['minute']``) """ if not start_date: start_date = datetime.datetime( year=2019, month=1, day=1) if not datasets: datasets = [ 'minute' ] if not ticker: ticker = 'SPY' tickers = [ ticker ] fix_suggestions = [] last_close = ae_utils.last_close() for ticker in tickers: not_done = True cur_date = start_date while not_done: cur_date_str = cur_date.strftime(ae_consts.COMMON_DATE_FORMAT) log.info( f'extracting {ticker} date={cur_date_str}') res = None # get from a date or the latest if not set if cur_date_str: res = ae_extract.extract( ticker=ticker, date=cur_date_str, datasets=datasets) else: res = ae_extract.extract( ticker=ticker, datasets=datasets) weekday_name = cur_date.strftime('%A') for ds_name in datasets: df = res[ticker][ds_name] if ae_consts.is_df(df=df): if 'date' in df: latest_date = df['date'].iloc[-1] latest_date_str = latest_date.strftime( ae_consts.COMMON_DATE_FORMAT) if latest_date_str == cur_date_str: log.info( f'valid - {ds_name} latest dates match ' f'{weekday_name}: ' f'{latest_date_str} == {cur_date_str}') else: if ds_name != 'daily': log.critical( f'{ds_name} latest dates does ' f'NOT match on ' f'{weekday_name} {cur_date_str} found: ' f'{latest_date_str}') else: one_day_back = ( latest_date + datetime.timedelta(days=1)) if weekday_name == 'Monday': one_day_back = ( latest_date + datetime.timedelta( days=3)) latest_date_str = one_day_back.strftime( ae_consts.COMMON_DATE_FORMAT) if latest_date_str == cur_date_str: log.info( f'valid - {ds_name} latest dates ' f'match ' f'{weekday_name}: ' f'{latest_date_str} == ' f'{cur_date_str}') else: log.critical( f'{ds_name} latest dates does ' f'NOT match on ' f'{weekday_name} {cur_date_str} ' f'found: ' f'{latest_date_str}') if ds_name == 'minute': fix_suggestions.append( f'fetch -t {ticker} -g iex_min ' f'-F {cur_date_str}') else: log.error( f'{ds_name} df does not have a date column ' f'on {cur_date_str}') else: log.error( f'Missing {ds_name} df on {cur_date_str}') # end of inspecting datasets if cur_date > last_close: not_done = False else: cur_date += datetime.timedelta(days=1) not_a_weekday = True while not_a_weekday: weekday = cur_date.date().weekday() if weekday > 4: log.debug( 'SKIP weekend day: ' f'{cur_date.strftime("%A on %Y-%m-%d")}') cur_date += datetime.timedelta(days=1) else: not_a_weekday = False # end for all dates # end of for all tickers if len(fix_suggestions) > 0: print('-------------------------------') print( 'Detected invalid dates - below are the suggested fixes ' 'to run using the fetch command.') print( ' - Please be aware fetching data may incur usages and ' 'costs on your account') for s in fix_suggestions: print(s) else: log.info( 'done')
# end inspect_datasets if __name__ == '__main__': parser = argparse.ArgumentParser( description=( 'Inspect datasets looking for dates in redis ' 'that look incorrect')) parser.add_argument( '-t', help=( 'ticker'), required=False, dest='ticker') parser.add_argument( '-g', help=( 'optional - datasets: ' 'minute or min = examine IEX Cloud intraday minute data, ' 'daily or day = examine IEX Cloud daily data, ' 'quote = examine IEX Cloud quotes data, ' 'stats = examine IEX Cloud key stats data, ' 'peers = examine IEX Cloud peers data, ' 'news = examine IEX Cloud news data, ' 'fin = examine IEX Cloud financials data, ' 'earn = examine IEX Cloud earnings data, ' 'div = examine IEX Cloud dividendsdata, ' 'comp = examine IEX Cloud company data, ' 'calls = examine Tradier calls data, ' 'puts = examine Tradier puts data, ' 'and comma delimited is supported as well'), required=False, dest='datasets') parser.add_argument( '-s', help=( 'start date format YYYY-MM-DD (default is 2019-01-01)'), required=False, dest='start_date') args = parser.parse_args() start_date = datetime.datetime( year=2019, month=1, day=1) datasets = [ 'minute' ] ticker = 'SPY' valid = True if args.ticker: ticker = args.ticker.upper() if args.datasets: datasets = [] for key in args.datasets.lower().split(','): if key == 'news': datasets.append('news1') elif key == 'min': datasets.append('minute') elif key == 'day': datasets.append('daily') elif key == 'fin': datasets.append('financials') elif key == 'earn': datasets.append('earnings') elif key == 'div': datasets.append('dividends') elif key == 'comp': datasets.append('company') elif key == 'calls': datasets.append('tdcalls') elif key == 'puts': datasets.append('tdputs') else: if key not in ae_consts.BACKUP_DATASETS: log.error( f'unsupported dataset key: {key} ' 'please use a supported key: ' f'{ae_consts.BACKUP_DATASETS}') valid = False else: datasets.append(key) if args.start_date: start_date = datetime.datetime.strptime( args.start_date, '%Y-%m-%d') if valid: inspect_datasets( ticker=ticker, start_date=start_date, datasets=datasets)