79154122

Date: 2024-11-04 03:30:29
Score: 1.5
Natty:
Report link

Well, for what it's worth, I have put something together that achieves what I hope will work tomorrow when I have live trade data.

I'm new to Python and it would be interesting to get some feedback on the below code. I'm sure there's a ton of optimizations that can be had. I find pandas and dataframes to be subjects I don't have a handle on.

import yfinance as yf
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import datetime
import pytz


def fetch_stock_hist_data(ticker):
    try:
        stock = yf.Ticker(ticker)
        hist_data = stock.history(period="1d", interval='1m')

        # add symbol column
        hist_data.insert(0, 'Symbol', ticker)
        # only use the last 5 results
        hist_data = hist_data.tail()
        hist_data.reset_index(drop=True, inplace=True)

        one_min_vol = 0
        two_min_vol = 0
        five_min_vol = 0

        # Iterate using range
        for i in range(len(hist_data)):
            five_min_vol += hist_data.iloc[i].to_dict().get('Volume')
            if i > 2:
                two_min_vol += hist_data.iloc[i].to_dict().get('Volume')
            if i > 3:
                one_min_vol += hist_data.iloc[i].to_dict().get('Volume')

        hist_last_row = hist_data.iloc[[-1]]

        new_df = pd.DataFrame(hist_last_row)
        drop_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']
        new_df = new_df.drop(columns=drop_columns)

        # Add columns for 1, 2 and 5 minute volumes
        new_df.insert(1, 'Lst1MinVol', one_min_vol)
        new_df.insert(2, 'Lst2MinVol', two_min_vol)
        new_df.insert(3, 'Lst5MinVol', five_min_vol)

        return new_df
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

def fetch_curr_stock_data(ticker):
    info = yf.Tickers(ticker).tickers[ticker].info
    data = [ticker, f"{info['currentPrice']}", f"{info['volume']}"]
    return data


def fetch_multiple_stocks(tickers):
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(fetch_stock_hist_data, ticker) for ticker in tickers]

        results = []
        for future in as_completed(futures):
            result = future.result()

            if result is not None:
                results.append(result)

    return pd.concat(results)


def fetch_curr_stocks(tickers):
    table_title = ['Symbol', 'Price', 'TotVolume']
    prevVol_df = pd.DataFrame(columns = ['Symbol', 'PrevVolume'])

    with ThreadPoolExecutor() as executor:
        while True:
            df = pd.DataFrame(columns = table_title)

            results = list(executor.map(fetch_curr_stock_data, tickers))

            # Adds items from results
            for result in results:
                df.loc[len(df)] = result

            # Convert TotVolume from string to number
            df['TotVolume'] = pd.to_numeric(df['TotVolume'], errors='coerce')

            # Copy volume data for each symbol to a new df.
            prevVol_df = df[['Symbol', 'TotVolume']].copy()
            prevVol_df.rename(columns={'TotVolume': 'PrevVolume'}, inplace=True)

            # Create a new df by merging df and prevVol_df
            tmp_df = pd.merge(df, prevVol_df, on='Symbol', how='left')

            curr_volume = tmp_df['TotVolume'].astype(int) - tmp_df['PrevVolume'].astype(int)

            tmp_df.insert(2, 'CurrVol', curr_volume)

            return tmp_df


if __name__ == "__main__":

    new_york_tz = pytz.timezone('America/New_York')
    tickers = ["AAPL", "GOOG", "MSFT"]
#    tickers = ["AAPL"]

    while True:
        # Get current time and format as 09:30:00
        time_object = datetime.datetime.now(new_york_tz)
        curr_time = time_object.strftime('%H:%M:%S')

        # Get stock info for tickers
        df_curr = fetch_curr_stocks(tickers)

        # Get stock historical data for last 5 minutes today.
        df_hist = fetch_multiple_stocks(tickers)

        #########################
        # Merge df_curr and df_hist

        cols_to_copy = df_hist[['Lst1MinVol',  'Lst2MinVol',  'Lst5MinVol']]

        # Merge df_hist and df2 on col0 to ensure data integrity
        merged_df = pd.merge(df_curr, df_hist[['Symbol', 'Lst1MinVol',  'Lst2MinVol',  'Lst5MinVol']], on='Symbol', how='left')

        #########################
        # Clean up dataframe data
        new_order = ['Symbol', 'Price', 'CurrVol', 'Lst1MinVol', 'Lst2MinVol', 'Lst5MinVol', 'TotVolume', 'PrevVolume']
        final_df = merged_df[new_order]

        # Get rid of 'PrevVolume' column
        final_df = final_df.drop(final_df.columns[-1], axis=1)

        # Insert time stamp as a new column
        final_df.insert(1, 'Time', curr_time)

        # Write data to csv file
        final_df.to_csv('/tmp/yf_data.csv', mode='a', header=False, index=False)
#       Output:
#       AAPL,22:01:16,222.91,0,979377,1403850,2299514,63519990
#       GOOG,22:01:16,172.65,0,387421,727605,1237449,21385165
#       MSFT,22:01:16,410.37,0,432180,558932,861389,23745361

        # Format volumes data with thousdand separator for readability when printing to screen
        final_df['CurrVol'] = final_df['CurrVol'].apply(lambda x: f"{x:,}")
        final_df['Lst1MinVol'] = final_df['Lst1MinVol'].apply(lambda x: f"{x:,}")
        final_df['Lst2MinVol'] = final_df['Lst2MinVol'].apply(lambda x: f"{x:,}")
        final_df['Lst5MinVol'] = final_df['Lst5MinVol'].apply(lambda x: f"{x:,}")
        final_df['TotVolume'] = final_df['TotVolume'].apply(lambda x: f"{x:,}")

        print(final_df)
#       Output:
#         Symbol      Time   Price CurrVol Lst1MinVol Lst2MinVol Lst5MinVol   TotVolume
#       0   AAPL  22:06:38  222.91       0    979,377  1,403,850  2,299,514  63,519,990
#       1   GOOG  22:06:38  172.65       0    387,421    727,605  1,237,449  21,385,165
#       2   MSFT  22:06:38  410.37       0    432,180    558,932    861,389  23,745,361

        time.sleep(10)

Reasons:
  • RegEx Blacklisted phrase (1.5): I'm new
  • Long answer (-1):
  • Has code block (-0.5):
  • Self-answer (0.5):
  • Low reputation (1):
Posted by: Tex