Well, for what it's worth, I have put something together that achieves what I hope will work tomorrow when I have live trade data.
I'm new to Python and it would be interesting to get some feedback on the below code. I'm sure there's a ton of optimizations that can be had. I find pandas and dataframes to be subjects I don't have a handle on.
import yfinance as yf
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
import time
import datetime
import pytz
def fetch_stock_hist_data(ticker):
try:
stock = yf.Ticker(ticker)
hist_data = stock.history(period="1d", interval='1m')
# add symbol column
hist_data.insert(0, 'Symbol', ticker)
# only use the last 5 results
hist_data = hist_data.tail()
hist_data.reset_index(drop=True, inplace=True)
one_min_vol = 0
two_min_vol = 0
five_min_vol = 0
# Iterate using range
for i in range(len(hist_data)):
five_min_vol += hist_data.iloc[i].to_dict().get('Volume')
if i > 2:
two_min_vol += hist_data.iloc[i].to_dict().get('Volume')
if i > 3:
one_min_vol += hist_data.iloc[i].to_dict().get('Volume')
hist_last_row = hist_data.iloc[[-1]]
new_df = pd.DataFrame(hist_last_row)
drop_columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']
new_df = new_df.drop(columns=drop_columns)
# Add columns for 1, 2 and 5 minute volumes
new_df.insert(1, 'Lst1MinVol', one_min_vol)
new_df.insert(2, 'Lst2MinVol', two_min_vol)
new_df.insert(3, 'Lst5MinVol', five_min_vol)
return new_df
except Exception as e:
print(f"Error fetching data for {ticker}: {e}")
return None
def fetch_curr_stock_data(ticker):
info = yf.Tickers(ticker).tickers[ticker].info
data = [ticker, f"{info['currentPrice']}", f"{info['volume']}"]
return data
def fetch_multiple_stocks(tickers):
with ThreadPoolExecutor() as executor:
futures = [executor.submit(fetch_stock_hist_data, ticker) for ticker in tickers]
results = []
for future in as_completed(futures):
result = future.result()
if result is not None:
results.append(result)
return pd.concat(results)
def fetch_curr_stocks(tickers):
table_title = ['Symbol', 'Price', 'TotVolume']
prevVol_df = pd.DataFrame(columns = ['Symbol', 'PrevVolume'])
with ThreadPoolExecutor() as executor:
while True:
df = pd.DataFrame(columns = table_title)
results = list(executor.map(fetch_curr_stock_data, tickers))
# Adds items from results
for result in results:
df.loc[len(df)] = result
# Convert TotVolume from string to number
df['TotVolume'] = pd.to_numeric(df['TotVolume'], errors='coerce')
# Copy volume data for each symbol to a new df.
prevVol_df = df[['Symbol', 'TotVolume']].copy()
prevVol_df.rename(columns={'TotVolume': 'PrevVolume'}, inplace=True)
# Create a new df by merging df and prevVol_df
tmp_df = pd.merge(df, prevVol_df, on='Symbol', how='left')
curr_volume = tmp_df['TotVolume'].astype(int) - tmp_df['PrevVolume'].astype(int)
tmp_df.insert(2, 'CurrVol', curr_volume)
return tmp_df
if __name__ == "__main__":
new_york_tz = pytz.timezone('America/New_York')
tickers = ["AAPL", "GOOG", "MSFT"]
# tickers = ["AAPL"]
while True:
# Get current time and format as 09:30:00
time_object = datetime.datetime.now(new_york_tz)
curr_time = time_object.strftime('%H:%M:%S')
# Get stock info for tickers
df_curr = fetch_curr_stocks(tickers)
# Get stock historical data for last 5 minutes today.
df_hist = fetch_multiple_stocks(tickers)
#########################
# Merge df_curr and df_hist
cols_to_copy = df_hist[['Lst1MinVol', 'Lst2MinVol', 'Lst5MinVol']]
# Merge df_hist and df2 on col0 to ensure data integrity
merged_df = pd.merge(df_curr, df_hist[['Symbol', 'Lst1MinVol', 'Lst2MinVol', 'Lst5MinVol']], on='Symbol', how='left')
#########################
# Clean up dataframe data
new_order = ['Symbol', 'Price', 'CurrVol', 'Lst1MinVol', 'Lst2MinVol', 'Lst5MinVol', 'TotVolume', 'PrevVolume']
final_df = merged_df[new_order]
# Get rid of 'PrevVolume' column
final_df = final_df.drop(final_df.columns[-1], axis=1)
# Insert time stamp as a new column
final_df.insert(1, 'Time', curr_time)
# Write data to csv file
final_df.to_csv('/tmp/yf_data.csv', mode='a', header=False, index=False)
# Output:
# AAPL,22:01:16,222.91,0,979377,1403850,2299514,63519990
# GOOG,22:01:16,172.65,0,387421,727605,1237449,21385165
# MSFT,22:01:16,410.37,0,432180,558932,861389,23745361
# Format volumes data with thousdand separator for readability when printing to screen
final_df['CurrVol'] = final_df['CurrVol'].apply(lambda x: f"{x:,}")
final_df['Lst1MinVol'] = final_df['Lst1MinVol'].apply(lambda x: f"{x:,}")
final_df['Lst2MinVol'] = final_df['Lst2MinVol'].apply(lambda x: f"{x:,}")
final_df['Lst5MinVol'] = final_df['Lst5MinVol'].apply(lambda x: f"{x:,}")
final_df['TotVolume'] = final_df['TotVolume'].apply(lambda x: f"{x:,}")
print(final_df)
# Output:
# Symbol Time Price CurrVol Lst1MinVol Lst2MinVol Lst5MinVol TotVolume
# 0 AAPL 22:06:38 222.91 0 979,377 1,403,850 2,299,514 63,519,990
# 1 GOOG 22:06:38 172.65 0 387,421 727,605 1,237,449 21,385,165
# 2 MSFT 22:06:38 410.37 0 432,180 558,932 861,389 23,745,361
time.sleep(10)