import pandas as pd
import numpy as np
import seaborn as sns
import math
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline
from helpers import *
import tqdm as tq
pl.seed_everything(123)
import yfinance as yf
tq.tqdm.pandas()
INFO:lightning_fabric.utilities.seed:Global seed set to 123


SECTIONS: Top | Compile Stocks | The Data | Add Change Column | Feature Engineering | Splitting & Scaling | Creating Sequences | PyTorch Datasets | LSTM | Trainer | Results |


compile_stocks()

  • Yahoo Finance only allows 7 days worth of data at 1 minute intervals
  • This function will compile dataframes in 7 day increments for an entire range of dates
  • They also only allow 1 min data for up to 30 days prior
  • This can be used for other intervals that allow for longer time periods
def compile_stocks(symbol, end, start, day_window, interval):
	import datetime
	import yfinance as yf
	
	end_date = end
	start_date = (pd.to_datetime(end) - datetime.timedelta(days = day_window))
	
	dfs = []
	stop_me = False
	
	while pd.to_datetime(start_date) >= pd.to_datetime(start):
		df = yf.download(symbol, 
						 start = start_date,
						 end = end_date, 
						 interval = interval)

		dfs.append(df)
		end_date = start_date
		start_date = start_date - datetime.timedelta(days = day_window)

		if start_date < pd.to_datetime(start):
			start_date = pd.to_datetime(start)
		else:
			start_date = start_date

		if start_date == end_date:
			break
		
	master_df = pd.concat(dfs).sort_values(by="Datetime")
		
	return master_df