Learn practical skills, build real-world projects, and advance your career
import pandas as pd
import numpy as np
import seaborn as sns
import math
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline
from helpers import *
import tqdm as tq
pl.seed_everything(123)
import yfinance as yf
tq.tqdm.pandas()
INFO:lightning_fabric.utilities.seed:Global seed set to 123


SECTIONS: Top | Compile Stocks | The Data | Add Change Column | Feature Engineering | Splitting & Scaling | Creating Sequences | PyTorch Datasets | LSTM | Trainer | Results |


compile_stocks()

  • Yahoo Finance only allows 7 days worth of data at 1 minute intervals
  • This function will compile dataframes in 7 day increments for an entire range of dates
  • They also only allow 1 min data for up to 30 days prior
  • This can be used for other intervals that allow for longer time periods
def compile_stocks(symbol, end, start, day_window, interval):
	import datetime
	import yfinance as yf
	
	end_date = end
	start_date = (pd.to_datetime(end) - datetime.timedelta(days = day_window))
	
	dfs = []
	stop_me = False
	
	while pd.to_datetime(start_date) >= pd.to_datetime(start):
		df = yf.download(symbol, 
						 start = start_date,
						 end = end_date, 
						 interval = interval)

		dfs.append(df)
		end_date = start_date
		start_date = start_date - datetime.timedelta(days = day_window)

		if start_date < pd.to_datetime(start):
			start_date = pd.to_datetime(start)
		else:
			start_date = start_date

		if start_date == end_date:
			break
		
	master_df = pd.concat(dfs).sort_values(by="Datetime")
		
	return master_df