Updated 2 years ago
SECTIONS: Top | Compile Stocks | The Data | Add Change Column | Feature Engineering | Splitting & Scaling | Creating Sequences | PyTorch Datasets | LSTM | Trainer | Results |
import pandas as pd
import numpy as np
import seaborn as sns
import math
import torch
import torch.nn as nn
import torch.optim as optim
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline
from helpers import *
import tqdm as tq
pl.seed_everything(123)
import yfinance as yf
tq.tqdm.pandas()
INFO:lightning_fabric.utilities.seed:Global seed set to 123
SECTIONS: Top | Compile Stocks | The Data | Add Change Column | Feature Engineering | Splitting & Scaling | Creating Sequences | PyTorch Datasets | LSTM | Trainer | Results |
compile_stocks()
- Yahoo Finance only allows 7 days worth of data at 1 minute intervals
- This function will compile dataframes in 7 day increments for an entire range of dates
- They also only allow 1 min data for up to 30 days prior
- This can be used for other intervals that allow for longer time periods
def compile_stocks(symbol, end, start, day_window, interval):
import datetime
import yfinance as yf
end_date = end
start_date = (pd.to_datetime(end) - datetime.timedelta(days = day_window))
dfs = []
stop_me = False
while pd.to_datetime(start_date) >= pd.to_datetime(start):
df = yf.download(symbol,
start = start_date,
end = end_date,
interval = interval)
dfs.append(df)
end_date = start_date
start_date = start_date - datetime.timedelta(days = day_window)
if start_date < pd.to_datetime(start):
start_date = pd.to_datetime(start)
else:
start_date = start_date
if start_date == end_date:
break
master_df = pd.concat(dfs).sort_values(by="Datetime")
return master_df