Updated 4 years ago
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_error
import joblib as jb
from fbprophet import Prophet
from sklearn.linear_model import Ridge
def create_features(df, label=None):
"""
Creates time series features from datetime index
"""
df['date'] = df.index
df['hour'] = df['date'].dt.hour
df['dayofweek'] = df['date'].dt.dayofweek
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['dayofyear'] = df['date'].dt.dayofyear
df['dayofmonth'] = df['date'].dt.day
df['weekofyear'] = df['date'].dt.weekofyear
X = df[['hour','dayofweek','quarter','month','year',
'dayofyear','dayofmonth','weekofyear']]
if label:
y = df[label]
return X, y
return X
# train test split
init_data = jb.load('1000535.pkl')
data = init_data.drop(columns=['is_estimated', 'device_id', 'tag', 'date', 'weekday'])
data.rename(columns={'measured_at': 'ds', 'd_act_total': 'y'}, inplace=True)
X_train = data[int(0.7*len(data)):int(0.9*len(data))]
X_val = data[int(0.9*len(data)):int(0.95*len(data))]
X_test = data[int(0.95*len(data)):]
X_train['ds'] = X_train.ds.dt.tz_convert(None)
X_val['ds'] = X_val.ds.dt.tz_convert(None)
X_test['ds'] = X_test.ds.dt.tz_convert(None)
<ipython-input-3-9cd674806561>:8: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
X_train['ds'] = X_train.ds.dt.tz_convert(None)
<ipython-input-3-9cd674806561>:9: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
X_val['ds'] = X_val.ds.dt.tz_convert(None)
<ipython-input-3-9cd674806561>:10: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
X_test['ds'] = X_test.ds.dt.tz_convert(None)
Xtrain, ytrain = create_features(X_train.set_index('ds'), label='y')
Xval, yval = create_features(X_val.set_index('ds'), label='y')
Xtest, ytest = create_features(X_test.set_index('ds'), label='y')
<ipython-input-2-568ff9107aea>:13: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead.
df['weekofyear'] = df['date'].dt.weekofyear