Learn practical skills, build real-world projects, and advance your career
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_error
import joblib as jb
from fbprophet import Prophet
from sklearn.linear_model import Ridge
def create_features(df, label=None):
    """
    Creates time series features from datetime index
    """
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.weekofyear
    
    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    if label:
        y = df[label]
        return X, y
    return X
# train test split
init_data = jb.load('1000535.pkl')
data = init_data.drop(columns=['is_estimated', 'device_id', 'tag', 'date', 'weekday'])
data.rename(columns={'measured_at': 'ds', 'd_act_total': 'y'}, inplace=True)
X_train = data[int(0.7*len(data)):int(0.9*len(data))]
X_val = data[int(0.9*len(data)):int(0.95*len(data))]
X_test = data[int(0.95*len(data)):]
X_train['ds'] = X_train.ds.dt.tz_convert(None)
X_val['ds'] = X_val.ds.dt.tz_convert(None)
X_test['ds'] = X_test.ds.dt.tz_convert(None)
<ipython-input-3-9cd674806561>:8: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy X_train['ds'] = X_train.ds.dt.tz_convert(None) <ipython-input-3-9cd674806561>:9: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy X_val['ds'] = X_val.ds.dt.tz_convert(None) <ipython-input-3-9cd674806561>:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy X_test['ds'] = X_test.ds.dt.tz_convert(None)
Xtrain, ytrain = create_features(X_train.set_index('ds'), label='y')
Xval, yval = create_features(X_val.set_index('ds'), label='y')
Xtest, ytest = create_features(X_test.set_index('ds'), label='y')
<ipython-input-2-568ff9107aea>:13: FutureWarning: Series.dt.weekofyear and Series.dt.week have been deprecated. Please use Series.dt.isocalendar().week instead. df['weekofyear'] = df['date'].dt.weekofyear

LightGBM