#importing the required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import random

#importing the dataset

data = pd.read_csv(r"C:\Users\T\Desktop\UCT Integrated Data Energy Analytics\Energy Data Sets\Delhi 5 Minute Electricity Demand for Forecasting Dataset\Delhi_electrictity_demand_data.csv")

data

#checking the basic informatrion about the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 393440 entries, 0 to 393439
Data columns (total 15 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Unnamed: 0    393440 non-null  int64  
 1   datetime      393440 non-null  object 
 2   Power demand  393440 non-null  float64
 3   temp          393440 non-null  float64
 4   dwpt          393440 non-null  float64
 5   rhum          393440 non-null  float64
 6   wdir          392900 non-null  float64
 7   wspd          393440 non-null  float64
 8   pres          393440 non-null  float64
 9   year          393440 non-null  int64  
 10  month         393440 non-null  int64  
 11  day           393440 non-null  int64  
 12  hour          393440 non-null  int64  
 13  minute        393440 non-null  int64  
 14  moving_avg_3  393438 non-null  float64
dtypes: float64(8), int64(6), object(1)
memory usage: 45.0+ MB

#changing the timestamp to a date time format
data['datetime'] = pd.to_datetime(data['datetime'])

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 393440 entries, 0 to 393439
Data columns (total 15 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   Unnamed: 0    393440 non-null  int64         
 1   datetime      393440 non-null  datetime64[ns]
 2   Power demand  393440 non-null  float64       
 3   temp          393440 non-null  float64       
 4   dwpt          393440 non-null  float64       
 5   rhum          393440 non-null  float64       
 6   wdir          392900 non-null  float64       
 7   wspd          393440 non-null  float64       
 8   pres          393440 non-null  float64       
 9   year          393440 non-null  int64         
 10  month         393440 non-null  int64         
 11  day           393440 non-null  int64         
 12  hour          393440 non-null  int64         
 13  minute        393440 non-null  int64         
 14  moving_avg_3  393438 non-null  float64       
dtypes: datetime64[ns](1), float64(8), int64(6)
memory usage: 45.0 MB

#dropping the first column which contains only numbers and not useful data
data = data.drop(columns=['Unnamed: 0'])

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 393440 entries, 0 to 393439
Data columns (total 14 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   datetime      393440 non-null  datetime64[ns]
 1   Power demand  393440 non-null  float64       
 2   temp          393440 non-null  float64       
 3   dwpt          393440 non-null  float64       
 4   rhum          393440 non-null  float64       
 5   wdir          392900 non-null  float64       
 6   wspd          393440 non-null  float64       
 7   pres          393440 non-null  float64       
 8   year          393440 non-null  int64         
 9   month         393440 non-null  int64         
 10  day           393440 non-null  int64         
 11  hour          393440 non-null  int64         
 12  minute        393440 non-null  int64         
 13  moving_avg_3  393438 non-null  float64       
dtypes: datetime64[ns](1), float64(8), int64(5)
memory usage: 42.0 MB

#setting the datetime to the index column
data = data.set_index('datetime')

data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 393440 entries, 2021-01-01 00:30:00 to 2024-12-12 00:30:00
Data columns (total 13 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Power demand  393440 non-null  float64
 1   temp          393440 non-null  float64
 2   dwpt          393440 non-null  float64
 3   rhum          393440 non-null  float64
 4   wdir          392900 non-null  float64
 5   wspd          393440 non-null  float64
 6   pres          393440 non-null  float64
 7   year          393440 non-null  int64  
 8   month         393440 non-null  int64  
 9   day           393440 non-null  int64  
 10  hour          393440 non-null  int64  
 11  minute        393440 non-null  int64  
 12  moving_avg_3  393438 non-null  float64
dtypes: float64(8), int64(5)
memory usage: 42.0 MB

data

#summary statrictics of a few columns
data[['Power demand','temp','rhum']].describe()

#exploring how many missing values there are
data.isnull().sum()

Power demand      0
temp              0
dwpt              0
rhum              0
wdir            540
wspd              0
pres              0
year              0
month             0
day               0
hour              0
minute            0
moving_avg_3      2
dtype: int64

data = data.drop(columns=['wdir'])

#show the records that contain any null values
data[data.isna().any(axis=1)]

#drop all records that contain all missing values
data = data.dropna(how = 'all')
data[data.isna().all(axis=1)]

data.tail()

#making sure that the index is in the datetime formart
data.index = pd.to_datetime(data.index)

#adding the "dayofweek" and "dayofyear" columns
data['dayofweek'] = data.index.dayofweek + 1 #monday as 1 and Sunday as 7
data['dayofyear'] = data.index.dayofyear #numbering the days from day 1 to day 366

data.tail()

#Feature engineering, creating of year quaters
data['quarter'] = data.index.quarter
data.head()

data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 393440 entries, 2021-01-01 00:30:00 to 2024-12-12 00:30:00
Data columns (total 15 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Power demand  393440 non-null  float64
 1   temp          393440 non-null  float64
 2   dwpt          393440 non-null  float64
 3   rhum          393440 non-null  float64
 4   wspd          393440 non-null  float64
 5   pres          393440 non-null  float64
 6   year          393440 non-null  int64  
 7   month         393440 non-null  int64  
 8   day           393440 non-null  int64  
 9   hour          393440 non-null  int64  
 10  minute        393440 non-null  int64  
 11  moving_avg_3  393438 non-null  float64
 12  dayofweek     393440 non-null  int32  
 13  dayofyear     393440 non-null  int32  
 14  quarter       393440 non-null  int32  
dtypes: float64(7), int32(3), int64(5)
memory usage: 43.5 MB

#create another column for the "weekofyear"
data.insert(7,'weekofyear', data.index.isocalendar().week.astype(int))

data.head(1000)

#create anew column to check if the date is a weekend or not
data.insert(7, 'is_weekend', data.index.dayofweek.isin([6,7]))

data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 393440 entries, 2021-01-01 00:30:00 to 2024-12-12 00:30:00
Data columns (total 17 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Power demand  393440 non-null  float64
 1   temp          393440 non-null  float64
 2   dwpt          393440 non-null  float64
 3   rhum          393440 non-null  float64
 4   wspd          393440 non-null  float64
 5   pres          393440 non-null  float64
 6   year          393440 non-null  int64  
 7   is_weekend    393440 non-null  bool   
 8   weekofyear    393440 non-null  int64  
 9   month         393440 non-null  int64  
 10  day           393440 non-null  int64  
 11  hour          393440 non-null  int64  
 12  minute        393440 non-null  int64  
 13  moving_avg_3  393438 non-null  float64
 14  dayofweek     393440 non-null  int32  
 15  dayofyear     393440 non-null  int32  
 16  quarter       393440 non-null  int32  
dtypes: bool(1), float64(7), int32(3), int64(6)
memory usage: 46.9 MB

data['is_weekend'] = data['is_weekend'].astype(int)

data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 393440 entries, 2021-01-01 00:30:00 to 2024-12-12 00:30:00
Data columns (total 17 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   Power demand  393440 non-null  float64
 1   temp          393440 non-null  float64
 2   dwpt          393440 non-null  float64
 3   rhum          393440 non-null  float64
 4   wspd          393440 non-null  float64
 5   pres          393440 non-null  float64
 6   year          393440 non-null  int64  
 7   is_weekend    393440 non-null  int64  
 8   weekofyear    393440 non-null  int64  
 9   month         393440 non-null  int64  
 10  day           393440 non-null  int64  
 11  hour          393440 non-null  int64  
 12  minute        393440 non-null  int64  
 13  moving_avg_3  393438 non-null  float64
 14  dayofweek     393440 non-null  int32  
 15  dayofyear     393440 non-null  int32  
 16  quarter       393440 non-null  int32  
dtypes: float64(7), int32(3), int64(7)
memory usage: 49.5 MB

#install and importing the holidays package for checking if the particular day falls on a public holiday or not
!pip install holidays
import holidays

Collecting holidays
  Downloading holidays-0.92-py3-none-any.whl.metadata (50 kB)
Requirement already satisfied: python-dateutil<3,>=2.9.0.post0 in c:\users\t\anaconda3\lib\site-packages (from holidays) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in c:\users\t\anaconda3\lib\site-packages (from python-dateutil<3,>=2.9.0.post0->holidays) (1.17.0)
Downloading holidays-0.92-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 10.4 MB/s  0:00:00
Installing collected packages: holidays
Successfully installed holidays-0.92

holidays.IN(years= data.year)

{datetime.date(2024, 1, 26): 'Republic Day', datetime.date(2024, 8, 15): 'Independence Day', datetime.date(2024, 10, 2): 'Gandhi Jayanti', datetime.date(2024, 5, 23): 'Buddha Purnima', datetime.date(2024, 10, 31): 'Diwali', datetime.date(2024, 8, 26): 'Janmashtami', datetime.date(2024, 10, 12): 'Dussehra', datetime.date(2024, 4, 21): 'Mahavir Jayanti', datetime.date(2024, 3, 8): 'Maha Shivaratri', datetime.date(2024, 11, 15): 'Guru Nanak Jayanti', datetime.date(2024, 7, 17): 'Muharram', datetime.date(2024, 9, 16): 'Milad-un-Nabi', datetime.date(2024, 4, 11): 'Id-ul-Fitr', datetime.date(2024, 6, 17): 'Bakrid', datetime.date(2024, 3, 29): 'Good Friday', datetime.date(2024, 12, 25): 'Christmas', datetime.date(2021, 1, 26): 'Republic Day', datetime.date(2021, 8, 15): 'Independence Day', datetime.date(2021, 10, 2): 'Gandhi Jayanti', datetime.date(2021, 5, 26): 'Buddha Purnima', datetime.date(2021, 11, 4): 'Diwali', datetime.date(2021, 8, 30): 'Janmashtami', datetime.date(2021, 10, 15): 'Dussehra', datetime.date(2021, 4, 25): 'Mahavir Jayanti', datetime.date(2021, 3, 11): 'Maha Shivaratri', datetime.date(2021, 11, 19): 'Guru Nanak Jayanti', datetime.date(2021, 8, 20): 'Muharram', datetime.date(2021, 10, 19): 'Milad-un-Nabi', datetime.date(2021, 5, 14): 'Id-ul-Fitr', datetime.date(2021, 7, 21): 'Bakrid', datetime.date(2021, 4, 2): 'Good Friday', datetime.date(2021, 12, 25): 'Christmas', datetime.date(2022, 1, 26): 'Republic Day', datetime.date(2022, 8, 15): 'Independence Day', datetime.date(2022, 10, 2): 'Gandhi Jayanti', datetime.date(2022, 5, 16): 'Buddha Purnima', datetime.date(2022, 10, 24): 'Diwali', datetime.date(2022, 8, 19): 'Janmashtami', datetime.date(2022, 10, 5): 'Dussehra', datetime.date(2022, 4, 14): 'Mahavir Jayanti', datetime.date(2022, 3, 1): 'Maha Shivaratri', datetime.date(2022, 11, 8): 'Guru Nanak Jayanti', datetime.date(2022, 8, 9): 'Muharram', datetime.date(2022, 10, 9): 'Milad-un-Nabi', datetime.date(2022, 5, 3): 'Id-ul-Fitr', datetime.date(2022, 7, 10): 'Bakrid', datetime.date(2022, 4, 15): 'Good Friday', datetime.date(2022, 12, 25): 'Christmas', datetime.date(2023, 1, 26): 'Republic Day', datetime.date(2023, 8, 15): 'Independence Day', datetime.date(2023, 10, 2): 'Gandhi Jayanti', datetime.date(2023, 5, 5): 'Buddha Purnima', datetime.date(2023, 11, 12): 'Diwali', datetime.date(2023, 9, 7): 'Janmashtami', datetime.date(2023, 10, 24): 'Dussehra', datetime.date(2023, 4, 4): 'Mahavir Jayanti', datetime.date(2023, 2, 18): 'Maha Shivaratri', datetime.date(2023, 11, 27): 'Guru Nanak Jayanti', datetime.date(2023, 7, 29): 'Muharram', datetime.date(2023, 9, 28): 'Milad-un-Nabi', datetime.date(2023, 4, 22): 'Id-ul-Fitr', datetime.date(2023, 6, 29): 'Bakrid', datetime.date(2023, 4, 7): 'Good Friday', datetime.date(2023, 12, 25): 'Christmas'}

#saving the new data in its own column
data['Holidays'] = holidays.IN(years= data.year)
data.head(3)

# Dropping some columns we do not need
columns_to_drop = ['dwpt', 'wspd', 'pres', 'minute', 'day', 'moving_avg_3']

# Drop the columns
data.drop(columns=columns_to_drop, inplace=True)

# Check the result of the remaining columns
print(data.columns.tolist())

['Power demand', 'temp', 'rhum', 'year', 'is_weekend', 'weekofyear', 'month', 'hour', 'dayofweek', 'dayofyear', 'quarter', 'Holidays']

data.head()

#Creating some laggged features
#These are past demand values: strong predictors which can be used as input to predict future values
#enables the models to learn from the historical data and improve from historical accurancy

#creating a demand lag feature from the same hour yesterday, and from the same hour last week
data['demand_lagged_24hr'] = data ['Power demand'].shift(24)

#creating another feature fro the demand lagged one week
#24hrs * 7 = 168 hours for a weekly shift
data['demand_lagged_168hr'] = data ['Power demand'].shift(186)

#creating two columns, one for the moving average and another for the standard deviation

data['demand_rolling_mean_24hr'] = data['Power demand'].rolling(window=24).mean()

data['demand_rolling_std_24hr'] = data['Power demand'].rolling(window=24).std()

#dropping the holiday column since we will ont use it
data = data.drop('Holidays',axis=1)

#dropping all the rows having nan values
data = data.dropna()
data

#plot raw data over time (demand over time)
data['Power demand'].plot(figsize=(15,6), title = 'Electricity Demand Over Time')
plt.xlabel('Year')
plt.ylabel('Demand (in MW)')
plt.show()

#visualize the demand by hour of the day
plt.figure(figsize = (15,6))
sns.boxplot(data = data, x = 'hour', y = 'Power demand')
plt.title("Demand by the Hour")
plt.show()

#visualize the demand by month
plt.figure (figsize=(15,6))
sns.boxplot(data = data, x = 'month', y ='Power demand')
plt.title("Demand by Month")
plt.show()

plt.figure(figsize=(10,10))
sns.regplot(data=data, x='temp', y='Power demand', 
            scatter_kws={'alpha':0.5},
            line_kws={'color': 'black', 'linewidth': 2})
plt.title("Demand vs Temperature with Trend Line")
plt.show()

#showinng the correlation matrix between the different features of the dataframe
plt.figure(figsize=(12,8))
sns.heatmap(data.corr(), annot=True, fmt=".2f",cmap='YlGnBu')
plt.title('Correlation Matrix of Features')
plt.show()

#Building the prediction model, at the end of the day, we need to predict the power demand

#renaming the demand column 
data.rename(columns={'Power demand': 'demand'}, inplace=True)

data.head()

#defining the target variable (Y) dependent variable
Y = data.demand

#defining the features (X) predictors/ features/ variables
X = data.drop('demand', axis = 1)

print(Y)

datetime
2021-01-01 16:00:00    3449.66
2021-01-01 16:05:00    3415.85
2021-01-01 16:10:00    3420.58
2021-01-01 16:15:00    3406.31
2021-01-01 16:20:00    3417.71
                        ...   
2024-12-12 00:10:00    2146.84
2024-12-12 00:15:00    2116.66
2024-12-12 00:20:00    2082.77
2024-12-12 00:25:00    2059.17
2024-12-12 00:30:00    2049.66
Name: demand, Length: 393254, dtype: float64

X

#splitting the data into training and testing data by 20% to 80%
X_train = X.loc[: '2024-02-29'] #taking 80% of the data as testing data

X_train

Y_train = Y.loc[:'2024-02-29'] #taking 80% of the data as the training data

X_test = X.loc['2024-03-01':] #20% of testing dataset

Y_test = Y.loc['2024-03-01':]

print (X_train.shape)
print(Y_train.shape)

(314928, 14)
(314928,)

print (X_test.shape)
print(Y_test.shape)

(78326, 14)
(78326,)

#XGBoost Model
#XGboost used for time series forecasting, as it can handle non-linear data easily.
#it can handle seasonality and trends in time series data effectively.

!pip install xgboost
from xgboost import XGBRegressor

Collecting xgboost
  Downloading xgboost-3.2.0-py3-none-win_amd64.whl.metadata (2.1 kB)
Requirement already satisfied: numpy in c:\users\t\anaconda3\lib\site-packages (from xgboost) (2.3.5)
Requirement already satisfied: scipy in c:\users\t\anaconda3\lib\site-packages (from xgboost) (1.16.3)
Downloading xgboost-3.2.0-py3-none-win_amd64.whl (101.7 MB)
   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.3/101.7 MB ? eta -:--:--
   - -------------------------------------- 3.4/101.7 MB 13.6 MB/s eta 0:00:08
   -- ------------------------------------- 5.8/101.7 MB 12.8 MB/s eta 0:00:08
   --- ------------------------------------ 7.9/101.7 MB 11.6 MB/s eta 0:00:09
   ---- ----------------------------------- 10.5/101.7 MB 11.7 MB/s eta 0:00:08
   ----- ---------------------------------- 13.1/101.7 MB 11.8 MB/s eta 0:00:08
   ----- ---------------------------------- 14.9/101.7 MB 11.6 MB/s eta 0:00:08
   ----- ---------------------------------- 15.2/101.7 MB 9.8 MB/s eta 0:00:09
   ------- -------------------------------- 19.1/101.7 MB 10.8 MB/s eta 0:00:08
   -------- ------------------------------- 22.0/101.7 MB 11.1 MB/s eta 0:00:08
   --------- ------------------------------ 24.4/101.7 MB 11.1 MB/s eta 0:00:07
   ---------- ----------------------------- 26.2/101.7 MB 10.8 MB/s eta 0:00:07
   ----------- ---------------------------- 28.8/101.7 MB 11.0 MB/s eta 0:00:07
   ------------ --------------------------- 31.5/101.7 MB 11.1 MB/s eta 0:00:07
   ------------- -------------------------- 34.3/101.7 MB 11.3 MB/s eta 0:00:06
   -------------- ------------------------- 37.2/101.7 MB 11.4 MB/s eta 0:00:06
   --------------- ------------------------ 39.3/101.7 MB 11.3 MB/s eta 0:00:06
   ---------------- ----------------------- 41.9/101.7 MB 11.4 MB/s eta 0:00:06
   ----------------- ---------------------- 44.3/101.7 MB 11.3 MB/s eta 0:00:06
   ------------------ --------------------- 46.7/101.7 MB 11.3 MB/s eta 0:00:05
   ------------------- -------------------- 49.0/101.7 MB 11.3 MB/s eta 0:00:05
   -------------------- ------------------- 51.4/101.7 MB 11.3 MB/s eta 0:00:05
   --------------------- ------------------ 54.0/101.7 MB 11.3 MB/s eta 0:00:05
   ---------------------- ----------------- 56.4/101.7 MB 11.3 MB/s eta 0:00:05
   ----------------------- ---------------- 59.2/101.7 MB 11.4 MB/s eta 0:00:04
   ------------------------ --------------- 61.9/101.7 MB 11.4 MB/s eta 0:00:04
   ------------------------- -------------- 64.7/101.7 MB 11.5 MB/s eta 0:00:04
   -------------------------- ------------- 67.1/101.7 MB 11.5 MB/s eta 0:00:04
   -------------------------- ------------- 67.4/101.7 MB 11.5 MB/s eta 0:00:03
   ---------------------------- ----------- 71.6/101.7 MB 11.4 MB/s eta 0:00:03
   ---------------------------- ----------- 73.7/101.7 MB 11.4 MB/s eta 0:00:03
   ------------------------------ --------- 76.5/101.7 MB 11.4 MB/s eta 0:00:03
   ------------------------------- -------- 78.9/101.7 MB 11.4 MB/s eta 0:00:02
   ------------------------------- -------- 80.7/101.7 MB 11.3 MB/s eta 0:00:02
   -------------------------------- ------- 82.6/101.7 MB 11.2 MB/s eta 0:00:02
   -------------------------------- ------- 83.9/101.7 MB 11.1 MB/s eta 0:00:02
   --------------------------------- ------ 85.7/101.7 MB 11.0 MB/s eta 0:00:02
   ---------------------------------- ----- 88.3/101.7 MB 11.0 MB/s eta 0:00:02
   ----------------------------------- ---- 91.2/101.7 MB 11.1 MB/s eta 0:00:01
   ------------------------------------ --- 93.8/101.7 MB 11.1 MB/s eta 0:00:01
   ------------------------------------- -- 95.7/101.7 MB 11.1 MB/s eta 0:00:01
   -------------------------------------- - 98.3/101.7 MB 11.1 MB/s eta 0:00:01
   --------------------------------------  100.9/101.7 MB 11.1 MB/s eta 0:00:01
   --------------------------------------  101.4/101.7 MB 11.1 MB/s eta 0:00:01
   ---------------------------------------- 101.7/101.7 MB 10.8 MB/s  0:00:09
Installing collected packages: xgboost
Successfully installed xgboost-3.2.0

#from sklearn library importing two fundamental metrics to calculate the performance of the model
from sklearn.metrics import mean_squared_error, mean_absolute_error
#from model_selection module of scikit-learn's library, import timeseries split class
#which perfomrs appropriate cross validation when dealing with time series data
from sklearn.model_selection import TimeSeriesSplit

#Initializing and training the model
model_xgb = XGBRegressor(n_estimators=1000,
                        early_stopping_rounds=50,
                        learning_rate=0.01,
                        random_state=42,
                        objective='reg:squarederror')

model_xgb.fit(X_train, Y_train, eval_set = [(X_train, Y_train), (X_test,Y_test)], verbose=False)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=50,
             enable_categorical=False, eval_metric=None, feature_types=None,
             feature_weights=None, gamma=None, grow_policy=None,
             importance_type=None, interaction_constraints=None,
             learning_rate=0.01, max_bin=None, max_cat_threshold=None,
             max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
             max_leaves=None, min_child_weight=None, missing=nan,
             monotone_constraints=None, multi_strategy=None, n_estimators=1000,
             n_jobs=None, num_parallel_tree=None, ...)

#making predictions
predictions_xgb = model_xgb.predict(X_test)

#evaluating the model
rmse_xgb = np.sqrt(mean_squared_error(Y_test,predictions_xgb))
mae_xgb = mean_absolute_error(Y_test,predictions_xgb)

print('XGBoost RMSE:', rmse_xgb)
print('XGBoost MAE:', mae_xgb)

XGBoost RMSE: 191.6282277687719
XGBoost MAE: 88.88229013144982

#visualize the predictions
plt.figure(figsize = (15,6))
plt.plot(Y_test.index,Y_test, label= 'Actual demand')
plt.plot(Y_test.index, predictions_xgb, label='Predicted Demand', color = 'Red')
plt.title('XGBoost Electricity demand Prediction')
plt.xlabel('Date')
plt.ylabel('Power Demand')
plt.legend()
plt.show()

#saving the model
import joblib
joblib.dump(model_xgb, 'electricity_xbg_prediction_model.pkl')

['electricity_xbg_prediction_model.pkl']

	Power demand	temp	rhum
count	393440.000000	393440.000000	393440.000000
mean	3960.736469	25.527913	63.435767
std	1300.473773	7.981563	24.850663
min	1302.080000	4.000000	5.000000
25%	3074.900000	20.000000	44.000000
50%	3832.320000	27.000000	67.000000
75%	4870.465000	31.000000	84.000000
max	8631.530000	46.400000	100.000000

	Power demand	temp	dwpt	rhum	wspd	pres	year	weekofyear	month	day	hour	minute	moving_avg_3	dayofweek	dayofyear	quarter
datetime
2021-01-01 00:30:00	2014.00	8.0	6.9	93.0	0.0	1017.0	2021	53	1	1	0	30	NaN	5	1	1
2021-01-01 00:35:00	2005.63	8.0	6.9	93.0	0.0	1017.0	2021	53	1	1	0	35	NaN	5	1	1
2021-01-01 00:40:00	1977.60	8.0	6.9	93.0	0.0	1017.0	2021	53	1	1	0	40	1999.076667	5	1	1
2021-01-01 00:45:00	1976.44	8.0	6.9	93.0	0.0	1017.0	2021	53	1	1	0	45	1986.556667	5	1	1
2021-01-01 00:50:00	1954.37	8.0	6.9	93.0	0.0	1017.0	2021	53	1	1	0	50	1969.470000	5	1	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2021-01-04 11:25:00	3505.59	16.0	14.0	88.0	9.4	1018.0	2021	1	1	4	11	25	3489.023333	1	4	1
2021-01-04 11:30:00	3471.75	17.8	16.5	92.0	7.6	1017.3	2021	1	1	4	11	30	3489.096667	1	4	1
2021-01-04 11:35:00	3487.28	17.8	16.5	92.0	7.6	1017.3	2021	1	1	4	11	35	3488.206667	1	4	1
2021-01-04 11:40:00	3501.53	17.8	16.5	92.0	7.6	1017.3	2021	1	1	4	11	40	3486.853333	1	4	1
2021-01-04 11:45:00	3481.15	17.8	16.5	92.0	7.6	1017.3	2021	1	1	4	11	45	3489.986667	1	4	1

	Power demand	temp	rhum	year	is_weekend	weekofyear	month	hour	dayofweek	dayofyear	quarter	Holidays
datetime
2021-01-01 00:30:00	2014.00	8.0	93.0	2021	0	53	1	0	5	1	1	NaN
2021-01-01 00:35:00	2005.63	8.0	93.0	2021	0	53	1	0	5	1	1	NaN
2021-01-01 00:40:00	1977.60	8.0	93.0	2021	0	53	1	0	5	1	1	NaN
2021-01-01 00:45:00	1976.44	8.0	93.0	2021	0	53	1	0	5	1	1	NaN
2021-01-01 00:50:00	1954.37	8.0	93.0	2021	0	53	1	0	5	1	1	NaN

	temp	rhum	year	is_weekend	weekofyear	month	hour	dayofweek	dayofyear	quarter	demand_lagged_24hr	demand_lagged_168hr	demand_rolling_mean_24hr	demand_rolling_std_24hr
datetime
2021-01-01 16:00:00	18.0	42.0	2021	0	53	1	16	5	1	1	3938.71	2014.00	3648.217083	159.107017
2021-01-01 16:05:00	18.0	42.0	2021	0	53	1	16	5	1	1	3918.13	2005.63	3627.288750	155.042152
2021-01-01 16:10:00	18.0	42.0	2021	0	53	1	16	5	1	1	3905.85	1977.60	3607.069167	148.645444
2021-01-01 16:15:00	18.0	42.0	2021	0	53	1	16	5	1	1	3867.80	1976.44	3587.840417	143.200310
2021-01-01 16:20:00	18.0	42.0	2021	0	53	1	16	5	1	1	3855.52	1954.37	3569.598333	135.285687
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2024-02-29 23:25:00	18.0	64.0	2024	0	9	2	23	4	60	1	2926.06	3766.91	2627.903750	168.803709
2024-02-29 23:30:00	18.0	75.0	2024	0	9	2	23	4	60	1	2894.40	3832.04	2605.032917	168.310756
2024-02-29 23:35:00	18.0	75.0	2024	0	9	2	23	4	60	1	2841.57	3787.23	2582.132500	172.076109
2024-02-29 23:40:00	18.0	75.0	2024	0	9	2	23	4	60	1	2865.73	3792.01	2557.773333	171.563203
2024-02-29 23:45:00	18.0	75.0	2024	0	9	2	23	4	60	1	2821.97	3833.37	2534.717083	171.696401

	Power demand	temp	dwpt	rhum	wspd	pres	year	month	day	hour	minute	moving_avg_3
datetime
2024-12-12 00:10:00	2146.84	12.3	6.8	69.0	1.8	1019.4	2024	12	12	0	10	2174.893333
2024-12-12 00:15:00	2116.66	12.3	6.8	69.0	1.8	1019.4	2024	12	12	0	15	2139.416667
2024-12-12 00:20:00	2082.77	12.3	6.8	69.0	1.8	1019.4	2024	12	12	0	20	2115.423333
2024-12-12 00:25:00	2059.17	12.3	6.8	69.0	1.8	1019.4	2024	12	12	0	25	2086.200000
2024-12-12 00:30:00	2049.66	12.3	6.8	69.0	1.8	1019.4	2024	12	12	0	30	2063.866667

	Power demand	temp	rhum	year	is_weekend	weekofyear	month	hour	dayofweek	dayofyear	quarter	demand_lagged_24hr	demand_lagged_168hr	demand_rolling_mean_24hr	demand_rolling_std_24hr
datetime
2021-01-01 16:00:00	3449.66	18.0	42.0	2021	0	53	1	16	5	1	1	3938.71	2014.00	3648.217083	159.107017
2021-01-01 16:05:00	3415.85	18.0	42.0	2021	0	53	1	16	5	1	1	3918.13	2005.63	3627.288750	155.042152
2021-01-01 16:10:00	3420.58	18.0	42.0	2021	0	53	1	16	5	1	1	3905.85	1977.60	3607.069167	148.645444
2021-01-01 16:15:00	3406.31	18.0	42.0	2021	0	53	1	16	5	1	1	3867.80	1976.44	3587.840417	143.200310
2021-01-01 16:20:00	3417.71	18.0	42.0	2021	0	53	1	16	5	1	1	3855.52	1954.37	3569.598333	135.285687
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2024-12-12 00:10:00	2146.84	12.3	69.0	2024	0	50	12	0	4	347	4	3446.53	3588.77	3120.540417	373.904837
2024-12-12 00:15:00	2116.66	12.3	69.0	2024	0	50	12	0	4	347	4	3402.37	3568.09	3066.969167	420.919382
2024-12-12 00:20:00	2082.77	12.3	69.0	2024	0	50	12	0	4	347	4	3348.17	3457.49	3014.244167	461.464483
2024-12-12 00:25:00	2059.17	12.3	69.0	2024	0	50	12	0	4	347	4	3311.41	3469.89	2962.067500	495.911949
2024-12-12 00:30:00	2049.66	12.3	69.0	2024	0	50	12	0	4	347	4	3297.00	3413.51	2910.095000	523.858573

	objective	'reg:squarederror'
	base_score	None
	booster	None
	callbacks	None
	colsample_bylevel	None
	colsample_bynode	None
	colsample_bytree	None
	device	None
	early_stopping_rounds	50
	enable_categorical	False
	eval_metric	None
	feature_types	None
	feature_weights	None
	gamma	None
	grow_policy	None
	importance_type	None
	interaction_constraints	None
	learning_rate	0.01
	max_bin	None
	max_cat_threshold	None
	max_cat_to_onehot	None
	max_delta_step	None
	max_depth	None
	max_leaves	None
	min_child_weight	None
	missing	nan
	monotone_constraints	None
	multi_strategy	None
	n_estimators	1000
	n_jobs	None
	num_parallel_tree	None
	random_state	42
	reg_alpha	None
	reg_lambda	None
	sampling_method	None
	scale_pos_weight	None
	subsample	None
	tree_method	None
	validate_parameters	None
	verbosity	None