Forecasting Monthly Electric Production using ARIMA and Prophet (Part 1)

#Import Packages
import pandas as pd
from matplotlib import pyplot as plt #time series plotting
# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")
#ARIMA and SARIMA function
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.stattools import adfuller #stationary test
from statsmodels.tsa.seasonal import seasonal_decompose
from pmdarima import auto_arima
from statsmodels.tsa.statespace.sarimax import SARIMAX
# Calculating Metrics
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
#import data
df = pd.read_csv("Electric_Production.csv", parse_dates = ['DATE'], index_col = ['DATE'])
df.head()
Dataset of Electric Production 1985–2018
# Plot bulanan 1985-2018
plt.figure(figsize=(11,10))
plt.subplot(2, 1, 1)
plt.plot(df)
plt.axhline(y = df['IPG2211A2N'].mean(), color = 'r', linestyle = '-')
plt.title('Image 1a. Monthly Electric Production (Jan 1985 - Jan 2018)')
plt.xlabel('Date')
plt.ylabel('Elec_Prod')
plt.grid()
plt.legend(['monthly observation','mean'], loc='upper left')
# Plot bulanan 2010-2017
start, end = '2010-01', '2017-12'
plt.subplot(2, 1, 2)
plt.plot(df.loc[start:end, 'IPG2211A2N'])
plt.title('Image 1b. Monthly Electric Production (Jan 2010 - Dec 2017)')
plt.xlabel('Date')
plt.ylabel('Elec_Prod')
plt.grid()
plt.show()
Figure 1a and 1b. Monthly Electric Production 1985–2018 and 2010–2017
def adf_test(dataset):
result = adfuller(dataset)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
print('\t%s: %.4f' % (key, value))
adf_test(df['IPG2211A2N'])
ADF Test for Electric Production Data
df2 = df
df2['diff_1'] = df2['IPG2211A2N'].diff()
df2.iloc[0,1] = 0
adf_test(df2['diff_1'])
ADF Test for Differentiated Data
# Plot bulanan 1985-2018
plt.figure(figsize=(11,10))
plt.subplot(2, 1, 1)
plt.plot(df['IPG2211A2N'])
plt.title('Image 4a. Monthly Electric Production (Jan 1985 - Jan 2018)')
plt.xlabel('Date')
plt.ylabel('Elec_Prod')
plt.grid()
# Plot Hasil Diferensiasi
plt.subplot(2, 1, 2)
plt.plot(df.IPG2211A2N.diff())
plt.title('Image 4b. Differentiation of Monthly Electricity Production Data')
plt.xlabel('Date')
plt.ylabel('Diff_Elec_Prod')
plt.grid()
plt.show()
#Plot ACF dan PACF data asli
plt.figure(figsize=(10,8))
plt.subplot(211)
plot_acf(df['IPG2211A2N'], ax=plt.gca())
plt.title('Image 3a. ACF')
plt.subplot(212)
plot_pacf(df['IPG2211A2N'], ax=plt.gca())
plt.title('Image 3b. PACF')
plt.show()
#Plot ACF dan PACF data hasil diferensiasi
plt.figure(figsize=(10,8))
plt.subplot(211)
plot_acf(df2['diff_1'], ax=plt.gca())
plt.title('Image 4a. ACF Hasil Diferensiasi 1')
plt.subplot(212)
plot_pacf(df2['diff_1'], ax=plt.gca())
plt.title('Image 4b. PACF Hasil Diferensiasi 1')
plt.show()
ACF and PACF plot for Original Data
ACF and PACF plot for Differentiated Data

ARIMA (Auto-Regressive Integrated Moving Average)

# Pisahkan data train dan test, set data test untuk 1 tahun terakhir
df_train = df.iloc[:len(df)-12]
df_test = df.iloc[len(df)-12:]
# Fitting model ARIMA dengan algoritma stepwise
stepwise_fit = auto_arima(df_train['IPG2211A2N'], start_p = 0, start_q = 0,
m = 12, start_P = 0, D = 1,
stationary = False, #diketahui tidak stasioner
seasonal = True, #data punya efek musiman
trace = True,
error_action ='ignore',
suppress_warnings = True,
stepwise = True)
stepwise_fit.summary()
Model determined by auto_arima function
Predictions of SARIMA (1,1,1)x(2,1,1,12)
Predictions of SARIMA (1,1,1)x(2,1,2,12)
# Fitting model ke full dataset
model = SARIMAX(df['IPG2211A2N'], order = (1, 1, 1), seasonal_order =(2, 1, 1, 12))
result = model.fit()
result.summary()
# Forecast data untuk 12 bulan selanjutnya
forecast = result.predict(start = len(df),
end = (len(df)-1) + 12,
typ = ‘levels’).rename(‘Forecast’)
df_forecast = pd.DataFrame(forecast)
display(df_forecast)
# Plot nilai hasil forecasting 12 bulan selanjutnya
df[‘IPG2211A2N’].plot(figsize = (12, 5), legend = True)
forecast.plot(legend = True)
plt.title(‘Image 5. Forecast Monthly Electric Production (Feb 2018 — Jan 2019)’)
plt.xlabel(‘Date’)
plt.ylabel(‘Elec_Prod’)
plt.grid()
plt.show()
Forecast of Electric Production for Feb 2018 — Jan 2019

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store