in

Calculate Accuracy measures of Arima Forecasting in python


I’m trying to forecast non-seasonal data using Auto Arima to find best order then apply it in forecasting. Forecasting code is based on Source1. Following the steps in source1, the accuracy measures calculation seem to be incorrect ( returning nan values), hence I call it approach one. In Approach2, I tried to use the existing models in available packages as suggested by source2, it worked but showing negative values for R2. Any suggestions? or what I’m missing here?

1- Following code can be used to prepare the data:

#-----------------------------------------------------
# load data
#--------------
# links to data in csv file:
#----------------------------
# 1- https://data.humdata.org/dataset/covid-19-vaccinations
import pandas as pd
df = pd.read_csv('vaccinations.csv',low_memory=False)
df_ind = df.loc[df['iso_code'] == 'IND']
df_indnew = df_ind[['date','daily_vaccinations']]
#df.reset_index(drop=True, inplace=True)
df_indnew.to_csv("India.csv", index = False)
# read it again
data = pd.read_csv('India.csv')
data.dropna(inplace=True)
df = data.reset_index(drop=True)
df.date=pd.to_datetime(df.date)
print(df)
print(df)
df.info()

2- Code:

from statsmodels.tsa.arima_model import *
import pmdarima as pm
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_percentage_error

#suppress warnings
#----------------
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore")


# load data
data = pd.read_csv('India.csv')
data.dropna(inplace=True)
df = data.reset_index(drop=True)
df.date=pd.to_datetime(df.date)

print(df)
df.info()


model = pm.auto_arima(df.daily_vaccinations,
                      start_p=1,
                      start_q=1,
                      test="adf",  # use adftest to find optimal 'd'
                      max_p=5, max_q=5,  # maximum p and q
                      m=1,  # frequency of series
                      d=None,  # let model determine 'd'
                      seasonal=False,  # No Seasonality
                      start_P=0,
                      D=0,
                      trace=True,
                      error_action='ignore',
                      suppress_warnings=True,
                      stepwise=True)

# ------------------------------
# Choose best model
# ------------------------------
model_fit = ARIMA(df.daily_vaccinations, order=model.order).fit()

#-------------------------------
# Forecast: 
#-------------------------------
print(" ============= Forecast ===========")

n_periods = 60
fc,confint = model.predict(n_periods=n_periods, return_conf_int=True)
index_of_fc = np.arange(len(df.daily_vaccinations), len(df.daily_vaccinations) + n_periods)

# make series for plotting purpose
fc_series = pd.Series(fc, index=index_of_fc)
lower_series = pd.Series(confint[:, 0], index=index_of_fc)
upper_series = pd.Series(confint[:, 1], index=index_of_fc)

forcast_date = [(df.date.iloc[-1] + datetime.timedelta(days=x+1)).strftime("%Y-%m-%d") for x in range(n_periods)]
previous_index = df.index.tolist()
new_index = previous_index + index_of_fc.tolist()
new_date = data.date.tolist() + forcast_date

fig, ax= plt.subplots(figsize=(15,8))
ax.plot(df.daily_vaccinations,marker=">")
ax.plot(fc_series,marker="o",fillstyle="none",linewidth=1, markersize=1)
ax.fill_between(lower_series.index,
                lower_series,
                upper_series,
                color="k", alpha=.15)
ax.legend(loc="upper left", fontsize=8)
ax.set_title("Forecast")
ax.set_xticks(new_index,new_date)
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%b'))
for label in ax.get_xticklabels(which="major"):
    label.set(rotation=60, horizontalalignment="right")
plt.show()



print("======================================")
print("============Accuracy metrics=========")
print("======================================")
print("Length of Forecast: ", n_periods)
print("Length of actual: ", len(df.daily_vaccinations))
diffInSize= len(df.daily_vaccinations)-n_periods
print("Slice Test data From",diffInSize)
test= df['daily_vaccinations'][diffInSize:]

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Approach 1
#%%%%%%%%%%%%%%%%%
print("===========")
print(" Approach 1")
print("===========")
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Accuracy metrics-Approach 1
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast - actual)/np.abs(actual))  # MAPE
    me = np.mean(forecast - actual)             # ME
    mae = np.mean(np.abs(forecast - actual))    # MAE
    mpe = np.mean((forecast - actual)/actual)   # MPE
    rmse = np.mean((forecast - actual)**2)**.5  # RMSE
    corr = np.corrcoef(forecast, actual)[0,1]   # corr
    mins = np.amin(np.hstack([forecast[:,None],
                              actual[:,None]]), axis=1)
    maxs = np.amax(np.hstack([forecast[:,None],
                              actual[:,None]]), axis=1)
    minmax = 1 - np.mean(mins/maxs)             # minmax
    #acf1 = acf(fc-test)[1]                      # ACF1
    return({'mape':mape, 'me':me, 'mae': mae,
            'mpe': mpe, 'rmse':rmse,
            'corr':corr, 'minmax':minmax})

print(forecast_accuracy(fc_series, test))

print("===========")
print(" Approach 2")
print("===========")
MSE=mean_squared_error(test, fc_series, squared = False)
print("MSE",MSE)

MAPE =mean_absolute_percentage_error(test, fc_series)
print("MAPE = ",MAPE)

from sklearn.metrics import r2_score
R2=r2_score(test, fc_series)
print("R2 = ",R2)

from sklearn.metrics import mean_absolute_error
MAE= mean_absolute_error(test, fc_series)
print("MAE = ",MAE)

from sklearn.metrics import median_absolute_error
print("Median Absolute Error = ",median_absolute_error(test, fc_series))

from math import sqrt
RMSE = sqrt(mean_squared_error(test, fc_series))
print("RMSE = ",RMSE)



Source: https://stackoverflow.com/questions/70552808/calculate-accuracy-measures-of-arima-forecasting-in-python

Typescript Error with Networks not having the correct property

GTK TextView has extra padding at the bottom when text is wrapped