You are on page 1of 6

%%time

df_vale2 = pd.read_csv('C:/Users/Cesar/Documents/LSTM-Sales-Sample01/Forecast/
SalesData_1.csv')
df_vale2

feature_list2 = list(df_vale2.columns)

features_2 = np.array(df_vale2)

import datetime

year1 = features_2[:, feature_list2.index('Year')]

month1 = features_2[:, feature_list2.index('Month')]

dates1 = [str(int(Year)) + '-' + str(int(Month)) for Year, Month in zip(year1,


month1)]

dates2_1 = [datetime.datetime.strptime(date, '%Y-%m') for date in dates1]

Fecha_1 = pd.DataFrame (data = {'Fecha': dates2_1})

Fecha2_1 = Fecha_1.values

data02 = df_vale2.values

Real_1 = pd.concat([Fecha_1,df_vale2],axis=1)

Real_1['Fecha'] = pd.to_datetime(Real_1['Fecha'])

Real_1.sort_values(by=['Fecha'],inplace=True)

Real2_1 = Real_1.copy()

Real2_1.set_index(Real_1.Fecha, inplace=True)

Real3_1 = Real2_1.copy()

Real3_1.drop(['Fecha'], axis = 1,inplace=True)

Real3_1

# convert an array of values into a dataset matrix


def create_dataset2(dataset, look_back=1):
dataset = np.insert(dataset,[0]*look_back,0)
dataX, dataY = [], []
for i in range(len(dataset)-look_back):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
dataY= np.array(dataY)
dataY = np.reshape(dataY,(dataY.shape[0],1))
dataset = np.concatenate((dataX,dataY),axis=1)
return dataset

look_back= 6
# create dataset x,y
dataset = Real3_1.Sales.values
dataset = create_dataset2(dataset,look_back)
dataset

dataset_Dataframe2 =
pd.DataFrame(dataset,columns=['Sales_6','Sales_5','Sales_4','Sales_3','Sales_2','Sa
les_1','Sales'])
dataset_Dataframe2

Ventas_Anteriores2 = dataset_Dataframe2.copy()

data01 = df_vale2.iloc[:,[0,1]]
data02 = df_vale2.iloc[:,[2]]
data03 = df_vale2.iloc[:, [3]]
data04 = Ventas_Anteriores2.iloc[:,[-7]]
Datos_Combinados2 = pd.concat([data01, data02 ,data04 , data03],axis=1)
Datos_Combinados2

Datos_Combinados3 = Datos_Combinados2.copy()
a_series = ~(Datos_Combinados3 == 0).any(axis=1)
new_df = Datos_Combinados3.loc[a_series]
new_df

new_df2 = new_df.copy()
new_df2 = new_df2.reset_index()
new_df2.drop(['index'], axis = 1,inplace=True)
new_df2

features = new_df2.copy()
features

feature_list2 = list(features.columns)

features_2 = np.array(features)

import datetime

year1 = features_2[:, feature_list2.index('Year')]

month1 = features_2[:, feature_list2.index('Month')]

dates1 = [str(int(Year)) + '-' + str(int(Month)) for Year, Month in zip(year1,


month1)]

dates2_1 = [datetime.datetime.strptime(date, '%Y-%m') for date in dates1]


Fecha_1 = pd.DataFrame (data = {'Fecha': dates2_1})

Fecha2_1 = Fecha_1.values

data02 = features.values

Real_1 = pd.concat([Fecha_1,features],axis=1)

Real_1['Fecha'] = pd.to_datetime(Real_1['Fecha'])

Real_1.sort_values(by=['Fecha'],inplace=True)

Real2_1 = Real_1.copy()

Real2_1.set_index(Real_1.Fecha, inplace=True)

Real3_1 = Real2_1.copy()

Real3_1.drop(['Fecha'], axis = 1,inplace=True)

Real3_1

# grid search ARIMA parameters for time series


import warnings
from math import sqrt
from pandas import read_csv
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# evaluate an ARIMA model for a given order (p,d,q)


def evaluate_arima_model(X, arima_order):
# prepare training dataset
train_size = int(len(X) * 0.66)
train, test = X[0:train_size], X[train_size:]
history = [x for x in train]
# make predictions
predictions = list()
for t in range(len(test)):
model = ARIMA(history, order=arima_order)
model_fit = model.fit()
yhat = model_fit.forecast()[0]
predictions.append(yhat)
history.append(test[t])
# calculate out of sample error
rmse = sqrt(mean_squared_error(test, predictions))
return rmse

# evaluate combinations of p, d and q values for an ARIMA model


def evaluate_models(dataset, p_values, d_values, q_values):
dataset = dataset.astype('float32')
best_score, best_cfg = float("inf"), None
for p in p_values:
for d in d_values:
for q in q_values:
order = (p,d,q)
try:
rmse = evaluate_arima_model(dataset, order)
if rmse < best_score:
best_score, best_cfg = rmse, order
print('ARIMA%s RMSE=%.3f' % (order,rmse))
except:
continue
print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

# evaluate parameters
p_values = [0, 1, 2, 4, 6, 8, 10,12,14,16,18,20]
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(Real3_1.Sales.values, p_values, d_values, q_values)

# fit an ARIMA model and plot residual errors


from pandas import datetime
from pandas import read_csv
from pandas import DataFrame
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot

# fit model
model = ARIMA(Real3_1.Sales, order=(18, 0, 1))
model_fit = model.fit()
# summary of fit model
print(model_fit.summary())
# line plot of residuals
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
# density plot of residuals
residuals.plot(kind='kde')
pyplot.show()
# summary stats of residuals
print(residuals.describe())

autocorrelation_plot(Real3_1.Sales_6)
pyplot.show()

import matplotlib.pyplot as plt


plt.rcParams["figure.figsize"] = (20,15)

from pandas import read_csv


from pandas import datetime
from matplotlib import pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt

# split into train and test sets


X = Real3_1.Sales.values
size = int(len(X) * 0.80)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
history2 = [x for x in test]

predictions_test = list()
# walk-forward validation
for t in range(len(test)):
model = ARIMA(history, order=(18, 0, 1))
model_fit = model.fit()
#print("Model")
#print(model_fit)
output = model_fit.forecast()
print("Output")
print(output)
yhat = output[0]
print("yhat")
print(yhat)
predictions_test.append(yhat)
obs = test[t]
history.append(obs)
print('predicted=%f, expected=%f' % (yhat, obs))
# evaluate forecasts
rmse_test = sqrt(mean_squared_error(test, predictions_test))
mape_test = mean_absolute_percentage_error(test, predictions_test)
print('Test RMSE: %.3f' % rmse_test)
print('Test MAPE: %.3f' % mape_test)
# plot forecasts against actual outcomes
pyplot.plot(test)
pyplot.plot(predictions_test, color='red')
pyplot.show()

mape_test.mean()*100

predictions_train = list()
# walk-forward validation
for t in range(len(train)):
model = ARIMA(history, order=(18, 0, 1))
model_fit = model.fit()
#print("Model")
#print(model_fit)
output = model_fit.forecast()
print("Output")
print(output)
yhat = output[0]
print("yhat")
print(yhat)
predictions_train.append(yhat)
obs = train[t]
history.append(obs)
print('predicted=%f, expected=%f' % (yhat, obs))
# evaluate forecasts
rmse_train = sqrt(mean_squared_error(train, predictions_train))
print('Test RMSE: %.3f' % rmse_train)
mape_train = mean_absolute_percentage_error(train, predictions_train)
print('Test MAPE: %.3f' % mape_train)
# plot forecasts against actual outcomes
pyplot.plot(train)
pyplot.plot(predictions_train, color='red')
pyplot.show()

mape_train.mean()

predictions_data_concat =
np.concatenate((predictions_train,predictions_test),axis=0)
predictions_data_concat

import matplotlib.pyplot as plt


plt.rcParams["figure.figsize"] = (20,15)
# line plot of observed vs predicted
fig, ax = plt.subplots(1)
ax.title.set_text('Comparacion Entre los valores predecidos y los valores reales
del ItemA-ARIMA')
ax.plot(new_df2.Sales, label='original', color='blue')
ax.plot(predictions_data_concat, label='predicciones', color='red')
ax.axvline(x=len(train)+1,color='k', linestyle='--')
ax.legend(loc='upper right')
ax.set_xlabel('Tiempo',fontsize = 16)
ax.set_ylabel('Demanda del ItemA '+ r'$(KG)$',fontsize = 16)
plt.show()

You might also like