You are on page 1of 37

Amazon ARMA model

Loading and Visualizing Data


# Loads the Amazon stock data and set its frequency

import pandas as pd

df = pd.read_csv("AMZN.csv", parse_dates=['Date'], index_col='Date')

df = df.asfreq('D').fillna(method='ffill')

closing_prices = df['Close']

# Visualizing the closing prices over time using matplotlib.

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))

plt.plot(closing_prices, label='Closing Prices')

plt.title('Amazon Stock Closing Prices Over Time')

plt.xlabel('Date')

plt.ylabel('Closing Price')

plt.legend()

plt.show()
Splitting Data
train_size = int(len(closing_prices) * 0.8)

train, test = closing_prices[:train_size], closing_prices[train_size:]

Time Series Analysis


# Plot ACF and PACF of the original time series to see if the data is stationary

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

acf_original = plot_acf(train)

pacf_original = plot_pacf(train)
# Perform Augmented Dickey-Fuller test for stationarity

from statsmodels.tsa.stattools import adfuller

adf_test = adfuller(train)

print(f'p-value: {adf_test[1]}')
p-value: 0.6863921026685542

Making Data stationary


# Differencing to make the series stationary

train_diff = train.diff().dropna()

# Visualizing the differenced data

plt.figure(figsize=(12, 6))

plt.plot(train_diff, label='1st Order Differenced Closing Prices')

plt.title('1st Order Differenced Amazon Stock Closing Prices')

plt.xlabel('Date')

plt.ylabel('Differenced Closing Price')

plt.legend()

plt.show()
# Again using ACF and Pacf to check its stationarity

acf_diff = plot_acf(train_diff)

pacf_diff = plot_pacf(train_diff)
# Perform Augmented Dickey-Fuller test for stationarity

adf_test_diff = adfuller(train_diff)

print(f'p-value: {adf_test_diff[1]}')
p-value: 0.0

Auto ARMA Model


from pmdarima import auto_arima

# Use auto_arima to find the best model

model = auto_arima(train_diff, suppress_warnings=True, seasonal=False)

# Get the best order

best_order = model.get_params()['order']

print("Best Order:", best_order)

Best Order: (2, 0, 2)


ARMA Model Fitting
import itertools

from statsmodels.tsa.arima.model import ARIMA

# Fit ARMA model with the best order

p, q = 2, 2

model = ARIMA(train_diff, order=(p, 0, q))

results = model.fit()

# Check AIC and BIC values

aic = results.aic

bic = results.bic

print(f'AIC: {aic}')

print(f'BIC: {bic}')

AIC: 6244.2862338379855
BIC: 6275.995159313397

# Check the residuals for randomness and stationarity

residuals = results.resid

plt.figure(figsize=(12, 6))

plt.plot(residuals)

plt.title('Residuals of ARMA Model')

plt.xlabel('Date')

plt.ylabel('Residuals')

plt.show()
Forecasting and Evaluation
# Forecast on the test values

forecast_steps = len(test)

forecast = results.forecast(steps=forecast_steps)

# Plot actual vs predicted values for the test set

plt.figure(figsize=(12, 6))

plt.plot(test.index, test.values, label='Actual')

plt.plot(test.index, forecast, label='Predicted')


plt.title('ARIMA Model - Actual vs Predicted on Test Set')

plt.xlabel('Date')

plt.ylabel('Closing Price')

plt.legend()

plt.show()
MSE and MAE
# Calculate Mean Squared Error (MSE) and Mean Absolute Error (MAE)

from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(test, forecast)

mae = mean_absolute_error(test, forecast)

print(f'Mean Squared Error (MSE): {mse}')


print(f'Mean Absolute Error (MAE): {mae}')
Mean Squared Error (MSE): 14503.066111204844
Mean Absolute Error (MAE): 118.31058324851284

Forecasting the closing value


# Predict the closing value of the next day

last_observation = closing_prices.index[-1]

next_day = last_observation + pd.Timedelta(days=1)

predicted_next_day = results.forecast(steps=1)[0]

print(f'Predicted Closing Value for {next_day.date()}: {predicted_next_day}')

Predicted Closing Value for 2023-02-18: 0.3408541885702173

PSD
import numpy as np

from scipy.fft import fft

import matplotlib.pyplot as plt

closing_prices = df['Close'].values # Convert pandas Series to NumPy array

# Compute the FFT

fft_values = fft(closing_prices)
# Compute the Power Spectral Density (PSD)

psd_values = np.abs(fft_values) ** 2

# Plot the PSD

plt.figure(figsize=(12, 6))

plt.plot(psd_values)

plt.title('Power Spectral Density (PSD)')

plt.xlabel('Frequency')

plt.ylabel('Power')

plt.show()

# it suggests that the data may not exhibit strong periodic behavior or
significant frequency components.

Spectrogram
from scipy.signal import spectrogram

closing_prices = df['Close']

# Compute the spectrogram

frequencies, times, spectrogram_values = spectrogram(closing_prices)

print("times shape:", times.shape)


print("frequencies shape:", frequencies.shape)

print("spectrogram_values.T shape:", spectrogram_values.T.shape)


times shape: (8,)
frequencies shape: (129,)
spectrogram_values.T shape: (8, 129)

# Plot the spectrogram

plt.figure(figsize=(12, 6))

plt.pcolormesh(times, frequencies, np.log(spectrogram_values + 1e-10),


shading='auto')

plt.title('Spectrogram')

plt.xlabel('Time')

plt.ylabel('Frequency')

plt.colorbar(label='Log Power')

plt.show()

AMAZON neural network

Import Libraries and Load Data


import pandas as pd

import numpy as np

from sklearn.preprocessing import MinMaxScaler


from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, LSTM

from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

Load and Preprocess Data


# Load the data

data = pd.read_csv('AMZN.csv')

closing_prices = data['Close'].values.reshape(-1, 1)

# Normalize the data

scaler = MinMaxScaler(feature_range=(0, 1))

closing_prices_normalized = scaler.fit_transform(closing_prices)

# Create a function to prepare the data for the LSTM model

def create_dataset(dataset, look_back=1):

dataX, dataY = [], []

for i in range(len(dataset) - look_back):


a = dataset[i:(i + look_back), 0]

dataX.append(a)

dataY.append(dataset[i + look_back, 0])

return np.array(dataX), np.array(dataY)

Create LSTM Dataset


# Define the look-back period

look_back = 10

# Prepare the dataset


X, Y = create_dataset(closing_prices_normalized, look_back)

Split Data into Training and Testing Sets


# Split the data into training and testing sets

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

Reshape Data for LSTM Model


# Reshape input to be [samples, time steps, features] for LSTM model

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

Build and Train the LSTM Model


# Build the LSTM model

model = Sequential()

model.add(LSTM(units=50, input_shape=(look_back, 1)))

model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model

model.fit(X_train, Y_train, epochs=50, batch_size=32, verbose=2)


Epoch 1/50
32/32 - 3s - loss: 0.0684 - 3s/epoch - 88ms/step
Epoch 2/50
32/32 - 0s - loss: 0.0053 - 268ms/epoch - 8ms/step
Epoch 3/50
32/32 - 0s - loss: 0.0023 - 277ms/epoch - 9ms/step
Epoch 4/50
32/32 - 0s - loss: 0.0021 - 266ms/epoch - 8ms/step
Epoch 5/50
32/32 - 0s - loss: 0.0021 - 267ms/epoch - 8ms/step
Epoch 6/50
32/32 - 0s - loss: 0.0020 - 267ms/epoch - 8ms/step
Epoch 7/50
32/32 - 0s - loss: 0.0020 - 278ms/epoch - 9ms/step
Epoch 8/50
32/32 - 0s - loss: 0.0020 - 262ms/epoch - 8ms/step
Epoch 9/50
32/32 - 0s - loss: 0.0020 - 277ms/epoch - 9ms/step
Epoch 10/50
32/32 - 0s - loss: 0.0019 - 261ms/epoch - 8ms/step
Epoch 11/50
32/32 - 0s - loss: 0.0019 - 273ms/epoch - 9ms/step
Epoch 12/50
32/32 - 0s - loss: 0.0019 - 267ms/epoch - 8ms/step
Epoch 13/50
32/32 - 0s - loss: 0.0019 - 267ms/epoch - 8ms/step
Epoch 14/50
32/32 - 0s - loss: 0.0019 - 284ms/epoch - 9ms/step
Epoch 15/50
32/32 - 0s - loss: 0.0018 - 314ms/epoch - 10ms/step
Epoch 16/50
32/32 - 0s - loss: 0.0018 - 264ms/epoch - 8ms/step
Epoch 17/50
32/32 - 0s - loss: 0.0017 - 247ms/epoch - 8ms/step
Epoch 18/50
32/32 - 0s - loss: 0.0017 - 281ms/epoch - 9ms/step
Epoch 19/50
32/32 - 0s - loss: 0.0016 - 326ms/epoch - 10ms/step
Epoch 20/50
32/32 - 0s - loss: 0.0016 - 289ms/epoch - 9ms/step
Epoch 21/50
32/32 - 0s - loss: 0.0018 - 285ms/epoch - 9ms/step
Epoch 22/50
32/32 - 0s - loss: 0.0016 - 262ms/epoch - 8ms/step
Epoch 23/50
32/32 - 0s - loss: 0.0015 - 311ms/epoch - 10ms/step
Epoch 24/50
32/32 - 0s - loss: 0.0016 - 263ms/epoch - 8ms/step
Epoch 25/50
32/32 - 0s - loss: 0.0015 - 315ms/epoch - 10ms/step
Epoch 26/50
32/32 - 0s - loss: 0.0014 - 270ms/epoch - 8ms/step
Epoch 27/50
32/32 - 0s - loss: 0.0014 - 270ms/epoch - 8ms/step
Epoch 28/50
32/32 - 0s - loss: 0.0014 - 266ms/epoch - 8ms/step
Epoch 29/50
32/32 - 0s - loss: 0.0014 - 265ms/epoch - 8ms/step
Epoch 30/50
32/32 - 0s - loss: 0.0014 - 280ms/epoch - 9ms/step
Epoch 31/50
32/32 - 0s - loss: 0.0014 - 275ms/epoch - 9ms/step
Epoch 32/50
32/32 - 0s - loss: 0.0013 - 291ms/epoch - 9ms/step
Epoch 33/50
32/32 - 0s - loss: 0.0012 - 292ms/epoch - 9ms/step
Epoch 34/50
32/32 - 0s - loss: 0.0013 - 293ms/epoch - 9ms/step
Epoch 35/50
32/32 - 0s - loss: 0.0013 - 308ms/epoch - 10ms/step
Epoch 36/50
32/32 - 0s - loss: 0.0013 - 293ms/epoch - 9ms/step
Epoch 37/50
32/32 - 0s - loss: 0.0012 - 301ms/epoch - 9ms/step
Epoch 38/50
32/32 - 0s - loss: 0.0013 - 285ms/epoch - 9ms/step
Epoch 39/50
32/32 - 0s - loss: 0.0012 - 287ms/epoch - 9ms/step
Epoch 40/50
32/32 - 0s - loss: 0.0011 - 282ms/epoch - 9ms/step
Epoch 41/50
32/32 - 0s - loss: 0.0012 - 263ms/epoch - 8ms/step
Epoch 42/50
32/32 - 0s - loss: 0.0011 - 254ms/epoch - 8ms/step
Epoch 43/50
32/32 - 0s - loss: 0.0011 - 260ms/epoch - 8ms/step
Epoch 44/50
32/32 - 0s - loss: 0.0011 - 265ms/epoch - 8ms/step
Epoch 45/50
32/32 - 0s - loss: 0.0010 - 264ms/epoch - 8ms/step
Epoch 46/50
32/32 - 0s - loss: 0.0010 - 267ms/epoch - 8ms/step
Epoch 47/50
32/32 - 0s - loss: 0.0011 - 271ms/epoch - 8ms/step
Epoch 48/50
32/32 - 0s - loss: 0.0010 - 289ms/epoch - 9ms/step
Epoch 49/50
32/32 - 0s - loss: 9.9199e-04 - 257ms/epoch - 8ms/step
Epoch 50/50
32/32 - 0s - loss: 9.7514e-04 - 258ms/epoch - 8ms/step

Out[37]:
<keras.src.callbacks.History at 0x16408a60110>

Evaluate Model Performance on Test Data


# Make predictions on the test data
predictions = model.predict(X_test)
8/8 [==============================] - 1s 4ms/step

# Inverse transform the predictions and actual values to the original scale

predictions = scaler.inverse_transform(predictions)

Y_test_original = scaler.inverse_transform(Y_test.reshape(-1, 1))

# Calculate the root mean squared error (RMSE)

rmse = np.sqrt(mean_squared_error(Y_test_original, predictions))

print('Root Mean Squared Error:', rmse)

Root Mean Squared Error: 3.300441897865308

Visualize Predictions
# Plot the predictions against actual values

plt.plot(Y_test_original, label='Actual')

plt.plot(predictions, label='Predicted')

plt.legend()

plt.show()
Predict Next Day's Closing Value
# Prepare the last 'look_back' days of data

last_data = closing_prices[-look_back:]

last_data_normalized = scaler.transform(last_data.reshape(-1, 1))

# Reshape and predict

last_data_reshaped = last_data_normalized.reshape(1, look_back, 1)

next_day_prediction_normalized = model.predict(last_data_reshaped)
1/1 [==============================] - 0s 31ms/step

# Inverse transform to get the predicted closing value

next_day_prediction = scaler.inverse_transform(next_day_prediction_normalized)

print('Predicted Next Day Closing Value:', next_day_prediction[0, 0])


Predicted Next Day Closing Value: 98.60197

J&J ARMA model

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from statsmodels.tsa.stattools import adfuller, kpss

from statsmodels.tsa.seasonal import seasonal_decompose

from statsmodels.graphics.tsaplots import plot_acf,


plot_pacf

from statsmodels.tsa.arima.model import ARIMA

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_absolute_error,


mean_squared_error
Loading and Visualizing Data
# Step 1: Load the CSV file

df = pd.read_csv('jj.csv', parse_dates=['date'],
index_col='date')

# Step 2: Decompose the time series to understand trends,


patterns, and seasonality

result = seasonal_decompose(df['data'], model='additive',


period=4)

result.plot()

plt.show()

Check for Stationarity


# Step 3: Check for stationarity

def check_stationarity(timeseries):

adf_result = adfuller(timeseries, autolag='AIC')


kpss_result = kpss(timeseries)

print('ADF Statistic:', adf_result[0])

print('p-value (ADF):', adf_result[1])

print('Critical Values (ADF):', adf_result[4])

print('\nKPSS Statistic:', kpss_result[0])

print('p-value (KPSS):', kpss_result[1])

print('Critical Values (KPSS):', kpss_result[3])

check_stationarity(df['data'])
ADF Statistic: 2.7420165734574735
p-value (ADF): 1.0
Critical Values (ADF): {'1%': -3.524624466842421, '5%': -2.9026070739026064,
'10%': -2.5886785262345677}

KPSS Statistic: 1.3634938602266964


p-value (KPSS): 0.01
Critical Values (KPSS): {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is smaller than the p-value returned.

kpss_result = kpss(timeseries)
# apply differencing

df['data_diff'] = df['data'] - df['data'].shift(1)

check_stationarity(df['data_diff'].dropna())
ADF Statistic: -0.40740976363804116
p-value (ADF): 0.9088542416911313
Critical Values (ADF): {'1%': -3.524624466842421, '5%': -2.9026070739026064,
'10%': -2.5886785262345677}

KPSS Statistic: 0.22139627096747283


p-value (KPSS): 0.1
Critical Values (KPSS): {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is greater than the p-value returned.

kpss_result = kpss(timeseries)
# Drop rows with missing values

df = df.dropna()

Log differencing
# Apply log differencing

df['data_log_diff'] = np.log(df['data']) - np.log(df['data'].shift(1))

# Check stationarity of log-differenced data

check_stationarity(df['data_log_diff'].dropna())

df = df.dropna()
ADF Statistic: -4.210044890698209
p-value (ADF): 0.0006335725359760862
Critical Values (ADF): {'1%': -3.5194805351545413, '5%': -2.9003945086747343,
'10%': -2.5874984279778395}

KPSS Statistic: 0.20166745754525875


p-value (KPSS): 0.1
Critical Values (KPSS): {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is greater than the p-value returned.

kpss_result = kpss(timeseries)

ACF and PACF Plots


from statsmodels.tsa.arima.model import ARIMA
# Step 4: ACF and PACF analysis for log-differenced data

plot_acf(df['data_log_diff'].dropna(), lags=20)

plt.show()

plot_pacf(df['data_log_diff'].dropna(), lags=20)

plt.show()
Splitting Data
# Step 5: Split data into training and test sets

train_size = int(len(df) * 0.7)

train, test = df['data_log_diff'][:train_size], df['data_log_diff'][train_size:]

Auto Arima
from pmdarima import auto_arima

# Use auto-arima to find the best model

autoarima_model = auto_arima(train, seasonal=True, suppress_warnings=True)

print(autoarima_model.summary())
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 57
Model: SARIMAX(3, 0, 2) Log Likelihood 54.168
Date: Sat, 16 Mar 2024 AIC -94.335
Time: 11:24:33 BIC -80.034
Sample: 0 HQIC -88.777
- 57
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 0.1618 0.025 6.490 0.000 0.113 0.211
ar.L1 -0.9634 0.088 -10.895 0.000 -1.137 -0.790
ar.L2 -0.9728 0.066 -14.730 0.000 -1.102 -0.843
ar.L3 -0.9187 0.036 -25.298 0.000 -0.990 -0.848
ma.L1 0.2672 0.166 1.612 0.107 -0.058 0.592
ma.L2 0.3396 0.154 2.202 0.028 0.037 0.642
sigma2 0.0079 0.002 4.197 0.000 0.004 0.012
=================================================================================
==
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB):
0.06
Prob(Q): 0.93 Prob(JB):
0.97
Heteroskedasticity (H): 1.09 Skew:
0.08
Prob(H) (two-sided): 0.85 Kurtosis:
3.02
=================================================================================
==

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-
step).

Model Fitting to ARMA Model


print(autoarima_model.order)

(3, 0, 2)

# 6. Model Fitting

arima_model = ARIMA(train, order=autoarima_model.order)

arima_fit = arima_model.fit()

# 7. Check residuals

residuals = arima_fit.resid

plt.plot(residuals)

plt.show()
Forecast Accuracy
# 8. Evaluate forecast accuracy

arima_forecast = arima_fit.forecast(steps=len(test))

mae = mean_absolute_error(test, arima_forecast)

mse = mean_squared_error(test, arima_forecast)

rmse = np.sqrt(mse)

print(f'MAE: {mae:.2f}')

print(f'MSE: {mse:.2f}')

print(f'RMSE: {rmse:.2f}')
MAE: 0.14
MSE: 0.03
RMSE: 0.18

# 9. Plotting predictions on test data

plt.figure(figsize=(12, 6))

plt.plot(df.index[train_size:], df['data_log_diff'][train_size:], label='Actual


Log-Differenced')
plt.plot(df.index[train_size:], arima_forecast, label='ARIMA Forecast')

plt.legend()

plt.title('ARIMA Model Forecast on Test Data')

plt.show()

# 10. Forecast next 24 months

forecast_steps = 24

forecast_index = pd.date_range(df.index[-1], periods=forecast_steps + 1, freq='M')[1:]

arima_next_24_forecast = arima_fit.forecast(steps=forecast_steps)

# 11. Plotting forecasted values on log differenced data

plt.figure(figsize=(12, 6))

plt.plot(df.index, df['data_log_diff'], label='Actual Log-Differenced')

plt.plot(forecast_index, arima_next_24_forecast, label='ARIMA Forecast Next 24 Months')

plt.legend()

plt.title('ARIMA Model Forecast for Next 24 Months')

plt.show()
# 10. Forecast next 24 months on actual data

n_forecast = 24

predict = arima_fit.get_prediction(end=arima_model.nobs +

n_forecast)

idx = np.arange(len(predict.predicted_mean))

fig, ax = plt.subplots()

ax.plot(df['data'], 'blue')

ax.plot(idx[-n_forecast:], predict.predicted_mean[-

n_forecast:], 'k--')

ax.set(title='Forecast of Johnson&Johnson Sales')

plt.show()
predictions_int = arima_fit.get_forecast(steps=24)

predictions_int.predicted_mean
57 -0.117143
58 0.126009
59 0.079427
60 0.070450
61 -0.099362
62 0.115915
63 0.081700
64 0.061709
65 -0.083857
66 0.107373
67 0.082908
68 0.054579
69 -0.070298
70 0.100121
71 0.083281
72 0.048791
73 -0.058409
74 0.093943
75 0.083007
76 0.044120
77 -0.047955
78 0.088657
79 0.082240
80 0.040376
Name: predicted_mean, dtype: float64

Power Spectral Density


import numpy as np

from scipy.fft import fft

import matplotlib.pyplot as plt

# Compute the FFT

fft_values = fft(df)

# Compute the Power Spectral Density (PSD)

psd_values = np.abs(fft_values) ** 2

# Plot the periodogram

plt.figure(figsize=(12, 6))

plt.plot(psd_values)

plt.title('Power Spectral Density (Periodogram)')

plt.xlabel('Frequency')

plt.ylabel('Power')

plt.show()

# Plot only the first half of the PSD (due to symmetry)

plt.figure(figsize=(12, 6))

plt.plot(psd_values[:len(df) // 2])

plt.title('Power Spectral Density (Periodogram)')

plt.xlabel('Frequency')

plt.ylabel('Power')

plt.show()
Spectrogram
from scipy.signal import spectrogram

# Compute the spectrogram

frequencies, times, spectrogram_values = spectrogram(df)

print("times shape:", times.shape)

print("frequencies shape:", frequencies.shape)

print("spectrogram_values.T shape:", spectrogram_values.T.shape)


times shape: (1,)
frequencies shape: (2,)
spectrogram_values.T shape: (1, 2, 82)

# Reshape the spectrogram_values to match the dimensions expected by pcolormesh

spectrogram_values_reshaped = spectrogram_values.T.squeeze()

times = np.arange(len(df))

# Plot the spectrogram

plt.figure(figsize=(12, 6))

plt.pcolormesh(times, frequencies, np.log(spectrogram_values_reshaped + 1e-10),


shading='auto')

plt.title('Spectrogram')

plt.xlabel('Time')

plt.ylabel('Frequency')

plt.colorbar(label='Log Power')

plt.show()

jj neural network

Importing Libraries:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import
MinMaxScaler
from sklearn.metrics import
mean_squared_error
from tensorflow.keras.models import
Sequential
from tensorflow.keras.layers import Dense,
LSTM
import matplotlib.pyplot as plt

Load and Preprocess Data:


# Load the data from the CSV file

data = pd.read_csv('jj.csv')

sales_data =
data['data'].values.astype(float).reshape(-1, 1)

# Normalize the data using Min-Max scaling

scaler = MinMaxScaler(feature_range=(0, 1))

sales_data_normalized =
scaler.fit_transform(sales_data)
# Function to create sequences for time series
forecasting

def create_sequences(data, seq_length):

sequences = []

for i in range(len(data) - seq_length):

seq = data[i:i + seq_length]

sequences.append(seq)

return np.array(sequences)

# Set the sequence length

sequence_length = 12

# Create sequences for training

training_sequences =
create_sequences(sales_data_normalized,
sequence_length)

Split Data for Training and Testing:


# Split data into training and testing sets

training_size = int(len(training_sequences) * 0.8)

training_data = training_sequences[:training_size]

testing_data = training_sequences[training_size:]

# Split the data into input (X) and output (y)


X_train, y_train = training_data[:, :-1], training_data[:,
-1]

X_test, y_test = testing_data[:, :-1], testing_data[:, -1]

Prepare Input and Output Data:


# Reshape input to be 3D [samples, timesteps,
features]

X_train = X_train.reshape((X_train.shape[0],
X_train.shape[1], 1))

X_test = X_test.reshape((X_test.shape[0],
X_test.shape[1], 1))

Build the LSTM Model:


# Build the LSTM model

model = Sequential()

model.add(LSTM(50, input_shape=(X_train.shape[1],
1)))

model.add(Dense(1))

model.compile(optimizer='adam',
loss='mean_squared_error')

Train the Model:


# Train the model

model_fit= model.fit(X_train, y_train, epochs=50,


batch_size=32, verbose=2)
Epoch 1/50
2/2 - 0s - loss: 0.0011 - 23ms/epoch - 11ms/step
Epoch 2/50
2/2 - 0s - loss: 0.0011 - 21ms/epoch - 10ms/step
Epoch 3/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 4/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 5/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 6/50
2/2 - 0s - loss: 0.0011 - 33ms/epoch - 16ms/step
Epoch 7/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 8/50
2/2 - 0s - loss: 0.0011 - 22ms/epoch - 11ms/step
Epoch 9/50
2/2 - 0s - loss: 0.0011 - 31ms/epoch - 15ms/step
Epoch 10/50
2/2 - 0s - loss: 0.0011 - 29ms/epoch - 14ms/step
Epoch 11/50
2/2 - 0s - loss: 0.0011 - 21ms/epoch - 10ms/step
Epoch 12/50
2/2 - 0s - loss: 0.0011 - 23ms/epoch - 11ms/step
Epoch 13/50
2/2 - 0s - loss: 0.0011 - 28ms/epoch - 14ms/step
Epoch 14/50
2/2 - 0s - loss: 0.0011 - 20ms/epoch - 10ms/step
Epoch 15/50
2/2 - 0s - loss: 0.0011 - 31ms/epoch - 15ms/step
Epoch 16/50
2/2 - 0s - loss: 0.0010 - 29ms/epoch - 14ms/step
Epoch 17/50
2/2 - 0s - loss: 0.0010 - 20ms/epoch - 10ms/step
Epoch 18/50
2/2 - 0s - loss: 0.0010 - 22ms/epoch - 11ms/step
Epoch 19/50
2/2 - 0s - loss: 0.0010 - 10ms/epoch - 5ms/step
Epoch 20/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 21/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 22/50
2/2 - 0s - loss: 0.0010 - 38ms/epoch - 19ms/step
Epoch 23/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 24/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 25/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 26/50
2/2 - 0s - loss: 0.0010 - 22ms/epoch - 11ms/step
Epoch 27/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 28/50
2/2 - 0s - loss: 0.0010 - 31ms/epoch - 16ms/step
Epoch 29/50
2/2 - 0s - loss: 0.0010 - 22ms/epoch - 11ms/step
Epoch 30/50
2/2 - 0s - loss: 0.0010 - 23ms/epoch - 11ms/step
Epoch 31/50
2/2 - 0s - loss: 0.0010 - 23ms/epoch - 12ms/step
Epoch 32/50
2/2 - 0s - loss: 0.0010 - 16ms/epoch - 8ms/step
Epoch 33/50
2/2 - 0s - loss: 9.9725e-04 - 33ms/epoch - 17ms/step
Epoch 34/50
2/2 - 0s - loss: 9.9585e-04 - 31ms/epoch - 16ms/step
Epoch 35/50
2/2 - 0s - loss: 9.9480e-04 - 16ms/epoch - 8ms/step
Epoch 36/50
2/2 - 0s - loss: 9.8967e-04 - 38ms/epoch - 19ms/step
Epoch 37/50
2/2 - 0s - loss: 9.8387e-04 - 31ms/epoch - 16ms/step
Epoch 38/50
2/2 - 0s - loss: 9.8935e-04 - 16ms/epoch - 8ms/step
Epoch 39/50
2/2 - 0s - loss: 9.7819e-04 - 16ms/epoch - 8ms/step
Epoch 40/50
2/2 - 0s - loss: 9.7617e-04 - 38ms/epoch - 19ms/step
Epoch 41/50
2/2 - 0s - loss: 9.7232e-04 - 16ms/epoch - 8ms/step
Epoch 42/50
2/2 - 0s - loss: 9.7557e-04 - 16ms/epoch - 8ms/step
Epoch 43/50
2/2 - 0s - loss: 9.8442e-04 - 22ms/epoch - 11ms/step
Epoch 44/50
2/2 - 0s - loss: 9.6747e-04 - 31ms/epoch - 16ms/step
Epoch 45/50
2/2 - 0s - loss: 9.6187e-04 - 31ms/epoch - 16ms/step
Epoch 46/50
2/2 - 0s - loss: 9.6344e-04 - 31ms/epoch - 16ms/step
Epoch 47/50
2/2 - 0s - loss: 9.5741e-04 - 22ms/epoch - 11ms/step
Epoch 48/50
2/2 - 0s - loss: 9.5320e-04 - 31ms/epoch - 16ms/step
Epoch 49/50
2/2 - 0s - loss: 9.4968e-04 - 31ms/epoch - 16ms/step
Epoch 50/50
2/2 - 0s - loss: 9.4901e-04 - 22ms/epoch - 11ms/step

Test the Model:


# Test the model

test_loss = model.evaluate(X_test, y_test, verbose=0)

print(f'Test Loss: {test_loss}'

Test Loss: 0.01699160970747471

Make Future Predictions


# Make predictions for the next 24 months

future_predictions = []

current_sequence = list(X_test[-1].flatten())

for i in range(24):

input_sequence = np.array(current_sequence[-(sequence_length-1):]).reshape(1,
sequence_length-1, 1)

predicted_value = model.predict(input_sequence)[0][0]

future_predictions.append(predicted_value)

current_sequence.append(predicted_value)

1/1 [==============================] - 0s 23ms/step


1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 29ms/step
1/1 [==============================] - 0s 33ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 28ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 29ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 23ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step
1/1 [==============================] - 0s 31ms/step

Inverse Transform Predictions:


# Inverse transform the predictions to get actual sales values
future_predictions =
scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

Display Predictions:
# Display the predictions for the next 24 months
print(f'Predictions for the next 24 months: {future_predictions}')
Predictions for the next 24 months: [[13.525593 ]
[13.94045 ]
[14.161354 ]
[14.346481 ]
[14.5128765]
[14.775693 ]
[14.8436775]
[14.963552 ]
[15.025752 ]
[15.252812 ]
[15.261148 ]
[15.34032 ]
[15.458272 ]
[15.556854 ]
[15.646535 ]
[15.729873 ]
[15.808032 ]
[15.8783865]
[15.947483 ]
[16.013062 ]
[16.07672 ]
[16.133318 ]
[16.190027 ]
[16.244 ]]

# Plotting training loss

plt.figure(figsize=(10, 6))

plt.plot(model_fit.history['loss'], label='Training Loss')

plt.title('Training Loss')

plt.xlabel('Epoch')

plt.ylabel('Loss')

plt.legend()

plt.show()

You might also like