Professional Documents
Culture Documents
import pandas as pd
df = df.asfreq('D').fillna(method='ffill')
closing_prices = df['Close']
plt.figure(figsize=(12, 6))
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()
Splitting Data
train_size = int(len(closing_prices) * 0.8)
acf_original = plot_acf(train)
pacf_original = plot_pacf(train)
# Perform Augmented Dickey-Fuller test for stationarity
adf_test = adfuller(train)
print(f'p-value: {adf_test[1]}')
p-value: 0.6863921026685542
train_diff = train.diff().dropna()
plt.figure(figsize=(12, 6))
plt.xlabel('Date')
plt.legend()
plt.show()
# Again using ACF and Pacf to check its stationarity
acf_diff = plot_acf(train_diff)
pacf_diff = plot_pacf(train_diff)
# Perform Augmented Dickey-Fuller test for stationarity
adf_test_diff = adfuller(train_diff)
print(f'p-value: {adf_test_diff[1]}')
p-value: 0.0
best_order = model.get_params()['order']
p, q = 2, 2
results = model.fit()
aic = results.aic
bic = results.bic
print(f'AIC: {aic}')
print(f'BIC: {bic}')
AIC: 6244.2862338379855
BIC: 6275.995159313397
residuals = results.resid
plt.figure(figsize=(12, 6))
plt.plot(residuals)
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.show()
Forecasting and Evaluation
# Forecast on the test values
forecast_steps = len(test)
forecast = results.forecast(steps=forecast_steps)
plt.figure(figsize=(12, 6))
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.legend()
plt.show()
MSE and MAE
# Calculate Mean Squared Error (MSE) and Mean Absolute Error (MAE)
last_observation = closing_prices.index[-1]
predicted_next_day = results.forecast(steps=1)[0]
PSD
import numpy as np
fft_values = fft(closing_prices)
# Compute the Power Spectral Density (PSD)
psd_values = np.abs(fft_values) ** 2
plt.figure(figsize=(12, 6))
plt.plot(psd_values)
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.show()
# it suggests that the data may not exhibit strong periodic behavior or
significant frequency components.
Spectrogram
from scipy.signal import spectrogram
closing_prices = df['Close']
plt.figure(figsize=(12, 6))
plt.title('Spectrogram')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar(label='Log Power')
plt.show()
import numpy as np
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
data = pd.read_csv('AMZN.csv')
closing_prices = data['Close'].values.reshape(-1, 1)
closing_prices_normalized = scaler.fit_transform(closing_prices)
dataX.append(a)
look_back = 10
model = Sequential()
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')
Out[37]:
<keras.src.callbacks.History at 0x16408a60110>
# Inverse transform the predictions and actual values to the original scale
predictions = scaler.inverse_transform(predictions)
Visualize Predictions
# Plot the predictions against actual values
plt.plot(Y_test_original, label='Actual')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.show()
Predict Next Day's Closing Value
# Prepare the last 'look_back' days of data
last_data = closing_prices[-look_back:]
next_day_prediction_normalized = model.predict(last_data_reshaped)
1/1 [==============================] - 0s 31ms/step
next_day_prediction = scaler.inverse_transform(next_day_prediction_normalized)
import pandas as pd
import numpy as np
df = pd.read_csv('jj.csv', parse_dates=['date'],
index_col='date')
result.plot()
plt.show()
def check_stationarity(timeseries):
check_stationarity(df['data'])
ADF Statistic: 2.7420165734574735
p-value (ADF): 1.0
Critical Values (ADF): {'1%': -3.524624466842421, '5%': -2.9026070739026064,
'10%': -2.5886785262345677}
C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is smaller than the p-value returned.
kpss_result = kpss(timeseries)
# apply differencing
check_stationarity(df['data_diff'].dropna())
ADF Statistic: -0.40740976363804116
p-value (ADF): 0.9088542416911313
Critical Values (ADF): {'1%': -3.524624466842421, '5%': -2.9026070739026064,
'10%': -2.5886785262345677}
C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is greater than the p-value returned.
kpss_result = kpss(timeseries)
# Drop rows with missing values
df = df.dropna()
Log differencing
# Apply log differencing
check_stationarity(df['data_log_diff'].dropna())
df = df.dropna()
ADF Statistic: -4.210044890698209
p-value (ADF): 0.0006335725359760862
Critical Values (ADF): {'1%': -3.5194805351545413, '5%': -2.9003945086747343,
'10%': -2.5874984279778395}
C:\Users\PMLS\AppData\Local\Temp\ipykernel_2036\3808783141.py:4:
InterpolationWarning: The test statistic is outside of the range of p-values
available in the
look-up table. The actual p-value is greater than the p-value returned.
kpss_result = kpss(timeseries)
plot_acf(df['data_log_diff'].dropna(), lags=20)
plt.show()
plot_pacf(df['data_log_diff'].dropna(), lags=20)
plt.show()
Splitting Data
# Step 5: Split data into training and test sets
Auto Arima
from pmdarima import auto_arima
print(autoarima_model.summary())
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 57
Model: SARIMAX(3, 0, 2) Log Likelihood 54.168
Date: Sat, 16 Mar 2024 AIC -94.335
Time: 11:24:33 BIC -80.034
Sample: 0 HQIC -88.777
- 57
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
intercept 0.1618 0.025 6.490 0.000 0.113 0.211
ar.L1 -0.9634 0.088 -10.895 0.000 -1.137 -0.790
ar.L2 -0.9728 0.066 -14.730 0.000 -1.102 -0.843
ar.L3 -0.9187 0.036 -25.298 0.000 -0.990 -0.848
ma.L1 0.2672 0.166 1.612 0.107 -0.058 0.592
ma.L2 0.3396 0.154 2.202 0.028 0.037 0.642
sigma2 0.0079 0.002 4.197 0.000 0.004 0.012
=================================================================================
==
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB):
0.06
Prob(Q): 0.93 Prob(JB):
0.97
Heteroskedasticity (H): 1.09 Skew:
0.08
Prob(H) (two-sided): 0.85 Kurtosis:
3.02
=================================================================================
==
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-
step).
(3, 0, 2)
# 6. Model Fitting
arima_fit = arima_model.fit()
# 7. Check residuals
residuals = arima_fit.resid
plt.plot(residuals)
plt.show()
Forecast Accuracy
# 8. Evaluate forecast accuracy
arima_forecast = arima_fit.forecast(steps=len(test))
rmse = np.sqrt(mse)
print(f'MAE: {mae:.2f}')
print(f'MSE: {mse:.2f}')
print(f'RMSE: {rmse:.2f}')
MAE: 0.14
MSE: 0.03
RMSE: 0.18
plt.figure(figsize=(12, 6))
plt.legend()
plt.show()
forecast_steps = 24
arima_next_24_forecast = arima_fit.forecast(steps=forecast_steps)
plt.figure(figsize=(12, 6))
plt.legend()
plt.show()
# 10. Forecast next 24 months on actual data
n_forecast = 24
predict = arima_fit.get_prediction(end=arima_model.nobs +
n_forecast)
idx = np.arange(len(predict.predicted_mean))
fig, ax = plt.subplots()
ax.plot(df['data'], 'blue')
ax.plot(idx[-n_forecast:], predict.predicted_mean[-
n_forecast:], 'k--')
plt.show()
predictions_int = arima_fit.get_forecast(steps=24)
predictions_int.predicted_mean
57 -0.117143
58 0.126009
59 0.079427
60 0.070450
61 -0.099362
62 0.115915
63 0.081700
64 0.061709
65 -0.083857
66 0.107373
67 0.082908
68 0.054579
69 -0.070298
70 0.100121
71 0.083281
72 0.048791
73 -0.058409
74 0.093943
75 0.083007
76 0.044120
77 -0.047955
78 0.088657
79 0.082240
80 0.040376
Name: predicted_mean, dtype: float64
fft_values = fft(df)
psd_values = np.abs(fft_values) ** 2
plt.figure(figsize=(12, 6))
plt.plot(psd_values)
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.show()
plt.figure(figsize=(12, 6))
plt.plot(psd_values[:len(df) // 2])
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.show()
Spectrogram
from scipy.signal import spectrogram
spectrogram_values_reshaped = spectrogram_values.T.squeeze()
times = np.arange(len(df))
plt.figure(figsize=(12, 6))
plt.title('Spectrogram')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.colorbar(label='Log Power')
plt.show()
jj neural network
Importing Libraries:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import
MinMaxScaler
from sklearn.metrics import
mean_squared_error
from tensorflow.keras.models import
Sequential
from tensorflow.keras.layers import Dense,
LSTM
import matplotlib.pyplot as plt
data = pd.read_csv('jj.csv')
sales_data =
data['data'].values.astype(float).reshape(-1, 1)
sales_data_normalized =
scaler.fit_transform(sales_data)
# Function to create sequences for time series
forecasting
sequences = []
sequences.append(seq)
return np.array(sequences)
sequence_length = 12
training_sequences =
create_sequences(sales_data_normalized,
sequence_length)
training_data = training_sequences[:training_size]
testing_data = training_sequences[training_size:]
X_train = X_train.reshape((X_train.shape[0],
X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0],
X_test.shape[1], 1))
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1],
1)))
model.add(Dense(1))
model.compile(optimizer='adam',
loss='mean_squared_error')
future_predictions = []
current_sequence = list(X_test[-1].flatten())
for i in range(24):
input_sequence = np.array(current_sequence[-(sequence_length-1):]).reshape(1,
sequence_length-1, 1)
predicted_value = model.predict(input_sequence)[0][0]
future_predictions.append(predicted_value)
current_sequence.append(predicted_value)
Display Predictions:
# Display the predictions for the next 24 months
print(f'Predictions for the next 24 months: {future_predictions}')
Predictions for the next 24 months: [[13.525593 ]
[13.94045 ]
[14.161354 ]
[14.346481 ]
[14.5128765]
[14.775693 ]
[14.8436775]
[14.963552 ]
[15.025752 ]
[15.252812 ]
[15.261148 ]
[15.34032 ]
[15.458272 ]
[15.556854 ]
[15.646535 ]
[15.729873 ]
[15.808032 ]
[15.8783865]
[15.947483 ]
[16.013062 ]
[16.07672 ]
[16.133318 ]
[16.190027 ]
[16.244 ]]
plt.figure(figsize=(10, 6))
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()