Professional Documents
Culture Documents
ipynb - Colaboratory
PRE-TRAITEMENT
dans cette partie on va nettoyer et preprarer nos data sets pour le travail
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import time
from scipy.signal import find_peaks
from scipy.stats import gamma
import statsmodels.api as sm
import warnings
from datetime import datetime
import statsmodels.api as sm
import scipy.stats as stats
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
data = pd.read_csv("/content/train_ML_IOT.csv")
data
Pour atteindre votre objectif, vous devrez explorer, analyser, et modéliser ces données de manière judicieuse. Si vous avez des questions
spécifiques sur les données ou sur la marche à suivre, n'hésitez pas à me les poser !
data.describe(), data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48120 entries, 0 to 48119
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 DateTime 48120 non-null object
1 Junction 48120 non-null int64
2 Vehicles 48120 non-null int64
3 ID 48120 non-null int64
dtypes: int64(3), object(1)
memory usage: 1.5+ MB
( Junction Vehicles ID
count 48120.000000 48120.000000 4.812000e+04
mean 2.180549 22.791334 2.016330e+10
std 0.966955 20.750063 5.944854e+06
min 1.000000 1.000000 2.015110e+10
25% 1.000000 9.000000 2.016042e+10
50% 2.000000 15.000000 2.016093e+10
75% 3.000000 29.000000 2.017023e+10
max 4.000000 180.000000 2.017063e+10,
None)
https://colab.research.google.com/drive/1Ku6VepHeAlZC-cd0Oeb9cqLksd0ueLGw#scrollTo=_Y_x-3ofvvxx&printMode=true 1/3
11/25/23, 2:51 PM HAMZAKHAWLA.ipynb - Colaboratory
data['DateTime'] = pd.to_datetime(data['DateTime'])
data['Weekday'] = [datetime.weekday(date) for date in data.DateTime]
data['Year'] = [date.year for date in data.DateTime]
data['Month'] = [date.month for date in data.DateTime]
data['Day'] = [date.day for date in data.DateTime]
data['Hour'] = [date.hour for date in data.DateTime]
data['Week'] = [date.week for date in data.DateTime]
data['Quarter'] = [date.quarter for date in data.DateTime]
data["IsWeekend"] = data["Weekday"] >= 5
data.head(10)
2015-11-
0 01 1 15 20151101001 6 2015 11 1 0 44
00:00:00
2015-11-
1 01 1 13 20151101011 6 2015 11 1 1 44
01:00:00
2015-11-
2 01 1 10 20151101021 6 2015 11 1 2 44
02:00:00
2015-11-
3 01 1 7 20151101031 6 2015 11 1 3 44
03:00:00
2015-11-
4 01 1 9 20151101041 6 2015 11 1 4 44
missing values
if (data.isnull().sum()).sum() != 0 :
# Fill Missing Values with Forward Fill (Previous Row Value as Current Row in Table) :
if "Forward_Fill" in fill_types :
data[ column ] = data[ column ].ffill(axis = 0)
# Fill Missing Values with Backward Fill (Next Row Value as Current Row in Table) :
if "Backward_Fill" in fill_types :
data[ column ] = data[ column ].bfill(axis = 0)
return data
fill_types = [ "Forward_Fill"]
fill_value = 0
https://colab.research.google.com/drive/1Ku6VepHeAlZC-cd0Oeb9cqLksd0ueLGw#scrollTo=_Y_x-3ofvvxx&printMode=true 2/3
11/25/23, 2:51 PM HAMZAKHAWLA.ipynb - Colaboratory
https://colab.research.google.com/drive/1Ku6VepHeAlZC-cd0Oeb9cqLksd0ueLGw#scrollTo=_Y_x-3ofvvxx&printMode=true 3/3