You are on page 1of 3

11/25/23, 2:51 PM HAMZAKHAWLA.

ipynb - Colaboratory

PRE-TRAITEMENT

dans cette partie on va nettoyer et preprarer nos data sets pour le travail

import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import time
from scipy.signal import find_peaks
from scipy.stats import gamma
import statsmodels.api as sm
import warnings
from datetime import datetime
import statsmodels.api as sm
import scipy.stats as stats

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))

data = pd.read_csv("/content/train_ML_IOT.csv")
data

DateTime Junction Vehicles ID

0 2015-11-01 00:00:00 1 15 20151101001

1 2015-11-01 01:00:00 1 13 20151101011

2 2015-11-01 02:00:00 1 10 20151101021

3 2015-11-01 03:00:00 1 7 20151101031

4 2015-11-01 04:00:00 1 9 20151101041

... ... ... ... ...

48115 2017-06-30 19:00:00 4 11 20170630194

48116 2017-06-30 20:00:00 4 30 20170630204

48117 2017-06-30 21:00:00 4 16 20170630214

48118 2017-06-30 22:00:00 4 22 20170630224

48119 2017-06-30 23:00:00 4 12 20170630234

48120 rows × 4 columns

Pour atteindre votre objectif, vous devrez explorer, analyser, et modéliser ces données de manière judicieuse. Si vous avez des questions
spécifiques sur les données ou sur la marche à suivre, n'hésitez pas à me les poser !

data.describe(), data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48120 entries, 0 to 48119
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 DateTime 48120 non-null object
1 Junction 48120 non-null int64
2 Vehicles 48120 non-null int64
3 ID 48120 non-null int64
dtypes: int64(3), object(1)
memory usage: 1.5+ MB
( Junction Vehicles ID
count 48120.000000 48120.000000 4.812000e+04
mean 2.180549 22.791334 2.016330e+10
std 0.966955 20.750063 5.944854e+06
min 1.000000 1.000000 2.015110e+10
25% 1.000000 9.000000 2.016042e+10
50% 2.000000 15.000000 2.016093e+10
75% 3.000000 29.000000 2.017023e+10
max 4.000000 180.000000 2.017063e+10,
None)

https://colab.research.google.com/drive/1Ku6VepHeAlZC-cd0Oeb9cqLksd0ueLGw#scrollTo=_Y_x-3ofvvxx&printMode=true 1/3
11/25/23, 2:51 PM HAMZAKHAWLA.ipynb - Colaboratory
data['DateTime'] = pd.to_datetime(data['DateTime'])
data['Weekday'] = [datetime.weekday(date) for date in data.DateTime]
data['Year'] = [date.year for date in data.DateTime]
data['Month'] = [date.month for date in data.DateTime]
data['Day'] = [date.day for date in data.DateTime]
data['Hour'] = [date.hour for date in data.DateTime]
data['Week'] = [date.week for date in data.DateTime]
data['Quarter'] = [date.quarter for date in data.DateTime]
data["IsWeekend"] = data["Weekday"] >= 5

data.head(10)

DateTime Junction Vehicles ID Weekday Year Month Day Hour Week Q

2015-11-
0 01 1 15 20151101001 6 2015 11 1 0 44
00:00:00

2015-11-
1 01 1 13 20151101011 6 2015 11 1 1 44
01:00:00

2015-11-
2 01 1 10 20151101021 6 2015 11 1 2 44
02:00:00

2015-11-
3 01 1 7 20151101031 6 2015 11 1 3 44
03:00:00

2015-11-
4 01 1 9 20151101041 6 2015 11 1 4 44

missing values

def fill_missing_values(data, fill_value, fill_types, columns, dataframe_name):

print("Missing Values BEFORE REMOVAL in ",dataframe_name," data")


display((data.isnull().sum()).sum())

if (data.isnull().sum()).sum() != 0 :

for column in columns :

# Fill Missing Values with Specific Value :


if "Value_Fill" in fill_types :
data[ column ] = data[ column ].fillna(fill_value)

# Fill Missing Values with Forward Fill (Previous Row Value as Current Row in Table) :
if "Forward_Fill" in fill_types :
data[ column ] = data[ column ].ffill(axis = 0)

# Fill Missing Values with Backward Fill (Next Row Value as Current Row in Table) :
if "Backward_Fill" in fill_types :
data[ column ] = data[ column ].bfill(axis = 0)

print("Missing Values AFTER REMOVAL in ",dataframe_name," data")


display(data.isnull().sum())

return data

fill_types = [ "Forward_Fill"]
fill_value = 0

train = fill_missing_values(data, fill_value, fill_types, data.columns,"train")

Missing Values BEFORE REMOVAL in train data


0

https://colab.research.google.com/drive/1Ku6VepHeAlZC-cd0Oeb9cqLksd0ueLGw#scrollTo=_Y_x-3ofvvxx&printMode=true 2/3
11/25/23, 2:51 PM HAMZAKHAWLA.ipynb - Colaboratory

https://colab.research.google.com/drive/1Ku6VepHeAlZC-cd0Oeb9cqLksd0ueLGw#scrollTo=_Y_x-3ofvvxx&printMode=true 3/3

You might also like