You are on page 1of 4

Entrée [49]: import pandas as pd

import numpy as np
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

Entrée [36]: # importation des données


df = pd.read_csv("C:/Users/PC/Desktop/2018_2020_waste.csv")

Entrée [37]: df.head()

Out[37]:
Waste Type Total Generated ('000 tonnes) Total Recycled ('000 tonnes) Year

0 Construction& Demolition 1624 1618 2018

1 Ferrous Metal 1269 126 2018

2 Paper/Cardboard 1054 586 2018

3 Plastics 949 41 2018

4 Food 763 126 2018

Entrée [38]: df.shape

Out[38]: (45, 4)

Entrée [39]: df.describe()

Out[39]:
Total Generated ('000 tonnes) Total Recycled ('000 tonnes) Year

count 45.000000 45.000000 45.000000

mean 1073.644444 508.688889 2019.000000

std 1951.504154 1035.912023 0.825723

min 23.000000 6.000000 2018.000000

25% 168.000000 25.000000 2018.000000

50% 313.000000 126.000000 2019.000000

75% 949.000000 428.000000 2020.000000

max 7695.000000 4726.000000 2020.000000


Entrée [40]: df.tail()

Out[40]:
Waste Type Total Generated ('000 tonnes) Total Recycled ('000 tonnes) Year

40 Non-ferrous metal 75 73 2020

41 Glass 66 7 2020

42 Scrap tyres 23 22 2020

43 Others (stones, ceramics, etc.) 193 21 2020

44 Overall 5880 3040 2020

Entrée [41]: sns.pairplot(df);


Entrée [42]: label_encode = LabelEncoder()
labels = label_encode.fit_transform(df['Waste Type'])
df['sortie'] = labels
df.drop(columns=['Waste Type'], axis=1, inplace=True)
df.head()

Out[42]:
Total Generated ('000 tonnes) Total Recycled ('000 tonnes) Year sortie

0 1624 1618 2018 3

1 1269 126 2018 4

2 1054 586 2018 14

3 949 41 2018 15

4 763 126 2018 6

Entrée [43]: df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Total Generated ('000 tonnes) 45 non-null int64
1 Total Recycled ('000 tonnes) 45 non-null int64
2 Year 45 non-null int64
3 sortie 45 non-null int32
dtypes: int32(1), int64(3)
memory usage: 1.4 KB

Entrée [44]: label_encode = LabelEncoder()


labels = label_encode.fit_transform(df['Total Recycled (\'000 tonnes)'])
df['sortie1'] = labels
df.drop(columns=['Total Recycled (\'000 tonnes)'], axis=1, inplace=True)
df.head()

Out[44]:
Total Generated ('000 tonnes) Year sortie sortie1

0 1624 2018 3 36

1 1269 2018 4 18

2 1054 2018 14 31

3 949 2018 15 14

4 763 2018 6 18
Entrée [45]: sns.scatterplot(x=df['sortie1'],y=df['sortie'], hue=df['Year'])

Out[45]: <AxesSubplot:xlabel='sortie1', ylabel='sortie'>

Entrée [50]: # Split the data into features (X) and target (y)
X = df.drop('Year', axis=1)
y = df['Year']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

# Scale the features using StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

(36, 3) (36,) (9, 3) (9,)

You might also like