You are on page 1of 3

In 

[3]: import numpy as np


import matplotlib.pyplot as plt
import pandas as pd

In [4]: dataset = pd.read_csv("D:\\Course\\Python\\Datasets\\Churn_Modelling.csv")


In [5]: dataset

...

In [6]: X = dataset.iloc[:, 3:13].values


X
y = dataset.iloc[:, 13].values
y

Out[6]: array([1, 0, 1, ..., 1, 1, 0], dtype=int64)

In [7]: X

...

In [8]: X

test1 = pd.DataFrame(X)
test1
...

In [11]: from sklearn.preprocessing import LabelEncoder, OneHotEncoder # import methods an



# Converting the categorical data into Number (0 ,1 ,2)
# create a function using imported

labelencoder_X_1 = LabelEncoder()

X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1]) # column1

# .fit() - will apply the function on data
#TransforM() - will convert the data into required values
# labelencoder_X_1.fit((X[:, 1]))
#labelencoder_X_1.Transform((X[:, 1]))
# .fit_Transform() : It will apply and convert the values into required format

labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2]) # column 2



In [12]: X
test2 = pd.DataFrame(X)
test2
...

In [ ]: # Creating 3 dummy varabiles for country ( Factor level of 3 spain , France and G

#onehotencoder = OneHotEncoder()
#X = onehotencoder.fit_transform(X).toarray()
#X = X[:, 1:]


#onehotencoder = OneHotEncoder()
#X = onehotencoder.fit_transform(X).toarray()
#X = X[:, 1:]

In [13]: # column Transformer method is used to convert the data which is more than 2 fact


from sklearn.compose import ColumnTransformer

ct = ColumnTransformer([("Geography", OneHotEncoder(), [1])])

X = ct.fit_transform(X)

In [11]: abc=pd.DataFrame(X)
abc

Out[11]: 0 1 2 3 4 5 6 7 8 9 10 11

0 1 0 0 619 0 42 2 0 1 1 1 101349

1 0 0 1 608 0 41 1 83807.9 1 0 1 112543

2 1 0 0 502 0 42 8 159661 3 1 0 113932

3 1 0 0 699 0 39 1 0 2 0 0 93826.6

4 0 0 1 850 0 43 2 125511 1 1 1 79084.1

... ... ... ... ... ... ... ... ... ... ... ... ...

9995 1 0 0 771 1 39 5 0 2 1 0 96270.6

9996 1 0 0 516 1 35 10 57369.6 1 1 1 101700

9997 1 0 0 709 0 36 7 0 1 0 1 42085.6

9998 0 1 0 772 1 42 3 75075.3 2 1 0 92888.5

9999 1 0 0 792 0 28 4 130143 1 1 0 38190.8

10000 rows × 12 columns


In [ ]: # label Encoder - will convert the datainto 0 and 1 ( factor level 2)
# Onehot encoder and Column Transfer - will helps to create the dummy variable(
#leve) and convert the data into 0 and 1

You might also like