You are on page 1of 7

8/29/22, 11:07 PM Untitled7.

ipynb - Colaboratory

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns

attrition=pd.read_csv('https://github.com/ybifoundation/Dataset/raw/main/EmployeeAttrition

attrition.head()

Age Attrition BusinessTravel DailyRate Department DistanceFromHome Educati

0 41 Yes Travel_Rarely 1102 Sales 1

Research &
1 49 No Travel_Frequently 279 8
Development

Research &
2 37 Yes Travel_Rarely 1373 2
Development

Research &
3 33 No Travel_Frequently 1392 3
Development

Research &
4 27 No Travel_Rarely 591 2
Development

5 rows × 35 columns

attrition.info()

<class 'pandas.core.frame.DataFrame'>

RangeIndex: 1470 entries, 0 to 1469

Data columns (total 35 columns):

# Column Non-Null Count Dtype

--- ------ -------------- -----

0 Age 1470 non-null int64

1 Attrition 1470 non-null object

2 BusinessTravel 1470 non-null object

3 DailyRate 1470 non-null int64

4 Department 1470 non-null object

5 DistanceFromHome 1470 non-null int64

6 Education 1470 non-null int64

7 EducationField 1470 non-null object

8 EmployeeCount 1470 non-null int64

9 EmployeeNumber 1470 non-null int64

10 EnvironmentSatisfaction 1470 non-null int64

11 Gender 1470 non-null object

12 HourlyRate 1470 non-null int64

13 JobInvolvement 1470 non-null int64

14 JobLevel 1470 non-null int64

15 JobRole 1470 non-null object

16 JobSatisfaction 1470 non-null int64

17 MaritalStatus 1470 non-null object

https://colab.research.google.com/drive/1u09TB-hT0mvGYRTydvGBKX65XQRWRCEc#scrollTo=ecTMjB3IrXKD&printMode=true 1/7
8/29/22, 11:07 PM Untitled7.ipynb - Colaboratory

18 MonthlyIncome 1470 non-null int64

19 MonthlyRate 1470 non-null int64

20 NumCompaniesWorked 1470 non-null int64

21 Over18 1470 non-null object

22 OverTime 1470 non-null object

23 PercentSalaryHike 1470 non-null int64

24 PerformanceRating 1470 non-null int64

25 RelationshipSatisfaction 1470 non-null int64

26 StandardHours 1470 non-null int64

27 StockOptionLevel 1470 non-null int64

28 TotalWorkingYears 1470 non-null int64

29 TrainingTimesLastYear 1470 non-null int64

30 WorkLifeBalance 1470 non-null int64

31 YearsAtCompany 1470 non-null int64

32 YearsInCurrentRole 1470 non-null int64

33 YearsSinceLastPromotion 1470 non-null int64

34 YearsWithCurrManager 1470 non-null int64

dtypes: int64(26), object(9)

memory usage: 402.1+ KB

attrition.describe()

Age DailyRate DistanceFromHome Education EmployeeCount Employe

count 1470.000000 1470.000000 1470.000000 1470.000000 1470.0 147

mean 36.923810 802.485714 9.192517 2.912925 1.0 102

std 9.135373 403.509100 8.106864 1.024165 0.0 60

min 18.000000 102.000000 1.000000 1.000000 1.0

25% 30.000000 465.000000 2.000000 2.000000 1.0 49

50% 36.000000 802.000000 7.000000 3.000000 1.0 102

75% 43.000000 1157.000000 14.000000 4.000000 1.0 155

max 60.000000 1499.000000 29.000000 5.000000 1.0 206

8 rows × 26 columns

attrition.isna().sum()

Age 0

Attrition 0

BusinessTravel 0

DailyRate 0

Department 0

DistanceFromHome 0

Education 0

EducationField 0

EmployeeCount 0

EmployeeNumber 0

EnvironmentSatisfaction 0

https://colab.research.google.com/drive/1u09TB-hT0mvGYRTydvGBKX65XQRWRCEc#scrollTo=ecTMjB3IrXKD&printMode=true 2/7
8/29/22, 11:07 PM Untitled7.ipynb - Colaboratory

Gender 0

HourlyRate 0

JobInvolvement 0

JobLevel 0

JobRole 0

JobSatisfaction 0

MaritalStatus 0

MonthlyIncome 0

MonthlyRate 0

NumCompaniesWorked 0

Over18 0

OverTime 0

PercentSalaryHike 0

PerformanceRating 0

RelationshipSatisfaction 0

StandardHours 0

StockOptionLevel 0

TotalWorkingYears 0

TrainingTimesLastYear 0

WorkLifeBalance 0

YearsAtCompany 0

YearsInCurrentRole 0

YearsSinceLastPromotion 0

YearsWithCurrManager 0

dtype: int64

attrition.nunique()

Age 43

Attrition 2

BusinessTravel 3

DailyRate 886

Department 3

DistanceFromHome 29

Education 5

EducationField 6

EmployeeCount 1

EmployeeNumber 1470

EnvironmentSatisfaction 4

Gender 2

HourlyRate 71

JobInvolvement 4

JobLevel 5

JobRole 9

JobSatisfaction 4

MaritalStatus 3

MonthlyIncome 1349

MonthlyRate 1427

NumCompaniesWorked 10

Over18 1

OverTime 2

PercentSalaryHike 15

PerformanceRating 2

RelationshipSatisfaction 4

StandardHours 1

StockOptionLevel 4

TotalWorkingYears 40

TrainingTimesLastYear 7

WorkLifeBalance 4

https://colab.research.google.com/drive/1u09TB-hT0mvGYRTydvGBKX65XQRWRCEc#scrollTo=ecTMjB3IrXKD&printMode=true 3/7
8/29/22, 11:07 PM Untitled7.ipynb - Colaboratory

YearsAtCompany 37

YearsInCurrentRole 19

YearsSinceLastPromotion 16

YearsWithCurrManager 18

dtype: int64

sns.pairplot(attrition)

https://colab.research.google.com/drive/1u09TB-hT0mvGYRTydvGBKX65XQRWRCEc#scrollTo=ecTMjB3IrXKD&printMode=true 4/7
8/29/22, 11:07 PM Untitled7.ipynb - Colaboratory

<seaborn.axisgrid.PairGrid at 0x7f96303e11d0>

attrition.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'DailyRate', 'Department',

'DistanceFromHome', 'Education', 'EducationField', 'EmployeeCount',

'EmployeeNumber', 'EnvironmentSatisfaction', 'Gender', 'HourlyRate',

'JobInvolvement', 'JobLevel', 'JobRole', 'JobSatisfaction',

'MaritalStatus', 'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked',

'Over18', 'OverTime', 'PercentSalaryHike', 'PerformanceRating',

'RelationshipSatisfaction', 'StandardHours', 'StockOptionLevel',

'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',

'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',

'YearsWithCurrManager'],

dtype='object')

y= attrition ['Attrition']

X = attrition[['Age', 'DailyRate', 'DistanceFromHome', 'Education', 'EmployeeCount',

       'EmployeeNumber', 'EnvironmentSatisfaction', 'HourlyRate',

       'JobInvolvement', 'JobLevel', 'JobSatisfaction', 'MonthlyIncome',

       'MonthlyRate', 'NumCompaniesWorked', 'PercentSalaryHike',

       'PerformanceRating', 'RelationshipSatisfaction', 'StandardHours',

       'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear',

       'WorkLifeBalance', 'YearsAtCompany', 'YearsInCurrentRole',

       'YearsSinceLastPromotion', 'YearsWithCurrManager']] 

from sklearn.model_selection import train_test_split 

X_train,X_test,y_train,y_test= train_test_split(

        X,y, train_size=.30 ,random_state=2529)

X_train.shape,X_test.shape,y_train.shape,y_test.shape

((441, 26), (1029, 26), (441,), (1029,))

from sklearn.ensemble import RandomForestClassifier

model=RandomForestClassifier()

https://colab.research.google.com/drive/1u09TB-hT0mvGYRTydvGBKX65XQRWRCEc#scrollTo=ecTMjB3IrXKD&printMode=true 5/7
8/29/22, 11:07 PM Untitled7.ipynb - Colaboratory

model.fit(X_train,y_train)

RandomForestClassifier()

y_pred=model.predict(X_test)

from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

accuracy_score(y_test,y_pred)

0.8435374149659864

confusion_matrix(y_test,y_pred)

array([[856, 5],

[156, 12]])

print(classification_report(y_test,y_pred))

precision recall f1-score support

No 0.85 0.99 0.91 861

Yes 0.71 0.07 0.13 168

accuracy 0.84 1029

macro avg 0.78 0.53 0.52 1029

weighted avg 0.82 0.84 0.79 1029

sample=attrition.sample()

sample

Age Attrition BusinessTravel DailyRate Department DistanceFromHome Educati

Research &
72 31 No Travel_Rarely 1082 1
Development

1 rows × 35 columns

X_new=sample.loc[:,X.describe().columns]
X_new

https://colab.research.google.com/drive/1u09TB-hT0mvGYRTydvGBKX65XQRWRCEc#scrollTo=ecTMjB3IrXKD&printMode=true 6/7
8/29/22, 11:07 PM Untitled7.ipynb - Colaboratory

Age DailyRate DistanceFromHome Education EmployeeCount EmployeeNumber Envi

72 31 1082 1 4 1 95
model.predict(X_new)
1 rows × 26 columns
array(['No'], dtype=object)

Colab paid products


-
Cancel contracts here

check 0s completed at 10:57 PM

https://colab.research.google.com/drive/1u09TB-hT0mvGYRTydvGBKX65XQRWRCEc#scrollTo=ecTMjB3IrXKD&printMode=true 7/7

You might also like