Professional Documents
Culture Documents
In [4]:
import pandas as pd
path =r"C:\Users\shrut\Desktop\employees.csv"
df=pd.read_csv(path)
df.head()
Out[4]: employee_id first_name last_name email phone_number hire_date job_id salary manager_id department_id
In [ ]:
#_1 create a full name column in df which is first_name+last_name
#_2 drop the column first_name and last_name and email column
#_6 sort the df based on salary ascending if salary same then full_name desending
#_7 count the number of employees whose manager is with manager_id= 103
#_9 look for any outlier in the boxplot if any then remove it
#_10 find out the department_id which is getting maximum salary,minimum salary from the firm
In [7]:
df['full_name']=df['first_name']+df['last_name']
df
Out[7]: employee_id first_name last_name email phone_number hire_date job_id salary manager_id department_id full_name
2 102.0 Lex De Haan lex.de haan@sqltutorial.org 515.123.4569 1/13/1993 5 17000 100.0 9 LexDe Haan
employee_id first_name last_name email phone_number hire_date job_id salary manager_id department_id full_name
employee_id first_name last_name email phone_number hire_date job_id salary manager_id department_id full_name
In [8]:
df.drop(['first_name','last_name','email'],axis=1)
In [10]:
df.isnull().sum()
employee_id 1
Out[10]:
first_name 0
last_name 0
email 0
phone_number 6
hire_date 0
job_id 0
salary 0
manager_id 1
department_id 0
full_name 0
dtype: int64
In [11]:
df.dropna()
Out[11]: employee_id first_name last_name email phone_number hire_date job_id salary manager_id department_id full_name
2 102.0 Lex De Haan lex.de haan@sqltutorial.org 515.123.4569 1/13/1993 5 17000 100.0 9 LexDe Haan
employee_id first_name last_name email phone_number hire_date job_id salary manager_id department_id full_name
employee_id first_name last_name email phone_number hire_date job_id salary manager_id department_id full_name
In [13]:
df.sort_index(axis=1,inplace=True)
df
Out[13]: department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
2 9 lex.de haan@sqltutorial.org 102.0 Lex LexDe Haan 1/13/1993 5 De Haan 100.0 515.123.4569 17000
department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
Out[14]: department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
2 9 lex.de haan@sqltutorial.org 102.0 Lex LexDe Haan 1/13/1993 5 De Haan 100.0 515.123.4569 17000
department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
In [15]:
df['manager_id'].value_counts()[103]
4
Out[15]:
In [19]:
import seaborn as sns
df
Out[19]: department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
2 9 lex.de haan@sqltutorial.org 102.0 Lex LexDe Haan 1/13/1993 5 De Haan 100.0 515.123.4569 17000
department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
In [35]:
#fig=plt.figure(figsize=(1500,1500))
sns.set(rc = {'figure.figsize':(15,8)})
plt.show()
In [36]:
import seaborn as sns
df
Out[36]: department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
2 9 lex.de haan@sqltutorial.org 102.0 Lex LexDe Haan 1/13/1993 5 De Haan 100.0 515.123.4569 17000
department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
In [38]:
a=df['salary'].quantile(0.75)
b=df['salary'].quantile(0.25)
c=a+1.5*(a-b)
df[df['salary']<c]
Out[38]: department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
In [39]:
df[(df['salary']==df['salary'].max())|(df['salary']==df['salary'].min())]
Out[39]: department_id email employee_id first_name full_name hire_date job_id last_name manager_id phone_number salary
In [40]:
sns.barplot(data=df,x='department_id')
<AxesSubplot:xlabel='department_id'>
Out[40]:
In [ ]: