You are on page 1of 4

Applied Exercise 4

1.8years, Excluding high school and elementary school, most of those with 8 years of
employment are those with higher education.
2. According to survivor analysis, it can be determined that after 15 years, employees will tend to
leave and choose more possible opportunities.
3. Gender and educational attainment will affect the employment of employees, as will regional
radiation.
4. The educational level and location of the employed person, as well as the gender, will affect
the number of years the employed person has worked.
5. In addition to the above reference points, staff salary, working environment and promotion
space will also become the factors for staff to leave or not
Code

library(ggplot2)
library(dplyr)
library(psych) # describe
library(GGally) # ggpairs plot
library(reshape)
library(purrr) # Organizing
library(tidyr) # Organize/tidy data
library(forcats) # fct_reorder
library(MASS) # stepAIC
library(descr) # for pseudo R2
library(pROC)
library(survival)

dataset <- read.csv(file = '/Users/phyllisxiong/Downloads/employeeDataset.csv')


dataset2 <- na.omit(dataset)
str(dataset2, give.attr=FALSE)

dataset2$employeeid <- as.factor(


ifelse(dataset2$employeeid<=8554,"Normal",
ifelse(dataset2$employeeid<=16875,"Medium",
ifelse(dataset2$age<=33380,"High","Very High"))))

dataset2$locationcode <- as.factor(


ifelse(dataset2$locationcode<=80,"Normal",
ifelse(dataset2$locationcode<=104,"Medium",
ifelse(dataset2$locationcode<=180,"High","Very High"))))

dataset2$urbanicity<- as.factor(
ifelse(dataset2$urbanicity<=0.74,"Normal",
ifelse(dataset2$urbanicity<=0.949,"Medium","High")))

dataset2$totalcertificationhours <- as.factor(


ifelse(dataset2$totalcertificationhours<=47,"Low",
ifelse(dataset2$totalcertificationhours<=88,"Medium",
ifelse(dataset2$totalcertificationhours<=336,"High","Very High"))))

dataset2$yearsincurrentposition <- as.numeric(dataset2$yearsincurrentposition)

dataset2$yearsatfirm <- as.numeric(dataset2$yearsatfirm)


col_names <- names(dataset2)
dataset2[,col_names] <- lapply(dataset2[,col_names] , factor)
summary(dataset2)
str(dataset2)

survObj<- Surv(dataset2$yearsatfirm, dataset2$quit)


plot(survObj)

fitKMSimple <- survfit(survObj ~ 1)


print(fitKMSimple)

dd<-datadist(dataset2)
options(datadist="dd")

plot(fitKMSimple,cond.int=FALSE,xlab="Time Since firm", ylab="Survival function",


main="Survival function")

fitCPH<-cph(Surv(yearsatfirm,quit)~locationcode+urbanicity+gender+
hiredonexperience+currentnlyinschool+
education+degreefield+
employeesinfirm+company_type+totalcertificationhours,data=dataset2, x=TRUE,
y=TRUE, surv=TRUE)
print(fitCPH)
exp(fitCPH$coefficients)

You might also like