# Human Capital and Economic Development: Effect of higher education on GDP per capita # Panel data: 11 countries 15 years (2000-2014) # # getwd() setwd("/Users/nune/Desktop/EC571/Panel Data") # Read data file and convert data into long format library(foreign) GDP<-read.csv(file="~/Desktop/EC571/Panel Data/GDP.csv", header=TRUE) EDUCX<-read.csv(file="~/Desktop/EC571/Panel Data/EDUCX.csv", header=TRUE) EDUC<-read.csv(file="~/Desktop/EC571/Panel Data/EDUC.csv", header=TRUE) HE<-read.csv(file="~/Desktop/EC571/Panel Data/HE.csv", header=TRUE, nrows = 15) # GDP.long<-reshape(GDP,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"), v.names="GDP", timevar="country", idvar="year", direction="long") EDUC.long<-reshape(EDUC,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"), v.names="EDUC", timevar="country", idvar="year", direction="long") EDUCX.long<-reshape(EDUCX,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"), v.names="EDUCX", timevar="country", idvar="year", direction="long") HE.long<-reshape(HE,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"), v.names="HE", timevar="country", idvar="year", direction="long") GEXH<-subset(cbind(GDP.long,EDUCX.long,HE.long), select = c("country","year","GDP","EDUCX","HE")) summary(GEXH) GEH<-subset(cbind(GDP.long,EDUC.long,HE.long), select = c("country","year","GDP","EDUC","HE")) summary(GEH) # # Convert data to panel data format library(plm) # There are 11 individuals and 15 years pdata<-pdata.frame(GEH,index=c("country","year"), row.names=TRUE) summary(pdata) pdatax<-pdata.frame(GEXH,index=c("country","year"), row.names=TRUE) summary(pdatax) # # Model Comparison # Linear functional form versus Double-log functional form # Using pooled model ## Let model1 be GDP=b0+b1(EDUCX)+b2(HE)+c1(lnEDUCX)+c2(lnHE) m1<-lm(GDP ~ EDUC + HE, data = pdata); summary(m1) # lnGDP<-log(pdata$GDP) lnEDUC<-log(pdata$EDUC) lnHE<-log(pdata$HE) m2<-lm(lnGDP ~ EDUC + HE, data = pdata); summary(m2) # # Different dependent variables library(lmtest) lngdp.ssr<-sum(m2$residuals^2); lngdp.ssr gm_gdp<-exp(mean(log(gdp))); gm_gdp #compute geometric mean for model2 (gm_gdp^2)*lngdp.ssr sum(m1$resid^2) # model 2 is more prefered # Nonnested explanatory variables # J-test m2a<-lm(lnGDP ~ lnEDUC + lnHE, data = pdata); summary(m2a) jtest(m2,m2a) # # Model Selection Criteria to compare AkaikeIC<-cbind(AIC(m2, k=2),AIC(m2a, k=2)) colnames(AkaikeIC)<-c("Model 2","Model 2a") rownames(AkaikeIC)<-"AIC"; AkaikeIC #Akaike's Information Criterion # BayesianIC<-cbind(BIC(m2),BIC(m2a)) colnames(BayesianIC)<-c("Model 2","Model 2a") rownames(BayesianIC)<-"BIC"; BayesianIC #Bayesian's Information modelselection<-rbind(AkaikeIC,BayesianIC); modelselection # # Using graph: based on model 1 plot(pdata$HE,gdp, xlab = 'HE', ylab = 'GDP') abline(lm(GDP~HE, data = pdata), col = 'red') # plot(pdata$EDUC,gdp, xlab = 'EDUC', ylab = 'GDP') abline(lm(GDP~EDUC, data = pdata)) # Using graph: based on model 2 plot(lnHE,lnGDP, xlab = 'log(HE)', ylab = 'log(GDP)') abline(lm(lnGDP~lnHE, data = pdata), col = 'blue') # plot(lnEDUC,lnGDP, xlab = 'log(EDUC)', ylab = 'log(GDP)') abline(lm(lnGDP~lnEDUC, data = pdata), col = 'green') # # Double-log functional form model lnGDP<-log(pdata$GDP) lnEDUC<-log(pdata$EDUC) lnHE<-log(pdata$HE) # F test for fixed effects versus OLS pFtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "individual") pFtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "time") pFtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "twoways") ## There is no time fixed effect while individual fixed effect presents in the model # # LM test for random effects versus OLS plmtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "individual", type = "bp") plmtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "time", type = "bp") plmtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "twoways", type = "bp") ## The individual random effects present in the model ## No time effects (p-value = 0.2991 > 0.05) ### After all tests, either individual fixed or individual random effects will be used # # Pooled OLS estimator pooled<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model = "pooling") summary(pooled) # # Fixed effects estimator (Within estimator) ## Individual effects idfixed<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model = "within", effect = "individual") summary(idfixed) owfixed.ideff<-summary(fixef(idfixed, effect = "individual")) rownames(owfixed.ideff)<-c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"); owfixed.ideff lm.idfixed<-lm(lnGDP ~ lnEDUC + lnHE + as.factor(country) - 1, data = pdata) summary(lm.idfixed) # dummy variable approach ## Time effects tfixed<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model= "within", effect = "time") summary(tfixed) owfixed.teff<-summary(fixef(tfixed, effect = "time")) rownames(owfixed.teff)<-c(2000:2014); owfixed.teff ## Heterogeineity across years library(gplots) plotmeans(GDP ~ year, main="Heterogeineity across years", data = pdata) # # Random effects estimator ## Individual effects idrandom<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model = "random", effect = "individual", random.method = "swar") summary(idrandom) # # Hausman test for fixed versus random effects model phtest(idfixed, idrandom) ### p-value is less than 0.05 so we reject the null hypothesis (H0: Random effects) ### Individual fixed effects model is more preferred # # Alternative Hausman test based on random effects ## Test the significance of group mean coefficients library(car) X.means<-cbind(Between(lnEDUC),Between(lnHE)) alt.random<-plm(lnGDP ~ lnEDUC + lnHE + X.means, data = pdata, model = "random") summary(alt.random) lht(alt.random,c("X.means1","X.means2")) ### Reject the H0 so group mean coefficients are not jointly equal to zero ### Fixed effects estimator is more preferred # # Fixed twoways effects (Fixed time effects + random individual effects) ft.idrandom<-plm(lnGDP ~ lnEDUC + lnHE + as.factor(year), data = pdata, model = "random", effect = "individual"); summary(ft.idrandom) ft.idfixed<-plm(lnGDP ~ lnEDUC + lnHE + as.factor(year), data = pdata, model = "within", effect="individual"); summary(ft.idfixed) phtest(ft.idrandom,ft.idfixed) # # Tests for Cross Section Dependence pcdtest(idfixed, test = "cd") # Pesaran CD test (default) pcdtest(idfixed, test = "lm") # Breusch-Pagan test # # Tests for Autcorrelation pdwtest(idfixed) # Durbin-Watson test pbgtest(idfixed) # Breusch-Godfrey test # # Testing for unit roots/stationarity by Dickey-Fuller test (Time series) library(tseries) adf.test(pdata$GDP, k=2) ## Since p-value > 0.05, the unit roots present ### If unit root is present you can take the first difference of the variable # # Test for general heteroscedasticity library(lmtest) bptest(lnGDP ~ lnEDUC + lnHE + as.factor(country), data = pdata, studentize=F) ## Since p-value < 0.05, the general heterscedasticity presents # # Correcting for heteroscedasticity and autocorrelation library(lmtest) HCV1<-vcovHC(idfixed, method = "arellano") # general heteroscedasticity & autocorrelation HCV2<-vcovHC(idfixed, method = "white2") # cross-section heteroscedasticity over i coeftest(idfixed,vcov.=HCV1) coeftest(idfixed,vcov.=HCV2) coeftest(lm.idfixed,vcov.=HCV1) # for dummy variable approach # # Normally Distributed Errors qqnorm(residuals(lm.idfixed), ylab = 'Residuals') qqline(residuals(lm.idfixed)) hist(residuals(lm.idfixed), xlab = 'Residuals') # Constant Variance plot(lm.idfixed$fitted.values, residuals(lm.idfixed), ylab = 'Residuals', xlab = 'Fitted value') # # Heterogeneity across years library(gplots) plotmeans(GDP ~ year, data = pdata, main = "Heterogeneity across year") plot(pdata$EDUC, pdata$GDP, xlab = "EDUC", ylab = "GDP") abline(lm(pdata$GDP ~ pdata$EDUC), lwd = 3, col = "red") ## Regular OLS regression does not consider heterogeneity across groups or time
# Endogeneity is not necessarily tested since it is another story using another estimator