You are on page 1of 4

# EC571 Advanced Econometrics

# Winter term 2017 - Final Project


# Human Capital and Economic Development: Effect of higher education on GDP per capita
# Panel data: 11 countries 15 years (2000-2014)
#
# getwd()
setwd("/Users/nune/Desktop/EC571/Panel Data")
# Read data file and convert data into long format
library(foreign)
GDP<-read.csv(file="~/Desktop/EC571/Panel Data/GDP.csv", header=TRUE)
EDUCX<-read.csv(file="~/Desktop/EC571/Panel Data/EDUCX.csv", header=TRUE)
EDUC<-read.csv(file="~/Desktop/EC571/Panel Data/EDUC.csv", header=TRUE)
HE<-read.csv(file="~/Desktop/EC571/Panel Data/HE.csv", header=TRUE, nrows = 15)
#
GDP.long<-reshape(GDP,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"),
v.names="GDP", timevar="country", idvar="year", direction="long")
EDUC.long<-reshape(EDUC,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"),
v.names="EDUC", timevar="country", idvar="year", direction="long")
EDUCX.long<-reshape(EDUCX,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"),
v.names="EDUCX", timevar="country", idvar="year", direction="long")
HE.long<-reshape(HE,varying=c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"),
v.names="HE", timevar="country", idvar="year", direction="long")
GEXH<-subset(cbind(GDP.long,EDUCX.long,HE.long), select = c("country","year","GDP","EDUCX","HE"))
summary(GEXH)
GEH<-subset(cbind(GDP.long,EDUC.long,HE.long), select = c("country","year","GDP","EDUC","HE"))
summary(GEH)
#
# Convert data to panel data format
library(plm)
# There are 11 individuals and 15 years
pdata<-pdata.frame(GEH,index=c("country","year"), row.names=TRUE)
summary(pdata)
pdatax<-pdata.frame(GEXH,index=c("country","year"), row.names=TRUE)
summary(pdatax)
#
# Model Comparison
# Linear functional form versus Double-log functional form
# Using pooled model
## Let model1 be GDP=b0+b1(EDUCX)+b2(HE)+c1(lnEDUCX)+c2(lnHE)
m1<-lm(GDP ~ EDUC + HE, data = pdata); summary(m1)
#
lnGDP<-log(pdata$GDP)
lnEDUC<-log(pdata$EDUC)
lnHE<-log(pdata$HE)
m2<-lm(lnGDP ~ EDUC + HE, data = pdata); summary(m2)
#
# Different dependent variables
library(lmtest)
lngdp.ssr<-sum(m2$residuals^2); lngdp.ssr
gm_gdp<-exp(mean(log(gdp))); gm_gdp #compute geometric mean for model2
(gm_gdp^2)*lngdp.ssr
sum(m1$resid^2) # model 2 is more prefered
# Nonnested explanatory variables
# J-test
m2a<-lm(lnGDP ~ lnEDUC + lnHE, data = pdata); summary(m2a)
jtest(m2,m2a)
#
# Model Selection Criteria to compare
AkaikeIC<-cbind(AIC(m2, k=2),AIC(m2a, k=2))
colnames(AkaikeIC)<-c("Model 2","Model 2a")
rownames(AkaikeIC)<-"AIC"; AkaikeIC #Akaike's Information Criterion
#
BayesianIC<-cbind(BIC(m2),BIC(m2a))
colnames(BayesianIC)<-c("Model 2","Model 2a")
rownames(BayesianIC)<-"BIC"; BayesianIC #Bayesian's Information
modelselection<-rbind(AkaikeIC,BayesianIC); modelselection
#
# Using graph: based on model 1
plot(pdata$HE,gdp, xlab = 'HE', ylab = 'GDP')
abline(lm(GDP~HE, data = pdata), col = 'red')
#
plot(pdata$EDUC,gdp, xlab = 'EDUC', ylab = 'GDP')
abline(lm(GDP~EDUC, data = pdata))
# Using graph: based on model 2
plot(lnHE,lnGDP, xlab = 'log(HE)', ylab = 'log(GDP)')
abline(lm(lnGDP~lnHE, data = pdata), col = 'blue')
#
plot(lnEDUC,lnGDP, xlab = 'log(EDUC)', ylab = 'log(GDP)')
abline(lm(lnGDP~lnEDUC, data = pdata), col = 'green')
#
# Double-log functional form model
lnGDP<-log(pdata$GDP)
lnEDUC<-log(pdata$EDUC)
lnHE<-log(pdata$HE)
# F test for fixed effects versus OLS
pFtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "individual")
pFtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "time")
pFtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "twoways")
## There is no time fixed effect while individual fixed effect presents in the model
#
# LM test for random effects versus OLS
plmtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "individual", type = "bp")
plmtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "time", type = "bp")
plmtest(lnGDP ~ lnEDUC + lnHE, data = pdata, effect = "twoways", type = "bp")
## The individual random effects present in the model
## No time effects (p-value = 0.2991 > 0.05)
### After all tests, either individual fixed or individual random effects will be used
#
# Pooled OLS estimator
pooled<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model = "pooling")
summary(pooled)
#
# Fixed effects estimator (Within estimator)
## Individual effects
idfixed<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model = "within", effect = "individual")
summary(idfixed)
owfixed.ideff<-summary(fixef(idfixed, effect = "individual"))
rownames(owfixed.ideff)<-c("BR","CA","ID","JP","LA","MA","PH","SI","KR","TH","VN"); owfixed.ideff
lm.idfixed<-lm(lnGDP ~ lnEDUC + lnHE + as.factor(country) - 1, data = pdata)
summary(lm.idfixed) # dummy variable approach
## Time effects
tfixed<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model= "within", effect = "time")
summary(tfixed)
owfixed.teff<-summary(fixef(tfixed, effect = "time"))
rownames(owfixed.teff)<-c(2000:2014); owfixed.teff
## Heterogeineity across years
library(gplots)
plotmeans(GDP ~ year, main="Heterogeineity across years", data = pdata)
#
# Random effects estimator
## Individual effects
idrandom<-plm(lnGDP ~ lnEDUC + lnHE, data = pdata, model = "random", effect = "individual", random.method =
"swar")
summary(idrandom)
#
# Hausman test for fixed versus random effects model
phtest(idfixed, idrandom)
### p-value is less than 0.05 so we reject the null hypothesis (H0: Random effects)
### Individual fixed effects model is more preferred
#
# Alternative Hausman test based on random effects
## Test the significance of group mean coefficients
library(car)
X.means<-cbind(Between(lnEDUC),Between(lnHE))
alt.random<-plm(lnGDP ~ lnEDUC + lnHE + X.means, data = pdata, model = "random")
summary(alt.random)
lht(alt.random,c("X.means1","X.means2"))
### Reject the H0 so group mean coefficients are not jointly equal to zero
### Fixed effects estimator is more preferred
#
# Fixed twoways effects (Fixed time effects + random individual effects)
ft.idrandom<-plm(lnGDP ~ lnEDUC + lnHE + as.factor(year), data = pdata,
model = "random", effect = "individual"); summary(ft.idrandom)
ft.idfixed<-plm(lnGDP ~ lnEDUC + lnHE + as.factor(year), data = pdata,
model = "within", effect="individual"); summary(ft.idfixed)
phtest(ft.idrandom,ft.idfixed)
#
# Tests for Cross Section Dependence
pcdtest(idfixed, test = "cd") # Pesaran CD test (default)
pcdtest(idfixed, test = "lm") # Breusch-Pagan test
#
# Tests for Autcorrelation
pdwtest(idfixed) # Durbin-Watson test
pbgtest(idfixed) # Breusch-Godfrey test
#
# Testing for unit roots/stationarity by Dickey-Fuller test (Time series)
library(tseries)
adf.test(pdata$GDP, k=2)
## Since p-value > 0.05, the unit roots present
### If unit root is present you can take the first difference of the variable
#
# Test for general heteroscedasticity
library(lmtest)
bptest(lnGDP ~ lnEDUC + lnHE + as.factor(country), data = pdata, studentize=F)
## Since p-value < 0.05, the general heterscedasticity presents
#
# Correcting for heteroscedasticity and autocorrelation
library(lmtest)
HCV1<-vcovHC(idfixed, method = "arellano") # general heteroscedasticity & autocorrelation
HCV2<-vcovHC(idfixed, method = "white2") # cross-section heteroscedasticity over i
coeftest(idfixed,vcov.=HCV1)
coeftest(idfixed,vcov.=HCV2)
coeftest(lm.idfixed,vcov.=HCV1) # for dummy variable approach
#
# Normally Distributed Errors
qqnorm(residuals(lm.idfixed), ylab = 'Residuals')
qqline(residuals(lm.idfixed))
hist(residuals(lm.idfixed), xlab = 'Residuals')
# Constant Variance
plot(lm.idfixed$fitted.values, residuals(lm.idfixed), ylab = 'Residuals', xlab = 'Fitted value')
#
# Heterogeneity across years
library(gplots)
plotmeans(GDP ~ year, data = pdata, main = "Heterogeneity across year")
plot(pdata$EDUC, pdata$GDP, xlab = "EDUC", ylab = "GDP")
abline(lm(pdata$GDP ~ pdata$EDUC), lwd = 3, col = "red")
## Regular OLS regression does not consider heterogeneity across groups or time

# Endogeneity is not necessarily tested since it is another story using another estimator

You might also like