You are on page 1of 8

502_assignment_2.

R
ayur
Tue Feb 19 16:15:54 2019

# EC502 Problem Set #2_2 Ayur Tadimalla U29348077 sdg


rm(list=ls())
library(foreign); library(sandwich); library(lmtest); library(pastecs)

## Loading required package: zoo


##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
options(max.print=5000)

setwd("/Users/ayur/Desktop/EC502/assignment")
directory<-getwd()

data<-read.dta("PS2_growth_data(2).dta")

# Question 1: Summarize the data

# a) Summary stats for various variables


g = 0.02; d = 0.03
data$detrend_pop = (data$p)/100 #; mean(data$detrend_pop)

data$gdppc_diff = data$gdppc00 - data$gdppc60

data$ln_gdppc_diff = log(data$gdppc00 - data$gdppc60); #data$detrend_pop = (data$p)/100;

## Warning in log(data$gdppc00 - data$gdppc60): NaNs produced


#data$detrend_pop; mean(data$detrend_pop, na.rm = TRUE)

data$detrend_ln_s = log(data$s/100); data$ln_n_g_d = log(data$detrend_pop + g + d);


data$ln_school = log(data$school/100)

data$s_n_g_d = data$detrend_ln_s - data$ln_n_g_d

summary(data, digits = 5)

## cname ccode gdppc60


## Length:169 Length:169 Min. : 407.82
## Class :character Class :character 1st Qu.: 1137.61
## Mode :character Mode :character Median : 2463.70
## Mean : 3612.75
## 3rd Qu.: 4304.32
## Max. :16010.25
## NA's :61
## gdppc00 s africa asia

1
## Min. : 245.82 Min. : 2.0657 Min. :1 Min. :1
## 1st Qu.: 1932.70 1st Qu.: 9.9700 1st Qu.:1 1st Qu.:1
## Median : 5232.87 Median :14.8935 Median :1 Median :1
## Mean : 8868.48 Mean :15.6105 Mean :1 Mean :1
## 3rd Qu.:12111.79 3rd Qu.:20.9248 3rd Qu.:1 3rd Qu.:1
## Max. :47020.31 Max. :41.1998 Max. :1 Max. :1
## NA's :12 NA's :58 NA's :119 NA's :141
## weurope school p nonoil
## Min. :1 Min. : 0.0500 Min. : 0.11396 Min. :1
## 1st Qu.:1 1st Qu.: 1.3000 1st Qu.: 1.43119 1st Qu.:1
## Median :1 Median : 3.7000 Median : 2.35997 Median :1
## Mean :1 Mean : 4.1574 Mean : 2.24310 Mean :1
## 3rd Qu.:1 3rd Qu.: 6.1875 3rd Qu.: 2.83555 3rd Qu.:1
## Max. :1 Max. :20.1500 Max. :10.02759 Max. :1
## NA's :154 NA's :67 NA's :22
## oecd inter detrend_pop gdppc_diff
## Min. :1 Min. :1 Min. :0.0011396 Min. :-1518.4
## 1st Qu.:1 1st Qu.:1 1st Qu.:0.0143119 1st Qu.: 370.6
## Median :1 Median :1 Median :0.0235997 Median : 2486.5
## Mean :1 Mean :1 Mean :0.0224310 Mean : 5839.8
## 3rd Qu.:1 3rd Qu.:1 3rd Qu.:0.0283555 3rd Qu.:11254.6
## Max. :1 Max. :1 Max. :0.1002758 Max. :34510.2
## NA's :148 NA's :45 NA's :61
## ln_gdppc_diff detrend_ln_s ln_n_g_d ln_school
## Min. : 4.3723 Min. :-3.87968 Min. :-2.9732 Min. :-7.6009
## 1st Qu.: 7.0736 1st Qu.:-2.30560 1st Qu.:-2.7440 1st Qu.:-4.3428
## Median : 8.0838 Median :-1.90424 Median :-2.6091 Median :-3.2968
## Mean : 8.0052 Mean :-2.01632 Mean :-2.6392 Mean :-3.6332
## 3rd Qu.: 9.5005 3rd Qu.:-1.56424 3rd Qu.:-2.5465 3rd Qu.:-2.7826
## Max. :10.4490 Max. :-0.88674 Max. :-1.8953 Max. :-1.6020
## NA's :78 NA's :58 NA's :67
## s_n_g_d
## Min. :-1.34672
## 1st Qu.: 0.27310
## Median : 0.65926
## Mean : 0.62313
## 3rd Qu.: 1.11816
## Max. : 1.73250
## NA's :58
#print(data, max = NULL)

# b) Plotting key relationships of interest


plot(log(data$gdppc00)~data$s_n_g_d, ylab="gdp per capita in 2000")

2
10
gdp per capita in 2000

9
8
7
6

−1.0 −0.5 0.0 0.5 1.0 1.5

data$s_n_g_d
# This is consistent with the Solow model, since countries with higher savings rate,
## and lower population have higher GDP per capita.

# Question 2: Regression
lm_1 = lm(log(data$gdppc00) ~ data$detrend_ln_s + data$ln_n_g_d, na.action = na.omit, data = data)
coeftest(lm_1, vcov. = vcovHC)

##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.00626 1.62081 1.8548 0.06635 .
## data$detrend_ln_s 1.23139 0.13634 9.0318 7.364e-15 ***
## data$ln_n_g_d -3.00882 0.54393 -5.5317 2.234e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Adjusted R squared
summary(lm_1, data=data)$adj.r.squared

## [1] 0.623688
# a) The signs match but the coefficients do not.
# b) No, since as we will see, there is omitted variable bias

# Question 3: Same regression with additional variable


lm_2=lm(log(data$gdppc00) ~ data$detrend_ln_s + data$ln_n_g_d + data$ln_school,
na.action = na.omit, data = data)
coeftest(lm_2, vcov. = vcovHC)

##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)

3
## (Intercept) 4.168099 1.316760 3.1654 0.002135 **
## data$detrend_ln_s 0.596075 0.188898 3.1555 0.002201 **
## data$ln_n_g_d -2.797786 0.404348 -6.9193 7.316e-10 ***
## data$ln_school 0.477398 0.094109 5.0728 2.195e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Adjusted R squared
summary(lm_2, data=data)$adj.r.squared

## [1] 0.7877058
# a) Yes, because one is "investing" in "human capital", which should increase output per capita.
# b) It removes ommitted variable bias, as we can see, output per capita depends more on schooling
## than the previous variables.
# c) Yes, the results are consistent wrt the signs, but not otherwise.

# Question 4: Unconditional Convergence


plot((log(data$gdppc00) - log(data$gdppc60)) ~ log(data$gdppc60))
(log(data$gdppc00) − log(data$gdppc60))

2
1
0
−1

6 7 8 9

log(data$gdppc60)
lm_3 = lm((log(data$gdppc00) - log(data$gdppc60)) ~ log(data$gdppc60), data = data);
coeftest(lm_3, vcov. = vcovHC)

##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.546180 0.474209 -1.1518 0.252006
## log(data$gdppc60) 0.158476 0.057291 2.7662 0.006693 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4
# Adjusted R squared
summary(lm_3, data=data)$adj.r.squared

## [1] 0.03323648
# a) represents how a unit increase in gdppc in 1960 changes difference in output growth
## b) It's not consistinent since you need to grow faster with less income

# Question 5: Conditional Convergence


lm_4=lm((log(data$gdppc00) - log(data$gdppc60)) ~ data$detrend_ln_s + data$ln_n_g_d + data$ln_school
+ log(data$gdppc60), na.action=na.omit, data = data)
coeftest(lm_4, vcov. = vcovHC)

##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.99884 1.27071 2.3600 0.020623 *
## data$detrend_ln_s 0.67767 0.21155 3.2033 0.001928 **
## data$ln_n_g_d -1.29943 0.46921 -2.7694 0.006927 **
## data$ln_school 0.21569 0.10546 2.0453 0.043990 *
## log(data$gdppc60) -0.45122 0.10533 -4.2838 4.913e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Adjusted R squared
summary(lm_4, data=data)$adj.r.squared

## [1] 0.5079832
# a) beta 4 is negative, so conditional convergence implies faster convergence for low income countries.
# b) Yes, we have evidence of conditional convergence. Same for MLW.

# Question 6: Sub - sample stability


data_weurope = data[which(data$weurope == 1), ]
data_asia = data[which(data$asia == 1), ]
data_africa = data[which(data$africa==1), ]
#print(data_weurope); print(data_asia); print(data_africa)

# Unconditional convergence for western Europe


plot((log(data_weurope$gdppc00) - log(data_weurope$gdppc60)) ~
log(data_weurope$gdppc60), data = data_weurope)

5
(log(data_weurope$gdppc00) − log(data_weurope$gdppc60))

1.6
1.4
1.2
1.0
0.8
0.6

8.5 9.0 9.5

log(data_weurope$gdppc60)

lm_5 = lm((log(data_weurope$gdppc00) - log(data_weurope$gdppc60)) ~ log(data_weurope$gdppc60),


data = data_weurope);
coeftest(lm_5, vcov. = vcovHC)

##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.57372 1.41483 4.6463 0.0004576 ***
## log(data_weurope$gdppc60) -0.60365 0.15832 -3.8128 0.0021543 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Adjusted R squared
summary(lm_5, data=data_weurope)$adj.r.squared

## [1] 0.6375867
# Unconditional convergence for Asia
plot((log(data_asia$gdppc00) - log(data_asia$gdppc60)) ~
log(data_asia$gdppc60), data = data_asia)

6
(log(data_asia$gdppc00) − log(data_asia$gdppc60))

2.5
2.0
1.5
1.0
0.5

6.5 7.0 7.5 8.0 8.5

log(data_asia$gdppc60)
lm_6 = lm((log(data_asia$gdppc00) - log(data_asia$gdppc60)) ~ log(data_asia$gdppc60),
data = data_asia);
coeftest(lm_6, vcov. = vcovHC)

##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.87944 1.85076 -1.0155 0.3299
## log(data_asia$gdppc60) 0.45412 0.25876 1.7550 0.1047
# Adjusted R squared
summary(lm_6, data=data_asia)$adj.r.squared

## [1] 0.08739205
# Unconditional convergence for Africa
plot((log(data_africa$gdppc00) - log(data_africa$gdppc60)) ~
log(data_africa$gdppc60), data = data_africa)

7
(log(data_africa$gdppc00) − log(data_africa$gdppc60))

1.5
0.5
−0.5
−1.5

6.0 6.5 7.0 7.5 8.0 8.5

log(data_africa$gdppc60)
lm_7 = lm((log(data_africa$gdppc00) - log(data_africa$gdppc60)) ~ log(data_africa$gdppc60),
data = data_africa);
coeftest(lm_7, vcov. = vcovHC)

##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.810391 1.032669 0.7848 0.4373
## log(data_africa$gdppc60) -0.080718 0.145623 -0.5543 0.5825
# Adjusted R squared
summary(lm_7, data=data_africa)$adj.r.squared

## [1] -0.01911344
# b) They show different degrees of convergence, since factors like s, g, n and delta,
## vary across continents but not within continents.

################################################################################################

You might also like