You are on page 1of 6

#b2000431 + Le Tran Anh Tuyet

# ngay 23/9/2022

# Chapter 2 p64
#Exer c2
dCEO <- read.csv(file = 'E:/ATUYET/ceosal2.csv' , header = F)
# header: lay dong dau tien trong file excel lam ten cot YES---T, NO---F
# file: duong dan file can doc va ten file.csv, ' '

# goi ten 1 cot: tendata$tencot

# (i) Find the average salary and the average tenure in the sample.
mean(dCEO$V1)

#(ii) How many CEOs are in their first year as CEO (that is, ceoten 5 0)?
# What is the longest tenure as a CEO?

subset(dCEO, V6==0)
# subset: tao subset
# subset(tendata, dkien LOC)

#co 6 CEOS ...


max(dCEO$V6)

subset(dCEO, v6==37)
# (iii) Estimate the simple regression model
# log(salary) = b0 + b1*ceoten + u,
# and report your results in the usual form.

# log(salary) ~ V10, ceoten ~ V6

regC2.1 <- lm(V10 ~ V6, data=dCEO)


# regression: lm(model, data=tendata)
summary(regC2.1)
# b0_hat = 6.505498; b1_hat = 0.009724

# log(salary)_hat = 6.505498 + 0.009724*ceoten

# What is the (approximate) predicted percentage increase in salary


# given one more year as a CEO?

# delta(ceoten)= +1
# delta(log(salary)) = b1_hat*delta(ceoten) = 0.009724*(+1) = 0.009724 = (0.9724%)

# chapter 2 C3 p64

dSLEEP <- read.csv(file = 'E:/ATUYET/sleep75.csv', header = F)


# (i) Report your results in equation form along with the number of observations
and R2.
# What does the intercept in this equation mean?

regC3.1 <- lm(V21 ~ V26, data = dSLEEP)


summary(regC3.1)

# the number of observations = 706, R2 = 0.1033


# What does the intercept in this equation
# if totwrk = 0 then sleep = 3586.37695
# If totwrk increases by 2 hours, by how much is sleep estimated to fall?
# Do you find this to be a large effect

# delta(totwrk)= +2hr = +120 mins


# delta(sleep_hat)= b1_hat*delta(totwrk)= -0.15075*(+120)= -18.09
#This to be no a large effect, because totwrk increase 120 mins then sleep decrease
18.09 mins

#30/9/2022
# Chapter 2 c4 p64
dWAGE <- read.csv(file = 'E:/ATUYET/wage2.csv', header = F)

#Find the average salary and average IQ in the sample.


mean(dWAGE$V1)
mean(dWAGE$V3)

#What is the sample standard deviation of IQ?


regC4.1 <- lm(V1 ~ V3, data = dWAGE)
summary(regC4.1)
sd(dWAGE$V3)

#Estimate a simple regression model where a one-point increase in IQ changes wage


by a constant dollar amount.
#Use this model to find the predicted increase in wage for an increase in IQ of 15
points. Does IQ explain most of the variation in wage?
#b0_hat = 116.9916, b1_hat = 8.3031
# log(WAGE)_hat = 116.9916 + 8.3031*wageten
# dealta(IQ) = +15
# delta(WAGE_hat)= b1_hat*delta(IQ)= 8.3031*(+15)=124.5465
# If IQ increase 15 point, wage increase by 124.5465

#If IQ increases by 15 points, what is the approximate percentage increase in


predicted wage?
regC4.3 <- lm(V17 ~ V3, data = dWAGE)
summary(regC4.3)
# b0^ = 5.8869942. b1^ = 0.0088072
# delta(IQ)= +1
# delta(log(WAGE))) = b1^*delta(IQ) = 0.0088072*(+1) = 0.0088072 = (0.88072%)

# Chapter 3 c1 p110
# bwght = b0 + b1*cigs + b2*faminc + u
dBWGHT <- read.csv(file = 'E:/ATUYET/bwght.csv', header = F)
# (i) What is the most likely sign for b2?
# La positive vi khi thu nhap cao thi chat luong do bo tang len, dan den ba bau co
nhieu suc khoe
# can nang cua tre so sinh cung se tang len

# (ii) Do you think cigs and faminc are likely to be correlated?


# Explain why the correlation might be positive or negative.
# Giua cigs va faminc co moi quan he tuong quan duong vi khi co thu nhap cao,ho co
the mua nhieu thuoc la de hut de dap ung nhu cau
# Co moi tuong quan am khi ma thu nhap cao thi dan tri cao, ho se mua san pham khac
thay vi thuoc la vi ho biet thuoc la la san pham co hai cho suc khoe

# (iii) Now, estimate the equation with and without faminc, using the data in
BWGHT.RAW.
# Report the results in equation form, including the sample size and R-squared.
# Discuss your results, focusing on whether adding faminc substantially changes the
estimated effect of cigs on bwght.
regC1.3.1 <- lm(V4 ~ V10, data=dBWGHT)
summary(regC1.3.1)
regC1.3.2 <- lm(V4 ~ V10+V1, data=dBWGHT)
summary(regC1.3.2)
cor(dBWGHT$V1,dBWGHT$V10)
lm(V1~V10, data=dBWGHT)
# b1_cigs^
# b1_cigs~ =b1_cigs^ + sig1^*b2

# b2_faminc > 0 & cor(faminc,cigs) < 0 ==> sig*b2 < 0


# b1_cigs > b1_cigs + sig*b2

#07/10/2022
#Chapter 3 C2 p110
#price = b0 + b1*sqrft + b2*bdrms + u
dprice <- read.csv(file = 'D:/ATuyet/ATUYET/hprice1.csv', header = F)

# (i) Write out the results in equation form.


regC3.2.1 <- lm(V1 ~ V5+V3, data=dprice)
summary(regC3.2.1)

# (ii) What is the estimated increase in price for a house with one more bedroom,
holding square footage constant?
# b0^ = -19.31500 , b1^ = 15.2
# delta(bdrms) = +1
# delta(price_hat)= b1^*delta(IQ)= 15.2*(+1)=15.2
# The house price increases by $15,200 with one more bedroom

# What is the estimated increase in price for a house with an additional bedroom
that
# is 140 square feet in size? Compare this to your answer in part (ii)
# b0^ = -19.31500 , b1^ = 0.13 , b2^ = 15.2
# delta(bdrms) = +1
# delta(sqrft) = +140
# delta(price_hat)= b1^*sqrft + b2^*bdrms = (+1)*15.2+(+140)*0.13 = 33.4
# The house price increases by $33,400 with one more bedroom that is 140 square
feet in size.
# Khi chi tang so luong phong ngu thi gia nha tang thap hon khi tang so luong phong
ngu và dien tich

# What percentage of the variation in price is explained by square footage and


number of bedrooms?
regC3.2.4 <- lm(V1 ~ V5, data=dprice)
summary(regC3.2.4)
# R2 = 0.6208 = 62.08%
# the number of bedrooms: df= n-k-1 <=> n= df+2+1 = 86+2+1 = 89

# The first house in the sample has sqrft = 2,438 and bdrms = 4
# Find the predicted selling price for this house from the OLS regression line
# price^= -19.315+2438*0.13+4*15.2 = 358.425 (thousand of dollars)

# The actual selling price of the first house in the sample was $300,000 (so price
= 300).
# Find the residual for this house.
# Does it suggest that the buyer underpaid or overpaid for the house?
# the residual= Ui^ = Yi - Yi^ = 300-358.425 = -58.425
# the buyer should underpaid

# Chapter 3, C3 p111
dCEO2 <- read.csv(file = 'D:/ATUYET/ceosal2.csv', header = F)
# (i) Estimate a model relating annual salary to firm sales and market value.
# Make the model of the constant elasticity variety for both independent variables.
# Write the results out in equation form

# salary = b0 + b1*SALE + b2*MV + u


# salary^ = b0^ + b1^*SALE + b2^*MV
# log(salary) = b0 + b1*log(SALE) + b2*log(MV) + u
# (delta(salary)/salary)/(delta(SALE)/SALE) = b1
regC3.3.1 <- lm(V10 ~ V11+V12, data=dCEO2)
summary(regC3.3.1)
# log(salary)= 4.62092 + 0.16213*log(SALE) + 0.10671*log(MV)
# R2 = 0.2991 = 29.91%

# (ii) Add profits to the model from part (i).


# Why can this variable not be included in logarithmic form?
# Would you say that these firm performance variables explain most of the variation
in CEO salaries?
regC3.3.2 <- lm(V10 ~ V11+V12+V8, data=dCEO2)
summary(regC3.3.2)
# log(salary) = b0 + b1*log(SALE) + b2*log(MV) + b3*profits + u
# log(salary)= 4.687 + 0.1614*log(SALE) + 0.0975*log(MV) + 0.00003566*profits
# R2 = 0.2993 = 29.93%
# vi profit co the am hoac duong nen profit khong lay gia tri log

# (iii) Add the variable ceoten to the model in part (ii).


# What is the estimated percentage return for another year of CEO tenure, holding
other factors fixed?

# log(salary) = b0 + b1*log(SALE) + b2*log(MV) + b3*profits + b4*ceoten + u


# correlation
# cor(data$tenbien1, data$tenbien2)
regC3.3.3 <- lm(V10 ~ V11+V12+V8+V6, data=dCEO2)
summary(regC3.3.3)
# log(salary)= 4.558 + 0.1622*log(SALE) + 0.01018*log(MV) + 0.00002905*profits +
0.01168*ceoten

# (iv) Find the sample correlation coefficient between the variables log(mktval)
and profits.
# Are these variables highly correlated? What does this say about the OLS
estimators
cor(dCEO2$V12, dCEO2$V8)
# correlation coefficient between the variables log(mktval) and profits is 0.77
# He so tuong quan cao nhung khong anh huong vi pham gia thiet so 3 (da cong tuyen)

#21/10/2022
#Chapter 3 C6-p112
dwage2 <- read.csv(file = 'D:/ATUYET/wage2.csv', header = F)
# (i) Run a simple regression of IQ on educ to obtain the slope coefficient,
say,δ1.
regC3.6.1 <- lm(V3 ~ V5, data=dwage2)
summary(regC3.6.1)
# δ1~ = 3.5338
# (ii) Run the simple regression of log(wage) on educ, and obtain the slope
coefficient
regC3.6.2 <- lm(V17 ~ V5, data=dwage2)
summary(regC3.6.2)
# b1~ = 0.059839

# (iii) Run the multiple regression of log(wage) on educ and IQ,


# and obtain the slope coefficients, b1^ and b2^, respectively
regC3.6.3 <- lm(V17 ~ V5+V3, data=dwage2)
summary(regC3.6.3)
# b1^ = 0.0391199
# b2^ = 0.0058631

# Verify that b1~ = b1^ + b2^ * δ1~


# We have b1~ = 0.059839; b1^ = 0.0391199; b2^ = 0.0058631; δ1~ = 3.5338
# 0.0391199+0.0058631*3.5338 = 0.059839
# Vay b1~ = b1^ + b2^ * δ1~ dung

#Chapter 4 C1-p164
# voteA = b0 + b1log(expendA) + b2log(expendB) + b3 prtystrA + u

# (i) What is the interpretation of b1?


# Neu tang 1% chi phi van dong bau cu của cac ung cu vien thi tỷ lệ bau cu cua ung
cu vien A tang b1%

# (ii) In terms of the parameters, state the null hypothesis


# that a 1% increase in A’s expenditures is offset by a 1% increase in B’s
expenditures.
# H0 : b2 + b1 = 0
# Gia thuyet H0 cho rằng tăng Z% chi tieu A và tăng Z% chi tieu B se khong lam thay
doi phan tram so phieu bau cu.
# Gia thuyet thay the bac bo gia thuyet H0 là:
# H1 : b2 + b1 =/= 0
# Gia thuyet H0 cho rằng tăng Z% chi tieu A và tăng Z% chi tieu B se lam thay doi
phan tram so phieu bau cu.

dvote1 <- read.csv(file = 'D:/ATUYET/vote1.csv', header = F)


# Do A’s expenditures affect the outcome?
# What about B’s expenditures? Can you use these results to test the hypothesis in
part (ii)
regC4.1.3 <- lm(V4 ~ V8+V9+V7, data=dvote1)
summary(regC4.1.3)
# voteA = 45.07893 + 6.08332*log(expendA) - 6.61542*log(expendB) +
0.15196*prtystrA
# neu tang 1% chi tieu A thi se lam ty le binh chon A tang 6.08332%
# neu tang 1% chi tieu B thi se lam ty le binh chon A giam 6.61542%

# n = 173, k = 3 -> n-k-1 = 173-3-1 = 169 voi muc y nghia 5%


# t-value_A = 15.924
# t-value_B = -17.46
# c(0,05) = 1.654
# Vi t-value_A va |t-value_B| deu lon hon c(0,05) nen reject H0
# Chi tieu A va B deu tac dong den ket qua

# (iv) Estimate a model that directly gives the t statistic for testing the
hypothesis in part (ii).
# What do you conclude? (Use a two-sided alternative.)
# voteA = b0 + b1log(expendA) + b2log(expendB) + b3 prtystrA + u (1)
# theta = b1 + b2 => b1 = theta - b2 , insert to (1)
# H0 : b2 + b1 = 0; H1 : b2 + b1 =/= 0
# H0 : theta = 0; H1 : theta =/= 0
# voteA = b0 + (theta - b2)*log(expendA) + b2*log(expendB) + b3*prtystrA + u (1)
# voteA = b0 + theta*log(expendA) + b2*[log(expendB) - log(expendA)] + b3*prtystrA
+ u (1)
dvote1$newexp <- dvote1$V8 - dvote1$V9
regC4.1.4 <- lm(V4 ~ V8 + newexp + V7, data=dvote1)
summary(regC4.1.4)
# voteA = 45.07893 - 0.53210*log(expendA) + 6.61542*[log(expendB) - log(expendA)] +
0.15196*prtystrA + u (1)
# t_value = 0.998 < 1.65 (DF > 120) tai muc y nghia 10% => ACCEPT H0
# b2 + b1 = 0 => b1= -b2
# a 1% increase in A’s expenditures is offset by a 1% increase in B’s expenditures

You might also like