You are on page 1of 9

#CREATE DATE VARIABLE

getwd() # verify your working directory


setwd("/Users/neerajgupta/Documents/MS - Analytics/Semester - 2/RiskModel&Asses - 515:90/Homework 1") #
set working directory to file location

# Importing Data From Yahoo Finance

data.AMZN <- read.csv("/Users/neerajgupta/Documents/MS - Analytics/Semester - 2/RiskModel&Asses -


515:90/Homework 1/Datasets/AMZN.csv",header=TRUE)

head(data.AMZN) #check data

class(data.AMZN$Date) # check Date variable

date <- as.Date(data.AMZN$Date,format="%Y-%m-%d") #create variable date

head(date) # verify format

data.GOOG <- read.csv("/Users/neerajgupta/Documents/MS - Analytics/Semester - 2/RiskModel&Asses -


515:90/Homework 1/Datasets/GOOG.csv",header=TRUE)

head(data.GOOG) #check data

class(data.GOOG$Date) # check Date variable

date <- as.Date(data.GOOG$Date,format="%Y-%m-%d") #create variable date

head(date) # verify format

data.JPM <- read.csv("/Users/neerajgupta/Documents/MS - Analytics/Semester - 2/RiskModel&Asses -


515:90/Homework 1/Datasets/JPM.csv",header=TRUE)

head(data.JPM) #check data

class(data.JPM$Date) # check Date variable

date <- as.Date(data.JPM$Date,format="%Y-%m-%d") #create variable date

head(date) # verify format

data.HSBC <- read.csv("/Users/neerajgupta/Documents/MS - Analytics/Semester - 2/RiskModel&Asses -


515:90/Homework 1/Datasets/HSBC.csv",header=TRUE)

head(data.HSBC) #check data

class(data.HSBC$Date) # check Date variable

date <- as.Date(data.HSBC$Date,format="%Y-%m-%d") #create variable date

head(date) # verify format

data.CVS <- read.csv("/Users/neerajgupta/Documents/MS - Analytics/Semester - 2/RiskModel&Asses -


515:90/Homework 1/Datasets/CVS.csv",header=TRUE)
head(data.CVS) #check data

class(data.CVS$Date) # check Date variable

date <- as.Date(data.CVS$Date,format="%Y-%m-%d") #create variable date

head(date) # verify format

data.UNH <- read.csv("/Users/neerajgupta/Documents/MS - Analytics/Semester - 2/RiskModel&Asses -


515:90/Homework 1/Datasets/UNH.csv",header=TRUE)

head(data.UNH) #check data

class(data.UNH$Date) # check Date variable

date <- as.Date(data.UNH$Date,format="%Y-%m-%d") #create variable date

head(date) # verify format

#Replace the date variable in data with the date

data.AMZN <- cbind(date, data.AMZN[,-1]) # -1 deletes the original Date variable


data.GOOG <- cbind(date, data.GOOG[,-1]) # -1 deletes the original Date variable
data.JPM <- cbind(date, data.JPM[,-1]) # -1 deletes the original Date variable
data.HSBC <- cbind(date, data.HSBC[,-1]) # -1 deletes the original Date variable
data.CVS <- cbind(date, data.CVS[,-1]) # -1 deletes the original Date variable
data.UNH <- cbind(date, data.UNH[,-1]) # -1 deletes the original Date variable

# Sort the Data in chronological order (if data is not in order)

data.AMZN <- data.AMZN[order(data.AMZN$date),]


data.GOOG <- data.GOOG[order(data.GOOG$date),]
data.JPM <- data.JPM[order(data.JPM$date),]
data.HSBC <- data.HSBC[order(data.HSBC$date),]
data.CVS <- data.CVS[order(data.CVS$date),]
data.UNH <- data.UNH[order(data.UNH$date),]

# Check class of data.AMZN

class(data.AMZN)
class(data.GOOG)
class(data.JPM)
class(data.HSBC)
class(data.CVS)
class(data.UNH)

library(xts)
data.AMZN <- xts(data.AMZN[,2:7],order.by=data.AMZN[,1])
data.GOOG <- xts(data.GOOG[,2:7],order.by=data.GOOG[,1])
data.JPM <- xts(data.JPM[,2:7],order.by=data.JPM[,1])
data.HSBC <- xts(data.HSBC[,2:7],order.by=data.HSBC[,1])
data.CVS <- xts(data.CVS[,2:7],order.by=data.CVS[,1])
data.UNH <- xts(data.UNH[,2:7],order.by=data.UNH[,1])
class(data.AMZN)
class(data.GOOG)
class(data.JPM)
class(data.HSBC)
class(data.CVS)
class(data.UNH)

# Rename variables (we can’t use the generic names if we work with multiple stocks)

names(data.AMZN)
names(data.GOOG)
names(data.JPM)
names(data.HSBC)
names(data.CVS)
names(data.UNH)

names(data.AMZN) <-
paste(c("AMZN.Open","AMZN.High","AMZN.Low","AMZN.Close","AMZN.Adjusted","AMZN.Volume"))
names(data.GOOG) <-
paste(c("GOOG.Open","GOOG.High","GOOG.Low","GOOG.Close","GOOG.Adjusted","GOOG.Volume"))
names(data.JPM) <- paste(c("JPM.Open","JPM.High","JPM.Low","JPM.Close","JPM.Adjusted","JPM.Volume"))
names(data.HSBC) <-
paste(c("HSBC.Open","HSBC.High","HSBC.Low","HSBC.Close","HSBC.Adjusted","HSBC.Volume"))
names(data.CVS) <- paste(c("CVS.Open","CVS.High","CVS.Low","CVS.Close","CVS.Adjusted","CVS.Volume"))
names(data.UNH) <-
paste(c("UNH.Open","UNH.High","UNH.Low","UNH.Close","UNH.Adjusted","UNH.Volume"))

head(data.AMZN)
head(data.GOOG)
head(data.JPM)
head(data.HSBC)
head(data.CVS)
head(data.UNH)

# Plotting the Data for each security to check for missing data

# Plotting the Data (verify that we have complete data)

plot(data.AMZN$AMZN.Close)
plot(data.GOOG$GOOG.Close)
plot(data.JPM$JPM.Close)
plot(data.HSBC$HSBC.Close)
plot(data.CVS$CVS.Close)
plot(data.UNH$UNH.Close)

# Plotting the Data (simlate missing data)

data.missing <- (data.AMZN[-400:-500,]) #delete observations 400 to 500


plot(data.missing$AMZN.Close)
data.missing <- (data.GOOG[-400:-500,])
plot(data.missing$GOOG.Close)
data.missing <- (data.JPM[-400:-500,])
plot(data.missing$JPM.Close)
data.missing <- (data.HSBC[-400:-500,])
plot(data.missing$HSBC.Close)
data.missing <- (data.CVS[-400:-500,])
plot(data.missing$CVS.Close)
data.missing <- (data.UNH[-400:-500,])
plot(data.missing$UNH.Close)

# Checking the dimension to verify that the data covers 5 year

# Verifying the number of columns and rows


dim(data.AMZN)
dim(data.GOOG)
dim(data.JPM)
dim(data.HSBC)
dim(data.CVS)
dim(data.UNH)

# Output summary statistics


summary(data.AMZN)
summary(data.GOOG)
summary(data.JPM)
summary(data.HSBC)
summary(data.CVS)
summary(data.UNH)

# Keeping only the first row


AMZN.onlyFirst <- data.AMZN[1,] #first row of data
GOOG.onlyFirst <- data.GOOG[1,]
JPM.onlyFirst <- data.JPM[1,]
HSBC.onlyFirst <- data.HSBC[1,]
CVS.onlyFirst <- data.CVS[1,]
UNH.onlyFirst <- data.UNH[1,]

# Deleting the first row


AMZN.delFirst <- data.AMZN[-1,] #delete first row of data from set
GOOG.delFirst <- data.GOOG[-1,]
JPM.delFirst <- data.JPM[-1,]
HSBC.delFirst <- data.HSBC[-1,]
CVS.delFirst <- data.CVS[-1,]
UNH.delFirst <- data.UNH[-1,]

# Look at first and last row


data.AMZN[c(1,nrow(data.AMZN)),]
data.GOOG[c(1,nrow(data.GOOG)),]
data.JPM[c(1,nrow(data.JPM)),]
data.HSBC[c(1,nrow(data.HSBC)),]
data.CVS[c(1,nrow(data.CVS)),]
data.UNH[c(1,nrow(data.AMZN)),]

# Looking at the first three rows and the last row


data.AMZN[c(1:3,nrow(data.AMZN)),]
data.GOOG[c(1:3,nrow(data.GOOG)),]
data.JPM[c(1:3,nrow(data.JPM)),]
data.HSBC[c(1:3,nrow(data.HSBC)),]
data.CVS[c(1:3,nrow(data.CVS)),]
data.UNH[c(1:3,nrow(data.UNH)),]

# Keeping one column


AMZN.onlyPrice <- data.AMZN[,4]
AMZN.onlyPrice2 <- data.AMZN$AMZN.Close
AMZN.onlyPrice[c(1:3,nrow(AMZN.onlyPrice)),]

GOOG.onlyPrice <- data.GOOG[,4]


GOOG.onlyPrice2 <- data.GOOG$GOOG.Close
GOOG.onlyPrice[c(1:3,nrow(GOOG.onlyPrice)),]

JPM.onlyPrice <- data.JPM[,4]


JPM.onlyPrice2 <- data.JPM$JPM.Close
JPM.onlyPrice[c(1:3,nrow(JPM.onlyPrice)),]

HSBC.onlyPrice <- data.HSBC[,4]


HSBC.onlyPrice2 <- data.HSBC$HSBC.Close
HSBC.onlyPrice[c(1:3,nrow(HSBC.onlyPrice)),]

CVS.onlyPrice <- data.CVS[,4]


CVS.onlyPrice2 <- data.CVS$CVS.Close
CVS.onlyPrice[c(1:3,nrow(CVS.onlyPrice)),]

UNH.onlyPrice <- data.UNH[,4]


UNH.onlyPrice2 <- data.UNH$UNH.Close
UNH.onlyPrice[c(1:3,nrow(UNH.onlyPrice)),]

# Converting daily prices to monthly/weekly

# to.weekly command – covert into weekly data


wk <- data.AMZN
data.weekly <- to.weekly(wk)
data.weekly[c(1:3,nrow(data.weekly)),]

wk <- data.GOOG
data.weekly <- to.weekly(wk)
data.weekly[c(1:3,nrow(data.weekly)),]

wk <- data.JPM
data.weekly <- to.weekly(wk)
data.weekly[c(1:3,nrow(data.weekly)),]

wk <- data.HSBC
data.weekly <- to.weekly(wk)
data.weekly[c(1:3,nrow(data.weekly)),]

wk <- data.CVS
data.weekly <- to.weekly(wk)
data.weekly[c(1:3,nrow(data.weekly)),]

wk <- data.UNH
data.weekly <- to.weekly(wk)
data.weekly[c(1:3,nrow(data.weekly)),]

# to.monthly command – covert into monthly data


mo <- data.AMZN
data.monthly <- to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]
mo <- data.GOOG
data.monthly <- to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]

mo <- data.JPM
data.monthly <- to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]

mo <- data.HSBC
data.monthly <- to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]

mo <- data.CVS
data.monthly <- to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]

mo <- data.UNH
data.monthly <- to.monthly(mo)
data.monthly[c(1:3,nrow(data.monthly)),]

# Plot Candle stick charts for one stock from each sector

# Before plotting, first create a open-high-low-close (OHLC) object.

library(quantmod)
OHLC <- data.weekly[-1,-6]
AMZN.ohlc <- as.quantmod.OHLC(OHLC,col.names=c("Open","High","Low","Close","Volume"))
AMZN.ohlc[c(1:3,nrow(AMZN.ohlc)),]

OHLC <- data.weekly[-1,-6]


JPM.ohlc <- as.quantmod.OHLC(OHLC,col.names=c("Open","High","Low","Close","Volume"))
JPM.ohlc[c(1:3,nrow(JPM.ohlc)),]

OHLC <- data.weekly[-1,-6]


CVS.ohlc <- as.quantmod.OHLC(OHLC,col.names=c("Open","High","Low","Close","Volume"))
CVS.ohlc[c(1:3,nrow(CVS.ohlc)),]

chartSeries(AMZN.ohlc,theme="white.mono",name="AMZN OHLC")
chartSeries(JPM.ohlc,theme="white.mono",name="JPM OHLC")
chartSeries(CVS.ohlc,theme="white.mono",name="CVS OHLC")

# Develop a plot comparing the capital gains by sector

# Comparing Capital Gains of Multiple Securities Over Time

Close.Prices <- cbind(AMZN.onlyPrice, GOOG.onlyPrice, JPM.onlyPrice, HSBC.onlyPrice, CVS.onlyPrice,


UNH.onlyPrice)
#Subset third year
Close.Prices <- Close.Prices['2007']

#Create sector values index


Technology <- (Close.Prices$AMZN.Close + Close.Prices$GOOG.Close) /
as.double(Close.Prices$AMZN.Close[1] + Close.Prices$GOOG.Close[1])
Financial <- (Close.Prices$JPM.Close + Close.Prices$HSBC.Close) /
as.double(Close.Prices$JPM.Close[1] + Close.Prices$HSBC.Close[1])
Healthcare <- (Close.Prices$CVS.Close + Close.Prices$UNH.Close) /
as.double(Close.Prices$CVS.Close[1] + Close.Prices$UNH.Close[1])

Multi.Sec.Comp <- cbind(Technology, Financial, Healthcare)


names(Multi.Sec.Comp) <- c("Technology", "Financial", "Healthcare")

#Looking at the data before plotting


Multi.Sec.Comp[c(1:3, nrow(Multi.Sec.Comp)),]

#Plot Capital Gains of Multiple Securities Over Time by sector


plot(x = index(Multi.Sec.Comp),
y = Multi.Sec.Comp$Technology,
type = "l",
xlab = "Date",
ylab = "Value of Investment ($)",
col = "black",
lty = 1,
lwd = 2,
ylim = c(0.8, 1.8),
main = "Value of $1 Investment in
AMZN, GOOG, JPM, HSBC, CVS, UNH by sector
January 1, 2007 - December 31, 2007")
lines(x = index(Multi.Sec.Comp),
y = Multi.Sec.Comp$Financial,
col = "red",
type = "l",
lty = 1,
lwd = 2)
lines(x = index(Multi.Sec.Comp),
y = Multi.Sec.Comp$Healthcare,
col = "blue",
type = "l",
lty = 1,
lwd = 2)
abline(h = 1,lty = 1, col = "black")
legend("topleft",
c("Technology", "Financial", "Healthcare"),
col=c("black", "red", "blue"),
lty=c(1, 1, 1),
lwd=c(2, 2, 2))

# Calculating MA

# Technical sector portfolio


Tech.Sec.Port <- AMZN.onlyPrice + GOOG.onlyPrice
names(Tech.Sec.Port) <- "Tech.Sec.Close"

#Calculating moving average


Tech.Sec.Port$SMA50 <- rollmeanr(Tech.Sec.Port$Tech.Sec.Close, k = 50)
Tech.Sec.Port$SMA200 <- rollmeanr(Tech.Sec.Port$Tech.Sec.Close, k = 200)

#Looking at the data before plotting


Tech.Sec.Port[c(1:3,nrow(Tech.Sec.Port)),]

#Subset to the fifth year of the data


Tech.Sec.Port2009 <- Tech.Sec.Port["2009"]
#Plot the SMA
#Calculating Y range
y.range <- range(Tech.Sec.Port2009, na.rm = TRUE)
y.range
#Plotting simple moving average for technical sector
plot(x = index(Tech.Sec.Port2009),
xlab = "Date",
y = Tech.Sec.Port2009$Tech.Sec.Close,
ylim = y.range,
ylab = "Price ($)",
type = "l",
lwd = 2,
main = "Technical sector - Simple Moving Average
January 1, 2009 - December 31, 2009")
lines(x = index(Tech.Sec.Port2009),
y = Tech.Sec.Port2009$SMA50)
lines(x = index(Tech.Sec.Port2009),
y = Tech.Sec.Port2009$SMA200,
lty=2)
legend("topleft",
c("Portfolio Price","50-Day Moving Average", "200-Day Moving Average"),
lty=c(1,1,2),
lwd=c(2,1,1))

#Bollinger Bands Plot

#Creating portfolio data


Port.Val.BB <- AMZN.onlyPrice + GOOG.onlyPrice + JPM.onlyPrice + HSBC.onlyPrice + CVS.onlyPrice +
UNH.onlyPrice
names(Port.Val.BB) <- "Val.Close"
#Calculate Rolling 20-Day Mean and Std.Dev for portfolio
Port.Val.BB$avg <- rollmeanr(Port.Val.BB$Val.Close, k = 20)
Port.Val.BB$sd <- rollapply(Port.Val.BB$Val.Close, width = 20, FUN = sd, fill = NA)
#Looking at the data
Port.Val.BB[c(1:3,nrow(Port.Val.BB)),]

#Calculate the Bollinger Bands


Port.Val.BB$sd2up <- Port.Val.BB$avg + 2 * Port.Val.BB$sd
Port.Val.BB$sd2down <- Port.Val.BB$avg - 2 * Port.Val.BB$sd
#Looking at the data before plotting
Port.Val.BB[c(1:3,nrow(Port.Val.BB)),]

#Calculating Y range
y.range <- range(Port.Val.BB$Val.Close, na.rm = TRUE)
y.range

#Plot the Bollinger Bands


plot(x = index(Port.Val.BB),
xlab = "Date",
y = Port.Val.BB$Val.Close,
ylim = y.range,
ylab = "Price ($)",
type = "l",
lwd = 3,
main= "Portfolio - Bollinger Bands (20 days, 2 deviations)
February 28, 2005 - February 25, 2010")
lines(x = index(Port.Val.BB),
y = Port.Val.BB$avg,lty = 2)
lines(x = index(Port.Val.BB),
y = Port.Val.BB$sd2up,
col = "gray40")
lines(x = index(Port.Val.BB),
y = Port.Val.BB$sd2down,
col= "gray40")
legend("topleft",
c("Portfolio Price","20-Day Moving Average","Upper Band","Lower Band"),
lty=c(1,2,1,1),
lwd=c(3,1,1,1),
col=c("black","black","gray40","gray40"))

You might also like