Professional Documents
Culture Documents
#===================================================
#----------------basics-------------------------
#===================================================
x=2
x<-2
3->y
z=y=4
# Remove variable z
rm(z)
class(x)
x=as.integer(x)
class(x)
is.integer(x)
#===================================================
a="A Grade"
# Check length of a
nchar(a)
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
nchar(3000)
#===================================================
class(b)
#===================================================
c=TRUE
d=FALSE
class(c)
class(d)
# Compare if x>y
x>y
# y<=x ??
y<=x
# IF x = y ??
x==y
# if x not equal to y ??
x!=y
grades=c("a","b","a","c","d","a","b","a","c","d")
marks=c(90,85,98,72,54,93,86,90,70,45)
rno=c(1:10)
marks=marks+2
marks
slab=marks/10
slab
grades[3]
slab[c(3,5)]
marks[8:10]
#===================================================
as.factor(grades)
gf=as.factor(grades)
as.numeric(gf)
#===================================================
#### NA ####
rno=c(rno,11)
marks=c(marks,NA)
grades=c(grades,NA)
# Update slab
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
slab=marks/10
slab
#===================================================
library(magrittr)
marks%>%mean(na.rm=TRUE)
#===================================================
myclass=data.frame(Roll_Number=rno,Marks=marks,CGPA=slab,Grades=grades)
myclass
# Dimensions of myclass
# number of rows
nrow(myclass)
# number of columns
ncol(myclass)
rownames(myclass)
colnames(myclass)
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
# Head of dataframe
head(myclass)
# Tail of dataframe
tail(myclass)
# grades?
myclass[,4]
myclass[,"Grades"]
myclass[5,4]
myclass[5,]
myclass[,c(1,4)]
library(ggplot2)
data(diamonds)
mydata=diamonds
head(mydata)
#Base Histogram
hist(mydata$carat,main="Carat Histogram",xlab="Carat")
# Base Scatterplot
plot(price~carat,data=mydata)
# Boxplot
boxplot(mydata$carat)
ggplot(data=mydata)+geom_histogram(aes(x=carat))
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
ggplot(data=mydata)+geom_density(aes(x=carat),fill="skyblue")
# Scatterplot
ggplot(mydata,aes(x=carat,y=price))+geom_point()
g=ggplot(mydata,aes(x=carat,y=price))
# Add colors to g
g+geom_point(aes(color=color))
g+geom_point(aes(color=color))+facet_wrap(~color)
g+geom_point(aes(color=color))+facet_grid(cut~clarity)
ggplot(mydata,aes(x=carat))+geom_histogram()+facet_wrap(~color)
# Boxplots
ggplot(mydata,aes(y=carat,x=1))+geom_boxplot()
ggplot(mydata,aes(y=carat,x=cut))+geom_boxplot()
#Line plot
ggplot(economics,aes(x=date,y=pop))+geom_line(color="red")
library(ggplot2)
# create data
xValue=1:50
yValue=cumsum(rnorm(50))
data=data.frame(xValue,yValue)
# area Plot
ggplot(data)+geom_area(aes(x=xValue, y=yValue))
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
#data
#barplot
geom_bar(stat="identity", fill="steelblue")+
theme_minimal()
# data
geom_bar(stat="identity")
# Use position=position_dodge()
geom_bar(stat="identity", position=position_dodge())
###########dot plot#####################
dd=ToothGrowth
head(dd)
class(dd$dose)
class(dd)
dd$dose=as.factor(dd$dose)
ggplot(data=dd)+geom_dotplot(aes(x=dose,y=len,color=supp),binaxis="y",stackdir="center")
#data
#barplot
geom_bar(stat="identity")
pie
##############correlogram###########
#####install.packages("GGally")
install.packages("corrplot")
h=cor(mtcars[,c(2,3,5)])
corr=head(round(h,2))
###corrplot(corr,method="circle")
library(corrplot)
corrplot(corr,method="circle")
corrplot(corr,method="pie")
corrplot(corr,method="color")
corrplot(corr,method="number")
#####################################################################
library(dplyr)
## Use of Pipes
class(diamonds)
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
head(diamonds)
diamonds
select(diamonds,carat,price)
## Print all except carat and price columns w/i and w/o pipes
diamonds %>%filter(carat>2,price<14000)
diamonds %>%filter(carat>2&price<14000)
ia="Ideal"
diamonds %>%
select(carat,price) %>%
mutate(ratio=price/carat,double=ratio*2)
diamonds2=diamonds
diamonds %<>%
select(carat,price) %>%
mutate(ratio=price/carat,double=ratio*2)
diamonds2
###
## summary
summarize(diamonds,mean(price))
## group by
# create a vector
rainfall=c(12,43,32,23,23,31,56,20,30,12,45,78)
# convert to time series object, freq means number of data points per year
rats=ts(rainfall,start=c(2017,6),frequency=12)
rats
rainfall1=c(12,21,33,24,23,15,22,12,12,12,13,70)
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
comr=matrix(c(rainfall,rainfall1),nrow=12)
rats1=ts(comr,start=c(2017,6),frequency = 12)
library(forecast)
start(dd)
end(dd)
frequency(dd)
class(dd)
d1=auto.arima(dd)
d2=predict(d1,n.ahead=3)
ts.plot(dd,d2$pred,col=c("blue","red"),lty=c(1,3))
install.packages(c("tm","SnowballC","topicmodels","wordcloud","sentimentr","syuzhet"))
library(tm)
library(SnowballC)
library(topicmodels)
library(wordcloud)
library(plyr)
library(dplyr)
library(stringr)
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
library(ggplot2)
library(httr)
library(reshape2)
library(sentimentr)
library(scales)
library(RCurl)
library(syuzhet)
getwd()
filenames = list.files(getwd(),pattern="*.txt")
files = lapply(filenames,readLines)
articles.corpus = Corpus(VectorSource(files))
class(articles.corpus)
##Text Preprocessing
# remove punctuation
#remove numbers
stopwords()
#Visualization - Wordcloud
library(wordcloud)
wordcloud(articles.corpus, random.order = F)
#Create TDM
#A term document matrix is a way of representing the words in the text as a table (or matrix) of
numbers. The rows of the matrix represent the text responses to be analysed, and the columns of
the matrix represent the words from the text that are to be used in the analysis.
tdm= TermDocumentMatrix(articles.corpus)
class(tdm)
tdm=as.matrix(tdm)
tdm
termfreq = rowSums(as.matrix(tdm))
termfreq
#Subsetting TDM
class(termfreqsubset)
#Creating a dataframe
library(ggplot2)
View(tdmdf)
coord_flip() +
theme(axis.text=element_text(size=6))
tdmplot
#Wordcloud
#Colors
colors()
################################################
#Sentiment Analysis
library(sentimentr)
class(articles.corpus)
class(a)
a=as.character(articles.corpus)
mysentiment = get_nrc_sentiment(a)
SentimentScores = data.frame(colSums(mysentiment[,]))
SentimentScores
names(SentimentScores) = "Score"
SentimentScores
SentimentScores)
SentimentScores
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
rownames(SentimentScores) = NULL
SentimentScores
theme(legend.position = "none") +
xlab("Sentiment") + ylab("Score") +
#Topic Modeling
#Latent dirichlet allocation (LDA) models are a widely used topic modeling technique.
#Create DTM
articleDtm = DocumentTermMatrix(articles.corpus,
k=3
SEED = 1234
control=list(seed = SEED))
lda.topics = as.matrix(topics(article.lda))
lda.topics
lda.terms = terms(article.lda)
lda.terms
library(caret)
library(kernlab)
library(e1071)
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
#install.packages("klaR")
library(klaR)
data1=iris
colnames(data1)
partition = createDataPartition(data1$Species,p=.80,list=F)
partition
datatr=data1[partition,]
datate=data1[-partition,]
modelfit=train(Species~.,data=datatr,method="nb")
prediction=predict(modelfit,newdata=datate)
confusionMatrix(prediction,datate$Species)
#install.packages("mlbench")
data(BreastCancer, package="mlbench")
bc <- BreastCancer[complete.cases(BreastCancer), ]
partition = createDataPartition(bc$Class,p=.80,list=F)
partition
datatr=bc[partition,]
datate=bc[-partition,]
modelfit=train(Class ~ Cell.shape,data=datatr,method="glm",family="binomial")
prediction=predict(modelfit,newdata=datate)
confusionMatrix(prediction,datate$Class)
data(spam)
Updated R Script Compiled by Mr. Anup Sharma (Strictly to be used as class notes)
z1=spam
head(z1)
z2=createDataPartition(z1$type,p=0.75,list=F)
ztrain=z1[z2,]
ztest=z1[-z2,]
zfit=train(type~.,data=ztrain,method="glm",family="binomial")
pred=predict(zfit,newdata=ztest)
confusionMatrix(pred,ztest$type)
#Clustering
iris
iris1=iris[,-5]
##################################################
plot(iris1$Sepal.Length, iris$Sepal.Width)
clu=kmeans(dist(iris1), 3)
clu$cluster
library(dplyr)
iris2=iris1 %>%
mutate(clusters=clu$cluster)
head(iris2)
#########################################
#hierarchical clustering
iris2=iris1[,-5]
h1=hclust(dist(iris2), method="ward.D2")
plot(h1, cex=0.2)