You are on page 1of 6

##This is example code for HICAST students participating in a

##workshop with the University of Florida

##You can use R like a calculator


2+2
log(16)
exp(2.77)

##The program functions like a normal scientific calculator


##Normal rules in order of operation apply

##You can also write formulas


##Now r knows that x=4
x*5
Matt<-20+3
84*Matt

##Next we are going to create a vector


x.vec<-c(1,2,3,4,5,6)
x.vec

xvec
##When you make a mistkae in R, these error messages pop up

x.dumm<c(1,4,6,7)

##You forgot the line in the arrow


x.dummy<-c(1,4,6,7)
x.dummy
x.vec*10

##Multiplying x.vec * 10, multiplies each individual number


sum(x.vec)

max(x.vec)

length(x.vec)

max(x.dummy)

##You can write all sorts of formulas, naming things and creating
##Shortcuts for yourself
x.mean=sum(x.vec)/length(x.vec)
x.mean

##The above formula calculated the mean for the vector name x.vec

############Make notes when you shift gears#####


##Notes to yourself are key##
##Now we are going to calculate a sequence of numbers###
seq(0,1000,25)

##This sequence counts up from 0 to 1000 by 25##


##We can also give it a name##
bieber<-seq(0,1000,25)
bieber
##you can also make additions to the sequence##
bieber^2

log(bieber)

##Create a vector where you repeat a number##


rep(1,20)
##In this case, 1 repeated 20 times##

###############Now let's create a matrix#######################


##These are numbers that are in rows and columns not unlike a data set##
clooney<-matrix(nrow=3,ncol=3)
##You always have to tell the matrix how many rows (nrow) and how many
##columns (ncol)... seperated by a comma
clooney
##However this is just the shell, we need to add numbers###
clooney[1,1]=22
clooney[1,2]=6
clooney[1,3]=189
clooney[2,1]=33
clooney[2,2]=89
clooney[2,3]=6789
clooney[3,1]=1
clooney[3,2]=78
clooney[3,3]=954
clooney

##You can also create a code so that you don't have to fill manually##
jay.z<-matrix(nrow=2,ncol=3)
jay.z[is.na(jay.z)]=0
jay.z

##So I populated all the rows and columns with zeros##


##You can convert these to anything##
##You can also add a specific number and ask R to fill the rest##
jay.z[1,1]=99
jay.z[is.na(jay.z)]=100
jay.z

########Let's work with data frames now################


##this is how you would work with real data##
bodymass<-c(140,220,450,235,500,340,375)
bodymass
sex<-c("f","f","m","f","m","f","m")
sex
cows=data.frame(bodymass,sex)
##This puts the two datasets together##
cows

##Now let's subset and manipulate this mini data set##


##Let's say we want to go into the cows dataset and extract only data
##From females##
cows$bodymass[cows$sex=="f"]

##Let's double check to make sure it pulled the right data##


cows
##You can even pull really specific data points from a larger set##
bulls<-cows$bodymass[cows$sex=="m"&cows$bodymass350]
bulls
##double check##
cows

####Now let's bring in a file that contains data####


##First you have to set the working directory##
##First, let's pull in the data file that I gave you to copy to your desktop#
setwd('C:\\Users\\mhallett2320\\Desktop\\Nepal Lectures')
##Make sure that your data files are saved as a text file###

##R cannot read Excel spreadsheets or .csv files###

grades<-read.table("grades.txt",header=TRUE)
grades

##If you just want to see the top part ot check the headings, just##
##ask to see the 'head'##
head(grades)

#If you attach your data, then R will recognize the headings##
##in your dataset###
attach(grades)
Final_Grade

##So now you can see only the final letter grades##
##If you don't attach, you need a different code##
grades$Final_Grade

##Now we can make alterations to just parts of the data##


##For example, let's say that you decided that you wanted to curve the##
##grades by 3%##
curved<-Percent+0.03
curved

##We can also add these alterations to the dataset right in R##
grades2<-cbind(grades,curved)
grades2
##As you can see it didn't eliminate the original percents, just##
##added the new column with the curved grades##
##We could write code to change the letter grades as well (but won't now##

###From here we can perform all of our descriptive statistics if we want##


mean(curved)
median(curved)
mode(curved)
##There are no repeated values, so there is no mode##
quantile(curved)
min(curved)
max(curved)
sd(curved)
var(curved)

#################Let's do some graphics################


##I'm bored of grades, so let's read some new data in###

area<-read.table("country.area.txt",header=TRUE)
area
head(area)

############Ok, now letr's make some graphs###########


###So, in R a regular scatter plot is 'plot'######
##and then you have to tell R wheat you want on the x and y axis####

country<-read.table("country.area.txt",header=TRUE)
plot(country$Rank,country$Totalsq.km)
plot(country$Country,country$Total.sqkm)
plot(country$Rank,country$Total.sqkm)

##Now obviously we need to change our x and y axis labels###


plot(country$Rank,country$Total.sqkm,xlab="Country Rank",ylab="Total area (km2)")
##Now as you can see our axis are labeled - and you can give them any name##

##Now let's say that we can to give each country a different color##
##Or better yet, let's highlight certain countries so we can see them##
##Now let's change the colors from black to blue##

plot(country$Rank,country$Total.sqkm,xlab="Country Rank",ylab="Total area


(km2)",col="blue")

##Create a boxplot to check the distribution##


boxplot(country$Total.sqkm)

barplot(grades$Student,grades$Points,xlab="Student",ylab="Total Points")

######Let�s try a Chi-squared test on some data######


###First you have to install a package called �MASS� to run the test####
##You can do this by either typing the code below into the r code lines####
###OR you can click on the library drop down menu on the top and find �MASS�####
library(MASS)
#####Now we have to set our working directory so that we can get our data on
preference into R######
####Remember that this is wherever you have saved your data, so this part of the
code will be#####
###different than mine#####
setwd('C:\\Users\\mhallett2320\\Desktop\\Nepal Lectures')
##########Now read the table into R using the following code######
pref<-read.table("pref.txt",header=TRUE)
####Attach the data so that you can search around to see what the data looks
like#####
attach(pref)
####Now check out the full data set#####
pref
#####Try searching just one of the colums to see what happens#####
Coke
###Now let�s run the Chi-squared test####
####In R, you simply type in the following code, and R does the rest####
chisq.test(pref)
####What do you think of the results? What do they say about the data?####

#####Now let�s try running a t-test#####


###First we have to read in the data######
rice<-read.table("rice.txt",header=TRUE)
####Now check the data to make sure that it is correct####
rice
###Attach the data so that you can check the headings#####
attach(rice)
####Check the headings to make sure that they are correct#####
head(rice)
#####Check one of the columns headings to make sure that you can see it by
itself####
Nepal.per.km2
###Now let�s calculate some descriptive stats#####
mean(Nepal.per.km2)
median(Nepal.per.km2)
mode(Nepal.per.km2)
min(Nepal.per.km2)
max(Nepal.per.km2)
quantile(Nepal.per.km2)
quartile(Nepal.per.km2)
sd(Nepal.per.km2)
var(Nepal.per.km2)
####Now let�s visualize the data using a boxplot to see how it is distributed#####
boxplot(Nepal.per.km2)
####if you remember, having equal variance is a key test in a t-test#####
####So let�s run a variance test to see if the variances are equally
distributed####
var.test(India.per.km2,Nepal.per.km2)
####Now we have to run a test on some simulated data to compare to observed and
expected values#
qf(0.95,55,55)
#####So we see that our observed value is greater than our expected value###
##This leads us to reject the null hypothesis of equal variance####
###So when we write the code for the t-test, we need to indicated that the
variances are unequal###
###We also need to let R know that this is not a paired t-test####
t.test(India.per.km2,Nepal.per.km2,var.equal=FALSE,paired=FALSE)
###What was the result?####
####Was it the same as we got in Excel?####

#########Now let�s try to run an ANOVA########


tomato.full<-read.table("tomato.full.txt",header=TRUE)
tomato.full

attach(tomato.full)

head(tomato.full)
mean(treat.1)
median(treat.1)
mode(treat.1)
sd(treat.1)
var(treat.1)
boxplot(tomato.full)
boxplot(tomato.full,xlab="Treatment",ylab="Tomato Production(kg)",col="blue")
###To run the ANOVA, we need a table that just has the means of each group#####
###Let�s do that in Excel and read in the new file#####
###Let�s try doing pairwise comparisons#####
###First read in the data####
tomato4<-read.table("treat.4.txt",header=TRUE)
###Check it out######
tomato4
#####The code for ANOVA using the aov function######
aov.out=aov(treat.4~control,data=tomato4)
####Check out the summary#####
summary(aov.out)
####What is this saying?######

########Now let�s try linear regression##############


####So first let�s add the data####
temp<-read.table("regress.txt",header=TRUE)
###Attach the data####
attach(temp)
###Check the data#####
Temp
####Check the headings#####
head(temp)
####Ok, so let�s plot the data######
plot(an.temp~year,data = temp,xlab = "Year",ylab="Degrees Celcius")
####now let�s change the color of the points to say, red#####
plot(an.temp~year,data = temp,xlab = "Year",ylab="Degrees Celcius",col="red")
####Now let�s add a regression line to the map#####
abline(lm(an.temp~year))
####Now let�s run the model####
temp.mod=lm(an.temp~year,data=temp)
summary(temp.mod)
#####What do you think? Is it the same as we found in Excel/######

You might also like