You are on page 1of 7

Intro to R software:

>data entry

bt=c(1,2,3,4,5,5,4,2,3,1)

>length(nameofdata)

this command gives 'n' or 'number of observations

>nrow(nameofdatatable)

-to check number of rows in data table

>ncol(nameofdatatable)

-to check number of column in data table (data.frame)

>dataname[,-1]

-this command is used to remove column

>dataname[c(2,17,8),]

-this command is used to locate multiple rows data

>dataname[,c(6,4,12)]

-this command is used to locate different column data

>dataname[1:10,]

-this command gives rows from 1 to 10

>dataname[,1:10]

-this command gives coloumn from 1 to 10

>data.frame(nameofdata,variables)

this command gives data in tabular form

>nameofdata[,1]

-this command locate column in give data table

>mean(nameofdata)

to find mean of sindle variable,data

>mean(nameofdatatable[,2])

another method of calculating mean

>mean(nameofdatatable[,2][1:50])

-to calculate mean for column 2 from entries 1 to 50


>apply(datatablename,2,mean), same for median,sd,var

to find collective mean of data

2 position for coloumn

1 position for row

>aggregate(datatablename[,5]~datatablename[,3],data=datatablename,FUN=mean)

-this commands gives mean against each category in datatable

-place numeric column first then category column later

-~ 'tilta' sign is used to link one column to another

>aggregate(datatablename[,5]~datatablename[,3],data=datatablename,FUN=sd)

-to get sd against each category

>aggregate(datatablename[,5]~datatablename[,3],data=datatablename,FUN=summary)

-to get summary of these column

>sort(dataname)

to arrange data in ascending order

>unique(dataname)

to exclude repitition of same values

>plot(dataname)

graphical representation of data in Scatter plot

>plot(dataname,col="green")

to add colors in graph

>plot(dataname,col="red",xlab="bodytemperature",ylab="weights")

to add names on graph

>plot(dataname,col="red",xlab="bodytemperature",ylab="weights",main="graph of bt and weights")

to add name of graph

>boxplot(dataname,col="red",xlab="bt",ylab="weights",main="graph")

this command gives boxplot of data

>summary(nameofdataordatatable)

this commands gives information and range of data or quartiles

category and length of datatable


>apply(nameofdata,2,mean,na.rm=T)

if we have different lengths of data then we add "NA" in missing

positions to equal lengths of data

then in computing mean add "na.rm=T" command to remove NA during calculation

>data()

this commands gives us datasets already available in R software

>head(nameofdata)

this commands gives us some first values from data

>tail(nameofdata)

this command gives us some last values from data

>str(nameofdata)

-this command gives structure of dataset

-like number of observations/ number of variables

>getwd()

this command gives the directory of R

>setwd("D:/Nabeela/mphil 2 semester/stat/data sets")

this commands enters the directory where datasets are saved

>namethedata=read.csv("D:/Nabeela/mphil 2 semester/stat/data sets/iq_level.csv")

-this command read the file in drive saved in excel form

-but save this excel file in File Format CSV(Comma delimited)

>namethedata=read.table("D:/Nabeela/mphil 2 semester/stat/data sets/quail_partial_data.txt")

-this command import data file in R saved in notepad form

>namethedata=read.table("D:/Nabeela/mphil 2 semester/stat/data
sets/quail_partial_data.txt",header=T)

-this command removes header from the table in saved file

>windows()

-to have more windows for graphs on other windows

>par(mfrow=c(2,5))

-to divide windows to get more than one


>attach(dataname)

-this command is used to get any column from given data table

-after applying this command just type name of column you want to work

on and click enter

>nameofdata[,1]

-same as above

>nameofdata$nameofcolumn

-same as above

>cbind(nameofcolumn,nameofothercolumn)

-this command is used to bind two columnes of different datasets

>rbind(nameofcolumn,nameofothercolumn)

-this command is used to bind two coloumns and represent in row format

>table(dataname[,2])

-this command gives frequency of values in data

> plot(iris[,1],col="black",ylim=c(0,8))

> points(iris[,2],col="red")

> points(iris[,3],col="blue")

> points(iris[,4],col="green")

-to get plot and add further points on it

>givename=which(variablename=="valueofvariable")

-it split vaiable column with same values

-f=which(Maternal=="F1") OR p=Maternal[-f]

-> f

[1] 1 2 3 4 22 23 24 25 26 27 28 29 30 31 32 47 48 67 68

[20] 69 70 71 72 73 74 75 76 77 78 79 80 97 98 99 100 101 102 122

[39] 123 124 125 126 127 128 129 130 144 145 146 147 148 169 170 171 172 173 174

[58] 175 176 177 178 179 180 181 182

-> Maternal[f]

[1] "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1"
[16] "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1"

[31] "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1"

[46] "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1" "F1"

[61] "F1" "F1" "F1" "F1" "F1"

REGRESSION MODEL (SIMPLE OR LINEAR)

STEPS:

1-TO CHECK LINEARITY PLOT SCATTER PLOT BETWEEN DEPENDENT VARIABLE (Y) AND INDEPENDENT
VARIABEL (X)

2-DRAW HISTOGRAM TO CHECK NORMALITY.

3-THEN APPLY LINEAR MODEL FUNCTION IN R.

4-INTERPRET RESULTS BY TAKING SUMMARY OF Lm.

>lm(dependentvariable~independentvariable)

-this commands is for linear model function

>plot(independentvar,dependentvar)

-to get scatter plot for regresion model

>points(independentvar,yhat,col="anycolor")

-to mark points for line of best fit or regression line

>lines(independentvar,yhat,col="anycolor")

-to connect points through lines

>abline(lm(dependentvar~independentvar),col="red")

-to get regression line in scattor plot

>pred=predict(fit,newdata=nameofnewdata)

-to get prediction of new data set based on previous data lm results

>cor(nameofdata)

-to get corelation between variables

>givename=which(nameofcolumn=="nameofcategory")

-for example: Gender=which(Sex=="M")

>givename
-for example:

Gender

1 3 7 9 13 16 17 18 19 20 21 23 24

25 27 30 31 32 33 34 40 41 42 43 48 49 50 51 52 58

-this gives positions where M category is placed

>nameofcolumn[namegiven]

-for example: Gender[male]

"M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M"

"M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M" "M"

-this commands give value of given data name

>cor(nameofdata[,-c(1:3)])

-this commands compute corelation but removing 1 to 3 column

-Ho=no relation ;H1=relation

> library(nameofpackage)

-to get package of test you want to perform

-for example

library(ppcor)

>pcor(nameofdata)

-to perform partial corelation

>cor(variableone,variable2,method="spearmen")

-to get rank corelation with spearmen method

>cor(variableone,variabletwo,method="kendal")

-to get rank corelation with kendal method

>cor.test(variableone,variabletwo,method="kendal")

-to get colrelation with p value

>cor(nameofdatatabel,method="spearman")

-to get table of spearman corelation analysis

>ad.test(nameofdata)

-to get anderson darling test for normality


-Ho=normal ;H1=not normal

>shapiro.test(nameofdata)

-to perform normality of test

-Ho=normal ;H1=not normal

>

testing of hypothesis:

TESTING OF HYPOTHESIS:

STEPS:

1:Normality

2:Homogenity

3:tests

normal and homogenous t-test, var true

normal and non homogenous t-test, var false

non normal wilcox test

Paired data t-test paired ture

#########################################AFTER MIDS ###########################

>dataname=rep(1:4,each=5)

-this command is used to repeat 1 to 4 counting 5 times

-for example treatment= (1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4)

>overallmean=mean(name of data $ treatment output name )

-to caculate overall mean

>txmean=tapply(name of data $ treatment output name , name of data $ treatment name ,mean)

-to calculate means of treatments individually

>duncanTest(fitt)

-to apply duncan test

>tukeyHSD(fitt)

-to apply tuckey honest significant difference test

You might also like