Análisis multivariante en R: aplicación en ecología

Rosana Ferrero 5 de febrero de 2014

Índice
1. Análisis descriptivo de datos multivariantes con R 1.1. EJEMPLO EN R. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 2 2

1

1.
1.1.

Análisis descriptivo de datos multivariantes con R
EJEMPLO EN R.

Este famoso conjunto de datos del iris (de Fisher o Anderson) da las medidas en centímetros de la longitud de las variables sépalo y la anchura y la longitud y la anchura del pétalo, respectivamente, por 50 flores de cada uno de 3 especies de iris. Las especies son setosa Iris, versicolor y virginica. iris es una trama de datos con 150 casos (filas) y 5 variables (columnas) con nombre Sepal.Length, Sepal.Width, Petal.Length, Petal.Width, y especies. data(iris) #abrimos el archivo de datos

# El archivo contiene un encabezado con los nombres de las variables # (header=T) y utiliza la comna como decimal (dec= , ) head(iris) ## ## ## ## ## ## ## 1 2 3 4 5 6 Sepal.Length Sepal.Width Petal.Length Petal.Width Species 5.1 3.5 1.4 0.2 setosa 4.9 3.0 1.4 0.2 setosa 4.7 3.2 1.3 0.2 setosa 4.6 3.1 1.5 0.2 setosa 5.0 3.6 1.4 0.2 setosa 5.4 3.9 1.7 0.4 setosa

tail(iris) ## ## ## ## ## ## ## 145 146 147 148 149 150 Sepal.Length Sepal.Width Petal.Length Petal.Width Species 6.7 3.3 5.7 2.5 virginica 6.7 3.0 5.2 2.3 virginica 6.3 2.5 5.0 1.9 virginica 6.5 3.0 5.2 2.0 virginica 6.2 3.4 5.4 2.3 virginica 5.9 3.0 5.1 1.8 virginica #nombres de los datos "Petal.Length" "Petal.Width"

names(iris)

## [1] "Sepal.Length" "Sepal.Width" ## [5] "Species" str(iris)

## 'data.frame': 150 obs. of 5 variables: ## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... ## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... ## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... ## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... ## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ... # I. explorar cada variable por separado # 1) variables cuantitativas summary(iris) ## Sepal.Length Sepal.Width Petal.Length 2 Petal.Width

## ## ## ## ## ## ## ## ## ## ## ## ##

Min. :4.30 1st Qu.:5.10 Median :5.80 Mean :5.84 3rd Qu.:6.40 Max. :7.90 Species setosa :50 versicolor:50 virginica :50

Min. :2.00 1st Qu.:2.80 Median :3.00 Mean :3.06 3rd Qu.:3.30 Max. :4.40

Min. :1.00 1st Qu.:1.60 Median :4.35 Mean :3.76 3rd Qu.:5.10 Max. :6.90

Min. :0.1 1st Qu.:0.3 Median :1.3 Mean :1.2 3rd Qu.:1.8 Max. :2.5

var(iris$Sepal.Length) ## [1] 0.6857 hist(iris$Sepal.Length) plot(density(iris$Sepal.Length))

Histogram of iris$Sepal.Length
30 Frequency 0 4 5 10 15 20 25

5

6 iris$Sepal.Length

7

8

3

density.default(x = iris$Sepal.Length)
0.4 Density 0.0 0.1 0.2 0.3

4

5

6

7

8

N = 150 Bandwidth = 0.2736

# 2) variables cualitativas table(iris$Species) ## ## ## setosa versicolor 50 50 virginica 50

pie(table(iris$Species)) barplot(table(iris$Species))

4

setosa

versicolor

virginica

5

0

10

20

30

40

50

setosa

versicolor

virginica

# II. explorar las variables en conjunto cov(iris[, 1:4]) ## ## ## ## ## Sepal.Length Sepal.Width Petal.Length Petal.Width Sepal.Length Sepal.Width Petal.Length Petal.Width 0.68569 -0.04243 1.2743 0.5163 -0.04243 0.18998 -0.3297 -0.1216 1.27432 -0.32966 3.1163 1.2956 0.51627 -0.12164 1.2956 0.5810

cor(iris[, 1:4]) ## ## ## ## ## Sepal.Length Sepal.Width Petal.Length Petal.Width Sepal.Length Sepal.Width Petal.Length Petal.Width 1.0000 -0.1176 0.8718 0.8179 -0.1176 1.0000 -0.4284 -0.3661 0.8718 -0.4284 1.0000 0.9629 0.8179 -0.3661 0.9629 1.0000

aggregate(Sepal.Length ~ Species, summary, data = iris) ## ## ## ## ## ## ## Species Sepal.Length.Min. Sepal.Length.1st Qu. 1 setosa 4.30 4.80 2 versicolor 4.90 5.60 3 virginica 4.90 6.22 Sepal.Length.Median Sepal.Length.Mean Sepal.Length.3rd Qu. 1 5.00 5.01 5.20 2 5.90 5.94 6.30 6

## ## ## ## ##

3

6.50 Sepal.Length.Max. 1 5.80 2 7.00 3 7.90

6.59

6.90

boxplot(Sepal.Length ~ Species, data = iris) with(iris, plot(Sepal.Length, Sepal.Width, col = Species, pch = as.numeric(Species))) plot(iris$Petal.Length, iris$Petal.Width, pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)], main = "Iris Data") pairs(iris[1:4], main = "Iris Data", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])

4.5

5.0

5.5

6.0

6.5

7.0

7.5

8.0

setosa

versicolor

virginica

7

Sepal.Width

2.0

2.5

3.0

3.5

4.0

4.5

5.0

5.5

6.0

6.5

7.0

7.5

8.0

Sepal.Length

8

Iris Data
2.5 iris$Petal.Width 0.5 1 1.0 1.5 2.0

2

3

4 iris$Petal.Length

5

6

7

9

Iris Data
2.0 2.5 3.0 3.5 4.0 0.5 1.0 1.5 2.0 2.5 7.5 4.0 2.0 3.0

Sepal.Width
7

1.5

2.5

Petal.Width
4.5 5.5 6.5 7.5 1 2 3 4 5 6 7

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) { usr <- par("usr") on.exit(par(usr)) par(usr = c(0, 1, 0, 1)) r <- abs(cor(x, y)) txt <- format(c(r, 0.123456789), digits = digits)[1] txt <- paste(prefix, txt, sep = "") if (missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) text(0.5, 0.5, txt, cex = cex.cor * r) } pairs(iris[1:4], main = "Iris Data", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)], upper.panel = panel.cor)

0.5

10

1

3

Petal.Length

5

4.5

6.0

Sepal.Length

Iris Data
2.0 2.5 3.0 3.5 4.0 0.5 1.0 1.5 2.0 2.5 7.5
0.12

4.0

2.0

3.0

Sepal.Width

0.43

0.37
7

1.5

2.5

Petal.Width
4.5 5.5 6.5 7.5 1 2 3 4 5 6 7

# gráficos 3D library(scatterplot3d) scatterplot3d(iris$Petal.Width, iris$Sepal.Length, iris$Sepal.Width) # library(rgl) plot3d(iris$Petal.Width, iris$Sepal.Length, # iris$Sepal.Width)

0.5

11

1

3

Petal.Length

0.96

5

4.5

6.0

Sepal.Length

0.87

0.82

iris$Sepal.Width

4.0

4.5

8 7 6 5 4 0.0 0.5 1.0 1.5 2.0 2.5

2.0

iris$Petal.Width

# gráficos más complejos: ggplot2 library(ggplot2) qplot(Sepal.Length, Sepal.Width, data = iris, facets = Species ~ .) qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point", "smooth"), color = Species, size = Petal.Width, method = "lm") qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point", "smooth"), color = Species, size = Petal.Width, method = "lm", facets = ~Species) qplot(x = Sepal.Width, y = Sepal.Length, data = iris, geom = c("point", "smooth"), color = Species, size = Petal.Width, method = "lm", facets = Species ~ .)

12

iris$Sepal.Length

2.5

3.0

3.5

4.5 4.0 3.5 3.0 2.5 2.0 4.5 setosa

Sepal.Width

4.0 versicolor 3.5 3.0 2.5 2.0 4.5 4.0 virginica 3.5 3.0 2.5 2.0 5 6 7 8

Sepal.Length

13

8

Species 7 setosa versicolor

Sepal.Length

virginica 6 Petal.Width 0.5 1.0 1.5 5 2.0 2.5

4 2.0 2.5 3.0 3.5 4.0 4.5

Sepal.Width

14

setosa 8

versicolor

virginica

Species 7 setosa versicolor

Sepal.Length

virginica 6 Petal.Width 0.5 1.0 1.5 5 2.0 2.5

4 2.0 2.5 3.0 3.5 4.0 4.5 2.0 2.5 3.0 3.5 4.0 4.5 2.0 2.5 3.0 3.5 4.0 4.5

Sepal.Width

15

8 7 setosa 6 5 4 8

Species setosa versicolor virginica versicolor

Sepal.Length

7 6 5 4 8 7

Petal.Width 0.5 1.0 1.5 2.0 2.5

virginica

6 5 4 2.0 2.5 3.0 3.5 4.0

4.5

Sepal.Width
# gráficos más complejos: ggplot2 library(reshape2) iris$flower_id <- rownames(iris) iris_melted <- melt(iris) ## Using Species, flower_id as id variables head(iris_melted) ## ## ## ## ## ## ## 1 2 3 4 5 6 Species flower_id variable value setosa 1 Sepal.Length 5.1 setosa 2 Sepal.Length 4.9 setosa 3 Sepal.Length 4.7 setosa 4 Sepal.Length 4.6 setosa 5 Sepal.Length 5.0 setosa 6 Sepal.Length 5.4

split_variable <- strsplit(as.character(iris_melted$variable), split = "\\.") iris_melted$flower_part <- sapply(split_variable, "[", 1) iris_melted$measurement_type <- sapply(split_variable, "[", 2) iris_melted$variable <- NULL head(iris_melted) ## Species flower_id value flower_part measurement_type ## 1 setosa 1 5.1 Sepal Length 16

## ## ## ## ##

2 3 4 5 6

setosa setosa setosa setosa setosa

2 3 4 5 6

4.9 4.7 4.6 5.0 5.4

Sepal Sepal Sepal Sepal Sepal

Length Length Length Length Length

iris_cast <- dcast(iris_melted, formula = flower_id + Species + flower_part ~ measurement_type) qplot(x = Width, y = Length, data = iris_cast, geom = c("point", "smooth"), color = Species, method = "lm", facets = flower_part ~ Species) head(iris_cast) ## ## ## ## ## ## ## 1 2 3 4 5 6 flower_id Species flower_part Length Width 1 setosa Petal 1.4 0.2 1 setosa Sepal 5.1 3.5 10 setosa Petal 1.5 0.1 10 setosa Sepal 4.9 3.1 100 versicolor Petal 4.1 1.3 100 versicolor Sepal 5.7 2.8

setosa 8

versicolor

virginica

6 Petal

4

2

Species setosa versicolor virginica

Length

8

6 Sepal

4

2

0

1

2

3

4

0

1

2

3

4

0

1

2

3

4

Width
ggplot(data = iris_cast, aes(x = Width, y = Length)) + geom_point() + facet_grid(Species ~ flower_part, scale = "free") + geom_smooth(method = "lm") + theme_bw(base_size = 24)

17

6 5 4 3 2 1 7 6 5 4 3 8 7 6 5

Petal

Sepal setosa versicolor virginica

Length

0.0 0.5 1.0 1.5 2.0 2.5 2.0 2.5 3.0 3.5 4.0 4.5

Width

my_plot <- ggplot(data = iris_cast, aes(x = Width, y = Length, shape = flower_part, color = flower_part)) + geom_point() + facet_grid(~Species) + geom_smooth(method = "lm") my_plot

18

setosa 8

versicolor

virginica

6

Length

flower_part Petal 4 Sepal

2

0

1

2

3

4

0

1

2

3

4

0

1

2

3

4

Width
library(ggthemes) my_plot + theme_excel(base_size = 24) my_plot + theme_wsj(base_size = 18)

19

setosa 8

versicolor

virginica

6

Length

flower_part Petal 4 Sepal

2

0

1

2

3

4

0

1

2

3

4

0

1

2

3

4

Width

20

flower_part
setosa 8 versicolor

Petal

Sepal virginica

6

4

2

0

1

2

3

4

0

1

2

3

4

0

1

2

3

4

Fuente de algunos gráficos: https://github.com/raphg/Biostat-578/blob/master/Advancedg raphicsi n R . Rpres

21

Sign up to vote on this title
UsefulNot useful