You are on page 1of 37

Taller R

Grupo 9 - Melgar, L., Fiallos, B., Ladines, D., Rodríguez, D., Zambrano, J.

11/8/2021

Estadística Descriptiva univariada/bivariada


Se deben tomar muestras de tamaño n = 20
summary(cars)

## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00

n = 20
indices = sample(1:nrow(cars), 20)
carsMuestreado = cars[indices, ]
carsMuestreado

## speed dist
## 19 13 46
## 45 23 54
## 28 16 40
## 27 16 32
## 7 10 18
## 15 12 28
## 33 18 56
## 11 11 28
## 42 20 56
## 4 7 22
## 9 10 34
## 36 19 36
## 13 12 20
## 24 15 20
## 16 13 26
## 35 18 84
## 46 24 70
## 37 19 46
## 18 13 34
## 47 24 92
Variable Speed
muestraSpeed = carsMuestreado[,1]

library(fdth)

##
## Attaching package: 'fdth'

## The following objects are masked from 'package:stats':


##
## sd, var

k = nclass.Sturges(muestraSpeed)
tablaS = fdt(muestraSpeed, breaks = "Sturges", right = FALSE)
tablaS

## Class limits f rf rf(%) cf cf(%)


## [6.93,9.815) 1 0.05 5 1 5
## [9.815,12.7) 5 0.25 25 6 30
## [12.7,15.585) 4 0.20 20 10 50
## [15.585,18.47) 4 0.20 20 14 70
## [18.47,21.355) 3 0.15 15 17 85
## [21.355,24.24) 3 0.15 15 20 100

hist(muestraSpeed, breaks = "Sturges", main = "Histograma de Speed", xlab


= "Speed", ylab = "Frecuencia Absoluta", col = c("lightgreen"))
tabla = hist(muestraSpeed, breaks ="Sturges", plot = FALSE)
plot(tabla$mids,cumsum(tabla$counts), main ="Ojiva de Speed", col =
"green", xlab = "Speed", ylab = "Frecuencia Absoluta Acumulada")
lines(tabla$mids,cumsum(tabla$counts), type ="l", col = "green")

boxplot(muestraSpeed, breaks = "Sturges", horizontal = TRUE, main


="Diagrama de caja", ylab ="Speed", col = c('lightgreen'))
library(modeest)

##
## Attaching package: 'modeest'

## The following object is masked from 'package:fdth':


##
## mfv

mean(muestraSpeed)

## [1] 15.65

median(muestraSpeed)

## [1] 15.5

modaS = mlv(muestraSpeed, method = "mfv")


modaS

## [1] 13

summary(muestraSpeed)

## Min. 1st Qu. Median Mean 3rd Qu. Max.


## 7.00 12.00 15.50 15.65 19.00 24.00

quantile(muestraSpeed, probs = c(0.05,0.95))


## 5% 95%
## 9.85 24.00

RI = IQR(muestraSpeed)
RI

## [1] 7

Q1 = quantile(muestraSpeed, probs = 0.25)


Q3 = quantile(muestraSpeed, probs = 0.75)
cerca_interior= Q1 - 1.5 * RI
cerca_exterior= Q3 + 1.5 * RI
v_logico1 = muestraSpeed < cerca_interior
v_logico1

## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

sum(v_logico1)

## [1] 0

ifelse(sum(v_logico1)==0,"No existe valores aberrantes inferiores",


muestraSpeed[v_logico1])

## [1] "No existe valores aberrantes inferiores"

v_logico2 = muestraSpeed > cerca_exterior


v_logico2

## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

sum(v_logico2)

## [1] 0

ifelse(sum(v_logico2)==0,"No existe valores aberrantes superiores",


muestraSpeed[v_logico2])

## [1] "No existe valores aberrantes superiores"

var(muestraSpeed)

## [1] 23.92368

sd(muestraSpeed)

## [1] 4.891184

cv = (sd(muestraSpeed)/mean(muestraSpeed)) * 100
cv
## [1] 31.25357

Variable Dist
muestraDist = carsMuestreado[,2]

library(fdth)

k = nclass.Sturges(muestraDist)
tablaD = fdt(muestraDist, breaks = "Sturges", right = FALSE)
tablaD

## Class limits f rf rf(%) cf cf(%)


## [17.82,30.337) 7 0.35 35 7 35
## [30.337,42.853) 5 0.25 25 12 60
## [42.853,55.37) 3 0.15 15 15 75
## [55.37,67.887) 2 0.10 10 17 85
## [67.887,80.403) 1 0.05 5 18 90
## [80.403,92.92) 2 0.10 10 20 100

hist(muestraDist, breaks = "Sturges", main = "Histograma de Dist", xlab =


"Dist", ylab = "Frecuencia Absoluta", col = c("lightblue"))

tabla = hist(muestraDist, breaks ="Sturges", plot = FALSE)


plot(tabla$mids,cumsum(tabla$counts), main ="Ojiva de Dist", col =
"blue", xlab = "Dist", ylab = "Frecuencia Absoluta Acumulada")
lines(tabla$mids,cumsum(tabla$counts), type ="l", col = "green")
boxplot(muestraDist, breaks = "Sturges", horizontal = TRUE, main
="Diagrama de caja", ylab ="Dist", col = c('lightblue'))
library(modeest)

mean(muestraDist)

## [1] 42.1

median(muestraDist)

## [1] 35

modaD = mlv(muestraDist, method = "mfv")


modaD

## [1] 20 28 34 46 56

summary(muestraDist)

## Min. 1st Qu. Median Mean 3rd Qu. Max.


## 18.0 27.5 35.0 42.1 54.5 92.0

quantile(muestraDist, probs = c(0.05,0.95))

## 5% 95%
## 19.9 84.4

RI = IQR(muestraDist)
RI

## [1] 27

Q1 = quantile(muestraDist, probs = 0.25)


Q3 = quantile(muestraDist, probs = 0.75)
cerca_interior= Q1 - 1.5 * RI
cerca_exterior= Q3 + 1.5 * RI
v_logico1 = muestraDist < cerca_interior
v_logico1

## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

sum(v_logico1)

## [1] 0

ifelse(sum(v_logico1)==0,"No existe valores aberrantes inferiores",


muestraDist[v_logico1])

## [1] "No existe valores aberrantes inferiores"

v_logico2 = muestraDist > cerca_exterior


v_logico2
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

sum(v_logico2)

## [1] 0

ifelse(sum(v_logico2)==0,"No existe valores aberrantes superiores",


muestraDist[v_logico2])

## [1] "No existe valores aberrantes superiores"

var(muestraDist)

## [1] 446.0947

sd(muestraDist)

## [1] 21.12095

cv = (sd(muestraDist)/mean(muestraDist)) * 100
cv

## [1] 50.16854

Descriptivo Bivariado
corrMuestras = cor(muestraSpeed, muestraDist)
corrMuestras

## [1] 0.7839219

covMuestras = cov(muestraSpeed, muestraDist)


covMuestras

## [1] 80.98421

matrizMuestra = pairs(carsMuestreado)
Regresión Lineal con el data set “cars”

Supuestos:
• Linealidad
• Homogeneidad
• Homocedasticidad
• Normalidad
• Independencia

Modelo Lineal
modeloCars = lm(muestraDist ~ muestraSpeed, data = carsMuestreado)
summary(modeloCars)

##
## Call:
## lm(formula = muestraDist ~ muestraSpeed, data = carsMuestreado)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.900 -8.016 -1.285 6.754 33.945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.8769 10.3381 -1.052 0.307
## muestraSpeed 3.3851 0.6319 5.357 4.32e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.47 on 18 degrees of freedom
## Multiple R-squared: 0.6145, Adjusted R-squared: 0.5931
## F-statistic: 28.7 on 1 and 18 DF, p-value: 4.316e-05

shapiro.test(modeloCars$residuals)

##
## Shapiro-Wilk normality test
##
## data: modeloCars$residuals
## W = 0.95095, p-value = 0.3818

Segú n el Test de Shapiro-Wilk normality test y guiá ndonos con el valor de p-value que
es menor al nivel de significancia de 0.05, entonces el modelo cumple con el supuesto
de la normalidad.
plot(muestraDist ~ muestraSpeed, xlab="Velocidad", ylab="Distancia de
frenado")
abline(modeloCars)
Regresión lineal Simple - Bondad de Ajuste

Modelo de regresión simple:

Yi = B0 + B1Xi + Ei; i = 1, …., N

X: Velocidad de cada auto

Y: Distancia de frenado
Contraste de hipó tesis:
H0: B1 = 0
H1: B1 not 0
Se rechaza la hipó tesis nula de que este coeficiente es cero debido a que el valor de
(Pr(>|t|) = 4.32e-05) esto significa que hay una probabilidad muy baja de que el
coeficiente estimado de speed en el modelo lineal est́e dentro de una distribució n
aleatoria de valores “nulos”, es decir, de coeficientes obtenidos aleatoriamente pero
que en realidad no son distintos de cero.

Modelo estimado de regresión lineal simple:

Yi = -10.8769 + 3.3851x + ei; i = 1,.,20


En nuestro modelo, el R2 corregido es 0.6145, lo que significa que el 61% de la
variabilidad de la distancia de frenado se puede explicar por la velocidad a la que va el
coche.
anova(modeloCars)

## Analysis of Variance Table


##
## Response: muestraDist
## Df Sum Sq Mean Sq F value Pr(>F)
## muestraSpeed 1 5208.7 5208.7 28.697 4.316e-05 ***
## Residuals 18 3267.1 181.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(state.x77)

## Population Income Illiteracy Life Exp


## Min. : 365 Min. :3098 Min. :0.500 Min. :67.96
## 1st Qu.: 1080 1st Qu.:3993 1st Qu.:0.625 1st Qu.:70.12
## Median : 2838 Median :4519 Median :0.950 Median :70.67
## Mean : 4246 Mean :4436 Mean :1.170 Mean :70.88
## 3rd Qu.: 4968 3rd Qu.:4814 3rd Qu.:1.575 3rd Qu.:71.89
## Max. :21198 Max. :6315 Max. :2.800 Max. :73.60
## Murder HS Grad Frost Area
## Min. : 1.400 Min. :37.80 Min. : 0.00 Min. : 1049
## 1st Qu.: 4.350 1st Qu.:48.05 1st Qu.: 66.25 1st Qu.: 36985
## Median : 6.850 Median :53.25 Median :114.50 Median : 54277
## Mean : 7.378 Mean :53.11 Mean :104.46 Mean : 70736
## 3rd Qu.:10.675 3rd Qu.:59.15 3rd Qu.:139.75 3rd Qu.: 81162
## Max. :15.100 Max. :67.30 Max. :188.00 Max. :566432

n = 20
indices = sample(1:nrow(state.x77), 20)
stateMuestreado = state.x77[indices, ]
stateMuestreado

## Population Income Illiteracy Life Exp Murder HS Grad


Frost Area
## Minnesota 3921 4675 0.6 72.96 2.3 57.6
160 79289
## Maryland 4122 5299 0.9 70.22 8.5 52.3
101 9891
## Ohio 10735 4561 0.8 70.82 7.4 53.2
124 40975
## Arkansas 2110 3378 1.9 70.66 10.1 39.9
65 51945
## New Hampshire 812 4281 0.7 71.23 3.3 57.6
174 9027
## Alaska 365 6315 1.5 69.31 11.3 66.7
152 566432
## Idaho 813 4119 0.6 71.87 5.3 59.5
126 82677
## Pennsylvania 11860 4449 1.0 70.43 6.1 50.2
126 44966
## Virginia 4981 4701 1.4 70.08 9.5 47.8
85 39780
## Alabama 3615 3624 2.1 69.05 15.1 41.3
20 50708
## Arizona 2212 4530 1.8 70.55 7.8 58.1
15 113417
## Nebraska 1544 4508 0.6 72.60 2.9 59.3
139 76483
## New Mexico 1144 3601 2.2 70.32 9.7 55.2
120 121412
## Connecticut 3100 5348 1.1 72.48 3.1 56.0
139 4862
## Florida 8277 4815 1.3 70.66 10.7 52.6
11 54090
## Washington 3559 4864 0.6 71.72 4.3 63.5
32 66570
## Michigan 9111 4751 0.9 70.63 11.1 52.8
125 56817
## Tennessee 4173 3821 1.7 70.11 11.0 41.8
70 41328
## Kansas 2280 4669 0.6 72.58 4.5 59.9
114 81787
## Nevada 590 5149 0.5 69.03 11.5 65.2
188 109889

Variable Population
muestraPopu = stateMuestreado[,1]

library(fdth)

k = nclass.Sturges(muestraPopu)
tablaP = fdt(muestraPopu, breaks = "Sturges", right = FALSE)
tablaP

## Class limits f rf rf(%) cf cf(%)


## [361.35,2297.558) 9 0.45 45 9 45
## [2297.558,4233.767) 6 0.30 30 15 75
## [4233.767,6169.975) 1 0.05 5 16 80
## [6169.975,8106.183) 0 0.00 0 16 80
## [8106.183,10042.39) 2 0.10 10 18 90
## [10042.39,11978.6) 2 0.10 10 20 100

hist(muestraPopu, breaks = "Sturges", main = "Histograma de Population",


xlab = "Population", ylab = "Frecuencia Absoluta", col = c("lightgreen"))

tabla = hist(muestraPopu, breaks ="Sturges", plot = FALSE)


plot(tabla$mids,cumsum(tabla$counts), main ="Ojiva de Population", col =
"green", xlab = "Population", ylab = "Frecuencia Absoluta Acumulada")
lines(tabla$mids,cumsum(tabla$counts), type ="l", col = "green")

boxplot(muestraPopu, breaks = "Sturges", horizontal = TRUE, main


="Diagrama de caja", ylab ="Population", col = c('lightgreen'))
library(modeest)

mean(muestraPopu)

## [1] 3966.2

median(muestraPopu)

## [1] 3329.5

modaP = mlv(muestraPopu, method = "mfv")


modaP

## [1] 365 590 812 813 1144 1544 2110 2212 2280 3100 3559
3615
## [13] 3921 4122 4173 4981 8277 9111 10735 11860

summary(muestraPopu)

## Min. 1st Qu. Median Mean 3rd Qu. Max.


## 365 1444 3330 3966 4375 11860

quantile(muestraPopu, probs = c(0.05,0.95))

## 5% 95%
## 578.75 10791.25

RI = IQR(muestraPopu)
RI
## [1] 2931

Q1 = quantile(muestraPopu, probs = 0.25)


Q3 = quantile(muestraPopu, probs = 0.75)
cerca_interior= Q1 - 1.5 * RI
cerca_exterior= Q3 + 1.5 * RI
v_logico1 = muestraPopu < cerca_interior
v_logico1

## Minnesota Maryland Ohio Arkansas New Hampshire


## FALSE FALSE FALSE FALSE FALSE
## Alaska Idaho Pennsylvania Virginia Alabama
## FALSE FALSE FALSE FALSE FALSE
## Arizona Nebraska New Mexico Connecticut Florida
## FALSE FALSE FALSE FALSE FALSE
## Washington Michigan Tennessee Kansas Nevada
## FALSE FALSE FALSE FALSE FALSE

sum(v_logico1)

## [1] 0

ifelse(sum(v_logico1)==0,"No existe valores aberrantes inferiores",


muestraPopu[v_logico1])

## [1] "No existe valores aberrantes inferiores"

v_logico2 = muestraPopu > cerca_exterior


v_logico2

## Minnesota Maryland Ohio Arkansas New Hampshire


## FALSE FALSE TRUE FALSE FALSE
## Alaska Idaho Pennsylvania Virginia Alabama
## FALSE FALSE TRUE FALSE FALSE
## Arizona Nebraska New Mexico Connecticut Florida
## FALSE FALSE FALSE FALSE FALSE
## Washington Michigan Tennessee Kansas Nevada
## FALSE TRUE FALSE FALSE FALSE

sum(v_logico2)

## [1] 3

ifelse(sum(v_logico2)==0,"No existe valores aberrantes superiores",


muestraPopu[v_logico2])

## [1] 10735

var(muestraPopu)

## [1] 11725047

sd(muestraPopu)
## [1] 3424.186

cv = (sd(muestraPopu)/mean(muestraPopu)) * 100
cv

## [1] 86.33417

Variable Income
muestraIncome = stateMuestreado[,2]

library(fdth)

k = nclass.Sturges(muestraIncome)
tablaI = fdt(muestraIncome, breaks = "Sturges", right = FALSE)
tablaI

## Class limits f rf rf(%) cf cf(%)


## [3344.22,3849.875) 4 0.20 20 4 20
## [3849.875,4355.53) 2 0.10 10 6 30
## [4355.53,4861.185) 9 0.45 45 15 75
## [4861.185,5366.84) 4 0.20 20 19 95
## [5366.84,5872.495) 0 0.00 0 19 95
## [5872.495,6378.15) 1 0.05 5 20 100

hist(muestraIncome, breaks = "Sturges", main = "Histograma de Income",


xlab = "Income", ylab = "Frecuencia Absoluta", col = c("lightblue"))
tabla = hist(muestraIncome, breaks ="Sturges", plot = FALSE)
plot(tabla$mids,cumsum(tabla$counts), main ="Ojiva de Income", col =
"blue", xlab = "Income", ylab = "Frecuencia Absoluta Acumulada")
lines(tabla$mids,cumsum(tabla$counts), type ="l", col = "blue")

boxplot(muestraIncome, breaks = "Sturges", horizontal = TRUE, main


="Diagrama de caja", ylab ="Income", col = c('lightblue'))
library(modeest)

mean(muestraIncome)

## [1] 4572.9

median(muestraIncome)

## [1] 4615

modaI = mlv(muestraIncome, method = "mfv")


modaI

## [1] 3378 3601 3624 3821 4119 4281 4449 4508 4530 4561 4669 4675 4701
4751 4815
## [16] 4864 5149 5299 5348 6315

summary(muestraIncome)

## Min. 1st Qu. Median Mean 3rd Qu. Max.


## 3378 4240 4615 4573 4827 6315

quantile(muestraIncome, probs = c(0.05,0.95))

## 5% 95%
## 3589.85 5396.35

RI = IQR(muestraIncome)
RI
## [1] 586.75

Q1 = quantile(muestraIncome, probs = 0.25)


Q3 = quantile(muestraIncome, probs = 0.75)
cerca_interior= Q1 - 1.5 * RI
cerca_exterior= Q3 + 1.5 * RI
v_logico1 = muestraIncome < cerca_interior
v_logico1

## Minnesota Maryland Ohio Arkansas New Hampshire


## FALSE FALSE FALSE FALSE FALSE
## Alaska Idaho Pennsylvania Virginia Alabama
## FALSE FALSE FALSE FALSE FALSE
## Arizona Nebraska New Mexico Connecticut Florida
## FALSE FALSE FALSE FALSE FALSE
## Washington Michigan Tennessee Kansas Nevada
## FALSE FALSE FALSE FALSE FALSE

sum(v_logico1)

## [1] 0

ifelse(sum(v_logico1)==0,"No existe valores aberrantes inferiores",


muestraIncome[v_logico1])

## [1] "No existe valores aberrantes inferiores"

v_logico2 = muestraIncome > cerca_exterior


v_logico2

## Minnesota Maryland Ohio Arkansas New Hampshire


## FALSE FALSE FALSE FALSE FALSE
## Alaska Idaho Pennsylvania Virginia Alabama
## TRUE FALSE FALSE FALSE FALSE
## Arizona Nebraska New Mexico Connecticut Florida
## FALSE FALSE FALSE FALSE FALSE
## Washington Michigan Tennessee Kansas Nevada
## FALSE FALSE FALSE FALSE FALSE

sum(v_logico2)

## [1] 1

ifelse(sum(v_logico2)==0,"No existe valores aberrantes superiores",


muestraIncome[v_logico2])

## [1] 6315

var(muestraIncome)

## [1] 466150.6

sd(muestraIncome)
## [1] 682.7522

cv = (sd(muestraIncome)/mean(muestraIncome)) * 100
cv

## [1] 14.9304

Variable Area
muestraArea = stateMuestreado[,8]

library(fdth)

k = nclass.Sturges(muestraArea)
tablaA = fdt(muestraArea, breaks = "Sturges", right = FALSE)
tablaA

## Class limits f rf rf(%) cf cf(%)


## [4813.38,99360.5367) 16 0.80 80 16 80
## [99360.5367,193907.693) 3 0.15 15 19 95
## [193907.693,288454.85) 0 0.00 0 19 95
## [288454.85,383002.007) 0 0.00 0 19 95
## [383002.007,477549.163) 0 0.00 0 19 95
## [477549.163,572096.32) 1 0.05 5 20 100

hist(muestraArea, breaks = "Sturges", main = "Histograma de Area", xlab =


"Area", ylab = "Frecuencia Absoluta", col = c("red"))
tabla = hist(muestraArea, breaks ="Sturges", plot = FALSE)
plot(tabla$mids,cumsum(tabla$counts), main ="Ojiva de Income", col =
"red", xlab = "Area", ylab = "Frecuencia Absoluta Acumulada")
lines(tabla$mids,cumsum(tabla$counts), type ="l", col = "red")

boxplot(muestraArea, breaks = "Sturges", horizontal = TRUE, main


="Diagrama de caja", ylab ="Area", col = c('red'))
library(modeest)

mean(muestraArea)

## [1] 85117.25

median(muestraArea)

## [1] 55453.5

modaI = mlv(muestraArea, method = "mfv")


modaI

## [1] 4862 9027 9891 39780 40975 41328 44966 50708 51945
54090
## [11] 56817 66570 76483 79289 81787 82677 109889 113417 121412
566432

summary(muestraArea)

## Min. 1st Qu. Median Mean 3rd Qu. Max.


## 4862 41240 55454 85117 82010 566432

quantile(muestraArea, probs = c(0.05,0.95))

## 5% 95%
## 8818.75 143663.00
RI = IQR(muestraArea)
RI

## [1] 40769.75

Q1 = quantile(muestraArea, probs = 0.25)


Q3 = quantile(muestraArea, probs = 0.75)
cerca_interior= Q1 - 1.5 * RI
cerca_exterior= Q3 + 1.5 * RI
v_logico1 = muestraArea < cerca_interior
v_logico1

## Minnesota Maryland Ohio Arkansas New Hampshire


## FALSE FALSE FALSE FALSE FALSE
## Alaska Idaho Pennsylvania Virginia Alabama
## FALSE FALSE FALSE FALSE FALSE
## Arizona Nebraska New Mexico Connecticut Florida
## FALSE FALSE FALSE FALSE FALSE
## Washington Michigan Tennessee Kansas Nevada
## FALSE FALSE FALSE FALSE FALSE

sum(v_logico1)

## [1] 0

ifelse(sum(v_logico1)==0,"No existe valores aberrantes inferiores",


muestraIncome[v_logico1])

## [1] "No existe valores aberrantes inferiores"

v_logico2 = muestraArea > cerca_exterior


v_logico2

## Minnesota Maryland Ohio Arkansas New Hampshire


## FALSE FALSE FALSE FALSE FALSE
## Alaska Idaho Pennsylvania Virginia Alabama
## TRUE FALSE FALSE FALSE FALSE
## Arizona Nebraska New Mexico Connecticut Florida
## FALSE FALSE FALSE FALSE FALSE
## Washington Michigan Tennessee Kansas Nevada
## FALSE FALSE FALSE FALSE FALSE

sum(v_logico2)

## [1] 1

ifelse(sum(v_logico2)==0,"No existe valores aberrantes superiores",


muestraArea[v_logico2])

## [1] 566432

var(muestraArea)
## [1] 13912448017

sd(muestraArea)

## [1] 117951

cv = (sd(muestraArea)/mean(muestraArea)) * 100
cv

## [1] 138.5748

Regresión Lineal con el data set “state.x77”

Supuestos:
• Linealidad
• Homogeneidad
• Homocedasticidad
• Normalidad
• Independencia

Modelo Lineal
modeloState = lm(muestraPopu ~ muestraIncome + muestraArea, data =
carsMuestreado)
summary(modeloState)

##
## Call:
## lm(formula = muestraPopu ~ muestraIncome + muestraArea, data =
carsMuestreado)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3807.4 -1995.3 -946.5 541.6 7514.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.056e+02 5.791e+03 -0.036 0.972
## muestraIncome 1.155e+00 1.326e+00 0.871 0.396
## muestraArea -1.301e-02 7.673e-03 -1.696 0.108
##
## Residual standard error: 3348 on 17 degrees of freedom
## Multiple R-squared: 0.1448, Adjusted R-squared: 0.04417
## F-statistic: 1.439 on 2 and 17 DF, p-value: 0.2646

shapiro.test(modeloState$residuals)

##
## Shapiro-Wilk normality test
##
## data: modeloState$residuals
## W = 0.87936, p-value = 0.01723
Segú n el Test de Shapiro-Wilk normality test y guiá ndonos con el valor de p-value que
es menor al nivel de significancia de 0.05, entonces el modelo cumple con el supuesto
de la normalidad.
plot(muestraPopu ~ muestraIncome + muestraArea, xlab="Population",
ylab="Income + Area")

abline(modeloState)

## Warning in abline(modeloState): only using the first two of 3


regression
## coefficients
Regresión lineal Múltiple - Bondad de Ajuste

Modelo de regresión múltiple:

Yi = B0 + B1X1 + B2X2 + Ei; i = 1, …., N


Contraste de hipó tesis:
H0: B1, B2 = 0
H1: B1, B2 not 0
Se rechaza la hipó tesis nula de que este coeficiente es cero debido a que el valor de
(Pr(>|t|) = 0,396) y (Pr(>|t|) = 0.108) esto significa que hay una probabilidad muy
baja de que el coeficiente estimado de ingresos y area en el modelo lineal este dentro
de una distribució n aleatoria de valores “nulos”, es decir, de coeficientes obtenidos
aleatoriamente pero que en realidad no son distintos de cero.

Modelo estimado de regresión lineal múltiple:

Yi = -2.056e+02 + 1.155e+00x1 + -1.301e-02x2 + ei ; i = 1,….,20


En nuestro modelo, el R2 corregido es 0.1448, lo que significa que el 14% de la
variabilidad de la població n se puede explicar por los ingresos y el area.
anova(modeloState)
## Analysis of Variance Table
##
## Response: muestraPopu
## Df Sum Sq Mean Sq F value Pr(>F)
## muestraIncome 1 10694 10694 0.001 0.9757
## muestraArea 1 32243165 32243165 2.877 0.1081
## Residuals 17 190522042 11207179

#ANOVA
data = iris3
data

## , , Setosa
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
## [7,] 4.6 3.4 1.4 0.3
## [8,] 5.0 3.4 1.5 0.2
## [9,] 4.4 2.9 1.4 0.2
## [10,] 4.9 3.1 1.5 0.1
## [11,] 5.4 3.7 1.5 0.2
## [12,] 4.8 3.4 1.6 0.2
## [13,] 4.8 3.0 1.4 0.1
## [14,] 4.3 3.0 1.1 0.1
## [15,] 5.8 4.0 1.2 0.2
## [16,] 5.7 4.4 1.5 0.4
## [17,] 5.4 3.9 1.3 0.4
## [18,] 5.1 3.5 1.4 0.3
## [19,] 5.7 3.8 1.7 0.3
## [20,] 5.1 3.8 1.5 0.3
## [21,] 5.4 3.4 1.7 0.2
## [22,] 5.1 3.7 1.5 0.4
## [23,] 4.6 3.6 1.0 0.2
## [24,] 5.1 3.3 1.7 0.5
## [25,] 4.8 3.4 1.9 0.2
## [26,] 5.0 3.0 1.6 0.2
## [27,] 5.0 3.4 1.6 0.4
## [28,] 5.2 3.5 1.5 0.2
## [29,] 5.2 3.4 1.4 0.2
## [30,] 4.7 3.2 1.6 0.2
## [31,] 4.8 3.1 1.6 0.2
## [32,] 5.4 3.4 1.5 0.4
## [33,] 5.2 4.1 1.5 0.1
## [34,] 5.5 4.2 1.4 0.2
## [35,] 4.9 3.1 1.5 0.2
## [36,] 5.0 3.2 1.2 0.2
## [37,] 5.5 3.5 1.3 0.2
## [38,] 4.9 3.6 1.4 0.1
## [39,] 4.4 3.0 1.3 0.2
## [40,] 5.1 3.4 1.5 0.2
## [41,] 5.0 3.5 1.3 0.3
## [42,] 4.5 2.3 1.3 0.3
## [43,] 4.4 3.2 1.3 0.2
## [44,] 5.0 3.5 1.6 0.6
## [45,] 5.1 3.8 1.9 0.4
## [46,] 4.8 3.0 1.4 0.3
## [47,] 5.1 3.8 1.6 0.2
## [48,] 4.6 3.2 1.4 0.2
## [49,] 5.3 3.7 1.5 0.2
## [50,] 5.0 3.3 1.4 0.2
##
## , , Versicolor
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 7.0 3.2 4.7 1.4
## [2,] 6.4 3.2 4.5 1.5
## [3,] 6.9 3.1 4.9 1.5
## [4,] 5.5 2.3 4.0 1.3
## [5,] 6.5 2.8 4.6 1.5
## [6,] 5.7 2.8 4.5 1.3
## [7,] 6.3 3.3 4.7 1.6
## [8,] 4.9 2.4 3.3 1.0
## [9,] 6.6 2.9 4.6 1.3
## [10,] 5.2 2.7 3.9 1.4
## [11,] 5.0 2.0 3.5 1.0
## [12,] 5.9 3.0 4.2 1.5
## [13,] 6.0 2.2 4.0 1.0
## [14,] 6.1 2.9 4.7 1.4
## [15,] 5.6 2.9 3.6 1.3
## [16,] 6.7 3.1 4.4 1.4
## [17,] 5.6 3.0 4.5 1.5
## [18,] 5.8 2.7 4.1 1.0
## [19,] 6.2 2.2 4.5 1.5
## [20,] 5.6 2.5 3.9 1.1
## [21,] 5.9 3.2 4.8 1.8
## [22,] 6.1 2.8 4.0 1.3
## [23,] 6.3 2.5 4.9 1.5
## [24,] 6.1 2.8 4.7 1.2
## [25,] 6.4 2.9 4.3 1.3
## [26,] 6.6 3.0 4.4 1.4
## [27,] 6.8 2.8 4.8 1.4
## [28,] 6.7 3.0 5.0 1.7
## [29,] 6.0 2.9 4.5 1.5
## [30,] 5.7 2.6 3.5 1.0
## [31,] 5.5 2.4 3.8 1.1
## [32,] 5.5 2.4 3.7 1.0
## [33,] 5.8 2.7 3.9 1.2
## [34,] 6.0 2.7 5.1 1.6
## [35,] 5.4 3.0 4.5 1.5
## [36,] 6.0 3.4 4.5 1.6
## [37,] 6.7 3.1 4.7 1.5
## [38,] 6.3 2.3 4.4 1.3
## [39,] 5.6 3.0 4.1 1.3
## [40,] 5.5 2.5 4.0 1.3
## [41,] 5.5 2.6 4.4 1.2
## [42,] 6.1 3.0 4.6 1.4
## [43,] 5.8 2.6 4.0 1.2
## [44,] 5.0 2.3 3.3 1.0
## [45,] 5.6 2.7 4.2 1.3
## [46,] 5.7 3.0 4.2 1.2
## [47,] 5.7 2.9 4.2 1.3
## [48,] 6.2 2.9 4.3 1.3
## [49,] 5.1 2.5 3.0 1.1
## [50,] 5.7 2.8 4.1 1.3
##
## , , Virginica
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 6.3 3.3 6.0 2.5
## [2,] 5.8 2.7 5.1 1.9
## [3,] 7.1 3.0 5.9 2.1
## [4,] 6.3 2.9 5.6 1.8
## [5,] 6.5 3.0 5.8 2.2
## [6,] 7.6 3.0 6.6 2.1
## [7,] 4.9 2.5 4.5 1.7
## [8,] 7.3 2.9 6.3 1.8
## [9,] 6.7 2.5 5.8 1.8
## [10,] 7.2 3.6 6.1 2.5
## [11,] 6.5 3.2 5.1 2.0
## [12,] 6.4 2.7 5.3 1.9
## [13,] 6.8 3.0 5.5 2.1
## [14,] 5.7 2.5 5.0 2.0
## [15,] 5.8 2.8 5.1 2.4
## [16,] 6.4 3.2 5.3 2.3
## [17,] 6.5 3.0 5.5 1.8
## [18,] 7.7 3.8 6.7 2.2
## [19,] 7.7 2.6 6.9 2.3
## [20,] 6.0 2.2 5.0 1.5
## [21,] 6.9 3.2 5.7 2.3
## [22,] 5.6 2.8 4.9 2.0
## [23,] 7.7 2.8 6.7 2.0
## [24,] 6.3 2.7 4.9 1.8
## [25,] 6.7 3.3 5.7 2.1
## [26,] 7.2 3.2 6.0 1.8
## [27,] 6.2 2.8 4.8 1.8
## [28,] 6.1 3.0 4.9 1.8
## [29,] 6.4 2.8 5.6 2.1
## [30,] 7.2 3.0 5.8 1.6
## [31,] 7.4 2.8 6.1 1.9
## [32,] 7.9 3.8 6.4 2.0
## [33,] 6.4 2.8 5.6 2.2
## [34,] 6.3 2.8 5.1 1.5
## [35,] 6.1 2.6 5.6 1.4
## [36,] 7.7 3.0 6.1 2.3
## [37,] 6.3 3.4 5.6 2.4
## [38,] 6.4 3.1 5.5 1.8
## [39,] 6.0 3.0 4.8 1.8
## [40,] 6.9 3.1 5.4 2.1
## [41,] 6.7 3.1 5.6 2.4
## [42,] 6.9 3.1 5.1 2.3
## [43,] 5.8 2.7 5.1 1.9
## [44,] 6.8 3.2 5.9 2.3
## [45,] 6.7 3.3 5.7 2.5
## [46,] 6.7 3.0 5.2 2.3
## [47,] 6.3 2.5 5.0 1.9
## [48,] 6.5 3.0 5.2 2.0
## [49,] 6.2 3.4 5.4 2.3
## [50,] 5.9 3.0 5.1 1.8

sac_datos = array(data,15) #Se obtiene el vector


sac_datos

## [1] 5.1 4.9 4.7 4.6 5.0 5.4 4.6 5.0 4.4 4.9 5.4 4.8 4.8 4.3 5.8

sacm_datos = head(data,15) #Se muestrean los datos


sacm_datos

## , , Setosa
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
## [7,] 4.6 3.4 1.4 0.3
## [8,] 5.0 3.4 1.5 0.2
## [9,] 4.4 2.9 1.4 0.2
## [10,] 4.9 3.1 1.5 0.1
## [11,] 5.4 3.7 1.5 0.2
## [12,] 4.8 3.4 1.6 0.2
## [13,] 4.8 3.0 1.4 0.1
## [14,] 4.3 3.0 1.1 0.1
## [15,] 5.8 4.0 1.2 0.2
##
## , , Versicolor
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 7.0 3.2 4.7 1.4
## [2,] 6.4 3.2 4.5 1.5
## [3,] 6.9 3.1 4.9 1.5
## [4,] 5.5 2.3 4.0 1.3
## [5,] 6.5 2.8 4.6 1.5
## [6,] 5.7 2.8 4.5 1.3
## [7,] 6.3 3.3 4.7 1.6
## [8,] 4.9 2.4 3.3 1.0
## [9,] 6.6 2.9 4.6 1.3
## [10,] 5.2 2.7 3.9 1.4
## [11,] 5.0 2.0 3.5 1.0
## [12,] 5.9 3.0 4.2 1.5
## [13,] 6.0 2.2 4.0 1.0
## [14,] 6.1 2.9 4.7 1.4
## [15,] 5.6 2.9 3.6 1.3
##
## , , Virginica
##
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 6.3 3.3 6.0 2.5
## [2,] 5.8 2.7 5.1 1.9
## [3,] 7.1 3.0 5.9 2.1
## [4,] 6.3 2.9 5.6 1.8
## [5,] 6.5 3.0 5.8 2.2
## [6,] 7.6 3.0 6.6 2.1
## [7,] 4.9 2.5 4.5 1.7
## [8,] 7.3 2.9 6.3 1.8
## [9,] 6.7 2.5 5.8 1.8
## [10,] 7.2 3.6 6.1 2.5
## [11,] 6.5 3.2 5.1 2.0
## [12,] 6.4 2.7 5.3 1.9
## [13,] 6.8 3.0 5.5 2.1
## [14,] 5.7 2.5 5.0 2.0
## [15,] 5.8 2.8 5.1 2.4

sacm_datos[,,1]

## Sepal L. Sepal W. Petal L. Petal W.


## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
## [7,] 4.6 3.4 1.4 0.3
## [8,] 5.0 3.4 1.5 0.2
## [9,] 4.4 2.9 1.4 0.2
## [10,] 4.9 3.1 1.5 0.1
## [11,] 5.4 3.7 1.5 0.2
## [12,] 4.8 3.4 1.6 0.2
## [13,] 4.8 3.0 1.4 0.1
## [14,] 4.3 3.0 1.1 0.1
## [15,] 5.8 4.0 1.2 0.2

sacm_datos[,,2]

## Sepal L. Sepal W. Petal L. Petal W.


## [1,] 7.0 3.2 4.7 1.4
## [2,] 6.4 3.2 4.5 1.5
## [3,] 6.9 3.1 4.9 1.5
## [4,] 5.5 2.3 4.0 1.3
## [5,] 6.5 2.8 4.6 1.5
## [6,] 5.7 2.8 4.5 1.3
## [7,] 6.3 3.3 4.7 1.6
## [8,] 4.9 2.4 3.3 1.0
## [9,] 6.6 2.9 4.6 1.3
## [10,] 5.2 2.7 3.9 1.4
## [11,] 5.0 2.0 3.5 1.0
## [12,] 5.9 3.0 4.2 1.5
## [13,] 6.0 2.2 4.0 1.0
## [14,] 6.1 2.9 4.7 1.4
## [15,] 5.6 2.9 3.6 1.3

sacm_datos[,,3]

## Sepal L. Sepal W. Petal L. Petal W.


## [1,] 6.3 3.3 6.0 2.5
## [2,] 5.8 2.7 5.1 1.9
## [3,] 7.1 3.0 5.9 2.1
## [4,] 6.3 2.9 5.6 1.8
## [5,] 6.5 3.0 5.8 2.2
## [6,] 7.6 3.0 6.6 2.1
## [7,] 4.9 2.5 4.5 1.7
## [8,] 7.3 2.9 6.3 1.8
## [9,] 6.7 2.5 5.8 1.8
## [10,] 7.2 3.6 6.1 2.5
## [11,] 6.5 3.2 5.1 2.0
## [12,] 6.4 2.7 5.3 1.9
## [13,] 6.8 3.0 5.5 2.1
## [14,] 5.7 2.5 5.0 2.0
## [15,] 5.8 2.8 5.1 2.4

uni = rbind(sacm_datos[,,1], sacm_datos[,,2],sacm_datos[,,3])


uni
## Sepal L. Sepal W. Petal L. Petal W.
## [1,] 5.1 3.5 1.4 0.2
## [2,] 4.9 3.0 1.4 0.2
## [3,] 4.7 3.2 1.3 0.2
## [4,] 4.6 3.1 1.5 0.2
## [5,] 5.0 3.6 1.4 0.2
## [6,] 5.4 3.9 1.7 0.4
## [7,] 4.6 3.4 1.4 0.3
## [8,] 5.0 3.4 1.5 0.2
## [9,] 4.4 2.9 1.4 0.2
## [10,] 4.9 3.1 1.5 0.1
## [11,] 5.4 3.7 1.5 0.2
## [12,] 4.8 3.4 1.6 0.2
## [13,] 4.8 3.0 1.4 0.1
## [14,] 4.3 3.0 1.1 0.1
## [15,] 5.8 4.0 1.2 0.2
## [16,] 7.0 3.2 4.7 1.4
## [17,] 6.4 3.2 4.5 1.5
## [18,] 6.9 3.1 4.9 1.5
## [19,] 5.5 2.3 4.0 1.3
## [20,] 6.5 2.8 4.6 1.5
## [21,] 5.7 2.8 4.5 1.3
## [22,] 6.3 3.3 4.7 1.6
## [23,] 4.9 2.4 3.3 1.0
## [24,] 6.6 2.9 4.6 1.3
## [25,] 5.2 2.7 3.9 1.4
## [26,] 5.0 2.0 3.5 1.0
## [27,] 5.9 3.0 4.2 1.5
## [28,] 6.0 2.2 4.0 1.0
## [29,] 6.1 2.9 4.7 1.4
## [30,] 5.6 2.9 3.6 1.3
## [31,] 6.3 3.3 6.0 2.5
## [32,] 5.8 2.7 5.1 1.9
## [33,] 7.1 3.0 5.9 2.1
## [34,] 6.3 2.9 5.6 1.8
## [35,] 6.5 3.0 5.8 2.2
## [36,] 7.6 3.0 6.6 2.1
## [37,] 4.9 2.5 4.5 1.7
## [38,] 7.3 2.9 6.3 1.8
## [39,] 6.7 2.5 5.8 1.8
## [40,] 7.2 3.6 6.1 2.5
## [41,] 6.5 3.2 5.1 2.0
## [42,] 6.4 2.7 5.3 1.9
## [43,] 6.8 3.0 5.5 2.1
## [44,] 5.7 2.5 5.0 2.0
## [45,] 5.8 2.8 5.1 2.4

mat_uni = data.frame(uni)
mat_uni$grupo= c(rep(1,15), rep(2,15), rep(3,15))
mat_uni
## Sepal.L. Sepal.W. Petal.L. Petal.W. grupo
## 1 5.1 3.5 1.4 0.2 1
## 2 4.9 3.0 1.4 0.2 1
## 3 4.7 3.2 1.3 0.2 1
## 4 4.6 3.1 1.5 0.2 1
## 5 5.0 3.6 1.4 0.2 1
## 6 5.4 3.9 1.7 0.4 1
## 7 4.6 3.4 1.4 0.3 1
## 8 5.0 3.4 1.5 0.2 1
## 9 4.4 2.9 1.4 0.2 1
## 10 4.9 3.1 1.5 0.1 1
## 11 5.4 3.7 1.5 0.2 1
## 12 4.8 3.4 1.6 0.2 1
## 13 4.8 3.0 1.4 0.1 1
## 14 4.3 3.0 1.1 0.1 1
## 15 5.8 4.0 1.2 0.2 1
## 16 7.0 3.2 4.7 1.4 2
## 17 6.4 3.2 4.5 1.5 2
## 18 6.9 3.1 4.9 1.5 2
## 19 5.5 2.3 4.0 1.3 2
## 20 6.5 2.8 4.6 1.5 2
## 21 5.7 2.8 4.5 1.3 2
## 22 6.3 3.3 4.7 1.6 2
## 23 4.9 2.4 3.3 1.0 2
## 24 6.6 2.9 4.6 1.3 2
## 25 5.2 2.7 3.9 1.4 2
## 26 5.0 2.0 3.5 1.0 2
## 27 5.9 3.0 4.2 1.5 2
## 28 6.0 2.2 4.0 1.0 2
## 29 6.1 2.9 4.7 1.4 2
## 30 5.6 2.9 3.6 1.3 2
## 31 6.3 3.3 6.0 2.5 3
## 32 5.8 2.7 5.1 1.9 3
## 33 7.1 3.0 5.9 2.1 3
## 34 6.3 2.9 5.6 1.8 3
## 35 6.5 3.0 5.8 2.2 3
## 36 7.6 3.0 6.6 2.1 3
## 37 4.9 2.5 4.5 1.7 3
## 38 7.3 2.9 6.3 1.8 3
## 39 6.7 2.5 5.8 1.8 3
## 40 7.2 3.6 6.1 2.5 3
## 41 6.5 3.2 5.1 2.0 3
## 42 6.4 2.7 5.3 1.9 3
## 43 6.8 3.0 5.5 2.1 3
## 44 5.7 2.5 5.0 2.0 3
## 45 5.8 2.8 5.1 2.4 3
Con el valor p no se rechaza la hipó tesis nula de que las medias son iguales a 0, es
decir, que los factores no afectan a la variable de respuesta “Sepal. L.”

You might also like