Estadistica Basica Rebeca

Estadistica Basica
• Probabilidad
library(readxl)
Titanic<-read_excel("Pasajeros-Titanic.xlsx")
Titanic
Pasajero sobrevivio sexo edad clase

<chr> <chr> <chr> <dbl> <chr>
Allison, Miss. Helen Loraine no female 2.0000 1st
Allison, Mrs. Hudson J C (Bessi no female 25.0000 1st
Evans, Miss. Edith Corse no female 36.0000 1st
Isham, Miss. Ann Elizabeth no female 50.0000 1st
Straus, Mrs. Isidor (Rosalie Id no female 63.0000 1st
Carter, Mrs. Ernest Courtenay ( no female 44.0000 2nd
Chapman, Mrs. John Henry (Sara no female 29.0000 2nd
Corbett, Mrs. Walter H (Irene C no female 30.0000 2nd
Corey, Mrs. Percy C (Mary Phyll no female NA 2nd
Funk, Miss. Annie Clemmer no female 38.0000 2nd
1-10 of 1,309 rows Previous 1 2 3 4 5 6 ... 131 Next
dim(Titanic)
## [1] 1309 5
names(Titanic)
## [1] "Pasajero" "sobrevivio" "sexo" "edad" "clase"
class(Titanic)
## [1] "tbl_df" "tbl" "data.frame"

Titanic=as.data.frame(Titanic)
class(Titanic)
## [1] "data.frame"
class(Titanic$sexo)
## [1] "character"
options(knitr.table.format="latex")
library(pander)
t1<-table(Titanic$sobrevivio,Titanic$clase)
t1
##
## 1st 2nd 3rd
## no 123 158 528
## yes 200 119 181
t2<-addmargins(t1)
t2
##
## 1st 2nd 3rd Sum
## no 123 158 528 809
## yes 200 119 181 500
## Sum 323 277 709 1309
pander(t2)
1st 2nd 3rd Sum
no 123 158 528 809
yes 200 119 181 500
Sum 323 277 709 1309

y=Titanic$clase # Recordemos que, en R, para escoger una variable de un fichero, se usa N
ombre_fichero$Variable
x=table(y) #El comando table crea una tabla de frecuencias
barplot(x, main = "Gráfico de Barras con una tabla de contigencia", col = c("grey","royal
blue","purple"))
# Se le puede dar formato a los ejes y color
library(lattice)
library(mosaic)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':

##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggformula
## Loading required package: ggplot2
## Loading required package: ggstance
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':

##
## geom_errorbarh, GeomErrorbarh
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## Registered S3 method overwritten by 'mosaic':

## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected
by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following object is masked from 'package:ggplot2':

##
## stat
## The following objects are masked from 'package:dplyr':

##
## count, do, tally
## The following objects are masked from 'package:stats':

##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':

##
## max, mean, min, prod, range, sample, sum
# Diagrama de barras
bargraph(~clase, data = Titanic, main= "Gráfico de Barras")
X=c(2,3,4,4.5,4.5,5.6,5.7,5.8,6,6.1,6.5,7,7,7,7.5,7.5,7.5,8.3,9,10.2,10.4,11,11.1,11.5,1
2,13)
div<-table(cut(X,breaks = 7))
div
##
## (1.99,3.57] (3.57,5.14] (5.14,6.71] (6.71,8.29] (8.29,9.86] (9.86,11.4]
## 2 3 6 6 2 4
## (11.4,13]
## 3
class(div)
## [1] "table"
library(pander)
library(xtable)
mat<-data.frame(div)
names(mat)<-c("Intervalos","Frecuencias($n_i$)")
x<-xtable(mat)
pander(x)
Intervalos Frecuencias(ni )
(1.99,3.57] 2
(3.57,5.14] 3
(5.14,6.71] 6
(6.71,8.29] 6
(8.29,9.86] 2
(9.86,11.4] 4
(11.4,13] 3
# División en intervalos
table(cut(Titanic$edad, breaks = 6))
##
## (0.0869,13.5] (13.5,26.8] (26.8,40.1] (40.1,53.4] (53.4,66.7]
## 99 375 345 150 68
## (66.7,80.1]
## 9
library(mosaic)
histogram(~edad, data = Titanic)
x=Titanic$edad
op<-par(mfrow=c(2,2)) #pone gráficos en formato 2 x 2
hist(x,breaks="Sturges", col = "purple") # número de intervalos según la fórmula de Stur
ges
hist(x, breaks=4, col = "green")
hist(x, breaks = 10, col = "yellow")
hist(x,breaks = 35,col = "grey")
library(mosaic)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':

##
## combine
p1<-freqpolygon(~edad, Titanic)
p2<-histogram(~edad, Titanic)
p3<-ladd(panel.freqpolygon(Titanic$edad))
grid.arrange(p1,p2,p3,ncol=2)
par(op)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:mosaic':

##
## do
## The following object is masked from 'package:ggplot2':

##
## last_plot
## The following object is masked from 'package:stats':

##
## filter
## The following object is masked from 'package:graphics':
##
## layout
X=na.omit(Titanic$edad) #na.omit es para no considerar valores en blanco

hist(X, prob=TRUE)
lines(density(X), lty="longdash", lwd=3 , col="purple") ## Me da error
dt<-data.frame(Titanic)
ggplot(dt,aes(x=clase))+geom_bar( aes(fill=sobrevivio), position="dodge")
library(HSAUR)
## Loading required package: tools
data("water", package = "HSAUR")

water
location town mortality hardness

<fctr> <chr> <int> <int>
1 South Bath 1247 105
2 North Birkenhead 1668 17
3 South Birmingham 1466 5
4 North Blackburn 1800 14
5 North Blackpool 1609 18
6 North Bolton 1558 10
7 North Bootle 1807 15

8 South Bournemouth 1299 78
9 North Bradford 1637 10
10 South Brighton 1359 84
1-10 of 61 rows Previous 1 2 3 4 5 6 7 Next
mort<-data.frame(water)
ggplot()+geom_point(data=mort, aes(x=hardness, y=mortality, colour=location))

head(mort,10)

1 South Bath 1247 105
2 North Birkenhead 1668 17

3 South Birmingham 1466 5
4 North Blackburn 1800 14
5 North Blackpool 1609 18
6 North Bolton 1558 10
7 North Bootle 1807 15
8 South Bournemouth 1299 78
9 North Bradford 1637 10
10 South Brighton 1359 84
1-10 of 10 rows
cov(mort$hardness,mort$mortality)
## [1] -4681.544
cor(mort$hardness,mort$mortality)
## [1] -0.6548486

cor(mort$hardness, mort$mortality)^2
## [1] 0.4288267
X<-seq(-1,1,length=100)
Y=sqrt(1-X^2)
xy=data.frame(X,Y)
ggplot(data=xy,aes(x=X, y=Y)) + geom_point(color="darkorange", size=3)
cor(X,Y)
## [1] -3.162392e-16
library(plotly)
library(readxl)
library(plotly)
library(ggplot2)
record_100_m <- read_excel("record-100-m.xlsx")
dt=data.frame(x=record_100_m[,1], y=record_100_m[,2])
names(dt)<-c("anyo","record")
p<- ggplot(dt, aes(x=anyo,y=record, color="red",size=1))+ geom_point(color="red",size=2)

+ geom_smooth(method = "lm", se=FALSE)
ggplotly(p)
colour
size
10.50
10.25
record
10.00
9.75
1920 1940 1960 1980 2000

anyo
Probabilidad
library(pander)
x=sample(1:2,1000, replace = TRUE) # Error in sample.int(length(x), size, replace, prob)
: invalid 'replace' argument
x
## [1] 2 2 1 1 1 2 2 1 2 1 1 1 2 1 1 1 1 1 2 1 2 1 2 1 2 1 2 2 2 2 2 1 1 1
## [35] 1 1 1 2 1 1 2 2 2 2 1 2 1 1 2 1 2 2 2 1 2 2 1 1 2 1 1 2 2 2 2 1 1 1
## [69] 1 1 1 2 2 1 2 1 2 2 2 2 1 2 1 1 1 1 2 2 1 1 2 2 2 2 1 1 1 1 2 2 1 2
## [103] 1 1 2 2 2 1 2 1 2 1 2 1 2 2 2 2 1 1 2 2 1 1 1 2 1 1 2 2 1 1 2 1 2 2
## [137] 1 2 1 1 2 2 1 1 2 1 2 1 1 2 1 2 1 2 1 1 2 2 1 2 2 2 1 2 1 1 2 2 1 1
## [171] 1 2 2 1 1 2 1 1 1 1 2 2 1 2 2 1 1 1 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 1
## [205] 2 1 1 2 2 1 2 2 2 1 2 1 1 1 1 1 2 1 2 2 1 1 1 2 2 2 1 1 1 2 2 2 2 2
## [239] 2 1 2 2 2 2 2 2 1 2 1 1 1 1 2 1 2 2 1 2 1 2 1 2 1 1 1 2 2 2 2 2 1 1
## [273] 2 2 1 1 2 1 2 2 2 1 1 2 1 1 1 2 1 2 2 2 2 1 2 2 1 2 2 2 1 1 2 1 2 1
## [307] 2 1 2 2 2 1 2 2 2 2 1 1 1 2 2 2 2 1 2 1 2 2 1 1 1 1 2 2 2 2 1 1 1 1
## [341] 1 2 1 2 1 1 1 1 2 1 2 2 1 2 1 2 2 1 1 2 2 2 2 1 1 2 2 2 2 1 1 1 1 2
## [375] 2 1 1 2 2 1 1 1 1 2 1 1 2 2 2 1 1 1 2 1 1 2 1 2 1 1 2 2 1 2 2 2 2 1
## [409] 1 2 1 1 2 2 1 2 1 2 2 2 2 2 1 1 2 2 1 1 1 1 2 2 2 1 2 1 1 1 2 1 2 2
## [443] 2 2 2 2 1 2 2 2 2 1 1 2 1 2 1 1 1 2 1 1 1 2 2 2 1 1 2 2 1 2 1 1 1 2
## [477] 2 2 2 1 2 1 1 2 2 2 1 1 2 2 1 2 2 1 2 2 1 2 2 2 2 1 2 2 1 1 1 2 2 2
## [511] 1 1 2 1 2 1 1 1 1 2 2 2 2 2 1 2 1 1 2 1 1 1 2 1 1 2 2 2 2 2 1 2 1 2
## [545] 1 2 2 2 2 2 2 1 1 2 2 1 1 1 2 1 2 1 1 1 1 1 2 1 2 1 2 1 1 2 2 2 1 2
## [579] 2 1 1 1 1 1 1 1 1 2 1 1 2 1 2 2 1 1 2 1 1 1 1 1 1 2 2 1 2 1 1 1 1 1
## [613] 2 1 2 2 1 2 2 2 1 1 2 1 2 1 1 1 1 2 2 2 1 2 1 2 2 2 1 2 1 1 1 2 2 2
## [647] 2 2 1 2 1 2 2 1 2 1 2 2 1 1 2 2 1 1 2 2 2 2 1 2 1 1 1 2 2 2 2 1 2 2
## [681] 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 2 2 2 1 2 1 1 1 2 1 1 1 1 1 2 1 1 1
## [715] 2 1 1 2 2 1 2 1 1 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 1 1 1 2 2
## [749] 2 2 2 2 1 2 1 1 1 1 1 1 1 1 1 2 1 2 2 1 1 2 1 2 2 1 2 1 2 1 2 2 1 2
## [783] 2 2 2 2 2 2 2 2 1 2 2 1 1 1 1 1 1 1 2 2 2 1 2 1 2 1 2 2 1 2 2 1 1 1
## [817] 1 2 2 2 2 2 2 2 1 2 1 2 2 2 2 1 2 2 1 2 1 1 1 2 2 1 2 1 1 1 2 2 1 2
## [851] 2 2 2 1 2 1 2 1 1 1 1 1 1 1 2 2 2 2 1 2 2 1 1 1 2 1 2 2 2 1 2 2 2 1
## [885] 2 1 1 1 2 2 2 1 1 1 2 2 2 1 1 2 1 2 1 1 2 1 2 2 2 2 1 2 1 2 2 1 2 1
## [919] 1 1 2 1 1 1 2 2 1 1 2 2 2 2 1 2 2 1 1 1 1 2 2 2 2 2 1 1 1 1 2 1 2 2
## [953] 1 2 2 2 1 1 2 1 2 1 2 2 2 1 1 1 1 2 1 2 1 1 1 2 2 2 1 2 2 2 2 2 1 1
## [987] 2 1 1 2 2 2 2 2 1 2 2 1 2 2
pander(table(x))
1 2
481 519
y=sample(1:6,1000, replace=TRUE)
y
## [1] 6 2 6 5 3 3 1 4 5 4 6 5 5 5 5 2 3 2 3 1 4 1 4 5 6 1 1 4 2 1 3 6 2 1
## [35] 6 4 3 4 3 5 3 3 4 4 6 4 1 1 2 3 6 5 4 6 1 3 4 5 6 3 1 1 5 4 6 2 2 2
## [69] 4 2 1 1 3 5 3 4 1 1 2 5 2 5 1 1 6 5 3 4 6 2 2 5 1 3 6 6 1 2 3 2 4 6
## [103] 1 3 6 1 6 5 5 1 4 5 3 3 2 2 5 2 1 4 6 1 3 4 6 4 4 6 1 2 4 6 1 2 4 2
## [137] 3 2 6 4 5 2 6 5 2 6 6 3 5 4 2 5 3 3 3 2 1 2 2 5 6 5 4 1 6 2 1 3 2 2
## [171] 5 4 4 5 1 3 4 5 2 5 3 3 4 2 6 4 5 5 3 3 2 2 2 6 6 3 5 1 2 1 5 5 1 4
## [205] 4 6 4 4 5 1 1 4 2 1 6 5 4 5 1 4 2 1 6 4 2 6 6 6 2 5 2 4 2 5 5 3 4 2
## [239] 4 1 2 2 4 3 6 6 3 6 4 6 4 4 1 4 4 1 3 1 5 4 1 5 5 3 3 1 2 6 1 4 6 5
## [273] 4 1 1 5 3 5 3 6 4 4 2 1 4 5 1 1 6 3 3 4 5 6 4 6 5 6 4 1 1 1 1 1 5 3
## [307] 3 6 5 1 5 1 3 6 2 4 3 5 4 2 1 3 3 4 5 6 3 1 3 2 5 6 1 3 2 4 1 4 6 5
## [341] 2 4 4 4 3 1 6 5 5 6 6 4 5 1 2 6 1 1 5 4 6 5 3 5 6 4 5 2 1 1 3 4 3 1
## [375] 1 3 3 5 4 1 5 6 2 4 6 6 2 1 2 2 1 4 2 3 6 2 5 3 5 5 3 4 4 1 5 6 6 6
## [409] 5 6 5 2 1 4 1 1 1 5 2 4 5 4 3 2 4 1 4 4 4 5 6 1 3 4 4 6 1 1 6 3 5 6
## [443] 5 3 6 2 6 1 2 6 3 3 1 2 4 3 2 4 6 4 5 3 1 3 3 6 6 2 4 2 3 1 1 2 3 6
## [477] 4 6 1 4 4 1 4 6 4 4 3 4 6 1 3 5 5 5 1 2 1 6 4 5 5 2 2 3 2 2 5 1 6 3
## [511] 1 6 6 1 3 1 2 5 3 6 2 2 6 5 2 1 2 3 3 3 6 5 1 1 5 3 2 6 4 3 3 6 5 2
## [545] 1 5 5 5 1 1 3 5 6 1 1 6 5 1 5 5 4 4 2 1 1 6 3 1 1 2 3 1 3 2 2 4 3 1
## [579] 3 4 1 6 1 2 5 6 5 1 2 1 3 4 2 2 1 6 6 4 5 3 3 5 3 2 6 5 4 4 6 2 3 3
## [613] 5 6 3 1 5 2 3 3 2 4 1 5 1 3 1 5 6 5 5 4 5 2 2 2 5 1 3 6 6 3 2 1 4 4
## [647] 2 5 3 5 2 6 4 4 3 5 5 3 4 5 2 4 5 1 2 3 6 1 2 4 4 2 4 4 4 5 5 5 1 1
## [681] 3 1 3 5 6 6 3 1 4 6 4 2 5 5 3 5 2 3 6 2 2 2 4 1 3 2 4 6 4 3 1 6 2 4
## [715] 1 2 1 3 5 1 5 4 1 6 6 3 2 2 3 2 1 3 6 4 6 5 6 5 4 1 2 3 1 2 2 5 1 1
## [749] 2 1 6 1 5 6 2 4 2 6 3 5 4 5 6 4 4 1 1 6 4 2 4 3 5 2 1 6 6 3 6 5 2 6
## [783] 4 2 5 6 1 1 6 1 4 2 2 5 2 5 2 6 5 6 4 1 6 2 2 4 5 6 5 6 5 3 2 2 3 4
## [817] 6 4 2 6 6 3 6 3 1 3 1 2 4 5 4 2 4 3 2 4 5 2 3 6 2 1 5 6 4 4 5 1 3 5
## [851] 2 2 3 5 4 4 4 1 4 2 3 3 6 2 2 3 2 1 5 1 1 2 4 4 4 3 3 4 6 5 3 2 3 6
## [885] 2 4 5 1 5 2 3 5 5 3 5 2 6 1 5 6 4 2 1 5 2 1 2 3 5 2 3 4 6 4 6 5 4 2
## [919] 1 3 4 2 6 6 2 4 4 5 2 2 5 3 1 5 2 1 5 3 6 4 6 2 4 6 6 6 5 3 1 3 5 4
## [953] 4 5 5 4 6 2 1 3 3 1 5 4 2 4 2 3 2 6 5 4 4 3 6 4 4 1 2 2 6 5 2 6 1 3
## [987] 6 1 4 6 2 4 5 5 4 2 2 2 5 2
pander(table(y))
1 2 3 4 5 6
169 175 152 175 170 159
# Por ejemplo, ahora vamos a

# “simular” el lanzamiento de 1000 monedas (el número 1 es cara, 2 es cruz), y los result
ados los visualizamos en una tabla.
library(dygraphs)
mdeaths
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 2134 1863 1877 1877 1492 1249 1280 1131 1209 1492 1621 1846
## 1975 2103 2137 2153 1833 1403 1288 1186 1133 1053 1347 1545 2066
## 1976 2020 2750 2283 1479 1189 1160 1113 970 999 1208 1467 2059
## 1977 2240 1634 1722 1801 1246 1162 1087 1013 959 1179 1229 1655
## 1978 2019 2284 1942 1423 1340 1187 1098 1004 970 1140 1110 1812
## 1979 2263 1820 1846 1531 1215 1075 1056 975 940 1081 1294 1341
fdeaths
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 901 689 827 677 522 406 441 393 387 582 578 666
## 1975 830 752 785 664 467 438 421 412 343 440 531 771
## 1976 767 1141 896 532 447 420 376 330 357 445 546 764
## 1977 862 660 663 643 502 392 411 348 387 385 411 638
## 1978 796 853 737 546 530 446 431 362 387 430 425 679
## 1979 821 785 727 612 478 429 405 379 393 411 487 574
lungDeaths<-cbind(mdeaths,fdeaths)
dygraph(lungDeaths)
## Registered S3 method overwritten by 'xts':

## method from
## as.zoo.xts zoo
mdeaths fdeaths
2800
2600
2400
2200
2000
1800
1600
1400
1200
1000
800
600
400
200
Jan 1974 Jan 1975 Jan 1976 Jan 1977 Jan 1978 Jan 1979
library(pander)
x=sample(1:2,1000, replace=TRUE)
pander(table(x))
1 2
495 505
barplot(table(x))
# Ahora hacemos lo mismo simulando el lanzamiento de un dado.

pander(table(x))
1 2 3 4 5 6
1 2 3 4 5 6
175 164 179 158 163 161
y=table(x)
pander(y)
1 2 3 4 5 6
187 151 163 151 183 165
1-pbinom(3,10,0.5)
## [1] 0.828125

Estadistica Basica Rebeca

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Estadistica Basica Rebeca

Uploaded by

Copyright:

Available Formats

Estadistica Basica

Pasajero sobrevivio sexo edad clase

Allison, Miss. Helen Loraine no female 2.0000 1st

Allison, Mrs. Hudson J C (Bessi no female 25.0000 1st

Evans, Miss. Edith Corse no female 36.0000 1st

Isham, Miss. Ann Elizabeth no female 50.0000 1st

Straus, Mrs. Isidor (Rosalie Id no female 63.0000 1st

Carter, Mrs. Ernest Courtenay ( no female 44.0000 2nd

Chapman, Mrs. John Henry (Sara no female 29.0000 2nd

Corbett, Mrs. Walter H (Irene C no female 30.0000 2nd

Corey, Mrs. Percy C (Mary Phyll no female NA 2nd

Funk, Miss. Annie Clemmer no female 38.0000 2nd

1-10 of 1,309 rows Previous 1 2 3 4 5 6 ... 131 Next

## [1] "Pasajero" "sobrevivio" "sexo" "edad" "clase"

## [1] "tbl_df" "tbl" "data.frame"

1st 2nd 3rd Sum

no 123 158 528 809

yes 200 119 181 500

Sum 323 277 709 1309

## Loading required package: dplyr

## The following objects are masked from 'package:stats':

## Loading required package: ggformula

## Loading required package: ggplot2

## Loading required package: ggstance

## The following objects are masked from 'package:ggplot2':

## Loading required package: mosaicData

## Loading required package: Matrix

## Registered S3 method overwritten by 'mosaic':

## The following object is masked from 'package:ggplot2':

## The following objects are masked from 'package:dplyr':

## The following objects are masked from 'package:stats':

## The following objects are masked from 'package:base':

table(cut(Titanic$edad, breaks = 6))

## The following object is masked from 'package:dplyr':

## The following object is masked from 'package:mosaic':

## The following object is masked from 'package:ggplot2':

## The following object is masked from 'package:stats':

X=na.omit(Titanic$edad) #na.omit es para no considerar valores en blanco

## Loading required package: tools

data("water", package = "HSAUR")

location town mortality hardness

1 South Bath 1247 105

2 North Birkenhead 1668 17

3 South Birmingham 1466 5

4 North Blackburn 1800 14

5 North Blackpool 1609 18

6 North Bolton 1558 10

7 North Bootle 1807 15

8 South Bournemouth 1299 78

9 North Bradford 1637 10

10 South Brighton 1359 84

1-10 of 61 rows Previous 1 2 3 4 5 6 7 Next

data("water", package = "HSAUR")

location town mortality hardness

1 South Bath 1247 105

2 North Birkenhead 1668 17

3 South Birmingham 1466 5

4 North Blackburn 1800 14

5 North Blackpool 1609 18

6 North Bolton 1558 10

7 North Bootle 1807 15

8 South Bournemouth 1299 78

9 North Bradford 1637 10

10 South Brighton 1359 84