You are on page 1of 15

Estadística Básica

CONTENIDO
Probabilidad........................................................................................................................................... 13

library(readxl)
Titanic<-read_excel("Pasajeros-Titanic.xlsx")
Titanic

## # A tibble: 1,309 x 5
## Pasajero sobrevivio sexo edad clase
## <chr> <chr> <chr> <dbl> <chr>
## 1 Allison, Miss. Helen Loraine no female 2 1st
## 2 Allison, Mrs. Hudson J C (Bessi no female 25 1st
## 3 Evans, Miss. Edith Corse no female 36 1st
## 4 Isham, Miss. Ann Elizabeth no female 50 1st
## 5 Straus, Mrs. Isidor (Rosalie Id no female 63 1st
## 6 Carter, Mrs. Ernest Courtenay ( no female 44 2nd
## 7 Chapman, Mrs. John Henry (Sara no female 29 2nd
## 8 Corbett, Mrs. Walter H (Irene C no female 30 2nd
## 9 Corey, Mrs. Percy C (Mary Phyll no female NA 2nd
## 10 Funk, Miss. Annie Clemmer no female 38 2nd
## # ... with 1,299 more rows

options(knitr.table.format="latex")
library(pander)
t1<-table(Titanic$sobrevivio,Titanic$clase)
t1

##
## 1st 2nd 3rd
## no 123 158 528
## yes 200 119 181

t2<-addmargins(t1)
t2

##
## 1st 2nd 3rd Sum
## no 123 158 528 809
## yes 200 119 181 500
## Sum 323 277 709 1309

pander(t2)

  1st 2nd 3rd Sum


no 123 158 528 809
yes 200 119 181 500
Sum 323 277 709 1309
y=Titanic$clase # Recordemos que, en R, para escoger una variable de un
fichero, se usa Nombre_fichero$Variable
x=table(y) #El comando table crea una tabla de frecuencias
barplot(x)

library(lattice)

library(mosaic)

## Loading required package: dplyr

##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag

## The following objects are masked from 'package:base':


##
## intersect, setdiff, setequal, union

## Loading required package: ggformula

## Loading required package: ggplot2

## Loading required package: ggstance

##
## Attaching package: 'ggstance'

## The following objects are masked from 'package:ggplot2':


##
## geom_errorbarh, GeomErrorbarh

##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")

## Loading required package: mosaicData

## Loading required package: Matrix

## Registered S3 method overwritten by 'mosaic':


## method from
## fortify.SpatialPolygonsDataFrame ggplot2

##
## The 'mosaic' package masks several functions from core packages in
order to add
## additional features. The original behavior of these functions should
not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading
mosaic.

##
## Attaching package: 'mosaic'

## The following object is masked from 'package:Matrix':


##
## mean

## The following object is masked from 'package:ggplot2':


##
## stat
## The following objects are masked from 'package:dplyr':
##
## count, do, tally

## The following objects are masked from 'package:stats':


##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var

## The following objects are masked from 'package:base':


##
## max, mean, min, prod, range, sample, sum

# Diagrama de barras
bargraph(~ clase, data = Titanic, main= "Gráfico de Barras")

X=c(2,3,4,4.5,4.5,5.6,5.7,5.8,6,6.1,6.5,7,7,7,7.5,7.5,7.5,8.3,9,10.2,10.4
,11,11.1,11.5,12,13)
div<-table(cut(X,breaks = 7))
div

##
## (1.99,3.57] (3.57,5.14] (5.14,6.71] (6.71,8.29] (8.29,9.86]
(9.86,11.4]
## 2 3 6 6 2
4
## (11.4,13]
## 3
library(pander)
library(xtable)
mat<-data.frame(div)
names(mat)<-c("Intervalos","Frecuencias($n_i$)")
x<-xtable(mat)
pander(x)

Intervalos Frecuencias(ni )
(1.99,3.57] 2
(3.57,5.14] 3
(5.14,6.71] 6
(6.71,8.29] 6
(8.29,9.86] 2
(9.86,11.4] 4
(11.4,13] 3
# División en intervalos

table(cut(Titanic$edad, breaks = 6))

##
## (0.0869,13.5] (13.5,26.8] (26.8,40.1] (40.1,53.4] (53.4,66.7]
## 99 375 345 150 68
## (66.7,80.1]
## 9

library(mosaic)
histogram(~edad, data = Titanic)
x=Titanic$edad
op<-par(mfrow=c(2,2)) #pone gráficos en formato 2 x 2
hist(x,breaks="Sturges") # número de intervalos según la fórmula de
Sturges
hist(x, breaks=4)
hist(x, breaks = 10)
hist(x,breaks = 35)
library(mosaic)
library(gridExtra)

##
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':


##
## combine

p1<-freqpolygon(~edad, Titanic)
p2<-histogram(~edad, Titanic)
p3<-ladd(panel.freqpolygon(Titanic$edad))
grid.arrange(p1,p2,p3,ncol=2)
par(op)

X=na.omit(Titanic$edad) #na.omit es para no considerar valores en blanco


hist(X, prob=TRUE)
lines(density(X), lty="dotted", lwd=2, col="red" ) ## Me da error
dt<-data.frame(Titanic)
ggplot(dt,aes(x=clase))+geom_bar( aes(fill=sobrevivio), position="dodge")

library(HSAUR)
## Loading required package: tools

data("water", package = "HSAUR")


water

## location town mortality hardness


## 1 South Bath 1247 105
## 2 North Birkenhead 1668 17
## 3 South Birmingham 1466 5
## 4 North Blackburn 1800 14
## 5 North Blackpool 1609 18
## 6 North Bolton 1558 10
## 7 North Bootle 1807 15
## 8 South Bournemouth 1299 78
## 9 North Bradford 1637 10
## 10 South Brighton 1359 84
## 11 South Bristol 1392 73
## 12 North Burnley 1755 12
## 13 South Cardiff 1519 21
## 14 South Coventry 1307 78
## 15 South Croydon 1254 96
## 16 North Darlington 1491 20
## 17 North Derby 1555 39
## 18 North Doncaster 1428 39
## 19 South East Ham 1318 122
## 20 South Exeter 1260 21
## 21 North Gateshead 1723 44
## 22 North Grimsby 1379 94
## 23 North Halifax 1742 8
## 24 North Huddersfield 1574 9
## 25 North Hull 1569 91
## 26 South Ipswich 1096 138
## 27 North Leeds 1591 16
## 28 South Leicester 1402 37
## 29 North Liverpool 1772 15
## 30 North Manchester 1828 8
## 31 North Middlesbrough 1704 26
## 32 North Newcastle 1702 44
## 33 South Newport 1581 14
## 34 South Northampton 1309 59
## 35 South Norwich 1259 133
## 36 North Nottingham 1427 27
## 37 North Oldham 1724 6
## 38 South Oxford 1175 107
## 39 South Plymouth 1486 5
## 40 South Portsmouth 1456 90
## 41 North Preston 1696 6
## 42 South Reading 1236 101
## 43 North Rochdale 1711 13
## 44 North Rotherham 1444 14
## 45 North St Helens 1591 49
## 46 North Salford 1987 8
## 47 North Sheffield 1495 14
## 48 South Southampton 1369 68
## 49 South Southend 1257 50
## 50 North Southport 1587 75
## 51 North South Shields 1713 71
## 52 North Stockport 1557 13
## 53 North Stoke 1640 57
## 54 North Sunderland 1709 71
## 55 South Swansea 1625 13
## 56 North Wallasey 1625 20
## 57 South Walsall 1527 60
## 58 South West Bromwich 1627 53
## 59 South West Ham 1486 122
## 60 South Wolverhampton 1485 81
## 61 North York 1378 71

mort<-data.frame(water)
ggplot()+geom_point(data=mort, aes(x=hardness, y=mortality,
colour=location))

data("water", package = "HSAUR")


mort<-data.frame(water)
head(mort,10)
## location town mortality hardness
## 1 South Bath 1247 105
## 2 North Birkenhead 1668 17
## 3 South Birmingham 1466 5
## 4 North Blackburn 1800 14
## 5 North Blackpool 1609 18
## 6 North Bolton 1558 10
## 7 North Bootle 1807 15
## 8 South Bournemouth 1299 78
## 9 North Bradford 1637 10
## 10 South Brighton 1359 84

cov(mort$hardness,mort$mortality)

## [1] -4681.544

cor(mort$hardness,mort$mortality)

## [1] -0.6548486

data("water", package = "HSAUR")


mort<-data.frame(water)
cor(mort$hardness, mort$mortality)^2

## [1] 0.4288267

X<-seq(-1,1,length=100)
Y=sqrt(1-X^2)
xy=data.frame(X,Y)
ggplot(data=xy,aes(x=X, y=Y)) + geom_point(color="darkorange", size=3)
cor(X,Y)

## [1] -3.162392e-16

library(plotly)

##
## Attaching package: 'plotly'

## The following object is masked from 'package:mosaic':


##
## do

## The following object is masked from 'package:ggplot2':


##
## last_plot

## The following object is masked from 'package:stats':


##
## filter

## The following object is masked from 'package:graphics':


##
## layout

#### no me falta la base record_100_m <-read_excel("record-100-m.xlsx")


###dt=data.frame(x=record_100_m[,1], y=record_100_m[,2])
###names(dt)<-c("año","record")
##p<- ggplot(data=dt, aes(x=año,y=record),color="red",size=3)+
geom_point(color="red",size=3)+ geom_smooth(method = "lm",se=FALSE)
##ggplotly(p)

Probabilidad
library(pander)
##x=sample(1:2,1000, replace = T) # Error in sample.int(length(x), size,
replace, prob) : invalid 'replace' argument
##pander(table(x))
##x=sample(1:6,1000, replace=T)
##pander(table(x)

library(dygraphs)
mdeaths

## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 2134 1863 1877 1877 1492 1249 1280 1131 1209 1492 1621 1846
## 1975 2103 2137 2153 1833 1403 1288 1186 1133 1053 1347 1545 2066
## 1976 2020 2750 2283 1479 1189 1160 1113 970 999 1208 1467 2059
## 1977 2240 1634 1722 1801 1246 1162 1087 1013 959 1179 1229 1655
## 1978 2019 2284 1942 1423 1340 1187 1098 1004 970 1140 1110 1812
## 1979 2263 1820 1846 1531 1215 1075 1056 975 940 1081 1294 1341

fdeaths

## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 901 689 827 677 522 406 441 393 387 582 578 666
## 1975 830 752 785 664 467 438 421 412 343 440 531 771
## 1976 767 1141 896 532 447 420 376 330 357 445 546 764
## 1977 862 660 663 643 502 392 411 348 387 385 411 638
## 1978 796 853 737 546 530 446 431 362 387 430 425 679
## 1979 821 785 727 612 478 429 405 379 393 411 487 574

lungDeaths<-cbind(mdeaths,fdeaths)
#dygraphs(lungDeaths)#### me da error in dygraphs(lungDeaths) : could
not find function "dygraphs"

# Por ejemplo, ahora vamos a


# “simular” el lanzamiento de 1000 monedas (el número 1 es cara, 2 es
cruz), y los resultados los visualizamos en una tabla.

library(pander)

#x=sample(1:2,1000, replace=T) ## Error in sample.int(length(x), size,


replace, prob) : invalid 'replace' argument
#pander(table(x))

#barplot(table(x))

# Ahora hacemos lo mismo simulando el lanzamiento de un dado.


x=sample(1:6,1000, replace=T) pander(table(x))

#x=sample(1:6,1000, replace=T)
#y=table(x)
#pander(y)
1-pbinom(3,10,0.5)

## [1] 0.828125

You might also like