You are on page 1of 21

Estadistica Basica

• Probabilidad

library(readxl)
Titanic<-read_excel("Pasajeros-Titanic.xlsx")
Titanic

Pasajero sobrevivio sexo edad clase


<chr> <chr> <chr> <dbl> <chr>

Allison, Miss. Helen Loraine no female 2.0000 1st

Allison, Mrs. Hudson J C (Bessi no female 25.0000 1st

Evans, Miss. Edith Corse no female 36.0000 1st

Isham, Miss. Ann Elizabeth no female 50.0000 1st

Straus, Mrs. Isidor (Rosalie Id no female 63.0000 1st

Carter, Mrs. Ernest Courtenay ( no female 44.0000 2nd

Chapman, Mrs. John Henry (Sara no female 29.0000 2nd

Corbett, Mrs. Walter H (Irene C no female 30.0000 2nd

Corey, Mrs. Percy C (Mary Phyll no female NA 2nd

Funk, Miss. Annie Clemmer no female 38.0000 2nd

1-10 of 1,309 rows Previous 1 2 3 4 5 6 ... 131 Next

dim(Titanic)

## [1] 1309 5

names(Titanic)

## [1] "Pasajero" "sobrevivio" "sexo" "edad" "clase"

class(Titanic)

## [1] "tbl_df" "tbl" "data.frame"


Titanic=as.data.frame(Titanic)
class(Titanic)

## [1] "data.frame"

class(Titanic$sexo)

## [1] "character"

options(knitr.table.format="latex")

library(pander)
t1<-table(Titanic$sobrevivio,Titanic$clase)
t1

##
## 1st 2nd 3rd
## no 123 158 528
## yes 200 119 181

t2<-addmargins(t1)
t2

##
## 1st 2nd 3rd Sum
## no 123 158 528 809
## yes 200 119 181 500
## Sum 323 277 709 1309

pander(t2)

1st 2nd 3rd Sum

no 123 158 528 809

yes 200 119 181 500

Sum 323 277 709 1309


y=Titanic$clase # Recordemos que, en R, para escoger una variable de un fichero, se usa N
ombre_fichero$Variable
x=table(y) #El comando table crea una tabla de frecuencias
barplot(x, main = "Gráfico de Barras con una tabla de contigencia", col = c("grey","royal
blue","purple"))
# Se le puede dar formato a los ejes y color

library(lattice)

library(mosaic)

## Loading required package: dplyr

##
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':


##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union

## Loading required package: ggformula

## Loading required package: ggplot2

## Loading required package: ggstance

##
## Attaching package: 'ggstance'

## The following objects are masked from 'package:ggplot2':


##
## geom_errorbarh, GeomErrorbarh

##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")

## Loading required package: mosaicData

## Loading required package: Matrix

## Registered S3 method overwritten by 'mosaic':


## method from
## fortify.SpatialPolygonsDataFrame ggplot2

##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected
by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.

##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean

## The following object is masked from 'package:ggplot2':


##
## stat

## The following objects are masked from 'package:dplyr':


##
## count, do, tally

## The following objects are masked from 'package:stats':


##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var

## The following objects are masked from 'package:base':


##
## max, mean, min, prod, range, sample, sum

# Diagrama de barras
bargraph(~clase, data = Titanic, main= "Gráfico de Barras")
X=c(2,3,4,4.5,4.5,5.6,5.7,5.8,6,6.1,6.5,7,7,7,7.5,7.5,7.5,8.3,9,10.2,10.4,11,11.1,11.5,1
2,13)
div<-table(cut(X,breaks = 7))
div

##
## (1.99,3.57] (3.57,5.14] (5.14,6.71] (6.71,8.29] (8.29,9.86] (9.86,11.4]
## 2 3 6 6 2 4
## (11.4,13]
## 3

class(div)

## [1] "table"
library(pander)
library(xtable)
mat<-data.frame(div)
names(mat)<-c("Intervalos","Frecuencias($n_i$)")
x<-xtable(mat)
pander(x)

Intervalos Frecuencias(ni )

(1.99,3.57] 2

(3.57,5.14] 3

(5.14,6.71] 6

(6.71,8.29] 6

(8.29,9.86] 2

(9.86,11.4] 4

(11.4,13] 3

# División en intervalos

table(cut(Titanic$edad, breaks = 6))

##
## (0.0869,13.5] (13.5,26.8] (26.8,40.1] (40.1,53.4] (53.4,66.7]
## 99 375 345 150 68
## (66.7,80.1]
## 9

library(mosaic)
histogram(~edad, data = Titanic)
x=Titanic$edad
op<-par(mfrow=c(2,2)) #pone gráficos en formato 2 x 2
hist(x,breaks="Sturges", col = "purple") # número de intervalos según la fórmula de Stur
ges
hist(x, breaks=4, col = "green")
hist(x, breaks = 10, col = "yellow")
hist(x,breaks = 35,col = "grey")
library(mosaic)
library(gridExtra)

##
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':


##
## combine

p1<-freqpolygon(~edad, Titanic)
p2<-histogram(~edad, Titanic)
p3<-ladd(panel.freqpolygon(Titanic$edad))
grid.arrange(p1,p2,p3,ncol=2)
par(op)

library(ggplot2)
library(plotly)

##
## Attaching package: 'plotly'

## The following object is masked from 'package:mosaic':


##
## do

## The following object is masked from 'package:ggplot2':


##
## last_plot

## The following object is masked from 'package:stats':


##
## filter
## The following object is masked from 'package:graphics':
##
## layout

X=na.omit(Titanic$edad) #na.omit es para no considerar valores en blanco


hist(X, prob=TRUE)
lines(density(X), lty="longdash", lwd=3 , col="purple") ## Me da error

dt<-data.frame(Titanic)
ggplot(dt,aes(x=clase))+geom_bar( aes(fill=sobrevivio), position="dodge")
library(HSAUR)

## Loading required package: tools

data("water", package = "HSAUR")


water

location town mortality hardness


<fctr> <chr> <int> <int>

1 South Bath 1247 105

2 North Birkenhead 1668 17

3 South Birmingham 1466 5

4 North Blackburn 1800 14

5 North Blackpool 1609 18

6 North Bolton 1558 10

7 North Bootle 1807 15


location town mortality hardness
<fctr> <chr> <int> <int>

8 South Bournemouth 1299 78

9 North Bradford 1637 10

10 South Brighton 1359 84

1-10 of 61 rows Previous 1 2 3 4 5 6 7 Next

mort<-data.frame(water)
ggplot()+geom_point(data=mort, aes(x=hardness, y=mortality, colour=location))

data("water", package = "HSAUR")


mort<-data.frame(water)
head(mort,10)

location town mortality hardness


<fctr> <chr> <int> <int>

1 South Bath 1247 105

2 North Birkenhead 1668 17


location town mortality hardness
<fctr> <chr> <int> <int>

3 South Birmingham 1466 5

4 North Blackburn 1800 14

5 North Blackpool 1609 18

6 North Bolton 1558 10

7 North Bootle 1807 15

8 South Bournemouth 1299 78

9 North Bradford 1637 10

10 South Brighton 1359 84

1-10 of 10 rows

cov(mort$hardness,mort$mortality)

## [1] -4681.544

cor(mort$hardness,mort$mortality)

## [1] -0.6548486

data("water", package = "HSAUR")


mort<-data.frame(water)
cor(mort$hardness, mort$mortality)^2

## [1] 0.4288267

X<-seq(-1,1,length=100)
Y=sqrt(1-X^2)

xy=data.frame(X,Y)
ggplot(data=xy,aes(x=X, y=Y)) + geom_point(color="darkorange", size=3)
cor(X,Y)

## [1] -3.162392e-16

library(plotly)

library(readxl)
library(plotly)
library(ggplot2)
record_100_m <- read_excel("record-100-m.xlsx")
dt=data.frame(x=record_100_m[,1], y=record_100_m[,2])

names(dt)<-c("anyo","record")

p<- ggplot(dt, aes(x=anyo,y=record, color="red",size=1))+ geom_point(color="red",size=2)


+ geom_smooth(method = "lm", se=FALSE)
ggplotly(p)

colour
size
10.50

10.25
record

10.00

9.75

1920 1940 1960 1980 2000


anyo

Probabilidad
library(pander)
x=sample(1:2,1000, replace = TRUE) # Error in sample.int(length(x), size, replace, prob)
: invalid 'replace' argument
x
## [1] 2 2 1 1 1 2 2 1 2 1 1 1 2 1 1 1 1 1 2 1 2 1 2 1 2 1 2 2 2 2 2 1 1 1
## [35] 1 1 1 2 1 1 2 2 2 2 1 2 1 1 2 1 2 2 2 1 2 2 1 1 2 1 1 2 2 2 2 1 1 1
## [69] 1 1 1 2 2 1 2 1 2 2 2 2 1 2 1 1 1 1 2 2 1 1 2 2 2 2 1 1 1 1 2 2 1 2
## [103] 1 1 2 2 2 1 2 1 2 1 2 1 2 2 2 2 1 1 2 2 1 1 1 2 1 1 2 2 1 1 2 1 2 2
## [137] 1 2 1 1 2 2 1 1 2 1 2 1 1 2 1 2 1 2 1 1 2 2 1 2 2 2 1 2 1 1 2 2 1 1
## [171] 1 2 2 1 1 2 1 1 1 1 2 2 1 2 2 1 1 1 2 2 2 1 2 2 2 2 2 1 2 2 1 2 2 1
## [205] 2 1 1 2 2 1 2 2 2 1 2 1 1 1 1 1 2 1 2 2 1 1 1 2 2 2 1 1 1 2 2 2 2 2
## [239] 2 1 2 2 2 2 2 2 1 2 1 1 1 1 2 1 2 2 1 2 1 2 1 2 1 1 1 2 2 2 2 2 1 1
## [273] 2 2 1 1 2 1 2 2 2 1 1 2 1 1 1 2 1 2 2 2 2 1 2 2 1 2 2 2 1 1 2 1 2 1
## [307] 2 1 2 2 2 1 2 2 2 2 1 1 1 2 2 2 2 1 2 1 2 2 1 1 1 1 2 2 2 2 1 1 1 1
## [341] 1 2 1 2 1 1 1 1 2 1 2 2 1 2 1 2 2 1 1 2 2 2 2 1 1 2 2 2 2 1 1 1 1 2
## [375] 2 1 1 2 2 1 1 1 1 2 1 1 2 2 2 1 1 1 2 1 1 2 1 2 1 1 2 2 1 2 2 2 2 1
## [409] 1 2 1 1 2 2 1 2 1 2 2 2 2 2 1 1 2 2 1 1 1 1 2 2 2 1 2 1 1 1 2 1 2 2
## [443] 2 2 2 2 1 2 2 2 2 1 1 2 1 2 1 1 1 2 1 1 1 2 2 2 1 1 2 2 1 2 1 1 1 2
## [477] 2 2 2 1 2 1 1 2 2 2 1 1 2 2 1 2 2 1 2 2 1 2 2 2 2 1 2 2 1 1 1 2 2 2
## [511] 1 1 2 1 2 1 1 1 1 2 2 2 2 2 1 2 1 1 2 1 1 1 2 1 1 2 2 2 2 2 1 2 1 2
## [545] 1 2 2 2 2 2 2 1 1 2 2 1 1 1 2 1 2 1 1 1 1 1 2 1 2 1 2 1 1 2 2 2 1 2
## [579] 2 1 1 1 1 1 1 1 1 2 1 1 2 1 2 2 1 1 2 1 1 1 1 1 1 2 2 1 2 1 1 1 1 1
## [613] 2 1 2 2 1 2 2 2 1 1 2 1 2 1 1 1 1 2 2 2 1 2 1 2 2 2 1 2 1 1 1 2 2 2
## [647] 2 2 1 2 1 2 2 1 2 1 2 2 1 1 2 2 1 1 2 2 2 2 1 2 1 1 1 2 2 2 2 1 2 2
## [681] 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 2 2 2 1 2 1 1 1 2 1 1 1 1 1 2 1 1 1
## [715] 2 1 1 2 2 1 2 1 1 1 1 2 2 1 1 2 1 2 2 2 2 2 2 2 2 1 2 2 2 1 1 1 2 2
## [749] 2 2 2 2 1 2 1 1 1 1 1 1 1 1 1 2 1 2 2 1 1 2 1 2 2 1 2 1 2 1 2 2 1 2
## [783] 2 2 2 2 2 2 2 2 1 2 2 1 1 1 1 1 1 1 2 2 2 1 2 1 2 1 2 2 1 2 2 1 1 1
## [817] 1 2 2 2 2 2 2 2 1 2 1 2 2 2 2 1 2 2 1 2 1 1 1 2 2 1 2 1 1 1 2 2 1 2
## [851] 2 2 2 1 2 1 2 1 1 1 1 1 1 1 2 2 2 2 1 2 2 1 1 1 2 1 2 2 2 1 2 2 2 1
## [885] 2 1 1 1 2 2 2 1 1 1 2 2 2 1 1 2 1 2 1 1 2 1 2 2 2 2 1 2 1 2 2 1 2 1
## [919] 1 1 2 1 1 1 2 2 1 1 2 2 2 2 1 2 2 1 1 1 1 2 2 2 2 2 1 1 1 1 2 1 2 2
## [953] 1 2 2 2 1 1 2 1 2 1 2 2 2 1 1 1 1 2 1 2 1 1 1 2 2 2 1 2 2 2 2 2 1 1
## [987] 2 1 1 2 2 2 2 2 1 2 2 1 2 2

pander(table(x))

1 2

481 519

y=sample(1:6,1000, replace=TRUE)
y
## [1] 6 2 6 5 3 3 1 4 5 4 6 5 5 5 5 2 3 2 3 1 4 1 4 5 6 1 1 4 2 1 3 6 2 1
## [35] 6 4 3 4 3 5 3 3 4 4 6 4 1 1 2 3 6 5 4 6 1 3 4 5 6 3 1 1 5 4 6 2 2 2
## [69] 4 2 1 1 3 5 3 4 1 1 2 5 2 5 1 1 6 5 3 4 6 2 2 5 1 3 6 6 1 2 3 2 4 6
## [103] 1 3 6 1 6 5 5 1 4 5 3 3 2 2 5 2 1 4 6 1 3 4 6 4 4 6 1 2 4 6 1 2 4 2
## [137] 3 2 6 4 5 2 6 5 2 6 6 3 5 4 2 5 3 3 3 2 1 2 2 5 6 5 4 1 6 2 1 3 2 2
## [171] 5 4 4 5 1 3 4 5 2 5 3 3 4 2 6 4 5 5 3 3 2 2 2 6 6 3 5 1 2 1 5 5 1 4
## [205] 4 6 4 4 5 1 1 4 2 1 6 5 4 5 1 4 2 1 6 4 2 6 6 6 2 5 2 4 2 5 5 3 4 2
## [239] 4 1 2 2 4 3 6 6 3 6 4 6 4 4 1 4 4 1 3 1 5 4 1 5 5 3 3 1 2 6 1 4 6 5
## [273] 4 1 1 5 3 5 3 6 4 4 2 1 4 5 1 1 6 3 3 4 5 6 4 6 5 6 4 1 1 1 1 1 5 3
## [307] 3 6 5 1 5 1 3 6 2 4 3 5 4 2 1 3 3 4 5 6 3 1 3 2 5 6 1 3 2 4 1 4 6 5
## [341] 2 4 4 4 3 1 6 5 5 6 6 4 5 1 2 6 1 1 5 4 6 5 3 5 6 4 5 2 1 1 3 4 3 1
## [375] 1 3 3 5 4 1 5 6 2 4 6 6 2 1 2 2 1 4 2 3 6 2 5 3 5 5 3 4 4 1 5 6 6 6
## [409] 5 6 5 2 1 4 1 1 1 5 2 4 5 4 3 2 4 1 4 4 4 5 6 1 3 4 4 6 1 1 6 3 5 6
## [443] 5 3 6 2 6 1 2 6 3 3 1 2 4 3 2 4 6 4 5 3 1 3 3 6 6 2 4 2 3 1 1 2 3 6
## [477] 4 6 1 4 4 1 4 6 4 4 3 4 6 1 3 5 5 5 1 2 1 6 4 5 5 2 2 3 2 2 5 1 6 3
## [511] 1 6 6 1 3 1 2 5 3 6 2 2 6 5 2 1 2 3 3 3 6 5 1 1 5 3 2 6 4 3 3 6 5 2
## [545] 1 5 5 5 1 1 3 5 6 1 1 6 5 1 5 5 4 4 2 1 1 6 3 1 1 2 3 1 3 2 2 4 3 1
## [579] 3 4 1 6 1 2 5 6 5 1 2 1 3 4 2 2 1 6 6 4 5 3 3 5 3 2 6 5 4 4 6 2 3 3
## [613] 5 6 3 1 5 2 3 3 2 4 1 5 1 3 1 5 6 5 5 4 5 2 2 2 5 1 3 6 6 3 2 1 4 4
## [647] 2 5 3 5 2 6 4 4 3 5 5 3 4 5 2 4 5 1 2 3 6 1 2 4 4 2 4 4 4 5 5 5 1 1
## [681] 3 1 3 5 6 6 3 1 4 6 4 2 5 5 3 5 2 3 6 2 2 2 4 1 3 2 4 6 4 3 1 6 2 4
## [715] 1 2 1 3 5 1 5 4 1 6 6 3 2 2 3 2 1 3 6 4 6 5 6 5 4 1 2 3 1 2 2 5 1 1
## [749] 2 1 6 1 5 6 2 4 2 6 3 5 4 5 6 4 4 1 1 6 4 2 4 3 5 2 1 6 6 3 6 5 2 6
## [783] 4 2 5 6 1 1 6 1 4 2 2 5 2 5 2 6 5 6 4 1 6 2 2 4 5 6 5 6 5 3 2 2 3 4
## [817] 6 4 2 6 6 3 6 3 1 3 1 2 4 5 4 2 4 3 2 4 5 2 3 6 2 1 5 6 4 4 5 1 3 5
## [851] 2 2 3 5 4 4 4 1 4 2 3 3 6 2 2 3 2 1 5 1 1 2 4 4 4 3 3 4 6 5 3 2 3 6
## [885] 2 4 5 1 5 2 3 5 5 3 5 2 6 1 5 6 4 2 1 5 2 1 2 3 5 2 3 4 6 4 6 5 4 2
## [919] 1 3 4 2 6 6 2 4 4 5 2 2 5 3 1 5 2 1 5 3 6 4 6 2 4 6 6 6 5 3 1 3 5 4
## [953] 4 5 5 4 6 2 1 3 3 1 5 4 2 4 2 3 2 6 5 4 4 3 6 4 4 1 2 2 6 5 2 6 1 3
## [987] 6 1 4 6 2 4 5 5 4 2 2 2 5 2

pander(table(y))

1 2 3 4 5 6

169 175 152 175 170 159

# Por ejemplo, ahora vamos a


# “simular” el lanzamiento de 1000 monedas (el número 1 es cara, 2 es cruz), y los result
ados los visualizamos en una tabla.

library(dygraphs)
mdeaths
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 2134 1863 1877 1877 1492 1249 1280 1131 1209 1492 1621 1846
## 1975 2103 2137 2153 1833 1403 1288 1186 1133 1053 1347 1545 2066
## 1976 2020 2750 2283 1479 1189 1160 1113 970 999 1208 1467 2059
## 1977 2240 1634 1722 1801 1246 1162 1087 1013 959 1179 1229 1655
## 1978 2019 2284 1942 1423 1340 1187 1098 1004 970 1140 1110 1812
## 1979 2263 1820 1846 1531 1215 1075 1056 975 940 1081 1294 1341

fdeaths

## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 901 689 827 677 522 406 441 393 387 582 578 666
## 1975 830 752 785 664 467 438 421 412 343 440 531 771
## 1976 767 1141 896 532 447 420 376 330 357 445 546 764
## 1977 862 660 663 643 502 392 411 348 387 385 411 638
## 1978 796 853 737 546 530 446 431 362 387 430 425 679
## 1979 821 785 727 612 478 429 405 379 393 411 487 574

lungDeaths<-cbind(mdeaths,fdeaths)
dygraph(lungDeaths)

## Registered S3 method overwritten by 'xts':


## method from
## as.zoo.xts zoo

mdeaths fdeaths
2800

2600

2400

2200

2000

1800

1600

1400

1200

1000

800

600
400
200
Jan 1974 Jan 1975 Jan 1976 Jan 1977 Jan 1978 Jan 1979

library(pander)

x=sample(1:2,1000, replace=TRUE)
pander(table(x))

1 2

495 505

barplot(table(x))

# Ahora hacemos lo mismo simulando el lanzamiento de un dado.


x=sample(1:6,1000, replace=TRUE)
pander(table(x))

1 2 3 4 5 6
1 2 3 4 5 6

175 164 179 158 163 161

x=sample(1:6,1000, replace=TRUE)
y=table(x)
pander(y)

1 2 3 4 5 6

187 151 163 151 183 165

1-pbinom(3,10,0.5)

## [1] 0.828125

You might also like