Estadística Básica: Contenido

Estadística Básica
CONTENIDO
Probabilidad........................................................................................................................................... 13
library(readxl)
Titanic<-read_excel("Pasajeros-Titanic.xlsx")
Titanic
## # A tibble: 1,309 x 5
## Pasajero sobrevivio sexo edad clase
## <chr> <chr> <chr> <dbl> <chr>
## 1 Allison, Miss. Helen Loraine no female 2 1st
## 2 Allison, Mrs. Hudson J C (Bessi no female 25 1st
## 3 Evans, Miss. Edith Corse no female 36 1st
## 4 Isham, Miss. Ann Elizabeth no female 50 1st
## 5 Straus, Mrs. Isidor (Rosalie Id no female 63 1st
## 6 Carter, Mrs. Ernest Courtenay ( no female 44 2nd
## 7 Chapman, Mrs. John Henry (Sara no female 29 2nd
## 8 Corbett, Mrs. Walter H (Irene C no female 30 2nd
## 9 Corey, Mrs. Percy C (Mary Phyll no female NA 2nd
## 10 Funk, Miss. Annie Clemmer no female 38 2nd
## # ... with 1,299 more rows
options(knitr.table.format="latex")
library(pander)
t1<-table(Titanic$sobrevivio,Titanic$clase)
t1
##
## 1st 2nd 3rd
## no 123 158 528
## yes 200 119 181
t2<-addmargins(t1)
t2
##
## 1st 2nd 3rd Sum
## no 123 158 528 809
## yes 200 119 181 500
## Sum 323 277 709 1309
pander(t2)
1st 2nd 3rd Sum

no 123 158 528 809
yes 200 119 181 500
Sum 323 277 709 1309
y=Titanic$clase # Recordemos que, en R, para escoger una variable de un
fichero, se usa Nombre_fichero$Variable
x=table(y) #El comando table crea una tabla de frecuencias
barplot(x)
library(lattice)
library(mosaic)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':

##
## intersect, setdiff, setequal, union
## Loading required package: ggformula
## Loading required package: ggplot2
## Loading required package: ggstance
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':

##
## geom_errorbarh, GeomErrorbarh
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## Registered S3 method overwritten by 'mosaic':

## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in
order to add
## additional features. The original behavior of these functions should
not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading
mosaic.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':

##
## mean
## The following object is masked from 'package:ggplot2':

##
## stat
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following objects are masked from 'package:stats':

##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':

##
## max, mean, min, prod, range, sample, sum
# Diagrama de barras
bargraph(~ clase, data = Titanic, main= "Gráfico de Barras")
X=c(2,3,4,4.5,4.5,5.6,5.7,5.8,6,6.1,6.5,7,7,7,7.5,7.5,7.5,8.3,9,10.2,10.4
,11,11.1,11.5,12,13)
div<-table(cut(X,breaks = 7))
div
##
## (1.99,3.57] (3.57,5.14] (5.14,6.71] (6.71,8.29] (8.29,9.86]
(9.86,11.4]
## 2 3 6 6 2
4
## (11.4,13]
## 3
library(pander)
library(xtable)
mat<-data.frame(div)
names(mat)<-c("Intervalos","Frecuencias($n_i$)")
x<-xtable(mat)
pander(x)
Intervalos Frecuencias(ni )
(1.99,3.57] 2
(3.57,5.14] 3
(5.14,6.71] 6
(6.71,8.29] 6
(8.29,9.86] 2
(9.86,11.4] 4
(11.4,13] 3
# División en intervalos
table(cut(Titanic$edad, breaks = 6))
##
## (0.0869,13.5] (13.5,26.8] (26.8,40.1] (40.1,53.4] (53.4,66.7]
## 99 375 345 150 68
## (66.7,80.1]
## 9
library(mosaic)
histogram(~edad, data = Titanic)
x=Titanic$edad
op<-par(mfrow=c(2,2)) #pone gráficos en formato 2 x 2
hist(x,breaks="Sturges") # número de intervalos según la fórmula de
Sturges
hist(x, breaks=4)
hist(x, breaks = 10)
hist(x,breaks = 35)
library(mosaic)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':

##
## combine
p1<-freqpolygon(~edad, Titanic)
p2<-histogram(~edad, Titanic)
p3<-ladd(panel.freqpolygon(Titanic$edad))
grid.arrange(p1,p2,p3,ncol=2)
par(op)
X=na.omit(Titanic$edad) #na.omit es para no considerar valores en blanco

hist(X, prob=TRUE)
lines(density(X), lty="dotted", lwd=2, col="red" ) ## Me da error
dt<-data.frame(Titanic)
ggplot(dt,aes(x=clase))+geom_bar( aes(fill=sobrevivio), position="dodge")
library(HSAUR)
## Loading required package: tools
data("water", package = "HSAUR")

water
## location town mortality hardness

## 1 South Bath 1247 105
## 2 North Birkenhead 1668 17
## 3 South Birmingham 1466 5
## 4 North Blackburn 1800 14
## 5 North Blackpool 1609 18
## 6 North Bolton 1558 10
## 7 North Bootle 1807 15
## 8 South Bournemouth 1299 78
## 9 North Bradford 1637 10
## 10 South Brighton 1359 84
## 11 South Bristol 1392 73
## 12 North Burnley 1755 12
## 13 South Cardiff 1519 21
## 14 South Coventry 1307 78
## 15 South Croydon 1254 96
## 16 North Darlington 1491 20
## 17 North Derby 1555 39
## 18 North Doncaster 1428 39
## 19 South East Ham 1318 122
## 20 South Exeter 1260 21
## 21 North Gateshead 1723 44
## 22 North Grimsby 1379 94
## 23 North Halifax 1742 8
## 24 North Huddersfield 1574 9
## 25 North Hull 1569 91
## 26 South Ipswich 1096 138
## 27 North Leeds 1591 16
## 28 South Leicester 1402 37
## 29 North Liverpool 1772 15
## 30 North Manchester 1828 8
## 31 North Middlesbrough 1704 26
## 32 North Newcastle 1702 44
## 33 South Newport 1581 14
## 34 South Northampton 1309 59
## 35 South Norwich 1259 133
## 36 North Nottingham 1427 27
## 37 North Oldham 1724 6
## 38 South Oxford 1175 107
## 39 South Plymouth 1486 5
## 40 South Portsmouth 1456 90
## 41 North Preston 1696 6
## 42 South Reading 1236 101
## 43 North Rochdale 1711 13
## 44 North Rotherham 1444 14
## 45 North St Helens 1591 49
## 46 North Salford 1987 8
## 47 North Sheffield 1495 14
## 48 South Southampton 1369 68
## 49 South Southend 1257 50
## 50 North Southport 1587 75
## 51 North South Shields 1713 71
## 52 North Stockport 1557 13
## 53 North Stoke 1640 57
## 54 North Sunderland 1709 71
## 55 South Swansea 1625 13
## 56 North Wallasey 1625 20
## 57 South Walsall 1527 60
## 58 South West Bromwich 1627 53
## 59 South West Ham 1486 122
## 60 South Wolverhampton 1485 81
## 61 North York 1378 71
mort<-data.frame(water)
ggplot()+geom_point(data=mort, aes(x=hardness, y=mortality,
colour=location))

head(mort,10)
## location town mortality hardness
## 1 South Bath 1247 105
## 2 North Birkenhead 1668 17
## 3 South Birmingham 1466 5
## 4 North Blackburn 1800 14
## 5 North Blackpool 1609 18
## 6 North Bolton 1558 10
## 7 North Bootle 1807 15
## 8 South Bournemouth 1299 78
## 9 North Bradford 1637 10
## 10 South Brighton 1359 84
cov(mort$hardness,mort$mortality)
## [1] -4681.544
cor(mort$hardness,mort$mortality)
## [1] -0.6548486

cor(mort$hardness, mort$mortality)^2
## [1] 0.4288267
X<-seq(-1,1,length=100)
Y=sqrt(1-X^2)
xy=data.frame(X,Y)
ggplot(data=xy,aes(x=X, y=Y)) + geom_point(color="darkorange", size=3)
cor(X,Y)
## [1] -3.162392e-16
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:mosaic':

##
## do
## The following object is masked from 'package:ggplot2':

##
## last_plot
## The following object is masked from 'package:stats':

##
## filter
## The following object is masked from 'package:graphics':

##
## layout
#### no me falta la base record_100_m <-read_excel("record-100-m.xlsx")

###dt=data.frame(x=record_100_m[,1], y=record_100_m[,2])
###names(dt)<-c("año","record")
##p<- ggplot(data=dt, aes(x=año,y=record),color="red",size=3)+
geom_point(color="red",size=3)+ geom_smooth(method = "lm",se=FALSE)
##ggplotly(p)
Probabilidad
library(pander)
##x=sample(1:2,1000, replace = T) # Error in sample.int(length(x), size,
replace, prob) : invalid 'replace' argument
##pander(table(x))
##x=sample(1:6,1000, replace=T)
##pander(table(x)
library(dygraphs)
mdeaths
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 2134 1863 1877 1877 1492 1249 1280 1131 1209 1492 1621 1846
## 1975 2103 2137 2153 1833 1403 1288 1186 1133 1053 1347 1545 2066
## 1976 2020 2750 2283 1479 1189 1160 1113 970 999 1208 1467 2059
## 1977 2240 1634 1722 1801 1246 1162 1087 1013 959 1179 1229 1655
## 1978 2019 2284 1942 1423 1340 1187 1098 1004 970 1140 1110 1812
## 1979 2263 1820 1846 1531 1215 1075 1056 975 940 1081 1294 1341
fdeaths
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1974 901 689 827 677 522 406 441 393 387 582 578 666
## 1975 830 752 785 664 467 438 421 412 343 440 531 771
## 1976 767 1141 896 532 447 420 376 330 357 445 546 764
## 1977 862 660 663 643 502 392 411 348 387 385 411 638
## 1978 796 853 737 546 530 446 431 362 387 430 425 679
## 1979 821 785 727 612 478 429 405 379 393 411 487 574
lungDeaths<-cbind(mdeaths,fdeaths)
#dygraphs(lungDeaths)#### me da error in dygraphs(lungDeaths) : could
not find function "dygraphs"
# Por ejemplo, ahora vamos a

# “simular” el lanzamiento de 1000 monedas (el número 1 es cara, 2 es
cruz), y los resultados los visualizamos en una tabla.
library(pander)
#x=sample(1:2,1000, replace=T) ## Error in sample.int(length(x), size,

replace, prob) : invalid 'replace' argument
#pander(table(x))
#barplot(table(x))
# Ahora hacemos lo mismo simulando el lanzamiento de un dado.

x=sample(1:6,1000, replace=T) pander(table(x))
#x=sample(1:6,1000, replace=T)
#y=table(x)
#pander(y)
1-pbinom(3,10,0.5)
## [1] 0.828125

Estadística Básica: Contenido

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Estadística Básica: Contenido

Uploaded by

Copyright:

Available Formats

Estadística Básica

1st 2nd 3rd Sum

## Loading required package: dplyr

## The following objects are masked from 'package:base':

## Loading required package: ggformula

## Loading required package: ggplot2

## Loading required package: ggstance

## The following objects are masked from 'package:ggplot2':

## Loading required package: mosaicData

## Loading required package: Matrix

## Registered S3 method overwritten by 'mosaic':

## The following object is masked from 'package:Matrix':

## The following object is masked from 'package:ggplot2':

## The following objects are masked from 'package:stats':

## The following objects are masked from 'package:base':

table(cut(Titanic$edad, breaks = 6))

## The following object is masked from 'package:dplyr':

X=na.omit(Titanic$edad) #na.omit es para no considerar valores en blanco

data("water", package = "HSAUR")

## location town mortality hardness

data("water", package = "HSAUR")

data("water", package = "HSAUR")

## The following object is masked from 'package:mosaic':

## The following object is masked from 'package:ggplot2':

## The following object is masked from 'package:stats':

## The following object is masked from 'package:graphics':

#### no me falta la base record_100_m <-read_excel("record-100-m.xlsx")

# Por ejemplo, ahora vamos a

#x=sample(1:2,1000, replace=T) ## Error in sample.int(length(x), size,

# Ahora hacemos lo mismo simulando el lanzamiento de un dado.

You might also like