Professional Documents
Culture Documents
Análisis Exploratorio de Datos Con R
Análisis Exploratorio de Datos Con R
22 de abril de 2020
library(tidyverse)
library(dplyr)
library(ggplot2)
library(funModeling)
library(corrr)
library(Hmisc)
library(randomForest)
1
age resting_blood_pressure serum_cholestoral
200
150
100
50
0
30 40 50 60 70 90 120 150 180 210
100 200 300 400 500 600
100
50
0
100 150 200 0.00 0.25 0.50 0.75 1.00 0 2 4 6
Análisis cuantitativo
summary(heart_disease)
2
## Max. :1.0000 Max. :6.20 Max. :3.000 Max. :3.0000
## NA's :4
## heart_disease_severity exter_angina has_heart_disease
## Min. :0.0000 0:204 no :164
## 1st Qu.:0.0000 1: 99 yes:139
## Median :0.0000
## Mean :0.9373
## 3rd Qu.:2.0000
## Max. :4.0000
##
describe(heart_disease) #Análisis numérico y categórico (cuantitativo)
## heart_disease
##
## 16 Variables 303 Observations
## --------------------------------------------------------------------------------
## age
## n missing distinct Info Mean Gmd .05 .10
## 303 0 41 0.999 54.44 10.3 40 42
## .25 .50 .75 .90 .95
## 48 56 61 66 68
##
## lowest : 29 34 35 37 38, highest: 70 71 74 76 77
## --------------------------------------------------------------------------------
## gender
## n missing distinct
## 303 0 2
##
## Value female male
## Frequency 97 206
## Proportion 0.32 0.68
## --------------------------------------------------------------------------------
## chest_pain
## n missing distinct
## 303 0 4
##
## Value 1 2 3 4
## Frequency 23 50 86 144
## Proportion 0.076 0.165 0.284 0.475
## --------------------------------------------------------------------------------
## resting_blood_pressure
## n missing distinct Info Mean Gmd .05 .10
## 303 0 50 0.995 131.7 19.41 108 110
## .25 .50 .75 .90 .95
## 120 130 140 152 160
##
## lowest : 94 100 101 102 104, highest: 174 178 180 192 200
## --------------------------------------------------------------------------------
## serum_cholestoral
## n missing distinct Info Mean Gmd .05 .10
## 303 0 152 1 246.7 55.91 175.1 188.8
## .25 .50 .75 .90 .95
## 211.0 241.0 275.0 308.8 326.9
##
3
## lowest : 126 131 141 149 157, highest: 394 407 409 417 564
## --------------------------------------------------------------------------------
## fasting_blood_sugar
## n missing distinct
## 303 0 2
##
## Value 0 1
## Frequency 258 45
## Proportion 0.851 0.149
## --------------------------------------------------------------------------------
## resting_electro
## n missing distinct
## 303 0 3
##
## Value 0 1 2
## Frequency 151 4 148
## Proportion 0.498 0.013 0.488
## --------------------------------------------------------------------------------
## max_heart_rate
## n missing distinct Info Mean Gmd .05 .10
## 303 0 91 1 149.6 25.73 108.1 116.0
## .25 .50 .75 .90 .95
## 133.5 153.0 166.0 176.6 181.9
##
## lowest : 71 88 90 95 96, highest: 190 192 194 195 202
## --------------------------------------------------------------------------------
## exer_angina
## n missing distinct Info Sum Mean Gmd
## 303 0 2 0.66 99 0.3267 0.4414
##
## --------------------------------------------------------------------------------
## oldpeak
## n missing distinct Info Mean Gmd .05 .10
## 303 0 40 0.964 1.04 1.225 0.0 0.0
## .25 .50 .75 .90 .95
## 0.0 0.8 1.6 2.8 3.4
##
## lowest : 0.0 0.1 0.2 0.3 0.4, highest: 4.0 4.2 4.4 5.6 6.2
## --------------------------------------------------------------------------------
## slope
## n missing distinct Info Mean Gmd
## 303 0 3 0.798 1.601 0.6291
##
## Value 1 2 3
## Frequency 142 140 21
## Proportion 0.469 0.462 0.069
## --------------------------------------------------------------------------------
## num_vessels_flour
## n missing distinct Info Mean Gmd
## 299 4 4 0.783 0.6722 0.9249
##
## Value 0 1 2 3
## Frequency 176 65 38 20
## Proportion 0.589 0.217 0.127 0.067
4
## --------------------------------------------------------------------------------
## thal
## n missing distinct
## 301 2 3
##
## Value 3 6 7
## Frequency 166 18 117
## Proportion 0.551 0.060 0.389
## --------------------------------------------------------------------------------
## heart_disease_severity
## n missing distinct Info Mean Gmd
## 303 0 5 0.832 0.9373 1.25
##
## lowest : 0 1 2 3 4, highest: 0 1 2 3 4
##
## Value 0 1 2 3 4
## Frequency 164 55 36 35 13
## Proportion 0.541 0.182 0.119 0.116 0.043
## --------------------------------------------------------------------------------
## exter_angina
## n missing distinct
## 303 0 2
##
## Value 0 1
## Frequency 204 99
## Proportion 0.673 0.327
## --------------------------------------------------------------------------------
## has_heart_disease
## n missing distinct
## 303 0 2
##
## Value no yes
## Frequency 164 139
## Proportion 0.541 0.459
## --------------------------------------------------------------------------------
profiling_num(heart_disease) # Análisis para variables numéricas (cuantitativas)
5
## 7 1.0 2.0 2.0 3.0 3.00 0.5057957 2.363050 1.0 [1, 3]
## 8 0.0 0.0 1.0 3.0 3.00 1.1833771 3.234941 1.0 [0, 3]
## 9 0.0 0.0 2.0 3.0 4.00 1.0532483 2.843788 2.0 [0, 4]
## range_80
## 1 [42, 66]
## 2 [110, 152]
## 3 [188.8, 308.8]
## 4 [116, 176.6]
## 5 [0, 1]
## 6 [0, 2.8]
## 7 [1, 2]
## 8 [0, 2]
## 9 [0, 3]
female 97 (32.01%)
Frequency / (Percentage %)
6
4 144 (47.52%)
3 86 (28.38%)
chest_pain
2 50 (16.5%)
1 23 (7.59%)
Frequency / (Percentage %)
7
0 258 (85.15%)
fasting_blood_sugar
1 45 (14.85%)
Frequency / (Percentage %)
8
0 151 (49.83%)
resting_electro
2 148 (48.84%)
1 4 (1.32%)
Frequency / (Percentage %)
9
NA 2 (0.66%)
3 166 (54.79%)
thal
7 117 (38.61%)
6 18 (5.94%)
Frequency / (Percentage %)
10
0 204 (67.33%)
exter_angina
1 99 (32.67%)
Frequency / (Percentage %)
11
no 164 (54.13%)
has_heart_disease
Frequency / (Percentage %)
12
4 144 (47.52%)
3 86 (28.38%)
var
2 50 (16.5%)
1 23 (7.59%)
Frequency / (Percentage %)
13