Professional Documents
Culture Documents
08/04/2022
## -- Conflicts ------------------------------------------
tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
Bank_info <- read.csv('train.csv')
head(Bank_info)
will shall remove of the first column, which isn’t relevant for class prediction.
Bank_info = Bank_info[-1]
##
## 0 1
## 179902 20098
##
## 0 1
## 89.951 10.049
ggplot(Bank_info,aes(target))+theme_bw()
+geom_bar(stat='count',fill='olivedrab')
In the Santander data table, look for missing values.
missing.values <- Bank_info %>%
gather(key = "key", value = "val") %>%
mutate(is.missing = is.na(val)) %>%
group_by(key, is.missing) %>%
summarise(num.missing = n()) %>%
filter(is.missing==T) %>%
select(-is.missing) %>%
arrange(desc(num.missing))
## `summarise()` has grouped output by 'key'. You can override using the
`.groups`
## argument.
missing.values %>%
ggplot() +
geom_bar(aes(x=key, y=num.missing), stat = 'identity') +
labs(x='variable', y="number of missing values", title='Number of missing
values') +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
There are no missing values in the Santander data.
divide the data between training and testing 80% of the total sample size
smp_size <- floor(0.8* nrow(Bank_info))
summary(Reg_model)
##
## Call:
## glm(formula = target ~ ., family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6668 -0.4006 -0.2331 -0.1243 3.8038
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 55.6396194 6.9241637 8.036 9.31e-16 ***
## var_0 0.0544089 0.0031346 17.358 < 2e-16 ***
## var_1 0.0414312 0.0023813 17.399 < 2e-16 ***
## var_2 0.0672844 0.0035981 18.700 < 2e-16 ***
## var_3 0.0174202 0.0047392 3.676 0.000237 ***
## var_4 0.0201422 0.0059424 3.390 0.000700 ***
## var_5 0.0144030 0.0012275 11.734 < 2e-16 ***
## var_6 0.2566053 0.0110511 23.220 < 2e-16 ***
## var_7 -0.0006790 0.0028274 -0.240 0.810221
## var_8 0.0187286 0.0029077 6.441 1.19e-10 ***
## var_9 -0.1103671 0.0077756 -14.194 < 2e-16 ***
## var_10 -0.0010182 0.0017614 -0.578 0.563230
## var_11 0.0125266 0.0016144 7.759 8.53e-15 ***
## var_12 -1.1363937 0.0500227 -22.718 < 2e-16 ***
## var_13 -0.0399844 0.0020643 -19.370 < 2e-16 ***
## var_14 -0.0049188 0.0043146 -1.140 0.254267
## var_15 0.1307794 0.0234965 5.566 2.61e-08 ***
## var_16 0.0086394 0.0037701 2.292 0.021931 *
## var_17 0.0002968 0.0014455 0.205 0.837302
## var_18 0.0176204 0.0012266 14.365 < 2e-16 ***
## var_19 0.0043403 0.0012067 3.597 0.000322 ***
## var_20 -0.0113755 0.0016468 -6.907 4.93e-12 ***
## var_21 -0.0237382 0.0011829 -20.068 < 2e-16 ***
## var_22 0.0710434 0.0033581 21.156 < 2e-16 ***
## var_23 -0.1760775 0.0183844 -9.578 < 2e-16 ***
## var_24 0.0269105 0.0025505 10.551 < 2e-16 ***
## var_25 0.1256277 0.0337436 3.723 0.000197 ***
## var_26 0.0334528 0.0016042 20.853 < 2e-16 ***
## var_27 -0.0020373 0.0063645 -0.320 0.748894
## var_28 -0.1096866 0.0123549 -8.878 < 2e-16 ***
## var_29 0.0090722 0.0036975 2.454 0.014143 *
## var_30 0.0008446 0.0012222 0.691 0.489541
## var_31 -0.0400091 0.0044972 -8.896 < 2e-16 ***
## var_32 0.0400870 0.0037296 10.748 < 2e-16 ***
## var_33 -0.0344267 0.0022472 -15.320 < 2e-16 ***
## var_34 -0.3373329 0.0178473 -18.901 < 2e-16 ***
## var_35 0.0236241 0.0018627 12.683 < 2e-16 ***
## var_36 -0.0413702 0.0031007 -13.342 < 2e-16 ***
## var_37 0.0120022 0.0042895 2.798 0.005142 **
## var_38 -0.0017004 0.0022682 -0.750 0.453454
## var_39 -0.0017704 0.0023832 -0.743 0.457544
## var_40 0.0204034 0.0011593 17.599 < 2e-16 ***
## var_41 -0.0008667 0.0016350 -0.530 0.596040
## var_42 -0.0410003 0.0139072 -2.948 0.003197 **
## var_43 -0.2854413 0.0312174 -9.144 < 2e-16 ***
## var_44 -0.0281428 0.0016023 -17.564 < 2e-16 ***
## var_45 -0.0031774 0.0004537 -7.003 2.51e-12 ***
## var_46 0.0051688 0.0033737 1.532 0.125508
## var_47 0.0033445 0.0009169 3.648 0.000265 ***
## var_48 0.0090753 0.0008511 10.663 < 2e-16 ***
## var_49 0.0124792 0.0012285 10.158 < 2e-16 ***
## var_50 -0.0646451 0.0139609 -4.630 3.65e-06 ***
## var_51 0.0089908 0.0011738 7.660 1.87e-14 ***
## var_52 0.0184593 0.0019385 9.522 < 2e-16 ***
## var_53 0.2809279 0.0125567 22.373 < 2e-16 ***
## var_54 -0.0069683 0.0011498 -6.061 1.36e-09 ***
## var_55 0.0106730 0.0016948 6.298 3.02e-10 ***
## var_56 -0.0310707 0.0027102 -11.464 < 2e-16 ***
## var_57 -0.0616999 0.0121641 -5.072 3.93e-07 ***
## var_58 -0.0191642 0.0022440 -8.540 < 2e-16 ***
## var_59 -0.0431573 0.0113096 -3.816 0.000136 ***
## var_60 0.0061646 0.0022848 2.698 0.006974 **
## var_61 0.0022670 0.0008332 2.721 0.006514 **
## var_62 0.0278926 0.0047545 5.867 4.45e-09 ***
## var_63 -0.0158502 0.0030982 -5.116 3.12e-07 ***
## var_64 -0.0335364 0.0064921 -5.166 2.39e-07 ***
## var_65 0.0097523 0.0025688 3.796 0.000147 ***
## var_66 0.0639324 0.0085923 7.441 1.00e-13 ***
## var_67 0.0203970 0.0013133 15.531 < 2e-16 ***
## var_68 -4.3399654 1.3446400 -3.228 0.001248 **
## var_69 0.0046745 0.0024336 1.921 0.054758 .
## var_70 0.0077135 0.0008112 9.509 < 2e-16 ***
## var_71 0.4107060 0.0362407 11.333 < 2e-16 ***
## var_72 -0.0119584 0.0024504 -4.880 1.06e-06 ***
## var_73 -0.0036995 0.0012980 -2.850 0.004369 **
## var_74 0.0043559 0.0006882 6.329 2.47e-10 ***
## var_75 -0.0192881 0.0015873 -12.152 < 2e-16 ***
## var_76 -0.0268274 0.0012058 -22.248 < 2e-16 ***
## var_77 -0.0148739 0.0025430 -5.849 4.94e-09 ***
## var_78 0.0791850 0.0048571 16.303 < 2e-16 ***
## var_79 0.0159159 0.0074012 2.150 0.031519 *
## var_80 -0.0246569 0.0012745 -19.347 < 2e-16 ***
## var_81 -0.1112098 0.0041056 -27.087 < 2e-16 ***
## var_82 0.0079618 0.0011418 6.973 3.10e-12 ***
## var_83 -0.0083210 0.0011621 -7.160 8.06e-13 ***
## var_84 0.0057926 0.0015572 3.720 0.000199 ***
## var_85 -0.0159435 0.0024810 -6.426 1.31e-10 ***
## var_86 -0.0171575 0.0012313 -13.934 < 2e-16 ***
## var_87 -0.0199997 0.0017107 -11.691 < 2e-16 ***
## var_88 -0.0254502 0.0038808 -6.558 5.45e-11 ***
## var_89 0.0361051 0.0026937 13.403 < 2e-16 ***
## var_90 0.0069268 0.0007368 9.402 < 2e-16 ***
## var_91 0.8071189 0.0631325 12.785 < 2e-16 ***
## var_92 -0.0341521 0.0023115 -14.775 < 2e-16 ***
## var_93 -0.1914629 0.0175659 -10.900 < 2e-16 ***
## var_94 0.0576689 0.0034736 16.602 < 2e-16 ***
## var_95 0.2022218 0.0154200 13.114 < 2e-16 ***
## var_96 0.0013377 0.0011342 1.179 0.238240
## var_97 0.0035790 0.0007637 4.686 2.78e-06 ***
## var_98 -0.0199367 0.0135098 -1.476 0.140018
## var_99 0.1019837 0.0051428 19.830 < 2e-16 ***
## var_100 0.0007374 0.0010575 0.697 0.485612
## var_101 -0.0069090 0.0019549 -3.534 0.000409 ***
## var_102 -0.0077584 0.0011166 -6.948 3.70e-12 ***
## var_103 -0.0361765 0.0521830 -0.693 0.488145
## var_104 -0.0461024 0.0049231 -9.364 < 2e-16 ***
## var_105 0.0973353 0.0112546 8.649 < 2e-16 ***
## var_106 0.0602575 0.0050992 11.817 < 2e-16 ***
## var_107 -0.0171556 0.0012767 -13.438 < 2e-16 ***
## var_108 -0.8249578 0.0559573 -14.743 < 2e-16 ***
## var_109 -0.0354546 0.0022118 -16.030 < 2e-16 ***
## var_110 0.0515278 0.0024959 20.645 < 2e-16 ***
## var_111 0.0850720 0.0088863 9.573 < 2e-16 ***
## var_112 0.0506396 0.0061150 8.281 < 2e-16 ***
## var_113 -0.0129788 0.0021677 -5.987 2.13e-09 ***
## var_114 -0.0862297 0.0097758 -8.821 < 2e-16 ***
## var_115 -0.0617007 0.0036691 -16.816 < 2e-16 ***
## var_116 -0.0488560 0.0058583 -8.340 < 2e-16 ***
## var_117 0.0007427 0.0007263 1.023 0.306481
## var_118 0.0162109 0.0011006 14.730 < 2e-16 ***
## var_119 0.0223076 0.0022940 9.724 < 2e-16 ***
## var_120 -0.0028523 0.0007979 -3.575 0.000351 ***
## var_121 -0.0725377 0.0056693 -12.795 < 2e-16 ***
## var_122 -0.0271909 0.0018665 -14.568 < 2e-16 ***
## var_123 -0.0203947 0.0015570 -13.099 < 2e-16 ***
## var_124 0.0062165 0.0035385 1.757 0.078949 .
## var_125 0.2743795 0.0302250 9.078 < 2e-16 ***
## var_126 0.0128150 0.0124638 1.028 0.303866
## var_127 -0.0416152 0.0030709 -13.552 < 2e-16 ***
## var_128 0.0284254 0.0029850 9.523 < 2e-16 ***
## var_129 -0.0039046 0.0023435 -1.666 0.095681 .
## var_130 0.1272180 0.0115507 11.014 < 2e-16 ***
## var_131 -0.2146884 0.0210969 -10.176 < 2e-16 ***
## var_132 -0.0577931 0.0066367 -8.708 < 2e-16 ***
## var_133 0.4547943 0.0254253 17.887 < 2e-16 ***
## var_134 0.0088710 0.0015624 5.678 1.36e-08 ***
## var_135 0.0107386 0.0012656 8.485 < 2e-16 ***
## var_136 -0.0014437 0.0009327 -1.548 0.121670
## var_137 0.0111844 0.0010884 10.276 < 2e-16 ***
## var_138 0.0121096 0.0021280 5.691 1.27e-08 ***
## var_139 -0.0304237 0.0012432 -24.471 < 2e-16 ***
## var_140 0.0123533 0.0019751 6.255 3.99e-10 ***
## var_141 -0.0150793 0.0014361 -10.500 < 2e-16 ***
## var_142 -0.0117629 0.0016959 -6.936 4.03e-12 ***
## var_143 -0.0142051 0.0032918 -4.315 1.59e-05 ***
## var_144 0.0709148 0.0104632 6.778 1.22e-11 ***
## var_145 0.0249316 0.0024738 10.078 < 2e-16 ***
## var_146 -0.0811948 0.0037765 -21.500 < 2e-16 ***
## var_147 0.0173344 0.0013002 13.332 < 2e-16 ***
## var_148 -0.8838599 0.0481230 -18.367 < 2e-16 ***
## var_149 -0.0149060 0.0009302 -16.025 < 2e-16 ***
## var_150 -0.0371611 0.0039238 -9.471 < 2e-16 ***
## var_151 0.0242775 0.0024228 10.021 < 2e-16 ***
## var_152 -0.0116044 0.0032153 -3.609 0.000307 ***
## var_153 -0.0053067 0.0048030 -1.105 0.269222
## var_154 -0.0287799 0.0019364 -14.863 < 2e-16 ***
## var_155 0.0214813 0.0016654 12.899 < 2e-16 ***
## var_156 -0.0687469 0.0101434 -6.777 1.22e-11 ***
## var_157 0.0190542 0.0017188 11.086 < 2e-16 ***
## var_158 -0.0015962 0.0012303 -1.297 0.194483
## var_159 0.0137250 0.0023444 5.854 4.79e-09 ***
## var_160 -0.0009777 0.0008900 -1.099 0.271975
## var_161 0.0845351 0.0443642 1.905 0.056718 .
## var_162 0.0710939 0.0067937 10.465 < 2e-16 ***
## var_163 0.0177823 0.0018249 9.744 < 2e-16 ***
## var_164 0.0253814 0.0017755 14.295 < 2e-16 ***
## var_165 -0.0340133 0.0019199 -17.716 < 2e-16 ***
## var_166 -0.4909968 0.0260157 -18.873 < 2e-16 ***
## var_167 0.0123663 0.0012371 9.997 < 2e-16 ***
## var_168 0.0147566 0.0030920 4.773 1.82e-06 ***
## var_169 -0.4238007 0.0262038 -16.173 < 2e-16 ***
## var_170 0.0369577 0.0021629 17.087 < 2e-16 ***
## var_171 0.0079766 0.0017961 4.441 8.95e-06 ***
## var_172 -0.0145277 0.0011109 -13.077 < 2e-16 ***
## var_173 0.0233963 0.0016251 14.396 < 2e-16 ***
## var_174 -0.0278788 0.0013427 -20.762 < 2e-16 ***
## var_175 0.0295419 0.0033190 8.901 < 2e-16 ***
## var_176 0.0040685 0.0012867 3.162 0.001567 **
## var_177 -0.0499421 0.0036915 -13.529 < 2e-16 ***
## var_178 -0.0058140 0.0011260 -5.163 2.43e-07 ***
## var_179 0.0579628 0.0033775 17.161 < 2e-16 ***
## var_180 0.0187958 0.0018314 10.263 < 2e-16 ***
## var_181 0.0351071 0.0070348 4.990 6.02e-07 ***
## var_182 -0.0038230 0.0010806 -3.538 0.000404 ***
## var_183 -0.0033740 0.0021606 -1.562 0.118384
## var_184 0.0182280 0.0010297 17.702 < 2e-16 ***
## var_185 0.0002055 0.0020540 0.100 0.920302
## var_186 -0.0319565 0.0030340 -10.533 < 2e-16 ***
## var_187 0.0046140 0.0008392 5.498 3.84e-08 ***
## var_188 -0.0289264 0.0024458 -11.827 < 2e-16 ***
## var_189 0.0198429 0.0099293 1.998 0.045671 *
## var_190 0.0404765 0.0021167 19.123 < 2e-16 ***
## var_191 0.0513358 0.0031557 16.268 < 2e-16 ***
## var_192 -0.0950868 0.0065820 -14.446 < 2e-16 ***
## var_193 -0.0128925 0.0024229 -5.321 1.03e-07 ***
## var_194 -0.0248415 0.0030861 -8.050 8.31e-16 ***
## var_195 0.0672201 0.0067075 10.022 < 2e-16 ***
## var_196 0.0136865 0.0017748 7.711 1.24e-14 ***
## var_197 -0.1294723 0.0104558 -12.383 < 2e-16 ***
## var_198 -0.0566818 0.0031650 -17.909 < 2e-16 ***
## var_199 0.0079010 0.0009334 8.464 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 104426 on 159999 degrees of freedom
## Residual deviance: 73988 on 159799 degrees of freedom
## AIC: 74390
##
## Number of Fisher Scoring iterations: 6
predict_data[1:5]
## 1 9 10 17 19
## 0.027921411 0.031712000 0.046992581 0.003316523 0.088037414
table(predict_data,test$target)
##
## predict_data 0 1
## 0 35505 2960
## 1 488 1047
mean(predict_data == test$target)
## [1] 0.9138
The model correctly predicted the test data 91% of the time.