You are on page 1of 5
Homework 5 Phnong Phan 9/26/2021 setwd("C:/Users/phuon/Desktop/GA TECH/Classes/hw5-Fall 21") data < read.table("uscrine.txt", stringsAsFactors = FALSE, header = TRUE) model <- Im(Crime - ., data = data) sunnary (model) ae 8 Call: ## In(formla = Crime ~ ., data = data) ae ## Residuals: ## Min 19 Median = 3Q Max, #8 -395.74 -98.09 6.69 112.99 512.67 ae #8 Coefficients: #8 Estimate Std. Error t value Pr(>|t|) # (Intercept) -5.984e+03 1,628e+03 ~3.675 0.000893 +++ ae 8.783et01 4.171e+01 2.106 0.043443 + #8 So -3.803e+00 1.4886+02 -0.026 0.979765 ae Bd 1,883e+02 6.209e+01 3.033 0.004861 ++ 8 Pot 1.92802 1.061e+02 1.817 0.078892 8 Po2 “1.094602 1.175602 -0.931 0.358830 ae LF 6.63802 1.470003 -0.452 0.654654 a8 MF 1.7éle01 2.035e+01 0.855 0.398995 # Pop -7.930e-01 1.29000 -0.568 0.573845 we 4.204000 6.481e+00 0.649 0.621279 ae UL -5.827e+03 4.210003 -1.384 0.176238 we U2 1.678202 8.234e+01 2.038 0.050161 #8 Wealth 9.617e-02 1.037e-01 0.928 0.360754 ## Ineq 7.067e+01 2.272e+01 3.111 0.003983 ** 48 Prob 4.855003 2.272e+03 -2.197 0.040627 + ## Tine 3.479000 7.165e00 -0.486 0.630708 ae ## Signif. codes: 0 #+#! 0.001 **#? 0.01 °#7 0.05 7.) 0.17) 1 ae ## Reeidual standard error: 209.1 on 31 degrees of freedom # Multiple R-equared: 0.8031, Adjusted R-squared: 0.7078 # Fostatistic: 8.429 on 15 and 31 DF, p-value: 3.539e-07 # Create traing data data_test <- data.frame(i = 14.0, So = 0, Bd = 10.0, Pot = 12.0 + Po2 = 18.8,LF = 0.640, N.F = 94.0, Pop = 160, MW = 1.1, U1 = 0.120, U2 = 3.6, Wealth = 3200, Ineq = 20.1, Prob = 0.04, Time = 39.0) data_predict < predict (model, data_test) # Range of the original crime data range (data$Crime) ## (1) 342 1993 # Predict the new data point based on the linear mode: data_predict ae 1 # 155.4349 # I noticed the estimated number is out of the data range # The data includes 47 observations and 15 factors. # However, it's not necessary that all factors will contribute to the model # Iwill remove the factors with p-values are much higher than 0.05 # including So, Po2, LF, M.F, Pop, MW, U1, Wealth, Time # Create the second model with factors M, Ed, Pol, U2, Ineq and Prob model_2<-Im(formula = Crime ~ M+ Ed + PoitU2+Ineq + Prob, data = data) summary (mode1_2) ae #8 call: ## In(formula = Crime ~ M+ Ed + Pol + U2 + Ineq + Prob, data = data) ae #8 Residuals: ## Min 19 Median == 3Q Max, #8 -470,68 -78.41 19.68 133.12 656.23, #8 #8 Coefficients: a8 Estimate Std. Error t value Pr(>It|) #8 (Intercept) -5040.50 899.84 5,602 1.72e-06 +4 ae 105.02 33.30 3.154 0.00305 «+ ae Bd 196.47 44.75 4.390 8.076-05 +4+ #8 Pot 118.02 13.75 8.363 2560-10 «++ ae U2 89.37 40.91 2.185 0.03483 + 6 Tuey 67.65 19.94 4.855 1886-05 +++ #8 Prob 3801.84 1528.10 -2.488 0.01711 * ae #8 Signif. codes: 0 ’¥##? 0.001 ’## 0.01 °#7 0.05 7.7 0.177 1 #8 #8 Residual standard error: 200.7 on 40 degrees of freedom ## Multiple R-squared: 0.7659, Adjusted R-squared: 0.7307 ## Fostatistic: 21.81 on 6 and 40 DF, p-value: 3.418e-11 predict (model_2,data_test) ae ae 1 1304.245 # New State Crime Prediction #1 do a § fold cross validation with Library ("DAG") ae ¢ < ev.Im(data,model_2, ae ae ae ae ae ae ae ae a8 #8 ae ae ## a ae a ae ae a Loading required package: lattice ) Analysis of Variance Table Response: Crine DE Sum Sq Mean Sq F value Pr(>F) x 1 85084 55084 1.97 0.24914 Ea 1 725967 725967 18.02 0.00013 +++ Pot 4 3173852 9173852 78.80 5.30-11 +++ we 1 217386 217386 5.40 0.02534 + neq 1 848273 848273 21.06 4.30-05 +++ Prob 1 249808 249308 6.19 0.01711 + Residuals 40 1611057 40276 Signif. codes: 0 *¥##? 0,001 ’##? 0.01 #7 0.08 7.7 0.17 74 Warning in cv.1m(data, model_2, m= 5): As there is >i explanatory variable, cross-validation predicted values for a fold are not a linear function of corresponding overall predicted values. Lines that are shown for the different folds are approximate Crime ae ae ae ae ae ae ae a8 #8 a8 a8 #8 ae ## ae ae a a ae ## ae ## ae ae a8 ae Small symbols show cross-validation predicted values 2 8 8 = rows V+ SP rode # x Fold 3 © Folds -e- Fold s x S 34 8 e 34 8 e 84 8 500 1000 1500 Predicted (fit to all data) fold 1 Observations in test set: 9 1 3 17 18 19 22 38 38 40 Predicted 810.83 386 527.4 800 1221 728 1102 544.4 1140.8, evpred 785.36 345 492.2 701 1240 702 1127 644.7 1168.2 crine 791.00 678 539.0 929 750 439 1272 568.0 1151.0 CV residual 5.64 283 46.8 228 ~490 -263 145 21.3 ~17.2 Sum of squares = 439507 Mean square = 48834 n= 9 fold 2 Observations in test set: 10 4 6 12 2 8 32 a4 41 4448 Predicted 1897.2 780.3 673 579.1 1259.0 774 997.5 796 1178 748 cvpred 1882.7 781.8 684 621.4 1238.3 788 1013.9 78 1159 808 crime 1969.0 682.0 849 523.0 1216.0 754 923.0 880 1030 508 CV residual 86.3 -99.8 165 -98.4 -22.3 -34 -90.9 102 ~129 -300 Sum of squares = 181038 Mean square = 18104 n= 10 fold 3 Observations in test set: 10 5 8 9 il 15 23 37 39 43 47 Predicted 1269.8 1854 719 1118 828.3 938 992 787 1017 976 cvpred 1266.8 1243 724 946 626.3 754 1077 TLT 1080 1038 #8 Crime 1234.0 1555 856 1674 798.0 1216 831 826 823 849 #8 CV residual ~32.8 312 192 728 -28.3 462 -246 109 -257 -189 ae ## Sum of squares = 1033612 Mean square = 103361 n= 10 #8 ae fold 4 #8 Observations in test set: 9 a 7 13 14 20 «2 «27-8088 # Predicted 733 739 713.6 1203.0 919.4 312.2 668.0 808 622 a8 cupred 760-770 730.1 1247-9 953.7 297.2 638.9 851 691 4# Crime 963 S11 664.0 1225.0 968.0 342.0 696.0 653 455 ## CV residual 203 -259 -66.1 22.9 14.3 44.8 87.1 -198 -236 ae ## Sun of equares = 213998 Mean equare = 29711 n= 9 ae #8 fold 5 ## Observations in test set: 9 ae 2 10 16 2 2% 2 31 33 42 ## Predicted 1388 787.3 1004 783.3 1789 1495 440.4 874 369, 8 cvpred 1856 723.7 1047 819.7 1795 1664 456.6 858 261 #8 Crine 1635 705.0 946 742.0 1993 1043 373.0 1072 542 #8 CV residual 279 -18.7 -101 77.7 198 621 83.6 214 281, ae #8 Sum of squares = 650990 Mean square = 72332 n= 9 ae ## Overall (Sum over all 9 folds) te ons 8 53586 # Multiple R-squared: 0.766, Adjusted R-squared: 0.731 # With these R-squared numbers, the model quality may seem good

You might also like