Professional Documents
Culture Documents
Metrics
Metrics
> #2017
> trad17 <- read.csv("trad17.csv",stringsAsFactors = F)
> # 2018
> trad18 <- read.csv("trad18.csv", stringsAsFactors = F)
> #VISUALIZATIONS.
> #install.packages("gridExtra")
> library(gridExtra)
> library(grid)
> pdf("Report.pdf")
> View(Adv18)
> # Fix an error in Adv18 dataset. Observation #534 has an error in PIE (-400
should be -4)
> Adv18$PIE[534] <- -4
> names(nba2017_1)
[1] "Year" "Rk.x" "Player" "Team" "Age"
[6] "GP" "W" "L" "Min" "Pts"
> names(nba2017_2)
[1] "Year" "Rk.x" "Player" "Team"
[53] "DEF_WS"
> names(nba2017_3)
[1] "Year" "Rk.x" "Player" "Team"
[65] "avg_speed_def"
> names(nba2017_4)
[1] "Year" "Rk.x" "Player" "Team"
> names(nba2018_1)
[1] "Year" "Rk.x" "Player" "Team" "Age"
> names(nba2018_2)
[1] "Year" "Rk.x" "Player" "Team"
[53] "DEF_WS"
> names(nba2018_3)
[1] "Year" "Rk.x" "Player" "Team"
[65] "avg_speed_def"
> names(nba2018_4)
[1] "Year" "Rk.x" "Player" "Team"
> names(nba2018_4)
[1] "Year" "Player" "Team" "Age"
> names(nba2017)
[1] "Year" "Player" "Team.x" "Age"
> nba2017$salary2017_18[277]<-2093040
> nba2017$salary2017_18[309]<-2422560
> # Function to remove rows if specific column that still have NAs in the salary
column
> completeFun <- function(data, desiredCols) {
+ comp .... [TRUNCATED]
> # Remove NAs from players with missing salary. The majority of remaining empty
salary17_18 is due to the player not getting a deal.
> nba2017 <- co .... [TRUNCATED]
> str(nba2017)
'data.frame': 442 obs. of 73 variables:
$ Year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ...
$ Player : chr "Russell Westbrook" "James Harden" "Isaiah Thomas"
"Anthony Davis" ...
$ Team.x : chr "OKC" "HOU" "BOS" "NOP" ...
$ Age : int 28 27 28 24 27 26 26 32 26 29 ...
$ GP.x : int 81 81 76 75 74 75 72 74 74 79 ...
$ W.x : int 46 54 51 31 47 38 30 51 54 65 ...
$ L.x : int 35 27 25 44 27 37 42 23 20 14 ...
$ Min.x : num 34.6 36.4 33.8 36.1 35.4 35.9 34.2 37.8 33.4 33.4 ...
$ Pts : num 31.6 29.1 28.9 28 27.3 27 27 26.4 25.5 25.3 ...
$ FGM : num 10.2 8.3 9 10.3 9.7 8.8 9 9.9 8.6 8.5 ...
$ FGA : num 24 18.9 19.4 20.3 20.9 19.8 19.9 18.2 17.7 18.3 ...
$ FG_per : num 42.5 44 46.3 50.5 46.7 44.4 45.2 54.8 48.5 46.8 ...
$ three_PM : num 2.5 3.2 3.2 0.5 0.4 2.9 1.8 1.7 2 4.1 ...
$ three_PA : num 7.2 9.3 8.5 1.8 1.7 7.7 5 4.6 5.2 10 ...
$ three_Per : num 34.3 34.7 37.9 29.9 26.6 37 36.1 36.3 38 41.1 ...
$ FTM : num 8.8 9.2 7.8 6.9 7.4 6.5 7.2 4.8 6.3 4.1 ...
$ FTA : num 10.4 10.9 8.5 8.6 8.7 7.3 9.3 7.2 7.2 4.6 ...
$ FT_per : num 84.5 84.7 90.9 80.2 84.2 89.5 77.2 67.4 88 89.8 ...
$ OREB : num 1.7 1.2 0.6 2.3 0.9 0.6 2.1 1.3 1.1 0.8 ...
$ DREB : num 9 7 2.1 9.5 4.3 4.3 8.9 7.3 4.7 3.7 ...
$ REB : num 10.7 8.1 2.7 11.8 5.2 4.9 11 8.6 5.8 4.5 ...
$ AST : num 10.4 11.2 5.9 2.1 3.9 5.9 4.6 8.7 3.5 6.6 ...
$ TOV : num 5.4 5.7 2.8 2.4 2.4 2.6 3.7 4.1 2.1 3 ...
$ STL : num 1.6 1.5 0.9 1.3 1.1 0.9 1.4 1.2 1.8 1.8 ...
$ BLK : num 0.4 0.5 0.2 2.2 0.2 0.3 1.3 0.6 0.7 0.2 ...
$ PF : num 2.3 2.7 2.2 2.2 1.8 2 3.9 1.8 1.6 2.3 ...
$ FP : num 60.6 55.8 41.5 53.3 40.7 42.6 51.4 51.3 43.3 43.6 ...
$ DD2 : int 62 64 5 49 5 11 46 42 9 9 ...
$ TD3 : int 42 22 0 0 0 0 2 13 0 0 ...
$ Plus_minus : num 3.1 5.2 3.6 0.7 2 1.1 -0.3 6.5 5.9 12.8 ...
$ Pts_Off_TO : num 4.7 4.4 3.8 3.5 3.5 3.5 3.6 4.1 4.3 4.4 ...
$ second_pts : num 3.6 2.2 1.9 3.8 1.9 1.6 3.1 2.5 2.2 2.2 ...
$ FBPS : num 6.7 4 4.5 2.2 2.5 2.5 1.3 5.5 3.5 6 ...
$ PITP : num 10.1 8.6 9.6 12.9 10.3 8.6 11.8 13.9 7.2 6.4 ...
$ OPP_PTs_off_TO : num 12.2 13.3 10.7 12 11.1 11.3 11.5 12.4 9.5 11.4 ...
$ OPP_sec_pts : num 9 10.1 10.6 8.8 10.1 9.6 8 10.1 8.6 9.5 ...
$ OPP_FBPS : num 8.1 11.1 8.9 9.6 8.9 9.9 10.8 10 8.8 10.1 ...
$ OPP_PITP : num 34.1 37.6 31.5 31.9 31.7 32.6 27.9 35.2 28.4 31.1 ...
$ BLKA : num 1.1 1.1 1.5 0.7 1.2 1.3 1.5 0.8 0.4 0.7 ...
$ PFD : num 6.7 6.2 6.3 6.9 5.5 5.6 8.2 5.9 5.4 3.5 ...
$ Def.Rtg : num 105 107 109 102 107 ...
$ DREB_per : num 27.9 21.2 7 26.9 13.4 13.4 30.5 20.9 16 11.3 ...
$ Per_DREB : num 35.2 27.6 9.5 34.9 17.8 17.6 39.6 27.3 20.6 15 ...
$ STL_per : num 27 24.3 18.1 22.2 18.9 17.4 24.5 23.1 32.6 24.1 ...
$ Per_BLK : num 10.3 15.4 5.8 49.9 5.2 7.2 41.9 19.3 18.2 4.5 ...
$ OPP_PTS_OFF_TOV : num 12.2 13.3 10.7 12 11.1 11.3 11.5 12.4 9.5 11.4 ...
$ OPP_PTS_2nd_Chance: num 9 10.1 10.6 8.8 10.1 9.6 8 10.1 8.6 9.5 ...
$ OPP_PTS_FB : num 8.1 11.1 8.9 9.6 8.9 9.9 10.8 10 8.8 10.1 ...
$ OPP_PTS_Paint : num 34.1 37.6 31.5 31.9 31.7 32.6 27.9 35.2 28.4 31.1 ...
$ DEF_WS : num 0.043 0.033 0.025 0.054 0.034 0.026 0.023 0.035 0.043
0.058 ...
$ Dist_FT : num 12533 11875 12281 12574 12655 ...
$ Dist_Miles : num 2.37 2.25 2.33 2.38 2.4 2.49 2.05 2.39 2.46 2.48 ...
$ Dist_Miles_off : num 1.28 1.18 1.28 1.24 1.34 1.41 1.07 1.26 1.38 1.32 ...
$ Dist_Miles_def : num 1.09 1.07 1.05 1.15 1.06 1.08 0.97 1.13 1.08 1.16 ...
$ avg_speed : num 4.13 3.72 4.14 3.96 4.06 4.17 3.6 3.8 4.43 4.46 ...
$ avg_speed_off : num 4.72 4.07 4.6 4.33 4.42 4.66 3.67 3.99 4.86 5.13 ...
$ avg_speed_def : num 3.61 3.4 3.68 3.64 3.69 3.67 3.52 3.62 3.97 3.88 ...
$ OffRTG : num 108 114 114 104 110 ...
$ DefRTG : num 105 107 109 102 107 ...
$ NetRTG : num 3.3 6.3 5 1.7 3.3 1.2 -2.2 7.7 8.6 17.2 ...
$ AST_per : num 54.3 50.5 30.5 11 20.4 27.7 26.2 38.8 18.2 28.7 ...
$ AST_TO : num 1.92 1.95 2.13 0.87 1.61 2.23 1.23 2.13 1.69 2.19 ...
$ AST_RATIO : num 23.4 27.6 18.5 7.3 12.6 18.6 14.3 25.6 13.3 22.2 ...
$ OREBper : num 5.3 3.5 1.9 6.7 3 1.8 7.2 4 3.8 2.7 ...
$ DREBper : num 27.9 21.2 7 26.9 13.4 13.4 30.5 20.9 16 11.3 ...
$ REBper : num 16.7 12.3 4.5 17 8.3 7.5 18.8 12.7 10 7.3 ...
$ TO_Ratio : num 12.2 14.1 8.7 8.4 7.8 8.3 11.6 12 7.9 10.1 ...
$ EFGper : num 47.6 52.5 54.6 51.8 47.7 51.6 49.8 59.4 54.1 58 ...
$ TS_per : num 55.4 61.3 62.5 58 55.2 58.6 56.2 61.9 61 62.4 ...
$ USG_per : num 40.8 34.1 33.7 32.6 34.2 30.9 36.4 29.7 31.2 29.2 ...
$ PACE : num 102.3 103 99.8 100.2 97.7 ...
$ PIE : num 23 19 16.1 19.2 15.5 15.9 17.8 18.3 17.4 15.1 ...
$ salary2017_18 : num 28530608 28299399 6261395 23775506 27739975 ...
[73] "salary2017_18"
> names(nba2017)
[1] "Year" "Player" "Team.x" "Age"
[73] "salary2017_18"
> # Partition the data using stratified sampling to ensure that train and test have
similar ranges of salary
> if (!requireNamespace("caret", quietly .... [TRUNCATED]
> library(caret)
> set.seed(123)
> ind <- createDataPartition(nba2017_mod$salary2017_18, p=0.70, list = F)
> # create a vector of player and team names for the test data
> test_players <- nba2017[-ind, "Player"]
> # bind the player and team names to the test data
> test <- cbind(test_players, test)
> print(colnames(test))
[1] "Player" "Age" "GP.x" "W.x"
> head(test)
Player Age GP.x W.x L.x Min.x Pts FGM FGA FG_per three_PM
three_PA
1 Russell Westbrook 28 81 46 35 34.6 31.6 10.2 24.0 42.5 2.5
7.2
8 LeBron James 32 74 51 23 37.8 26.4 9.9 18.2 54.8 1.7
4.6
11 Kyrie Irving 25 72 47 25 35.1 25.2 9.3 19.7 47.3 2.5
6.1
12 Karl-Anthony Towns 21 82 31 51 37.0 25.1 9.8 18.0 54.2 1.2
3.4
13 Kevin Durant 28 62 51 11 33.4 25.1 8.9 16.5 53.7 1.9
5.0
14 Jimmy Butler 27 76 40 36 37.0 23.9 7.5 16.5 45.5 1.2
3.3
three_Per FTM FTA FT_per OREB DREB REB AST TOV STL BLK PF FP DD2 TD3
Plus_minus
1 34.3 8.8 10.4 84.5 1.7 9.0 10.7 10.4 5.4 1.6 0.4 2.3 60.6 62 42
3.1
8 36.3 4.8 7.2 67.4 1.3 7.3 8.6 8.7 4.1 1.2 0.6 1.8 51.3 42 13
6.5
11 40.1 4.1 4.6 90.5 0.7 2.5 3.2 5.8 2.5 1.2 0.3 2.2 39.7 8 0
4.6
12 36.7 4.3 5.2 83.2 3.6 8.7 12.3 2.7 2.6 0.7 1.3 2.9 47.1 62 1
-0.3
13 37.5 5.4 6.2 87.5 0.6 7.6 8.3 4.8 2.2 1.1 1.6 1.9 48.0 23 1
11.5
14 36.7 7.7 8.9 86.5 1.7 4.5 6.2 5.5 2.1 1.9 0.4 1.5 44.4 15 2
2.7
Pts_Off_TO second_pts FBPS PITP OPP_PTs_off_TO OPP_sec_pts OPP_FBPS OPP_PITP
BLKA PFD
1 4.7 3.6 6.7 10.1 12.2 9.0 8.1 34.1
1.1 6.7
8 4.1 2.5 5.5 13.9 12.4 10.1 10.0 35.2
0.8 5.9
11 2.7 1.8 2.5 8.3 11.1 10.1 10.4 33.6
1.0 3.4
12 3.4 4.7 1.5 14.1 13.1 10.3 11.0 35.7
1.1 4.7
13 4.6 2.4 5.7 10.0 10.7 9.6 9.3 31.3
0.4 4.6
14 4.0 2.6 4.0 7.8 11.2 9.2 10.6 34.2
0.8 6.5
Def.Rtg DREB_per Per_DREB STL_per Per_BLK OPP_PTS_OFF_TOV OPP_PTS_2nd_Chance
1 104.6 27.9 35.2 27.0 10.3 12.2 9.0
8 107.1 20.9 27.3 23.1 19.3 12.4 10.1
11 109.1 7.5 9.9 23.3 11.2 11.1 10.1
12 110.8 27.7 36.5 11.6 36.0 13.1 10.3
13 101.3 23.2 30.3 16.1 34.6 10.7 9.6
14 103.4 13.1 17.0 30.6 10.7 11.2 9.2
OPP_PTS_FB OPP_PTS_Paint DEF_WS Dist_FT Dist_Miles Dist_Miles_off Dist_Miles_def
1 8.1 34.1 0.043 12533.4 2.37 1.28 1.09
8 10.0 35.2 0.035 12611.3 2.39 1.26 1.13
11 10.4 33.6 0.024 12428.6 2.35 1.28 1.08
12 11.0 35.7 0.018 13353.1 2.53 1.35 1.18
13 9.3 31.3 0.056 11874.8 2.25 1.17 1.08
14 10.6 34.2 0.051 13728.7 2.60 1.41 1.19
avg_speed avg_speed_off avg_speed_def OffRTG DefRTG NetRTG AST_per AST_TO
AST_RATIO
1 4.13 4.72 3.61 107.9 104.6 3.3 54.3 1.92
23.4
8 3.80 3.99 3.62 114.9 107.1 7.7 38.8 2.13
25.6
11 4.03 4.34 3.72 114.2 109.1 5.1 27.8 2.32
19.3
12 4.11 4.28 3.94 109.9 110.8 -0.9 12.7 1.04
10.5
13 4.05 4.55 3.62 117.2 101.3 16.0 21.8 2.17
18.4
14 4.23 4.47 3.96 106.4 103.4 3.0 24.3 2.62
19.6
OREBper DREBper REBper TO_Ratio EFGper TS_per USG_per PACE PIE salary2017_18
Team
1 5.3 27.9 16.7 12.2 47.6 55.4 40.8 102.31 23.0 28530608
OKC
8 4.0 20.9 12.7 12.0 59.4 61.9 29.7 98.38 18.3 33285709
CLE
11 2.3 7.5 5.0 8.3 53.5 58.0 30.2 99.12 13.5 18868625
CLE
12 11.4 27.7 19.5 10.1 57.6 61.8 27.4 97.10 17.1 6216840
MIN
13 2.3 23.2 13.7 8.5 59.4 65.1 27.6 103.71 18.6 25000000
GSW
14 5.0 13.1 9.0 7.5 49.2 58.6 26.5 97.78 16.4 19301070
CHI
> # Get the abreviation for the model type (eg. "lm" = linear model)
> names(getModelInfo())
[1] "ada" "AdaBag" "AdaBoost.M1"
[4] "adaboost" "amdai" "ANFIS"
[7] "avNNet" "awnb" "awtan"
[10] "bag" "bagEarth" "bagEarthGCV"
[13] "bagFDA" "bagFDAGCV" "bam"
[16] "bartMachine" "bayesglm" "binda"
[19] "blackboost" "blasso" "blassoAveraged"
[22] "bridge" "brnn" "BstLm"
[25] "bstSm" "bstTree" "C5.0"
[28] "C5.0Cost" "C5.0Rules" "C5.0Tree"
[31] "cforest" "chaid" "CSimca"
[34] "ctree" "ctree2" "cubist"
[37] "dda" "deepboost" "DENFIS"
[40] "dnn" "dwdLinear" "dwdPoly"
[43] "dwdRadial" "earth" "elm"
[46] "enet" "evtree" "extraTrees"
[49] "fda" "FH.GBML" "FIR.DM"
[52] "foba" "FRBCS.CHI" "FRBCS.W"
[55] "FS.HGD" "gam" "gamboost"
[58] "gamLoess" "gamSpline" "gaussprLinear"
[61] "gaussprPoly" "gaussprRadial" "gbm_h2o"
[64] "gbm" "gcvEarth" "GFS.FR.MOGUL"
[67] "GFS.LT.RS" "GFS.THRIFT" "glm.nb"
[70] "glm" "glmboost" "glmnet_h2o"
[73] "glmnet" "glmStepAIC" "gpls"
[76] "hda" "hdda" "hdrda"
[79] "HYFIS" "icr" "J48"
[82] "JRip" "kernelpls" "kknn"
[85] "knn" "krlsPoly" "krlsRadial"
[88] "lars" "lars2" "lasso"
[91] "lda" "lda2" "leapBackward"
[94] "leapForward" "leapSeq" "Linda"
[97] "lm" "lmStepAIC" "LMT"
[100] "loclda" "logicBag" "LogitBoost"
[103] "logreg" "lssvmLinear" "lssvmPoly"
[106] "lssvmRadial" "lvq" "M5"
[109] "M5Rules" "manb" "mda"
[112] "Mlda" "mlp" "mlpKerasDecay"
[115] "mlpKerasDecayCost" "mlpKerasDropout" "mlpKerasDropoutCost"
[118] "mlpML" "mlpSGD" "mlpWeightDecay"
[121] "mlpWeightDecayML" "monmlp" "msaenet"
[124] "multinom" "mxnet" "mxnetAdam"
[127] "naive_bayes" "nb" "nbDiscrete"
[130] "nbSearch" "neuralnet" "nnet"
[133] "nnls" "nodeHarvest" "null"
[136] "OneR" "ordinalNet" "ordinalRF"
[139] "ORFlog" "ORFpls" "ORFridge"
[142] "ORFsvm" "ownn" "pam"
[145] "parRF" "PART" "partDSA"
[148] "pcaNNet" "pcr" "pda"
[151] "pda2" "penalized" "PenalizedLDA"
[154] "plr" "pls" "plsRglm"
[157] "polr" "ppr" "pre"
[160] "PRIM" "protoclass" "qda"
[163] "QdaCov" "qrf" "qrnn"
[166] "randomGLM" "ranger" "rbf"
[169] "rbfDDA" "Rborist" "rda"
[172] "regLogistic" "relaxo" "rf"
[175] "rFerns" "RFlda" "rfRules"
[178] "ridge" "rlda" "rlm"
[181] "rmda" "rocc" "rotationForest"
[184] "rotationForestCp" "rpart" "rpart1SE"
[187] "rpart2" "rpartCost" "rpartScore"
[190] "rqlasso" "rqnc" "RRF"
[193] "RRFglobal" "rrlda" "RSimca"
[196] "rvmLinear" "rvmPoly" "rvmRadial"
[199] "SBC" "sda" "sdwd"
[202] "simpls" "SLAVE" "slda"
[205] "smda" "snn" "sparseLDA"
[208] "spikeslab" "spls" "stepLDA"
[211] "stepQDA" "superpc" "svmBoundrangeString"
[214] "svmExpoString" "svmLinear" "svmLinear2"
[217] "svmLinear3" "svmLinearWeights" "svmLinearWeights2"
[220] "svmPoly" "svmRadial" "svmRadialCost"
[223] "svmRadialSigma" "svmRadialWeights" "svmSpectrumString"
[226] "tan" "tanSearch" "treebag"
[229] "vbmpRadial" "vglmAdjCat" "vglmContRatio"
[232] "vglmCumulative" "widekernelpls" "WM"
[235] "wsrf" "xgbDART" "xgbLinear"
[238] "xgbTree" "xyf"
> # Results
> lm
Linear Regression
310 samples
69 predictor
No pre-processing
Resampling: Cross-Validated (5 fold, repeated 5 times)
Summary of sample sizes: 248, 248, 246, 250, 248, 248, ...
Resampling results:
Call:
lm(formula = .outcome ~ ., data = dat)
Residuals:
Min 1Q Median 3Q Max
-15232772 -2471478 -307534 2214296 20368317
> # Scale-Location
> sqrt_std_resid <- sqrt(abs(residuals / sd(residuals)))
> # 1st plot = if the linearity assumption is meet you should see no pattern here
and the line should be flat, you should also see not pattern if the ....
[TRUNCATED]
> # Surprisingly BLKA > Opp 2nd chance pts > GP > Opp pts in the paint > W were the
top 5 correlated variables.
> # Reset margins back to default
> .... [TRUNCATED]
>
##################################################################################
> # Ridge regression
> # Ridge shrinks the coeficients but keeps .... [TRUNCATED]
> ridge
Ridge Regression
310 samples
69 predictor
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was lambda = 0.1.
> ridge$modelInfo
$label
[1] "Ridge Regression"
$library
[1] "elasticnet"
$type
[1] "Regression"
$parameters
parameter class label
1 lambda numeric Weight Decay
$grid
function (x, y, len = NULL, search = "grid")
{
if (search == "grid") {
out <- expand.grid(lambda = c(0, 10^seq(-1, -4, length = len -
1)))
}
else {
out <- data.frame(lambda = 10^runif(len, min = -5, 1))
}
out
}
$loop
NULL
$fit
function (x, y, wts, param, lev, last, classProbs, ...)
{
elasticnet::enet(as.matrix(x), y, lambda = param$lambda)
}
<bytecode: 0x000002157908d7a8>
$predict
function (modelFit, newdata, submodels = NULL)
{
elasticnet::predict.enet(modelFit, newdata, s = 1, mode = "fraction")$fit
}
<bytecode: 0x00000215796c2f60>
$predictors
function (x, s = NULL, ...)
{
if (is.null(s)) {
if (!is.null(x$tuneValue)) {
s <- x$tuneValue$.fraction
}
else stop("must supply a vaue of s")
out <- elasticnet::predict.enet(x, s = s, type = "coefficients",
mode = "fraction")$coefficients
}
else {
out <- elasticnet::predict.enet(x, s = s)$coefficients
}
names(out)[out != 0]
}
$tags
[1] "Linear Regression" "L2 Regularization"
$prob
NULL
$sort
function (x)
x[order(-x$lambda), ]
> # Variable Importance for Ridge Model
> plot(varImp(ridge, scale = F), main = "Variable Importance - Ridge Model")
> # FP(fantasy pts) > min > Pts > FGM > Dist_miles were the top 5 correlated
variables
>
> # There is another way to run ridge using glmnet and altho .... [TRUNCATED]
> set.seed(12345)
310 samples
69 predictor
No pre-processing
Resampling: Cross-Validated (5 fold, repeated 5 times)
Summary of sample sizes: 247, 249, 248, 248, 248, 248, ...
Resampling results across tuning parameters:
> plot(glmnet_ridge)
> # Create a plot with three lines for RMSE, Rsquared, and MAE
> plot(lambda, RMSE, type = "l", col = "blue", ylim = c(min(RMSE, Rsquared, MAE),
max(R .... [TRUNCATED]
> # DEF_win shares > BLKA > Dist_miles_def > avg_speed_off > 3pt made were the top
5 correlated variables
>
> # While both ridge and glmnet produce s .... [TRUNCATED]
> lasso
The lasso
310 samples
69 predictor
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was fraction = 0.1.
> # Similarly to Ridge FP(fantasy pts) > min > Pts > FGM > Dist_miles were the top
5 correlated variables
>
>
> #################################### .... [TRUNCATED]
> en
glmnet
310 samples
69 predictor
No pre-processing
Resampling: Cross-Validated (5 fold, repeated 5 times)
Summary of sample sizes: 248, 248, 248, 249, 247, 248, ...
Resampling results across tuning parameters:
RMSE was used to select the optimal model using the smallest value.
The final values used for the model were alpha = 0 and lambda = 0.2.
> plot(en)
> plot(varImp(en, scale = F), main = "Variable Importance - Elastic Net Model")
> # Just like glmnet ridge DEF_win shares > BLKA > Dist_miles_def > avg_speed_off >
3pt made were the top 5 correlated variables
>
> ################ .... [TRUNCATED]
> set.seed(54321)
> MARS1
Multivariate Adaptive Regression Spline
310 samples
69 predictor
No pre-processing
Resampling: Cross-Validated (5 fold, repeated 5 times)
Summary of sample sizes: 248, 248, 249, 248, 247, 247, ...
Resampling results:
RMSE Rsquared MAE
5716119 0.5098306 4095691
> MARS1$finalModel
Selected 7 of 37 terms, and 7 of 69 predictors (nprune=7)
Termination condition: RSq changed by less than 0.001 at 37 terms
Importance: FP, DEF_WS, W.x-unused, Age, three_PA, OREB, Dist_Miles_off, FGM, ...
Number of terms at each degree of interaction: 1 4 2
GCV 2.199971e+13 RSS 6.134086e+15 GRSq 0.6318521 RSq 0.6667271
> # Min, Age, FP, BLKA, TD3 (triple doubles) were the only variables used.
>
> # Multivariate Adapative Regression Splines
> # tuning parameter degr .... [TRUNCATED]
> MARS
Multivariate Adaptive Regression Splines
310 samples
69 predictor
No pre-processing
Resampling: Cross-Validated (5 fold, repeated 5 times)
Summary of sample sizes: 248, 248, 249, 248, 247, 249, ...
Resampling results:
> MARS$finalModel
Selected 13 of 21 terms, and 10 of 69 predictors
Termination condition: RSq changed by less than 0.001 at 21 terms
Importance: FP, DEF_WS, Age, OREB, Dist_FT, three_PA, OPP_FBPS, W.x, Pts, BLK, ...
Number of terms at each degree of interaction: 1 12 (additive model)
GCV 2.038615e+13 RSS 5.341501e+15 GRSq 0.6588536 RSq 0.7097892
> # FP, Dist_Miles_def, Age, TD3, BLKA, STL, OPP_FBPS, BLK, DEF_WS were the only
variables used
> # MARS looks like it does the best at incorporating .... [TRUNCATED]
Call:
summary.resamples(object = res)
MAE
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
LinearModel 3317404 3948132 4185130 4212961 4628162 5098863 0
Ridge 3081720 3640888 3912416 3858794 4118357 4528260 0
GlmnetRidge 3363802 3545343 3760396 3853595 4113259 4834166 0
Lasso 3147714 3710169 4003428 3904404 4104815 4580604 0
ElasticNet 2718386 3609310 3860008 3812247 3975460 4793636 0
MARS 3324116 3831165 4097809 4095691 4294680 5080470 0
MARSs 3176418 3605303 3879350 3872835 4047672 4683669 0
RMSE
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
LinearModel 4546094 5062952 5636885 5573061 5957401 6560565 0
Ridge 3993764 4819406 5076350 5048515 5330219 6133344 0
GlmnetRidge 4138136 4629204 4955967 5014903 5409471 6035031 0
Lasso 4101223 4768681 5257961 5160723 5522938 6290290 0
ElasticNet 3540897 4662299 5055350 4984094 5265990 6123995 0
MARS 4514745 5286385 5638189 5716119 6006468 7049854 0
MARSs 4114448 4993291 5250407 5294033 5790850 6381070 0
Rsquared
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
LinearModel 0.3504425 0.4776367 0.5263815 0.5166265 0.5593456 0.6182352 0
Ridge 0.4656987 0.5299787 0.5822513 0.5943249 0.6640283 0.7206675 0
GlmnetRidge 0.4459301 0.5475673 0.5782917 0.5838632 0.6411426 0.7502092 0
Lasso 0.3838528 0.4906664 0.5793424 0.5659929 0.6541656 0.7362773 0
ElasticNet 0.4360748 0.5217468 0.6004407 0.5852860 0.6439920 0.7503014 0
MARS 0.2517839 0.4342799 0.5320606 0.5098306 0.5765996 0.6478697 0
MARSs 0.3004270 0.4965708 0.5278863 0.5488899 0.6162878 0.7242066 0
> # Based on mean Rsquared it looks like the best model was en (Elastic Net
Regression)
> # However lets use all the models to predict the test datase .... [TRUNCATED]
[1] "Year" "Player" "Team" "Age"
> names(nba2017)
[1] "Year" "Player" "Team.x" "Age"
[73] "salary2017_18"
> nba2018 <- nba2018[-223,]
> str(nba2018)
'data.frame': 539 obs. of 72 variables:
$ Year : int 2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
$ Player : chr "James Harden" "Anthony Davis" "LeBron James" "Damian
Lillard" ...
$ Team : chr "HOU" "NOP" "CLE" "POR" ...
$ Age : int 28 25 33 27 23 30 29 29 27 21 ...
$ GP.x : int 72 75 82 73 75 51 68 80 48 54 ...
$ W.x : int 59 45 50 44 39 41 49 47 27 16 ...
$ L.x : int 13 30 32 29 36 10 19 33 21 38 ...
$ Min.x : num 35.4 36.4 36.9 36.6 36.8 32 34.2 36.4 36.2 34.5 ...
$ Pts : num 30.4 28.1 27.5 26.9 26.9 26.4 26.4 25.4 25.2 24.9 ...
$ FGM : num 9 10.4 10.5 8.5 9.9 8.4 9.3 9.5 8.5 8.4 ...
$ FGA : num 20.1 19.5 19.3 19.4 18.7 16.9 18 21.1 18 19.5 ...
$ FG_per : num 44.9 53.4 54.2 43.9 52.9 49.5 51.6 44.9 47 43.2 ...
$ three_PM : num 3.7 0.7 1.8 3.1 0.6 4.2 2.5 1.2 2.2 2.7 ...
$ three_PA : num 10 2.2 5 8.6 1.9 9.8 6.1 4.1 6.1 7.1 ...
$ three_Per : num 36.7 34 36.7 36.1 30.7 42.3 41.9 29.8 35.4 38.3 ...
$ FTM : num 8.7 6.6 4.7 6.8 6.5 5.5 5.3 5.2 6.1 5.4 ...
$ FTA : num 10.1 8 6.5 7.4 8.5 5.9 5.9 7.1 8.2 6.1 ...
$ FT_per : num 85.8 82.8 73.1 91.6 76 92.1 88.9 73.7 74.6 87.8 ...
$ OREB : num 0.6 2.5 1.2 0.8 2.1 0.7 0.5 1.9 2.2 0.5 ...
$ DREB : num 4.8 8.6 7.5 3.6 8 4.4 6.4 8.2 10.7 4 ...
$ REB : num 5.4 11.1 8.6 4.5 10 5.1 6.8 10.1 12.9 4.5 ...
$ AST : num 8.8 2.3 9.1 6.6 4.8 6.1 5.4 10.3 5.4 4.7 ...
$ TOV : num 4.4 2.2 4.2 2.8 3 3 3 4.8 5 3.6 ...
$ STL : num 1.8 1.5 1.4 1.1 1.5 1.6 0.7 1.8 1.6 0.9 ...
$ BLK : num 0.7 2.6 0.9 0.4 1.4 0.2 1.8 0.3 1.6 0.3 ...
$ PF : num 2.3 2.1 1.7 1.6 3.1 2.2 2 2.5 3.8 3.1 ...
$ FP : num 53 55.1 54.1 43.6 51.7 43.8 47 54.3 53.3 37.2 ...
$ DD2 : int 31 50 52 11 42 5 15 59 38 4 ...
$ TD3 : int 4 1 18 0 1 0 2 25 3 0 ...
$ Plus_minus : num 7.3 3.9 1.3 4.7 2 9.5 5.1 4.8 1.7 -6.6 ...
$ Pts_Off_TO : num 4.7 4.2 4.1 3.4 4.1 4.3 3.7 5.2 3.5 2.6 ...
$ second_pts : num 2 4.5 2.2 2 3 1.6 1.9 3 3.3 1.8 ...
$ FBPS : num 2 3.6 4.2 2.2 4.7 4.3 4.3 5.5 1.9 3.4 ...
$ PITP : num 9.3 15.4 14.4 7.7 15.7 5.4 7.7 11.3 11.3 6.7 ...
$ OPP_PTs_off_TO : num 11.9 12.2 12.7 11.3 11.3 12.3 13.5 11.2 13.1 14.2 ...
$ OPP_sec_pts : num 8.8 10.6 9.7 8.9 9.5 9 10.1 9.3 9 9.2 ...
$ OPP_FBPS : num 9.2 10.9 9.1 8.7 7 9.9 10.8 9 10.3 11.6 ...
$ OPP_PITP : num 35.7 34.8 36.2 32.7 34.4 32.5 34.5 33.5 33.2 33.9 ...
$ BLKA : num 1.4 0.6 0.8 1.2 1.1 0.4 0.6 1.1 1.3 1.1 ...
$ PFD : num 7 7.4 5.4 5.4 6.8 4.1 4.8 5.8 7.8 5.4 ...
$ Def.Rtg : num 105 103 111 104 105 ...
$ DREB_per : num 15.3 24.1 22.5 10.6 24.9 14.1 19.2 25.4 32.5 12.6 ...
$ Per_DREB : num 19.3 31.6 29.1 13.3 32.4 18.5 25.6 32.3 41.3 16.4 ...
$ STL_per : num 26.8 24.3 26 19.2 21.7 27.9 13.3 25.8 28.3 17.3 ...
$ Per_BLK : num 19.4 53.5 28.9 8.9 32 3.3 33.7 6.9 40.2 7.7 ...
$ OPP_PTS_OFF_TOV : num 11.9 12.2 12.7 11.3 11.3 12.3 13.5 11.2 13.1 14.2 ...
$ OPP_PTS_2nd_Chance: num 8.8 10.6 9.7 8.9 9.5 9 10.1 9.3 9 9.2 ...
$ OPP_PTS_FB : num 9.2 10.9 9.1 8.7 7 9.9 10.8 9 10.3 11.6 ...
$ OPP_PTS_Paint : num 35.7 34.8 36.2 32.7 34.4 32.5 34.5 33.5 33.2 33.9 ...
$ DEF_WS : num 0.043 0.05 0.017 0.05 0.042 0.037 0.033 0.046 0.036
0.013 ...
$ Dist_FT : num 11475 12628 12545 13318 13292 ...
$ Dist_Miles : num 2.17 2.39 2.38 2.52 2.52 2.42 2.3 2.51 2.17 2.46 ...
$ Dist_Miles_off : num 1.17 1.24 1.25 1.41 1.33 1.28 1.21 1.34 1.11 1.36 ...
$ Dist_Miles_def : num 1 1.15 1.12 1.11 1.19 1.13 1.09 1.16 1.05 1.11 ...
$ avg_speed : num 3.69 3.96 3.88 4.16 4.15 4.55 4.06 4.14 3.6 4.3 ...
$ avg_speed_off : num 3.97 4.31 4.1 4.53 4.61 5.24 4.55 4.67 3.77 4.73 ...
$ avg_speed_def : num 3.41 3.64 3.66 3.77 3.74 3.96 3.63 3.66 3.44 3.86 ...
$ OffRTG : num 114 109 113 108 109 ...
$ DefRTG : num 105 103 111 103 105 ...
$ NetRTG : num 9.9 5.4 1.7 4.8 3.5 14.7 8.8 5.7 1.8 -10 ...
$ AST_per : num 44.9 10.4 43.2 30.1 23.7 27.2 24.3 46.4 24.1 23.3 ...
$ AST_TO : num 2 1.07 2.15 2.33 1.62 2.03 1.77 2.15 1.06 1.3 ...
$ AST_RATIO : num 23.2 8.4 25.7 20.6 15.9 21.2 18.6 26.1 16.7 15.4 ...
$ OREBper : num 1.8 7.7 3.7 2.4 6.7 2.7 1.6 5.4 7.1 1.3 ...
$ DREBper : num 15.4 24.1 22.5 10.7 24.9 14.1 19.2 25.5 32.5 12.7 ...
$ REBper : num 8.8 16.3 13.3 6.6 15.9 8.9 11.1 15.1 20.2 7 ...
$ TO_Ratio : num 11.6 7.9 11.9 8.8 9.8 10.5 10.5 12.1 15.8 11.8 ...
$ EFGper : num 54.1 55.2 59 51.9 54.5 61.8 58.6 47.7 53 50.1 ...
$ TS_per : num 61.9 61.2 62.1 59.4 59.8 67.5 64 52.4 58.3 56.1 ...
$ USG_per : num 36.1 29.7 31.6 30.1 31.2 29.7 29.9 33.2 32.4 30.9 ...
$ PACE : num 99.8 103.3 100.1 100.4 98.4 ...
$ PIE : num 19.4 18.8 19.1 16.5 18.6 16.5 16.8 18.6 16.7 12.3 ...
> # Merge actual salary with nba2018 dataset and compute the difference between
actual and predicted
> names(nba2018)
[1] "Year" "Player" "Team" "Age"
> names(nba2018_salaryFinal)
[1] "Year" "Player" "Team.x" "Age"
> # Write the final data set to file so that you have a copy
> write.csv(nba2018_salary, "nba2018_salary.csv")
> # If you wanted to save the final model as a function that can be later recalled
do the following:
> saveRDS(en, "final_model.rds")
> print(fm)
glmnet
310 samples
69 predictor
No pre-processing
Resampling: Cross-Validated (5 fold, repeated 5 times)
Summary of sample sizes: 248, 248, 248, 249, 247, 248, ...
Resampling results across tuning parameters:
RMSE was used to select the optimal model using the smallest value.
The final values used for the model were alpha = 0 and lambda = 0.2.
> sqrt(mean((train$salary2017_18-predict1)^2))
[1] 4426223
> sqrt(mean((test$salary2017_18-predict2)^2))
[1] 4882551
> # Determine the team with the biggest difference between predicted and actual
salary. Basically which team is over/underpaying the most?
> # First .... [TRUNCATED]
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[15] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[29] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[43] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[57] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[99] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[113] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
FALSE
[127] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[141] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
FALSE
[155] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
FALSE
[183] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
FALSE
[197] FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
FALSE
[211] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[225] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[239] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[267] FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[281] FALSE FALSE FALSE TRUE TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[295] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[309] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[323] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[337] FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE FALSE
FALSE
[351] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[365] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[379] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[393] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
TRUE
[407] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE TRUE FALSE
TRUE
[421] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE
FALSE
[435] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
FALSE
[449] FALSE TRUE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
TRUE
[463] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
FALSE
[477] FALSE TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
FALSE
[491] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE
FALSE
[505] FALSE FALSE FALSE FALSE
> duplicated(nba2018_salary_minDup$Player)
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[15] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[29] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[43] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[57] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[99] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[113] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[127] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[141] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[155] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[183] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[197] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[225] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[239] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[267] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[281] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[295] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[309] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[323] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[337] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[351] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[365] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[379] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[393] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[407] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[421] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[435] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[449] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
FALSE
[463] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
> names(nba2018_salary_minDup)
[1] "Year" "Player" "Team.x" "PV_EN" "PV_lm"
[16] "Diff_MARS1"
> # Use the highest Rsquared model for prediction (Elastic Net)
> predictions <- predict(en, newdata = test)
> print(colnames(test))
[1] "Player" "Age" "GP.x" "W.x"
[73] "diff"
> # Find team performance (sum of diff values for each team)
> team_performance <- aggregate(diff ~ Team, data = test, sum)
> # Identify teams that overpaid (positive sum of diff values)
> overpaid_teams <- team_performance[team_performance$diff > 0, ]
> dev.off();
null device
1
> sink()