Professional Documents
Culture Documents
Target Gift N
Flag Obs N Mean Std Dev Minimum Maximum
--------------------------------------------------------------------------------------------
0 4843 0 . . . .
NOTE: There were 1780 observations read from the data set SHAN.GIFT.
WHERE GiftAvgCard36=.;
NOTE: The data set WORK.CHK has 1780 observations and 28 variables.
NOTE: DATA statement used (Total process time):
real time 0.05 seconds
cpu time 0.01 seconds
/*missing values for age*/
data chk;
set shan.gift;
where DemAge=.;
run;
NOTE: There were 2407 observations read from the data set SHAN.GIFT.
WHERE DemAge=.;
NOTE: The data set WORK.CHK has 2407 observations and 28 variables.
NOTE: DATA statement used (Total process time):
real time 0.09 seconds
cpu time 0.02 second
/*missing value treatment*/
data shan.gift1;
set shan.gift;
if DemAge=. then
do;
DemAge=0;
flagDemAge=1;
end;
else flagDemage=0;
if GiftAvgCard36=. then
do;
GiftAvgCard36=0;
flagGiftAvgCard36=1;
end;
else flagGiftAvgCard36=0;
run;
NOTE: This SAS session is using a registry in WORK. All changes will be lost at the end of
this session.
23 data shan.gift1;
24 set shan.gift;
25 if DemAge=. then do;
26 DemAge=0;
27 flagDemAge=1;
28 end;
29 else flagDemage=0;
30 if GiftAvgCard36=. then do;
31 GiftAvgCard36=0;
32 flagGiftAvgCard36=1;
33 end;
34 else flagGiftAvgCard36=0;
35 run;
NOTE: There were 9686 observations read from the data set SHAN.GIFT.
NOTE: The data set SHAN.GIFT1 has 9686 observations and 30 variables.
NOTE: DATA statement used (Total process time):
/*Find the significance of character variables on the dependent variable*/
E 95 132 227
0.98 1.36 2.34
41.85 58.15
1.96 2.73
0 1
61.06 38.94
8.32 5.31
L 17 17 34
0.18 0.18 0.35
50.00 50.00
0.35 0.35
Cramer's V 0.1099
A E F L N S
A E F L N S
Cramer's V 1.0000
A E F L N S
Cramer's V 1.0000
Sample Size = 9686
/*output of chisq*/
The SAS System
0 1
01 54 67 121
0.56 0.69 1.25
44.63 55.37
1.12 1.38
02 92 99 191
0.95 1.02 1.97
48.17 51.83
1.90 2.04
03 68 85 153
0.70 0.88 1.58
44.44 55.56
1.40 1.76
04 21 30 51
0.22 0.31 0.53
41.18 58.82
0.43 0.62
05 48 47 95
0.50 0.49 0.98
50.53 49.47
0.99 0.97
Table of DemCluster by TargetB
0 1
06 31 22 53
0.32 0.23 0.55
58.49 41.51
0.64 0.45
07 34 44 78
0.35 0.45 0.81
43.59 56.41
0.70 0.91
08 102 80 182
1.05 0.83 1.88
56.04 43.96
2.11 1.65
09 36 34 70
0.37 0.35 0.72
51.43 48.57
0.74 0.70
10 106 69 175
1.09 0.71 1.81
60.57 39.43
2.19 1.42
Table of DemCluster by TargetB
0 1
15 56 52 108
0.58 0.54 1.12
51.85 48.15
1.16 1.07
Table of DemCluster by TargetB
0 1
16 97 104 201
1.00 1.07 2.08
48.26 51.74
2.00 2.15
17 92 86 178
0.95 0.89 1.84
51.69 48.31
1.90 1.78
19 25 25 50
0.26 0.26 0.52
50.00 50.00
0.52 0.52
20 78 93 171
0.81 0.96 1.77
45.61 54.39
1.61 1.92
Table of DemCluster by TargetB
0 1
21 91 74 165
0.94 0.76 1.70
55.15 44.85
1.88 1.53
22 60 65 125
0.62 0.67 1.29
48.00 52.00
1.24 1.34
23 60 71 131
0.62 0.73 1.35
45.80 54.20
1.24 1.47
25 71 64 135
0.73 0.66 1.39
52.59 47.41
1.47 1.32
Table of DemCluster by TargetB
0 1
26 49 51 100
0.51 0.53 1.03
49.00 51.00
1.01 1.05
28 85 109 194
0.88 1.13 2.00
43.81 56.19
1.76 2.25
29 33 40 73
0.34 0.41 0.75
45.21 54.79
0.68 0.83
0 1
31 63 62 125
0.65 0.64 1.29
50.40 49.60
1.30 1.28
32 45 27 72
0.46 0.28 0.74
62.50 37.50
0.93 0.56
33 26 26 52
0.27 0.27 0.54
50.00 50.00
0.54 0.54
34 64 68 132
0.66 0.70 1.36
48.48 51.52
1.32 1.40
0 1
37 56 43 99
0.58 0.44 1.02
56.57 43.43
1.16 0.89
38 53 65 118
0.55 0.67 1.22
44.92 55.08
1.09 1.34
0 1
41 113 84 197
1.17 0.87 2.03
57.36 42.64
2.33 1.73
42 67 73 140
0.69 0.75 1.45
47.86 52.14
1.38 1.51
44 111 74 185
1.15 0.76 1.91
60.00 40.00
2.29 1.53
0 1
46 92 104 196
0.95 1.07 2.02
46.94 53.06
1.90 2.15
47 52 34 86
0.54 0.35 0.89
60.47 39.53
1.07 0.70
48 48 48 96
0.50 0.50 0.99
50.00 50.00
0.99 0.99
50 35 35 70
0.36 0.36 0.72
50.00 50.00
0.72 0.72
Table of DemCluster by TargetB
0 1
52 19 13 32
0.20 0.13 0.33
59.38 40.63
0.39 0.27
53 70 88 158
0.72 0.91 1.63
44.30 55.70
1.45 1.82
Cramer's V 0.0966
data shan.gift1;
set shan.gift1;
ind_demclus_1=0;
ind_demclus_2=0;
ind_demclus_3=0;
ind_demclus_4=0;
If DemCluster in ( '32') then ind_demclus_1 = 1 ;
If DemCluster in ( '10') then ind_demclus_1 = 1 ;
If DemCluster in ( '47') then ind_demclus_1 = 1 ;
If DemCluster in ( '44') then ind_demclus_1 = 1 ;
If DemCluster in ( '52') then ind_demclus_1 = 1 ;
If DemCluster in ( '06') then ind_demclus_1 = 1 ;
If DemCluster in ( '30') then ind_demclus_1 = 1 ;
else If DemCluster in ( '41') then ind_demclus_2 = 1 ;
else If DemCluster in ( '37') then ind_demclus_2 = 1 ;
else If DemCluster in ( '08') then ind_demclus_2 = 1 ;
else If DemCluster in ( '21') then ind_demclus_2 = 1 ;
else If DemCluster in ( '43') then ind_demclus_2 = 1 ;
else If DemCluster in ( '49') then ind_demclus_2 = 1 ;
else If DemCluster in ( '51') then ind_demclus_2 = 1 ;
else If DemCluster in ( '45') then ind_demclus_2 = 1 ;
else If DemCluster in ( '36') then ind_demclus_2 =1;
else If DemCluster in ( '25') then ind_demclus_3 =1;
else If DemCluster in ( '15') then ind_demclus_3 =1;
else If DemCluster in ( '17') then ind_demclus_3 =1;
else If DemCluster in ( '09') then ind_demclus_3 =1;
else If DemCluster in ( '05') then ind_demclus_3 =1;
else If DemCluster in ( '12') then ind_demclus_3 =1;
else If DemCluster in ( '31') then ind_demclus_3 =1;
else If DemCluster in ( '19') then ind_demclus_3 =1;
else If DemCluster in ( '33') then ind_demclus_3 =1;
else If DemCluster in ( '48') then ind_demclus_3 =1;
else If DemCluster in ( '50') then ind_demclus_3 =1;
else If DemCluster in ( '27') then ind_demclus_3 =1;
else If DemCluster in ( '26') then ind_demclus_3 =1;
else If DemCluster in ( '14') then ind_demclus_3 =1;
else If DemCluster in ( '39') then ind_demclus_3 =1;
else If DemCluster in ( '34') then ind_demclus_3 =1;
else If DemCluster in ( '16') then ind_demclus_3 =1;
else If DemCluster in ( '02') then ind_demclus_3 =1;
else If DemCluster in ( '22') then ind_demclus_4 =1;
else If DemCluster in ( '42') then ind_demclus_4 =1;
else If DemCluster in ( '18') then ind_demclus_4 =1;
else If DemCluster in ( '35') then ind_demclus_4 =1;
else If DemCluster in ( '46') then ind_demclus_4 =1;
else If DemCluster in ( '11') then ind_demclus_4 =1;
else If DemCluster in ( '24') then ind_demclus_4 =1;
else If DemCluster in ( '23') then ind_demclus_4 =1;
else If DemCluster in ( '20') then ind_demclus_4 =1;
else If DemCluster in ( '40') then ind_demclus_4 =1;
else If DemCluster in ( '29') then ind_demclus_4 =1;
else If DemCluster in ( '38') then ind_demclus_4 =1;
else If DemCluster in ( '13') then ind_demclus_4 =1;
else If DemCluster in ( '01') then ind_demclus_4 =1;
else If DemCluster in ( '03') then ind_demclus_4 =1;
else If DemCluster in ( '53') then ind_demclus_4 =1;
else If DemCluster in ( '28') then ind_demclus_4 =1;
else If DemCluster in ( '07') then ind_demclus_4 =1;
run;
/*demgender variable signifcance*/
0 1
Col Pct Table of DemGender by TargetB
0 1
Cramer's V 0.0027
0 1
Cramer's V -0.0081
/* log file*/
NOTE: There were 9686 observations read from the data set SHAN.GIFT1.
NOTE: The data set WORK.TEST has 6793 observations and 36 variables.
NOTE: The data set WORK.VAL has 2893 observations and 36 variables.
NOTE: DATA statement used (Total process time):
real time 0.07 seconds
cpu time 0.01 second
/* Knowing bi-variate strength of the independent variables in explaining the dependent
variable*/
proc logistic data = test ;
model targetB =
DemAge
DemMedHomeValue
DemMedIncome
DemPctVeterans
GiftAvg36
GiftAvgAll
GiftAvgCard36
GiftAvgLast
GiftCnt36
GiftCntAll
GiftCntCard36
GiftCntCardAll
GiftTimeFirst
GiftTimeLast
PromCnt12
PromCnt36
PromCntAll
PromCntCard12
PromCntCard36
PromCntCardAll
StatusCatStarAll
flagDemAge
flagGiftAvgCard36
ind_demclus_1
ind_demclus_2
ind_demclus_3
ind_demclus_4
ind_stcat96nk_A_L_N
ind_stcat96nk_E_or_S/selection = stepwise maxstep=1 details;
ods output EffectNotInModel = log_data ;
run;
/* Multi collinearity treatment – step 01
Note – we also dropped some insignificant variables based on bi-variate strength*/
/* Proc contents of test data just to see what extra fields were added */
data predicted;
set predicted;
P_0_D = round(P_0*1000,0.1);
log_odds=0.2751 +
DemMedHomeValue*9.425E-7 +
GiftAvg36*-0.00915 +
GiftCnt36*0.0847 +
GiftCntCardAll*0.0273 +
GiftTimeLast*-0.0362 +
ind_demclus_1*-0.3611 +
ind_demclus_2*-0.2279 +
ind_demclus_4*0.1434 ;
prob=exp(log_odds)/(1+exp(log_odds));
run;
data predicted;
set predicted;
P_0_D = round(P_0*1000,0.1);
run;
proc sql ;
select P_final, min(P_0_D)as Min_score, max(P_0_D)as Max_score, sum(1*targetB) as
responder, count(targetB) as population
from practice
group by P_final
order by P_final
;
quit;