You are on page 1of 4

/********************************

* EPID 602 SAMPLE CODE *


* DUE DATE: APRIL 19, 2019 *
* NAME: POOJA PATEL *
********************************/
LIBNAME final 'M:\EPID602\Final';

PROC CONTENTS DATA=final.mets81 VARNUM; RUN;

/*Q1a: create binary variables for each condition*/

DATA final.mets81; /*abdominal obesity*/


SET final.mets81;
IF CINT=. THEN obese=.;
ELSE IF DP2=0 and CINT>102 THEN obese=1;
ELSE IF DP2=0 and CINT=<102 THEN obese=0;
ELSE IF DP2=1 and CINT>88 THEN obese=1;
ELSE IF DP2=1 and CINT=<88 THEN obese=0;
RUN;

DATA final.mets81; /*high triglycerides*/


SET final.mets81;
IF TG_2005=. THEN trigly=.;
ELSE IF TG_2005>=150 THEN trigly=1;
ELSE IF 0<TG_2005<150 THEN trigly=0;
RUN;

DATA final.mets81; /*low HDL*/


SET final.mets81;
IF HDL_2005=. THEN hdl=.;
ELSE IF DP2=0 and HDL_2005<40 THEN hdl=1;
ELSE IF DP2=0 and HDL_2005>=40 THEN hdl=0;
ELSE IF DP2=1 and HDL_2005<50 THEN hdl=1;
ELSE IF DP2=1 and HDL_2005>=50 THEN hdl=0;
RUN;

DATA final.mets81; /*blood pressure*/


SET final.mets81;
IF SYS>=130 OR DIAS>=85 THEN bp=1;
ELSE bp=0;
RUN;

DATA final.mets81; /*glucose*/


SET final.mets81;
IF AL7=. THEN glucose=.;
ELSE IF AL7>=100 THEN glucose=1;
ELSE IF AL7<100 THEN glucose=0;
RUN;

/*Q1b: sum all the binary variables to have a total sum of the five
conditions*/
DATA final.mets81;
SET final.mets81;
metsum=sum (obese, trigly, hdl, bp, glucose);
RUN;

PROC PRINT DATA=final.mets81 (obs=25);


VAR obese trigly hdl bp glucose metsum;
RUN;

/*Q1c: create indicator variable METSYN; if sum of all the conditions is 3 or


more, then the participant will have the metabolic syndrome*/
DATA final.mets81;
SET final.mets81;
IF metsum>=3 then metsyn=1;
IF metsum<3 then metsyn=0;
RUN;

PROC PRINT DATA=final.mets81 (obs=25);


VAR obese trigly hdl bp glucose metsum metsyn;
RUN;

/*Q1d: prevalence of metabolic syndrome in the population*/


PROC FREQ DATA=final.mets81;
TABLES metsyn;
RUN;

/*Q2: Table 1, Descriptive Statistics*/

/*CONTINUOUS VARIABLES - check distribution*/


PROC UNIVARIATE DATA=final.mets81 PLOT;
VAR CD1 income TMETS2009 healthy_diet;
RUN;

/*age, continuous predictor (normal) - T-test


*if folded F-test>.05, then use pooled p-value, if F-test<.05, then use
satterthwaite p-value*/
PROC TTEST DATA=final.mets81;
CLASS metsyn;
VAR CD1;
RUN;

/*income, continuous predictor (not normal) - Wilcoxon rank sum test*/


/*report 2-sided p-value from wilcoxon output normal approx*/
PROC NPAR1WAY DATA=final.mets81 WILCOXON;
CLASS metsyn;
VAR income;
RUN;

PROC MEANS DATA=final.mets81;


CLASS metsyn;
VAR income;
RUN;

/*physical activity, continuous predictor (not normal) - Wilcoxon rank sum


test*/
PROC NPAR1WAY DATA=final.mets81 WILCOXON;
CLASS metsyn;
VAR TMETS2009;
RUN;

PROC MEANS DATA=final.mets81 clm;


CLASS metsyn;
VAR TMETS2009;
RUN;

/*healthy dietary score, continuous predictor (normal) - T-test */


PROC TTEST DATA=final.mets81;
CLASS metsyn;
VAR healthy_diet;
RUN;

/*BINARY VARIABLES*/
/*sex, binary predictor - chisq test*/
PROC FREQ DATA=final.mets81; TABLES DP2*METSYN/CHISQ; RUN;

/*rural, binary predictor - chisq test*/


PROC FREQ DATA=final.mets81; TABLES rural*METSYN/CHISQ; RUN;

/*obese, binary predictor - chisq test*/


PROC FREQ DATA=final.mets81; TABLES obese *METSYN/CHISQ; RUN;

/*trigly, binary predictor - chisq test*/


PROC FREQ DATA=final.mets81; TABLES trigly*METSYN/CHISQ; RUN;

/*hdl, binary predictor - chisq test*/


PROC FREQ DATA=final.mets81; TABLES hdl*METSYN/CHISQ; RUN;

/*bp, binary predictor - chisq test*/


PROC FREQ DATA=final.mets81; TABLES bp*METSYN/CHISQ; RUN;

/*glucose, binary predictor - chisq test*/


PROC FREQ DATA=final.mets81; TABLES glucose*METSYN/CHISQ; RUN;

/*Q3: run 2 unconditional aka unmatched logistic regression models


outcome: metsyn (binary)
exposure: healthy dietary score (continuous)*/

/*unadjusted, crude model*/


PROC LOGISTIC DATA=final.mets81 DESCENDING;
MODEL metsyn=healthy_diet;
RUN;

/*adjusted model (with age, sex, and income)*/


PROC LOGISTIC DATA=final.mets81 DESCENDING;
MODEL metsyn=healthy_diet CD1 DP2 income;
RUN;

/*Q3c: crude model for every 10 units increase in healthy_dietary score*/


DATA final.mets81;
SET final.mets81;
healthy_dietary10=healthy_diet/10;
RUN;

PROC LOGISTIC DATA=final.mets81 DESCENDING;


MODEL metsyn=healthy_dietary10;
RUN;
/*Q4: run 2 linear regression models
outcome: waist circumference (CINT, continuous)
exposure: dietary score (continuous)*/

/*unadjusted, crude model*/


PROC REG DATA=final.mets81;
MODEL CINT=healthy_diet;
RUN;

/*adjusted model (with age, sex, and income)*/


PROC REG DATA=final.mets81;
MODEL CINT=healthy_diet CD1 DP2 income;
RUN;

/*Q5: run log binomial regression model


(since this is a cross-sectional study, it's more appropriate to report
prevalence ratios instead of OR)
outcome: metsyn
exposure: healthy dietary score*/
PROC GENMOD DATA=final.mets81 DESCENDING;
MODEL METSYN=healthy_diet /dist=binomial link=log;
ESTIMATE 'beta' healthy_diet 1 -1/exp;
RUN;

/*Q5b: run log binomial regression model for every 10-unit increase,
healthy_dietary10*/
PROC GENMOD DATA=final.mets81 DESCENDING;
MODEL METSYN=healthy_dietary10 /dist=binomial link=log;
ESTIMATE 'beta' healthy_dietary10 1 -1/exp;
RUN;

/*Q6: run 2 logistic regression models


chose ppl from 30 clusters*/

/*unadjusted, crude model*/


PROC SURVEYLOGISTIC DATA=final.mets81;
MODEL METSYN (event='1')=healthy_diet;
CLUSTER cluster;
RUN;

/*adjusted model (with age, sex, and income)*/


PROC SURVEYLOGISTIC DATA=final.mets81;
MODEL METSYN (event='1')=healthy_diet CD1 DP2 income;
CLUSTER cluster;
RUN;

/*****************THE END***************************/

You might also like