/**************************************************************************************************************************************** Imputation SAS code for Exemplar 6 NOTE that you need to run the program ex6formats.sas before you can open the data files here *** Do this now **** This program uses the IVEWARE macros that can be installed from http://www.isr.umich.edu/src/smp/ive/ ALSO to run the IVEWARE software you need to close all advanced editor windows and use only the old program editor. CHANGE the libname to shere your data sets are stored proc contents data=ex6.ex6det;run; ****************************************************************************************************************************************/ libname ex6 'C:\Documents and Settings\gillian raab\My Documents\aprojects\peaslaptop\ex6datafiles\data' ; /*------------------------------------------------------------------- first a small analysis using only offending prevalence -----------------------------------------------------------------------*/ data prev; set ex6.ex6; keep dprev: caseid gender; run; /*--------------------------- get observed means---------------------*/ proc means data=ex6.ex6; * means for observed; var dp: dvo: dva: ; class gender; run; /*--------------------------- get observed means complete cases---------------------*/ proc means data=ex6.ex6; * means for observed; var dp: ; where dprev1^=. and dprev2^=. and dprev3^=. and dprev4^=. and dprev5^=. and dprev6^=.; class gender; run; /*--------------------------------------------------------------- now a repeated measures analysis that uses all the data and allows for correlations between time periods First the data needs to be restructured into a temporary data set to have one case per follow up period ----------------------------------------------------------------*/ data formixed; set prev; * prepare for mixed restructuring data; array dp[6] Dprev1-Dprev6; do time=1 to 6; dprev= dp[time]; output; end; keep time dprev caseid gender; run; proc mixed data=formixed method=ml covtest;* run mixed; class time gender ; model Dprev = gender*time time / s; repeated /type=un subject=caseid R=6 Rcorr=6;* 6TH SUBJECT HAS COMPLETE DATA so use this for an estimate of variance covariance matrix; lsmeans gender*time; run; /*------------------------------------------------------------ now MICE imputation using IVEWARE with just the 7 binary variables ------------------------------------------------------------------*/ %impute(name=ivesetup2, dir="C:" ,setup=new); title Multiple imputation prevalence only; datain prev; dataout ex6.previmp ALL; default categorical; transfer caseid; iterations 10; multiples 4; print all; seed 2001; run; /*------------------------ look at results--------------------------------*/ data ex6.PREVIMP; * save it just in case; set ex6.PREVimp; rename _mult_=_imputation_; run; proc sort data=ex6.previmp; by _imputation_; run; /*----------------------------------------------------------------------------------- POST IMPUTATION PROCEDURES The imputation variable needs to be called _imputation_ renamed above These are always 2 stage procedures -------------------------------------------------------------------------------------*/ PROC CORR DATA=ex6.PREVIMP COV OUT=OUTCOV(TYPE=COV) NOCORR noprint ; VAR dprev1-dprev6 ; BY _IMPUTATION_ ; WHERE GENDER=1; * boyS; run; PROC MIANALYZE data=outcov ; VAR dprev1-dprev6 ; RUN; PROC CORR DATA=ex6.PREVIMP COV OUT=OUTCOV(TYPE=COV) NOCORR noprint ; VAR dprev1-dprev6 ; BY _IMPUTATION_ ; WHERE GENDER=2; * GIRLS; run; PROC MIANALYZE data=outcov ; VAR dprev1-dprev6 ; RUN; /*---------------------now assuming normal distribution-------------------*/ /*---------------------first with no limits-------------------------------*/ proc mi data=prev OUT=IMPUTEDmi1 nimpute=10; * imputation prevalence only no limits; VAR dprev1-dprev6 ; mcmc nbiter =200 niter=100 timeplot acfplot; RUN; /*------------------------ look at results in explorer now--------------------------*/ proc means data=imputedmi1 ; * get univariate means all respondents ; var dprev1-dprev6 ; class gender; run; /*------------------------------------------------ and run the post imputation procedure to get means and their SEs ----------------------------------------------------*/ PROC CORR DATA=imputedmi1 COV OUT=OUTCOV(TYPE=COV) NOCORR noprint ; VAR dprev1-dprev6 ; BY _IMPUTATION_ ; WHERE GENDER=1; * BOYS; run; PROC MIANALYZE data=outcov ; title 'Post imputation means imputed from scores boys'; VAR dprev1-dprev6 ; RUN; /*----------------- now make the imputed values back to 0/1 sing 0.5 as break point--------------*/ data imputedmi1b; set imputedmi1; array dp dprev1-dprev6; do i= 1 to 6; if dp[i]<0.5 then dp[i]=0; else dp[i]=1; end; run; /*------------------ table of results-----------------------*/ proc means data=imputedmi1b ; * get univariate means all respondents ; var dprev1-dprev6 ; class gender; run ; /*--------------------- now with limits-------------------------------*/ proc mi data=prev OUT=IMPUTEDmi1 nimpute=10 minimum= 0 0 0 0 0 0 maximum= 1 1 1 1 1 1 round= 1 1 1 1 1 1; * imputation prevalence only no limits; VAR dprev1-dprev6 ; mcmc nbiter =200 niter=100 timeplot acfplot; RUN; /*------------------------ look at results in explorer now--------------------------*/ /*------------------ table of results-----------------------*/ proc means data=imputedmi1 ; * get univariate means all respondents ; var dprev1-dprev6 ; class gender; run; /*------------------------------------------------------------------------- now the larger data set IMputed with IVEWARE ------------------------------------------------------------------------------*/ /*------------------------------------------------------------------- Need to run ex6formats.sas before the next step. Preliminary trials showed errors. This only worked when maximum predictors was restricted and numbers of iterations of logistic models restricted. Upper limits were set on the vol and var variables because the results gave some almost infinite (>1e16) values for the imputed data in a few cases without this. Plan to do something else (go for a run? read a good book? sleep?) while this runs. This will take a while) ------------------------------------------------------------------*/ %impute(name=ivesetup2, dir="C:" ,setup=new); title Multiple imputation all variables; datain ex6.ex6; dataout ivescore ALL; default categorical; count dvar1 dvar2 dvar3 dvar4 dvar5 dvar6; continuous dvol1 dvol2 dvol3 dvol4 dvol5 dvol6; bounds dvol1 (>0 <400) dvol2 (>0 <400) dvol3 (>0 <400) dvol4 (>0 <400) dvol5 (>0 <400) dvol6 (>0 <400) DVAR1(<16) DVAR2(<17) DVAR3(<19) DVAR4(<19) DVAR5(<18) DVAR6(<19) ; restrict dvol1 (dprev1=1) dvol2(dprev2=1) dvol3 (dprev3=1) dvol4 (dprev4=1) dvol5 (dprev5=1) dvol6 (dprev6=1) dvar1 (dprev1=1) dvar2(dprev2=1) dvar3 (dprev3=1) dvar4 (dprev4=1) dvar5 (dprev5=1) dvar6 (dprev6=1) ; transfer caseid; maxlogi 50; iterations 5; multiples 5; seed 23571; run; /*--------------------------------------------------- CHANGE NAME OF _mult_ TO _imputation_ IN ORDER TO USE proc mianalyze AND CARRY OUT POST-IMPUTATION ROUTINES -----------------------------------------------------*/ data ex6.ivesc; * save it just in case; set ivescore; run; rename _mult_=_imputation_; run; proc sort data=ex6.ivesc; by _imputation_; run; /*-------------- CHECK FOR ANY EXTREME VALUES--------------*/ PROC means DATA=EX6.ivesc ; VAR dvol1-dvol6 dvar1-dvar6 ; BY _IMPUTATION_; RUN; ?8------------ before using this other variables should be checked----*/ PROC CORR DATA=EX6.ivesc COV OUT=OUTCOV(TYPE=COV) NOCORR noprint ; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; BY _IMPUTATION_ ; WHERE GENDER=2; * GIRLS; run; PROC MIANALYZE data=outcov ; title 'Post imputation means imputed from scores girls'; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; RUN; PROC CORR DATA=EX6.ivesc COV OUT=OUTCOV(TYPE=COV) NOCORR noprint ; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; BY _IMPUTATION_ ; WHERE GENDER=1; * BOYS; run; PROC MIANALYZE data=outcov ; title 'Post imputation means imputed from scores boys'; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; RUN; /*---------------------logistic regression with 3 predictors-----------*/ proc logistic data=ex6.ivesc outest=outreg covout descending noprint; by _imputation_; class gender szindep sector/param=ref; model dprev6= gender szindep sector ; run; proc contents data= outreg short;run;* use this to find names of contrasts; PROC MIANALYZE data=outreg edf=4325 ; var GENDERFemale SZINDEPManual_high_depr sectorBehavioural sectorIndependent sectorSpecial ; run; /*--We can compare with the complete data analysis here---------*/ proc logistic data=ex6.ex6 descending ; class gender szindep sector; model dprev6= gender szindep sector ; run; /*---------------------now assuming normal distribution-------------------*/ /*---------------------first with no limits-------------------------------*/ proc contents data=ex6.ex6 short position; run; proc mi data=ex6.ex6 OUT=IMPUTEDmi nimpute=10; * imputation prevalence only no limits; title 'imputing means as if normal'; VAR SASMKY80 SBSMKY80 SCSMKY80 SDSMKY80 SESMKY80 SFSMKY80 RAALCY80 RBALCY80 RCALCY80 RDALCY80 REALCY80 RFALCY80 GENDER ETHGP HZRESPRE SZINDEP SZABS04 HZEVREFI YZLEAVE sector dvar1 dvol1 dvar2 dvol2 dvar3 dvol3 dvar4 dvol4 dvar5 dvol5 dvar6 dvol6 drgcode1 drgcode2 drgcode3 drgcode4 drgcode5 drgcode6 ; mcmc nbiter =200 niter=100 timeplot acfplot; RUN; /*------------------------ look at results in explorre now--------------------------*/ proc means data=imputedmi ; * checking range of values ; var dp: dv: ; run; proc freq data=imputedmi; * check it; table dvol3*dvar3 / norow nocol nopercent; run; /*-----------------recaclulate prevalence AND MAKE VOL VAR CONSISTENT--------------*/ data ex6.NORMsc; set imputedmi; array dp dprev1-dprev6; array dvar dvar1-dvar6; array dvol dvol1-dvol6; do i= 1 to 6; dvar[i]=round(dvAR[i]); dVOL[i]=round(dVOL[i]) ; dp[i]=0; if dvar[i]>0 then dp[i]=1; if dVAR[i]<0 then dVAR[i]=0; if dVOL[i]<0 then dVOL[i]=0; IF DVAR[I]=0 THEN DVOL[I]=0; IF DVar[I]>0 AND DVOL[I]=0 THEN DVOL[I]=1; end; run; /*----------------------- this would need to be done for all others too-------------*/ proc freq data=ex6.NORMsc; * check it; table dvol3*dvar3 / norow nocol nopercent; run; /*------------------ table of results-----------------------*/ proc means data=ex6.NORMsc ; * get univariate meansby gender ; var dp: dvo: dva:; class gender; run; PROC CORR DATA=EX6.NORMsc COV OUT=OUTCOV(TYPE=COV) NOCORR noprint ; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; BY _IMPUTATION_ ; WHERE GENDER=1; * BOYS; run; PROC MIANALYZE data=outcov ; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; RUN; PROC CORR DATA=EX6.NORMsc COV OUT=OUTCOV(TYPE=COV) NOCORR noprint ; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; BY _IMPUTATION_ ; WHERE GENDER=2; * girls; run; PROC MIANALYZE data=outcov ; VAR dprev1-dprev6 dvol1-dvol6 dvar1-dvar6 ; RUN; /*---------------------logistic regression with 3 predictors-----------*/ proc logistic data=ex6.NORMsc outest=outreg covout descending noprint; by _imputation_; class gender szindep sector /param=ref; model dprev6= gender szindep sector ; run; proc contents data= outreg short;run;* use this to find names of contrasts; PROC MIANALYZE data=outreg edf=4325 ; var GENDERFemale SZINDEPManual_high_depr sectorBehavioural sectorIndependent sectorSpecial ; run; /*--We can compare with the complete data analysis here---------*/ proc logistic data=ex6.ex6 descending ; class gender szindep sector/ param=ref; model dprev6= gender szindep sector ; run;