/*--------------------------------------------------- ex5wt.sas G Raab /Helen Storkey Nov/Dec 04 Some SAS graphs are plotted by the code here but are not shown in the output file First set up libname to tell sas where data can be found Use yout own directory name --------------------------------------------------------*/ libname exemp5 'C:\Documents and Settings\gillian raab\My Documents\aprojects\peas\ex5datafiles\data'; proc means data=exemp5.ex5wt min max mean sum; * get population and respondent totals as a check; var npop ayra_num ; run; /*------------------------------- non-response modelling----------------------- commented out lines show earlier stages of model building -------------------------------------------------------------------------------*/ proc logistic data=exemp5.ex5wt; class agegrp gender; *model ayra_num/npop = agegrp gender agegrp*gender sacc sinc semp shlth sed shse; ***************** only sacc sinc shse drop others one by one; *model ayra_num/npop = agegrp gender agegrp*gender sacc sinc semp shlth shse; *model ayra_num/npop = agegrp gender agegrp*gender sacc sinc shlth shse; *model ayra_num/npop = agegrp gender agegrp*gender sacc sinc shse; **************** now try interactions of sinc (most important predictor) with age and gender; *model ayra_num/npop = agegrp gender agegrp*gender*sinc sacc sinc shse; *model ayra_num/npop = agegrp gender agegrp*gender agegrp*sinc gender*sinc sacc sinc shse; *model ayra_num/npop = agegrp gender agegrp*gender agegrp*sinc sacc sinc shse; ****************** housing can now be dropped to give final model; model ayra_num/npop = agegrp gender agegrp*gender agegrp*sinc sacc sinc ; run; /*-------------------------------------------------------------- now check linearity of important term sinc by setting it as a grouped variable and also putting in a quadratic term ------------------------------------------------------------------*/ data new; set exemp5.ex5wt; sincl=sinc; sinc2=sinc**2; run; proc univariate data=aapop;* get its percentiles in data zone distribution; output out=temp pctlpre=P_ pctlpts=10 to 100 by 10; var sinc ; run; * this divides sinc into 5 groups with same no of data zones in each; proc format; value sinc low-<5.8=1 5.8-<12.4=2 12.4-<18.3=3 18.3-<26.1=4 26.1-high=5; proc logistic data=new; class agegrp gender sinc; format sinc sinc.; model ayra_num/npop = agegrp gender agegrp*gender agegrp*sincl sacc sinc sincl; * grouped model did not improve on linear one; run; proc logistic data=new; class agegrp gender ; format sinc sinc.; model ayra_num/npop = agegrp gender agegrp*gender agegrp*sincl sacc sinc sinc2; * neither did quadratic term; run; /*---------------------------------------------------------------------- now make data set to add in to examine predictions at all possible combinations of agegroup sex and a point in each of the 5 groups for sinc sacc is held at its mean value because it is of minor importance ------------------------------------------------------------------------*/ data predict (drop=i j);* set predictions at p10..p30...tp p90 (mid points of 5 groups); do agegrp=1 to 13; do i= 1 to 2; if i=1 then gender='M'; if i=2 then gender='F'; do j=1 to 5; if j=1 then sinc=32.3; if j=2 then sinc=21.8; if j=3 then sinc=15.7; if j=4 then sinc=9; if j=5 then sinc=3.3; sacc=0; shse=15; output; end; end; end; run; data new; set exemp5.ex5wt predict; run; proc logistic data=new; * rerun to get predictions from final model above; class agegrp gender ; *rerunning model on data set with prediction cases; model ayra_num/npop = agegrp gender agegrp*gender agegrp*sinc sacc sinc ; output out=pred p=phat; run; data pred;* just keep the ones for prediction with missing outcome; set pred; if npop=.; keep agegrp gender sinc phat ; format gender 1.0 agegrp 2.0; run; * now make plots of predicted values; proc sort data=pred; by gender;run; proc gplot data=pred; goptions reset=(axis, legend, pattern, symbol, title, footnote) norotate hpos=0 vpos=0 htext= ftext= ctext= target= gaccess= gsfmode= ; goptions device=WIN ctext=blue graphrc interpol=join colors=(black red green blue cyan gold brown purple olive orange grey violet); axis1 order=(0 to 0.035 by 0.001) color=blue width=2.0 ; axis2 color=blue order=(0 to 13 by 1) width=2.0 ;* excludes outside this range; symbol1 value=dot colour=; plot phat*agegrp=sinc/haxis=axis2 vaxis=axis1; by gender; run; run; /*--------------- now try quadratic function of age instead of groups---------------*/ proc logistic data=new; class gender ; format sinc sinc.; model ayra_num/npop = agegrp gender agegrp*gender agegrp*sinc agegrp*agegrp agegrp*agegrp*gender agegrp*agegrp*sinc sacc sinc ; output out=pred p=phat; run; data pred2; set pred; if npop=.; * just the ones for prediction; keep agegrp gender sinc phat ; format gender 1.0 agegrp 2.0; run; *----------------- and plot these too; proc sort data=pred2; by gender; run; proc gplot data=pred2; goptions reset=(axis, legend, pattern, symbol, title, footnote) norotate hpos=0 vpos=0 htext= ftext= ctext= target= gaccess= gsfmode= ; goptions device=WIN ctext=blue graphrc interpol=join colors=(black red green blue cyan gold brown purple olive orange grey violet); axis1 order=(0 to 0.035 by 0.001) color=blue width=2.0 ; axis2 color=blue order=(0 to 13 by 1) width=2.0 ;* excludes outside this range; symbol1 value=dot colour=; plot phat*agegrp=sinc/haxis=axis2 vaxis=axis1; by gender; run; run; *----------------------- these look OK will go with this; /*---------------------- now calculate weights only need those with any in sample ready to merge with data file-----------------------------*/ data predx; set pred; if ayra_num>0 and npop^=.; * just keep those in sample; gweight=1/phat; *grossing weight; avweight=gweight/67.76*4165/4147; * rescale to mean 1; keep dz ayra_num npop dz agegrp gender avweight gweight; run; proc means data=predx mean max min sum;* get mean weight for sample; weight ayra_num ; var avweight gweight ; run; /*------------------------------------------------- next step would be to merge weights back into the data file ------------------------------------------------*/