/*--------------------------------------------------- ex5.sas proc contents data=exemp5.ex5; run; proc means data=exemp5.ex5; var weight; run; G Raab Nov/Dec 04 --------------------------------------------------------*/ * firts set up libname - change to your directory where data are held; libname exemp5 'C:\Documents and Settings\gillian raab\My Documents\aprojects\peas\ex5datafiles\data'; /*---------------------------------------------------------- now open and run the program ex5_formats.sas If you don't the progrmas below will fail because they don't have access to the correct formats -----------------------------------------------------------*/ * first look at weighted and unweighted proportions ; ods html file='c:/ex52.htm'; proc freq data=exemp5.ex5;; table q85a q85b genhelf ; format q85a q85b druguse. genhelf genhelf.; run; proc surveymeans data=exemp5.ex5; var sinc sacc genhelf; weight weight; run; /*----------------------------------------------------------- rescaling weights for this sub-sample to mean 1.00 to allow claculation of design effects in sas for continuous variables The comments below explain how to do this from the SAS results ----------------------------------------------------------*/ data foranal ; set exemp5.ex5; weight=weight/1.20457; wtsq=weight**2; run; proc means data=foranal; /*----------------------------------------------------------------------- MEAN Wtsq TO CHECK WHAT DESIGN EFFECT WOULD BE IF WEIGHTS HAD NO REL TO VARIABLE The answer is the mean of wtsq here, since the weights sum to 1.000 ------------------------------------------------------------------------*/ var weight wtsq; run; proc means std data=foranal; var sinc sacc genhelf; weight weight; output stderr=sincse saccse genhelfse out=sessrs; run; /*--------------------------------------------------------------------------------------- this stores estimated standard errors for s.r.s.for variables of interest It only gives the right answer if the weights sum exactly to 1.000 for this subgroup of the main data -----------------------------------------------------------------------------------------*/ proc surveymeans stderr data=foranal ; * now get standard errors for probability weighted means; weight weight; var sinc sacc genhelf; ods output statistics=ses; run; /*-------------------------------------------------------------------------------- now merge them to calculate design factors from ratio of complex se to srs se. And square them to get the design effect. ------------------------------------------------------------------*/ data temp; merge sessrs ses; *results in this file; dfsinc=sinc_stderr/sincse; dfsacc=sacc_stderr/saccse; dfgenhelf=genhelf_stderr/genhelfse; desinc=dfsinc**2; desacc=dfsacc**2; degenhelf=dfgenhelf**2; keep df: de: ; run; proc print data=temp; run; * can be printed out; /*---------------------------------------------------------------------- now prediction of general health from other variables model building is done with surveyreg intermediate models fitted are commented out ----------------------------------------------------------------*/ proc surveyreg data=foranal; class living q85a q85b; format living living. q85a q85b q85a.; weight weight; *model genhelf=age sinc sacc living; * start with these demographics only sinc matters; *model genhelf=sinc q85a q85b; * now add drug use - both matter ; model genhelf= sinc q85a q85b/solution ; *check coefficients for class variables of drug use ; run; /*------------------------------------------------------------------- these suggest that the difference is betwen (never and once or twice users) health best (daily weekly or monthly) health worst (more than one month ago) in the middle So next code rescores this so 1 is monthly or more vs 0 for non/once or twice and 0.5 for more than a month --------------------------------------------------------------------------*/ data foranal; * recode the scores for cannabis and amphetamines; set foranal; if q85a=1 then canscore=0;/* cannabis*/ if q85a=2 then canscore=0; if q85a=6 then canscore=0.5; if q85a=5 then canscore=1; if q85a=4 then canscore=1; if q85a=3 then canscore=1; if q85b=1 then ampscore=0;/* amphetamines*/ if q85b=2 then ampscore=0; if q85b=6 then ampscore=0.5; if q85b=5 then ampscore=1; if q85b=4 then ampscore=1; if q85b=3 then ampscore=1; run; proc surveyreg data=foranal; *class living q85a q85b qual; format q85a q85b q85a. qual qual.; weight weight; model genhelf= canscore/solution ;*canscore ampscore/solution ; * as expected still strong ; run; /*------------------------------------------------------------------ now compare this model with weighted and unweighted ordinary regressions --------------------------------------------------------------------*/ proc glm data=foranal; * here coefficients the same but p-values different; weight gweight; model genhelf= canscore/solution;* ampscore / solution; run; proc glm data=foranal; * now unweighted both different; model genhelf= sinc canscore ampscore / solution; run; ods html close;