/*----------------------------------------------- macro to do survey analyses with s.e.s of percentiles of a distribution within groups Gillian Raab May 04 Use at your own risk - but please let me know of any obvious errors. This version does (0.05,0.1,0.25,.5,.75,.90,.90) could be expanded Works by calculating the s.e. of the weighted percentage estimate and mapping this to the distribution. Macro variables var - variable of interest data - data set group - variable for grouping n - number of groups format - format used for grouping variable (recommend using character format, not sure if program works without) weight, cluster, strata - survey features as in surveymeans final - sas name of output file Result is an output file with percentiles of the distribution for each group, along with Low - value of variable at - 1 s.e. below estimate High - value of variable at + 1 s.e. below estimat stderr - standandard error estimated by half the distance between high and low To use for just one group, set n=1 and group=one -------------------------------------------*/ %macro pctilegrps(var=annetinc, data=, group=, n=2, weight=, strata=, cluster=, format=, final= , ); data forpct (keep=&var &weight vweight &group one &cluster &strata); * save vars needed for analysis only; set &data; &format; one=1; vweight=&weight; run; proc surveymeans data=forpct mean std nobs sum ; *get sum of weights within strata to estimate pctiles save to a file; strata &strata; var &group ; class &group; weight &weight; cluster &cluster; ods output statistics=mystat ; run; data mystat2 (keep=sumwt: one ); * get weight totals in groups; set mystat ; array vars[&n,5] _numeric_; array sumwt[&n]; do i=1 to &n; sumwt[i]=vars[i,4]; end; one=1; * dummy for merging; run; proc sort data=forpct; * sort data by variable in groups; by &group &var; run; data forpct; * merge stats back with sorted data; merge forpct mystat2; by one; run; data pctiles (drop=i); * calculate percentiles; set forpct; by &group &var; array p[7] ;array pct[7];array sumwt[&n]; p1=.05;p2=.1;p3=.25;p4=.5;p5=.75;p6=.9;p7=.95; if _n_=1 then do; ngp=0; end; if first.&group then do; cumwt=0;ngp=ngp+1; do i = 1 to 7; pct[i]=0; end; end; cumwt=&weight+cumwt; prop=cumwt/sumwt[ngp]; do i = 1 to 7; if prop>p[i] then do; pct[i]=1; ;end; end; retain cumwt ngp pct:; run; proc surveymeans data=pctiles ; *get pctiles their ses and sums of weights; strata &strata; var pct1--pct7 ; domain &group; cluster &cluster; weight &weight; ods output domain=mystatx ; * save to a file; run; data mystatx; set mystatx; drop &group;run; data mystat2 (keep= se: ngp ); * get ses for proportions; set mystatx; array vars[7,5] _numeric_; array se[7]; do i=1 to 7; se[i]=vars[i,3]; end; ngp=_n_; run; data forp; * merge stats back with sorted data; merge pctiles mystat2; by ngp; run; proc sort data=forp; by ngp &var;run; data pctl2 (keep=pctile level highlow ngp); * now pick up incomes at +- 1 s.e. of prop; set forp; by &group &var; array p[7] ;array sumwt[&n]; array se[7];array done[7,3]; p1=.05;p2=.1;p3=.25;p4=.5;p5=.75;p6=.9;p7=.95; if first.&group then do; cumwt=0; do i = 1 to 7; do j=1 to 3; done[i,j]=.;end;end; end; cumwt=&weight+cumwt; prop=cumwt/sumwt[ngp]; do i = 1 to 7; if prop>p[i]-se[i] and done[i,1]=. then do; done[i,1]=1; highlow=-1; pctile=&var; level=p[i]; output ; end; if prop>p[i] and done[i,2]=. then do; done[i,2]=1; highlow=0; pctile=&var; level=p[i]; output ; end; if prop>p[i]+se[i] and done[i,3]=. then do; done[i,3]=1; highlow=1; pctile=&var; level=p[i]; output ; end; end; retain cumwt done:; run; proc sort data=pctl2; by ngp level highlow;run; * prepare output file; data &final; set pctl2; by ngp level highlow; if highlow=-1 then low=pctile; if highlow=0 then pct=pctile; if highlow=1 then do; high=pctile; se=-(low-high)/2; output; end; keep ngp pct se low high level; retain low pct high; run; proc datasets;delete forp forpct mystat mystat2 mystatx pctiles pctl2; run; proc print data=new;run; %mend pctilegrps;