/*---------------------------------------------------- ex2_prep.sas written by G Raab 04/04 data.randall is combined random adult data from 4 years of Scottish Household Survey -----------------------------------------------------*/ libname data 'C:\Documents and Settings\gillian raab\My Documents\aprojects\charlotte2\data'; libname exemp2 "C:\Documents and Settings\gillian raab\My Documents\aprojects\peas\ex2datafiles\data"; options fmtsearch=(formats formats ); data rainternet; * get survey years 2001 and 2002 only and keep variables needed; set data.randall ; if dyear in (2002,2001) ; keep rc5 rc6: rc7e rc7g hhtype groupinc ind_wt mosaic00 rclass council intuse quarter rseg rsoc shs_6cla dyear tenure uniqid ha5 ha6 ha7 intuse ; if rc5^=. then intuse=1; else intuse=0; * THIS BETTER THAN HHOLD q; if shs_6cla=-1 then shs_6cla=.; if annetinc=. or annetinc<0 then groupinc=1; else if annetinc<10000 then groupinc=2; else if annetinc<20000 then groupinc=3; else if annetinc<30000 then groupinc=4; else if annetinc<50000 then groupinc=5; else groupinc=6; run; proc sort data=data.psu; by uniqid;run; data rainternet2; * merge with file of PSUs and add noise to weights; ; merge rainternet (in=ok) data.psu ; by uniqid; if ok; seed=2314; IND_WT=ind_WT*(0.1*rannor(seed)+1); run; /*--------------------------------------------------------------- now code to scramble UNIQID values proc contents data=ids; proc contents data=rainternet;run; ---------------------------------------------------------------------*/ data ids; *now code to scramble UNIQID values ; set rainternet; seed=4253451; rand=ranuni(seed); nid=uniqid; keep rand nid; run; proc sort data=ids; by rand; run; data rainternet3;* set variable PSU to cluster or id for each type of area; merge rainternet2 ids; uniqid=nid; drop nid; if mosaic00='Co' then mosaic00='C'; * corrections to mosaic codes used to identify strata; if mosaic00=' ' then mosaic00='Z'; * blanks replaced by Z; /*-------------------------------------------------------------------------------- identify LAs where sampling is clustered -------------------------------------------------------------------------------------*/ if council in (100,180,200,220,230,260,280,350,395) then clust=0; else clust=1; /*------------------------------------------------------------------------ psus are UNIQid in unclustered areas ----------------------------------------------------------------------------*/ if clust=0 then psu=put(uniqid,12.0); run; proc sort data=rainternet3; by council descending psu ;run; data rainternet4; set rainternet3; /*---------------------------------------------------------------------------- replace blank PSUs in clustered areas with previous in council list proc sort; by oldpsu council; run; ----------------------------------------------------------------------------------*/ retain oldpsu cold; if _n_=1 then oldpsu='xxxxxxxxxxxx'; if clust=1 then do; if psu='' then do; put oldpsu= cold= psu= council=;psu=oldpsu; end; else do; oldpsu=psu;cold=council; end; end; format cold council.; drop oldpsu cold; run; /*---------------------------------------------------------------------------- * now get commonest mosaic code for each psu ; --------------------------------------------------------------------------------*/ proc sort data=rainternet4; by council psu mosaic00; run; data temp (keep=psu mos);* get commonest Mosaic for PSU this used to stratify; set rainternet4; by council psu mosaic00; attrib mos label='mosaic for ED'; if _n_=1 then next=1; if first.psu then do; largestn=0 ;total=0; end; retain largestn mos n total next; if first.mosaic00 then n=0; n=n+1;total=total+1; if last.mosaic00 then do; if n>largestn and mosaic00^='' then do; mos=mosaic00; largestn=n; end; end; if last.psu then output; run; proc sort data=rainternet4; by psu; run; proc sort data=temp; by psu; run; data exemp2.ex2_nc; * get strata merge back with original data and save as file without corrections; merge temp(in=intemp) rainternet4 (in=inra); by psu; if clust=1 then stratum=put(council,3.0)||mos; else stratum=put(council,3.0)|| mosaic00; if inra; keep uniqid psu stratum intuse shs_6cla rc5 age sex rc7g rc7e empsta groupinc; run; data rainternet5; * code from now on sorts out data problems; merge temp(in=intemp) rainternet4 (in=inra); by psu; if inra; /* ------------------------------------------------------------------- corrections when split PSUs accross LAs ---------------------------------------------------------------------------*/ if psu in('089001A3', '089019D4') then council=370; if psu in ( '266001I4' , '266019D3') then council=190; /*------------------------------------------------------------------------ now sorting lonely PSUs ------------------------------------------------------------------------------*/ if mos in (' ','Z') then mos='K'; * pool odds and end categories; if mosaic00 in (' ','Z','X') then mosaic00='K'; if council in (150,170,190,120,210,290,310,320,360,370) and mos in ('I') then mos='A'; if council in (110,120,130,290,355,400 ) and mos in ('F') then mos='E'; if council in (100,130,310 ) and mos in ('H') then mos='E'; if council in (200,210 ,290,310,320,370,400 ) and mos in ('K') then mos='A'; if council in (120,210,235,270,290,320,330,380,390,400) and mos in ('X') then mos='A'; if council in (130 ) and mos in ('B') then mos='I'; if council in (130,340,360 ) and mos in ('D') then mos='A'; if council in (330 ) and mos in ('I','B') then mos='A'; if council in (110,360 ) and mos in ('G') then mos='A'; if clust=1 then stratum=put(council,3.0)||mos; * redo strata; else stratum=put(council,3.0)|| mosaic00; if council=100 and stratum='100H' then stratum='100E'; * one last lonely PSUs in non clustered areas; run; /*----------------------------------------------- identify lonely PSUs - should be none left now! ------------------------------------------------------*/ proc sort data=rainternet5; by council stratum psu ; run; data check; set rainternet5;* keep one unit per PSU; by council stratum psu; if first.psu; keep uniqid council stratum psu; run; proc freq data=check ; table stratum; run; data check ; set check; by council stratum psu; * lonely ones only should be empty; if last.stratum and first.stratum; run; proc freq data=check ; table stratum; run; /*-------------------------------------------------------------------------------- now check that PSUs don't cross strata and that output file of PSUs should be OK now -----------------------------------------------------------------------------------*/ proc sort data=rainternet5; by stratum psu; run; data strata (keep=uniqid council psu stratum); set rainternet5; by stratum psu ; if last.stratum and ^last.psu then put 'Warning PSU crossing startum' uniqid= council= psu= stratum=; if last.psu then output; run; /*----------------------------------------------------------------------- final tidy dropping vars not needed --------------------------------------------------------------------*/ data exemp2.ex2; set rainternet5; run; proc gplot; plot ind_wt*ind_wtx; run; libname exemp2 'C:\Documents and Settings\gillian raab\My Documents\aprojects\peas\web\exemp2/data'; data exemp2.ex2;* copy main data set to peas directory; set rainternet0; age=ha5; sex=ha6;emp_sta=ha7; keep uniqid age sex council RC5 emp_sta stratum ind_wt intuse rc5 psu groupinc shs_6cla clust rc7e rc7g; /* --------------------------------------------------------------------------------- remove formats and read in seperatly to make it easier for other systems program ex2_formats has this ------------------------------------------------------------------------------------*/ format emp_sta sex council rc: shs_6CLA 3.0 annetinc 12.0; attrib intuse label='Whether person uses the --internet'; drop ha5 ha6 ha7; run; proc contents data=exemp2.ex2 ; run; * now make an export file to go to Stata and R with no variable labels on it (not sure why this does not work); * variable labels will be added in each of the other programs; libname xp xport 'C:\Documents and Settings\gillian raab\My Documents\aprojects\ex2datafiles/data/ex2.xpt'; * export file for Stata; proc copy in=exemp2 out=xp; select ex2; run; proc copy out=work in=xp; select ex2_nc; run;