/*---------------------------------------------------- ex2_nc_prep.sas written by G Raab 04/04 data.randall is combined random adult data from 4 years of Scottish Household Survey -----------------------------------------------------*/ libname data 'C:\Documents and Settings\gillian raab\My Documents\aprojects\charlotte2\data'; libname formats 'C:\Documents and Settings\gillian raab\My Documents\aprojects\charlotte2\data\formats'; options fmtsearch=(formats formats ); data rainternet; * get survey years 2001 and 2002 only and keep variables needed; set data.randall ; if dyear in (2002,2001) ; keep rc5 rc6: rc7e rc7g hhtype groupinc ind_wt mosaic00 rclass council intuse intuse quarter rseg rsoc shs_6cla dyear tenure uniqid ha5 ha6 ha7 intuse ; if rc5^=. then intuse=1; else intuse=0; * THIS BETTER THAN HHOLD q; if shs_6cla=-1 then shs_6cla=.; if annetinc=. or annetinc<0 then groupinc=1; else if annetinc<10000 then groupinc=2; else if annetinc<20000 then groupinc=3; else if annetinc<30000 then groupinc=4; else if annetinc<50000 then groupinc=5; else groupinc=6; run; proc sort data=data.psu; by uniqid;run; data rainternet2; * merge with file of PSUs and identify ; merge rainternet (in=ok) data.psu ; by uniqid; if ok; run; /*--------------------------------------------------------------- now code to scramble UNIQID values proc contents data=ids; proc contents data=rainternet;run; ---------------------------------------------------------------------*/ data ids; set rainternet; seed=today();rand=ranuni(seed); nid=uniqid;keep rand nid; run; proc sort data=ids; by rand; run; data rainternet3; merge rainternet2 ids; uniqid=nid; drop nid; /*-------------------------------------------------------------------------------- identify LAs where sampling is clustered -------------------------------------------------------------------------------------*/ if mosaic00='Co' then mosaic00='C'; * corrections to mosaic codes; if mosaic00=' ' then mosaic00='Z'; * blanks replaced by Z; /* ------------------------------------------------------------------- corrections when split PSUs accross LAs ---------------------------------------------------------------------------*/ /*if psu in('089001A3', '089019D4') then council=370; if psu in ( '266001I4' , '266019D3') then council=190;*/ if council in (100,180,200,220,230,260,280,350,395) then clust=0; else clust=1; /*------------------------------------------------------------------------ psus are UNIQid in unclustered areas ----------------------------------------------------------------------------*/ if clust=0 then psu=put(uniqid,12.0); proc sort data=rainternet3; by council descending psu ;run; data rainternet4; set rainternet3; /*---------------------------------------------------------------------------- replace blank PSUs in clustered areas with previous in council list proc sort; by oldpsu council; run; ----------------------------------------------------------------------------------*/ retain oldpsu cold; if _n_=1 then oldpsu='xxxxxxxxxxxx'; if clust=1 then do; if psu='' then do; put oldpsu= cold= psu= council=;psu=oldpsu; end; else do; oldpsu=psu;cold=council; end; end; drop oldpsu cold; run; /*---------------------------------------------------------------------------- * now get commonest mosaic code for each psu ; --------------------------------------------------------------------------------*/ proc sort data=rainternet4; by council psu mosaic00; run; data temp (keep=psu mos);* get commonest Mosaic for PSU; set rainternet4; by council psu mosaic00; attrib mos label='mosaic for ED'; if _n_=1 then next=1; if first.psu then do; largestn=0 ;total=0; end; retain largestn mos n total next; if first.mosaic00 then n=0; n=n+1;total=total+1; if last.mosaic00 then do; if n>largestn and mosaic00^='' then do; mos=mosaic00; largestn=n; end; end; if last.psu then output; run; proc sort data=rainternet4; by psu; run; proc sort data=temp; by psu; run; data rainternet5; * and merge back with original data; merge temp(in=intemp) rainternet4 (in=inra); by psu; if inra; /* if mos in (' ','Z') then mos='K'; * pool odds and end categories; if mosaic00 in (' ','Z','X') then mosaic00='K'; if council in (150,170,190,120,210,290,310,320,360,370) and mos in ('I') then mos='A'; if council in (110,120,130,290,355,400 ) and mos in ('F') then mos='E'; if council in (100,130,310 ) and mos in ('H') then mos='E'; if council in (200,210 ,290,310,320,370,400 ) and mos in ('K') then mos='A'; if council in (120,210,235,270,290,320,330,380,390,400) and mos in ('X') then mos='A'; if council in (130 ) and mos in ('B') then mos='I'; if council in (130,340,360 ) and mos in ('D') then mos='A'; if council in (330 ) and mos in ('I','B') then mos='A'; if council in (110,360 ) and mos in ('G') then mos='A'; */ if clust=1 then stratum=put(council,3.0)||mos; else stratum=put(council,3.0)|| mosaic00; *if council=100 and stratum='100H' then stratum='100E'; * one last lonely PSUs in non clustered areas; run; /*----------------------------------------------------------------------- final tidy dropping vars not needed and adding noise to weights and making weights still sum to sample size --------------------------------------------------------------------*/ data rainternet0; set rainternet5; seed=today(); IND_WT=ind_WT*(0.1*rannor(seed)+1)*28685/28642.33; run; proc means; var ind_wt;run; libname exemp2 'C:\Documents and Settings\gillian raab\My Documents\aprojects\peas\web\exemp2/data'; data exemp2.ex2_nc;* copy main data set to peas directory; set rainternet0; age=ha5; sex=ha6;emp_sta=ha7; keep uniqid age sex council RC5 emp_sta stratum ind_wt intuse rc5 psu groupinc shs_6cla clust rc7e rc7g; /* --------------------------------------------------------------------------------- remove formats and read in seperatly to make it easier for other systems program ex2_formats has this ------------------------------------------------------------------------------------*/ format emp_sta sex council rc: shs_6CLA 3.0 annetinc 12.0; attrib intuse label='Whether person uses the --internet'; drop ha5 ha6 ha7; run; proc contents data=exemp2.ex2_nc ; run; * now make an export file to go to Stata with no variable labels on it (not sure why this does not work); * variable labels will be added in each of the other programs; libname xp xport 'C:\Documents and Settings\gillian raab\My Documents\aprojects\peas\web\exemp2/data/ex2_nc.xpt'; * export file for Stata; proc copy in=exemp2 out=xp; select ex2_nc; run; proc copy out=exemp2 in=xp; select ex2_nc; run;