############################################### # HOW TO USE THIS FILE # To submit one line at a time just press Ctrl+R and the line # where the pointer is will be sent to R # # To submit more lines highlight them and press Ctrl+R # ######### ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS # # data stored in c:/GR/aprojects/peas/ex1datafiles/data/.Rdata # this file is c:/GR/aprojects/peas/ex1datafiles/program_code/ex1.R # ############################################## # # data are in a data frame frs # to get the names of the variables # names(frs) # # first find the simple mean of the income variable - no weights # mean(frs$HHINC) # # and then the weighted mean - go to the html help file if you want to check how this works # weighted.mean(frs$HHINC,frs$GROSS2) ########################################################################### # The next line makes the plot shown on the web page and also # examines the distribution of the weights ######################################################################## par(omi=c(.1,.1,0,.1))# sets outer margins boxplot(split(frs$GROSS2,frs$ADULTH),col='red',cex.axis=0.7,cex.lab=0.7,las=2,ylab='Grossing-up weight') # # now it is time to try using the survey package # # If you have not already done so you will need to # download the survey package # (>>pakages menu >> install package from cran>> select a mirror near you # and pick the survey pakage from the list) # then to give R access to the survey functions do # library(survey) ############################################################ # # The first step is to create a SURVEY DESIGN OBJECT that # will contain both the data and the information about the design ########################################## frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) summary(frs.des) ############################################ # see the html help file for the function svydesign for more explanation # the ~ sign refers to it being one of the columns of the data # # the R object frs.des now contains all the information about the design # including the data # # But first let us examine the effect of weighting and clustering # on the precision of the mean weekly income # # First get the standard error and design effect for the mean income # allowing for weighting and clustering ################################################################### svymean(~HHINC,design=frs.des,deff=T) ####################################################################### # now reset the design so that only weighting is allowed # for. To do this the PSU must be set to the individual case identifier (SERNUM) ########################################################################### frs.des <- svydesign(id=~SERNUM, weights=~GROSS2,data=frs) svymean(~HHINC,design=frs.des,deff=T) ########################################################## # next we get ready to post-stratify this survey to match the population totals on # council tax band (CTBAND) and tenure (TENTYPE) # read in totals # by using the R function xtabs # to create tables of totals by CTBAND and TENTYPE ########################################################### tab.ctband <- xtabs(GROSS2~CTBAND,data=frs) tab.ctband[1:9]<-c( 24.83 24.62 15.45 11.91 11.96 5.95 3.94 0.45 0.89)*sum(frs$GROSS2)/100 unclass(tab.ctband) tab.tenure <- xtabs(GROSS2~TENURE,data=frs) tab.tenure[1:4]<-c( 62.63 , 21.59, 5.58 , 10.20)*sum(frs$GROSS2)/100 unclass(tab.ctband) # # now we rake the original survey to match these totals # frs.des <- svydesign(id=~PSU, weights=~WEIGHTA,data=frs) frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure)) # # # svymean(~HHINC,design=frs.des,deff=T) ########################################################### # R uses a method called calibration weighting # to get the standard errors for a raked survey # see the help on raking # It is not available for any other program # featured on this site, but SUDAAN does it # An alternative is to use replication methods ################################################################### # There are TWO kinds of survey design # # 1. Using simple methods , as used above # usually for surveys WITHOUT POST-STRATIFICATION # 2. Using replicate weights, usually for surveys with POST-STARTIFICATION # # the next stage is to convert the simple design into one with replicate weights # we use the default options which produce jacknife samples # see html help for as.svrepdesign # Go back to the original design and convert it to a # replicate one and then post-stratify it ####################################################### frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) frs.des <- as.svrepdesign(frs.des) frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure)) svymean(~HHINC,design=frs.des,deff=T) ##################################################################### # notice that we have overwritten the previous R object with the new one # this is to save space as each one contains the data and the work space # can get too big for comfort ################################################################# ## ######################################################################################### # NOW GETTING THE PERCENTILES OF THE INCOME DISTRIBUTION # my.svrepquantile(~HHINC,design=frs.des,quantile=c(0.05,0.1,.25,.5,.75,.9,.95)) # # NEED TO USE THIS JUST NOW (source the file myRfunctions.R # go to R package main page for link to it # because the survey package one is not working right # for replicate designs # #svyquantile(~HHINC,design=frs.des,quantile=c(0.05,0.1,.25,.5,.75,.9,.95),ci=T) # #################################################################################### # # INCOME FOR LONE PARENTS # # the next command will give a table of the data by household type # xtabs(~ADULTH+DEPCHLDH,data=frs) frs$LONEP<-0 frs$LONEP[frs$ADULTH==1 & frs$DEPCHLDH>0]<-1 xtabs(~LONEP+DEPCHLDH,data=frs) sum(frs$LONEP) # # you will see that there are just 334 single parent households # we want the mean household income and its standard error # To do this we need to subset the survey (linkto theory) # In R this means creating a new subset design object # Need to reset the design so that the new variable LONEP # is there # frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) frs.des.lonep<-subset.survey.design(frs.des,LONEP==1) # # and then apply all the survey functions to this new design # svymean(~HHINC,frs.des.lonep,deff=T) # # as expected single parent incomes are lower than the rest of the population # # Now with poststratification # frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) frs.des <- as.svrepdesign(frs.des) frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure)) frs.des.lonep<-subset.survey.design(frs.des,LONEP==1) svymean(~HHINC,frs.des.lonep,deff=T)