# HOW TO USE THIS FILE
# This is an HTML version of the R script file ex1.R
# It is intended to show you the code and to allow links.
# NOT to use interactively to run the code
# Pasting the  into R might work, but it might not
# to view the ouptut from these commands in html go to R results page
red stuff (R commands)

#########  ANY TEXT AFTER THE # CHARACTER (here shown black)
#             ARE TREATED AS COMMENTS BY R

Links in this page
Mean income with different design assumptions
Raking to match Scottish totals 
Jacknife estimation for mean
Subgroup lone parents
 Percentiles

#
# data are in a data frame frs
# to get the names of the variables
#
names(frs)
#
# first find the simple mean of the income variable - no weights
#
mean(frs$HHINC)
#
# and then the weighted mean - go to the html help file if you want to check how this works
#
weighted.mean(frs$HHINC,frs$GROSS2)
###########################################################################
# The next line makes the plot shown on the web page and also
# examines the distribution of the weights
########################################################################
par(omi=c(.1,.1,0,.1))# sets outer margins
boxplot(split(frs$GROSS2,frs$ADULTH),col='red',cex.axis=0.7,cex.lab=0.7,las=2,ylab='Grossing-up weight')
#
# now it is time to try using the survey package
# to give R access to the survey functions do
#

 back to top uparrow
library(survey)
############################################################
#
# The first step is to create a SURVEY DESIGN OBJECT that
# will contain both the data and the information about the design
##########################################

frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs)

############################################
# see the html help file for the function svydesign for more explanation
# the ~ sign refers to it being one of the columns of the data
#
#  First get the standard error and design effect for the mean income
#   allowing for weighting  and clustering
###################################################################

svymean(~HHINC,design=frs.des,deff=T)

#######################################################################
# now  reset the design so that only weighting is allowed
# for. To do this the PSU must be set to the individual case identifier (SERNUM)
###########################################################################

frs.des <- svydesign(id=~SERNUM, weights=~GROSS2,data=frs)
svymean(~HHINC,design=frs.des,deff=T)
 back to top uparrow


##########################################################
# next we  get ready to post-stratify this survey to match the population totals on
# council tax band (CTBAND) and tenure (TENTYPE)
# read in totals
# by using the R function xtabs
# to create tables of totals by CTBAND and TENTYPE
# starting with percentages and multiplying by sum of weights
###########################################################

tab.ctband <- xtabs(GROSS2~CTBAND,data=frs)
tab.ctband[1:9]<-c( 24.83 24.62 15.45 11.91 11.96 5.95 3.94 0.45 0.89)*sum(frs$GROSS2)/100
unclass(tab.ctband)

tab.tenure <- xtabs(GROSS2~TENURE,data=frs)
tab.tenure[1:4]<-c(  62.63 , 21.59, 5.58 , 10.20)*sum(frs$GROSS2)/100
unclass(tab.ctband)
#
# now we rake the original survey to match these totals
#
frs.des <- svydesign(id=~PSU, weights=~WEIGHTA,data=frs)
frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure))

#
#
#
svymean(~HHINC,design=frs.des,deff=T)
###########################################################
# R uses a method called calibration weighting
# to get the standard errors for a raked survey
# see the help on raking
# It is not available for any other program
# featured on this site, but SUDAAN does it
# An alternative is to use fepication methods
################################################################### ################################################################### # There are TWO kinds of survey design # # 1. Using simple methods , as used above # usually for surveys WITHOUT POST-STRATIFICATION # 2. Using replicate weights, usually for surveys with POST-STARTIFICATION # # the next stage is to convert the simple design into one with replicate weights # we use the default options which produce jacknife samples # see html help for as.svrepdesign # Go back to the original design and convert it to a # replicate one and then post-stratify it ####################################################### frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) frs.des <- as.svrepdesign(frs.des) frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure)) back to top uparrow ##################################################################### # Now poststratifying # notice that we overwrite the previous R object with the new one # this is to save space as each one contains the data and the work space # can get too big for comfort # # svymean recognises that the design is post-stratified and # uses a jackknife method ################################################################# frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) frs.des <- as.svrepdesign(frs.des) frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure)) svymean(~HHINC,design=frs.des,deff=T) back to top uparrow ########################################################################################### # NOW GETTING THE PERCENTILES OF THE INCOME DISTRIBUTION # my.svrepquantile(~HHINC,design=frs.des,quantile=c(0.05,0.1,.25,.5,.75,.9,.95)) # # NEED TO USE THIS JUST NOW (source the file myRfunctions.R # go to Exemplar 1 main page for link to it # because the survey package one is not working right # for replicate designs # #svyquantile(~HHINC,design=frs.des,quantile=c(0.05,0.1,.25,.5,.75,.9,.95),ci=T) # back to top uparrow #################################################################################### # # INCOME FOR LONE PARENTS # # the next command will give a table of the data by household type # xtabs(~ADULTH+DEPCHLDH,data=frs) frs$LONEP<-0 frs$LONEP[frs$ADULTH==1 & frs$DEPCHLDH>0]<-1 xtabs(~LONEP+DEPCHLDH,data=frs) sum(frs$LONEP) # # you will see that there are just 451 single parent households # we want the mean household income and its standard error # To do this we need to subset the survey # In R this means creating a new subset design object # Need to reset the design so that the new variable LONEP # is there # frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) frs.des.lonep<-subset.survey.design(frs.des,LONEP==1) # # and then apply all the survey functions to this new design # svymean(~HHINC,frs.des.lonep,deff=T) # # as expected single parent incomes are lower than the rest of the population # # Now with poststratification frs.des <- svydesign(id=~PSU, weights=~GROSS2,data=frs) frs.des <- as.svrepdesign(frs.des) frs.des<-rake(frs.des,list(~CTBAND,~TENURE),list(tab.ctband,tab.tenure)) frs.des.lonep<-subset.survey.design(frs.des,LONEP==1) svymean(~HHINC,frs.des.lonep,deff=T)