############################################## # # HOW TO USE THIS FILE # A text version of this file (ex5.R) is also available which can just be sourced # into the R program # This file is intended for you to use interactively by pasting into R # Comments are in black - red bits are commands that can be pasted into R # # ######### ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS # # data stored in c:/GR/aprojects/peas/web/exemp3/data/ex3.Rdata # this file is c:/GR/aprojects/peas/web/exemp3/program_code/ex3_R.html # ############################################## # # data are in a data frame ex5 # to get the names of the variables # names(ex5) # # the general health data is in a factor called ex5$GENHELF # to get a table of this variable do # table(ex5$GENHELF) # # now it is time to try using the survey package # to give R access to the survey functions do # library(survey) # # The first step is to create a SURVEY DESIGN OBJECT that # will contain both the data and the information about the design # # This design has WEIGHTS (WEIGHT), but no clustering or startification # since it is not clustered the PSU is just the individual respondent # To represent this to the design, we set the id variable to 1 # ex5.des<- svydesign(id=~1, weights=~WEIGHT,data=ex5) # # see the html help file for the function svydesign for more explanation # the ~ sign refers to it being one of the columns of the data # the R object now contains all the information about the design # including the data # To get details of the design summary(ex5.des) # # now we can use the svymean commands to get first the weighted proportions # for categorical variables svymean(~GENHELF,ex5.des,deff=T) svymean(~Q85A,ex5.des,deff=T) # # this fails for missing values so need to get subset of design without missing values # svymean(~Q85A,subset(ex5.des,!is.na(Q85A)),deff=T) # # and expressing this as percentages # round(print(svymean(~Q85A,subset(ex5.des,!is.na(Q85A))))*100,1) # now the means of some continuous variables # svymean(~as.numeric(GENHELF),ex5.des,deff=T) # this treats it as a score not a category svymean(~SINC,ex5.des,deff=T) svymean(~SACC,ex5.des,deff=T) # # Now we recode the cannabis and amphetamine scores # and add them into the design CANSCORE<-as.numeric(ex5$Q85A) CANSCORE[CANSCORE<=2]<-0 CANSCORE[CANSCORE %in% c(3,4,5)]<-1 CANSCORE[CANSCORE==6]<-0.5 ex5.des<-update(ex5.des,CANSCORE=CANSCORE) rm(CANSCORE) # this removes vector in main workspace # # AMPSCORE<-as.numeric(ex5$Q85A) AMPSCORE[AMPSCORE<=2]<-0 AMPSCORE[AMPSCORE %in% c(3,4,5)]<-1 AMPSCORE[AMPSCORE==6]<-0.5 ex5.des<-update(ex5.des,AMPSCORE=AMPSCORE) rm(AMPSCORE) # this removes vector in main workspace # # now run some regressions # summary(svyglm(as.numeric(GENHELF)~CANSCORE,ex5.des)) summary(svyglm(as.numeric(GENHELF)~AMPSCORE,ex5.des)) summary(svyglm(as.numeric(GENHELF)~CANSCORE+AMPSCORE,ex5.des)) # # Now make a table to understand results # can.table<-svytable(~GENHELF+Q85A,ex5.des) can.tprint<-round(sweep(can.table,2,apply(can.table,2,sum),"/")*100) # calculating column percents can.tprint<-rbind(can.tprint,table(ex5$Q85A)) # add a row below for base numbers # # at any stage type tyhe name of an object to see what it is # dimnames(can.tprint)[[1]][6]<-'BASE' can.tprint