##############################################

#

# HOW TO USE THIS FILE

# A text version of this file (ex5.R) is also available which can just be sourced

# into the R program

# This file is intended for you to use interactively by pasting into R

# Comments are in black - red bits are commands that can be pasted into R

#

#########  ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS

#

# data stored in c:/GR/aprojects/peas/web/exemp3/data/ex3.Rdata

# this file is c:/GR/aprojects/peas/web/exemp3/program_code/ex3_R.html

#

##############################################

#

# data are in a data frame ex5

# to get the names of the variables

#

names(ex5)

#

# the general health data is in a factor called ex5$GENHELF

# to get a table of this variable do

#

table(ex5$GENHELF)

#

#  now it is time to try using the survey package

#  to give R access to the survey functions do

#

library(survey)

#

# The first step is to create a SURVEY DESIGN OBJECT that

# will contain both the data and the information about the design

#

# This design has WEIGHTS (WEIGHT),  but no clustering or startification

#  since it is not clustered the PSU is just the individual respondent

# To represent this to the design, we set the id variable to 1

#

 ex5.des<- svydesign(id=~1, weights=~WEIGHT,data=ex5)

#

# see the html help file for the function svydesign for more explanation

# the ~ sign refers to it being one of the columns of the data

# the R object  now contains all the information about the design

# including the data

# To get details of the design

summary(ex5.des)

#

# now we can use the svymean commands to get first the weighted proportions

# for categorical variables

svymean(~GENHELF,ex5.des,deff=T)

svymean(~Q85A,ex5.des,deff=T)

#

# this fails for missing values so need to get subset of design without missing values

#

svymean(~Q85A,subset(ex5.des,!is.na(Q85A)),deff=T)

#

# and expressing this as percentages

#

round(print(svymean(~Q85A,subset(ex5.des,!is.na(Q85A))))*100,1)

 

# now the means of some continuous variables

#

svymean(~as.numeric(GENHELF),ex5.des,deff=T)  # this treats it as a score not a category

svymean(~SINC,ex5.des,deff=T)

svymean(~SACC,ex5.des,deff=T)

#

#  Now we recode the cannabis and amphetamine scores

#  and add them into the design

CANSCORE<-as.numeric(ex5$Q85A)
CANSCORE[CANSCORE<=2]<-0
CANSCORE[CANSCORE %in% c(3,4,5)]<-1
CANSCORE[CANSCORE==6]<-0.5

ex5.des<-update(ex5.des,CANSCORE=CANSCORE)

rm(CANSCORE)             # this removes vector in main workspace

#

#

AMPSCORE<-as.numeric(ex5$Q85A)
AMPSCORE[AMPSCORE<=2]<-0
AMPSCORE[AMPSCORE %in% c(3,4,5)]<-1
AMPSCORE[AMPSCORE==6]<-0.5

ex5.des<-update(ex5.des,AMPSCORE=AMPSCORE)

rm(AMPSCORE)             # this removes vector in main workspace

#

# now run some regressions

#

summary(svyglm(as.numeric(GENHELF)~CANSCORE,ex5.des))

summary(svyglm(as.numeric(GENHELF)~AMPSCORE,ex5.des))

summary(svyglm(as.numeric(GENHELF)~CANSCORE+AMPSCORE,ex5.des))

 

#

#  Now make a table to understand results

#

can.table<-svytable(~GENHELF+Q85A,ex5.des)
can.tprint<-round(sweep(can.table,2,apply(can.table,2,sum),"/")*100)
# calculating column percents
can.tprint<-rbind(can.tprint,table(ex5$Q85A))                    # add a row below for base numbers
#

# at any stage type tyhe name of an object to see what it is

#
dimnames(can.tprint)[[1]][6]<-'BASE'
can.tprint