##############################################
#
# HOW TO USE THIS FILE
# A text version of this file (ex5.R) is also available which can just be sourced
# into the R program
# This file is intended for you to use interactively by pasting into R
# Comments are in black - red bits are commands that can be pasted into R
# 
#
#########  ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS
#
# data stored in c:/GR/aprojects/peas/web/exemp3/data/ex3.Rdata
# this file is c:/GR/aprojects/peas/web/exemp3/program_code/ex3_R.html
#
##############################################
#
# data are in a data frame ex5
# to get the names of the variables
#
names(ex5)
#
# the general health data is in a factor called ex5$GENHELF
# to get a table of this variable do
#
table(ex5$GENHELF)
#
#  now it is time to try using the survey package
#  to give R access to the survey functions do
#
library(survey)
#
# The first step is to create a SURVEY DESIGN OBJECT that 
# will contain both the data and the information about the design
#
# This design has WEIGHTS (WEIGHT),  but no clustering or startification
#  since it is not clustered the PSU is just the individual respondent
# To represent this to the design, we set the id variable to 1
#
 ex5.des<- svydesign(id=~1, weights=~WEIGHT,data=ex5)
#
# see the html help file for the function svydesign for more explanation
# the ~ sign refers to it being one of the columns of the data
# the R object  now contains all the information about the design
# including the data
# To get details of the design
summary(ex5.des)
#
# now we can use the svymean commands to get first the weighted proportions
# for categorical variables
svymean(~GENHELF,ex5.des,deff=T)
svymean(~Q85A,ex5.des,deff=T)
#
# this fails for missing values so need to get subset of design without missing values
#
svymean(~Q85A,subset(ex5.des,!is.na(Q85A)),deff=T)
#
# and expressing this as percentages
#
round(print(svymean(~Q85A,subset(ex5.des,!is.na(Q85A))))*100,1)
 
# now the means of some continuous variables
#
svymean(~as.numeric(GENHELF),ex5.des,deff=T)  # this treats it as a score not a category
svymean(~SINC,ex5.des,deff=T)
svymean(~SACC,ex5.des,deff=T)
#
#  Now we recode the cannabis and amphetamine scores
#  and add them into the design
CANSCORE<-as.numeric(ex5$Q85A)
CANSCORE[CANSCORE<=2]<-0
CANSCORE[CANSCORE %in% c(3,4,5)]<-1
CANSCORE[CANSCORE==6]<-0.5
ex5.des<-update(ex5.des,CANSCORE=CANSCORE)
rm(CANSCORE)             # this removes vector in main workspace
#
#
AMPSCORE<-as.numeric(ex5$Q85A)
AMPSCORE[AMPSCORE<=2]<-0
AMPSCORE[AMPSCORE %in% c(3,4,5)]<-1
AMPSCORE[AMPSCORE==6]<-0.5
ex5.des<-update(ex5.des,AMPSCORE=AMPSCORE)
rm(AMPSCORE)             # this removes vector in main workspace
#
# now run some regressions
#
summary(svyglm(as.numeric(GENHELF)~CANSCORE,ex5.des))
summary(svyglm(as.numeric(GENHELF)~AMPSCORE,ex5.des))
summary(svyglm(as.numeric(GENHELF)~CANSCORE+AMPSCORE,ex5.des))
 
#
#  Now make a table to understand results
#
can.table<-svytable(~GENHELF+Q85A,ex5.des)
can.tprint<-round(sweep(can.table,2,apply(can.table,2,sum),"/")*100) # calculating column percents
can.tprint<-rbind(can.tprint,table(ex5$Q85A))                    # add a row below for base numbers
#
# at any stage type tyhe name of an object to see what it is
#
dimnames(can.tprint)[[1]][6]<-'BASE'
can.tprint