###############################################
# HOW TO USE THIS FILE
# To submit one line at a time just press Ctrl+R and the line
# where the pointer is will be sent to R
#
# To submit more lines highlight them and press Ctrl+R
#
# It like to tile the two windows (windows menu)
#
#########  ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS
#
# data stored in c:/GR/aprojects/peas/web/exemp2/data/.Rdata
# this file is c:/GR/aprojects/peas/web/exemp2/program_code/ex2.R
#
##############################################
#
# data are in a data frame shs
# to get the names of the variables
#
names(shs)
#
# and to see a histogram of the weights and the range of weights 
# (pretty large) and the ratio of the most extereme weights
#
 hist(shs$IND_WT,col='red')
 range(shs$IND_WT)
 max(shs$IND_WT)/min(shs$IND_WT)
#
# now it is time to try using the survey package
#
# If you have not already done so you will need to 
# download the survey package 
# (>>pakages menu >> install package from cran>> select a mirror near you
#               and pick the survey pakage from the list)
# then to give R access to the survey functions do
#
library(survey)
#
#
# The first step is to create a SURVEY DESIGN OBJECT that 
# will contain both the data and the information about the design
#
# note that in the latest version of R survey 2.9.1 it is necessary
# to use the grossing-up weight to get the correct design effects
# Go to # html help #packages #survey #svymean to see where 
# this is explained 
#
# version 3 will have an option to overcome this
#
##########################################
 
shs.des <- svydesign(id=~PSU, weights=~GROSSWT,strata=~STRATUM,data=shs)

print(summary(shs.des))
#
# You will have seen a summary of the strata and for each stratum
# the number of PSUs and observations per stratum
# in the LAs where there was no clustering the no. of clusters is
# the same as the number of observations
# NOTE THAT SOME STRATA HAVE BEEN COMBINED TO PREVENT LONELY PSUS
#
# #############  linkxx RESULTS FOR TABLE 2.3 xxendlink IN EXEMPLAR HOME PAGE##############
# now use the command svymean to get the mean of
# the 0/1 variable for internet use 
# for the whole sample and for subgroups
#
svymean(~INTUSE,shs.des,deff=T)
#
# by sex
svyby(~INTUSE,~SEX,shs.des,svymean,keep.var=T,deff=T)
#
# The error messages are because splitting the data by sex has
# produced a few more lonely PSUS. See the html help for
# survey.lonely.psu by following links from html help menu to #packages #survey 
# There are some options to adjust estimates for this 
# this code will run them
#
options("survey.adjust.domain.lonely"=T)
options("survey.lonely.psu"='adjust')
svyby(~INTUSE,~SEX,shs.des,svymean,keep.var=T,deff=T)
#
# this increases the design effect by only a tiny amount 
# so not worth fussing with here
#
# Now proportions different using for different hours per week
#
# Need to usesubset  to get rid of the cases where this
# is missing for non-internet users
#
svymean(~RC5,subset(shs.des,INTUSE==1),deff=T)
#
#################### linkxx  THIS CODE FOR TABLE 2.4 xxendlink ################
# testing efect of different designs
#
shs.des <- svydesign(id=~UNIQID, weights=~GROSSWT,data=shs)
svymean(~INTUSE,shs.des,deff=T)
shs.des <- svydesign(id=~PSU, weights=~GROSSWT,data=shs)
svymean(~INTUSE,shs.des,deff=T)
shs.des <- svydesign(id=~UNIQID, weights=~GROSSWT,strata=~STRATUM,data=shs)
svymean(~INTUSE,shs.des,deff=T)
shs.des <- svydesign(id=~PSU, weights=~GROSSWT,strata=~STRATUM,data=shs)
svymean(~INTUSE,shs.des,deff=T)
#
#
################### linkxx now tables xxendlink and chi squared tests for tables 2.5 2.6 2.7################
#  now some chisquare tests
#  the R help (internet help menu #packages #surveys #svychisq)
#   gives details of other options
#
svychisq(~INTUSE+SEX,shs.des)
chisq.test(table(shs$INTUSE,shs$SEX),correct=F)191.89*
#
# table gives sum of weights by default
#
svytable(~INTUSE+SEX,shs.des)


svytable(~SEX+GROC,shs.des)
#
# table gives sum of weights
# more work would be needed to produce a nice table from this
#
svychisq(~GROC+SEX,shs.des)
chisq.test(table(shs$GROC,shs$SEX),correct=F)
svychisq(~RC5+EMP_STA,subset(shs.des,INTUSE==1))
chisq.test(table(shs$RC5,shs$EMP_STA),correct=T)
#
######################## linkxx by council area xxendlink############
#
svyby(~INTUSE,~COUNCIL,shs.des,svymean,keep.var=T,deff=T)
#
# now get mean internet use and CI and DEFF for Just Orkney
#
svymean(~INTUSE,subset(shs.des,COUNCIL=='Orkney Islands'),deff=T)
# 
#
# now logistic regression analyses for survey data ren=moving missing categories
#
################# linkxx table 2.7 xxendlink #####################

summary(svyglm(INTUSE~GROUPINC,subset(shs.des,!is.na(GROUPINC)),family='binomial'))
#
# add rurality
#
summary(svyglm(INTUSE~GROUPINC+SHS_6CLA,subset(shs.des,!is.na(GROUPINC)),family='binomial'))
#
################### linkxx figure 1.1 xxlink ##########################################
#
fit<-svyglm(INTUSE~GROUPINC,design=shs.des)
# now some more complicated code to fit the curves for age and sex # # shown on the first page of this exemplar
# using the splines library in R
#
library(splines)
#
# replace one missing value in age
#
shs$AGE[is.na(shs$AGE)]<-40
splinexs<-bs(shs$AGE,knots=c(25,45,65),degree=3)
SP1<-splinexs[,1]
SP2<-splinexs[,2]
SP3<-splinexs[,3]
SP4<-splinexs[,4]
SP5<-splinexs[,5]
SP6<-splinexs[,6]
#
# add extra vars to survey design
#
shs.des<-update(shs.des,SP1=SP1,SP2=SP2,SP3=SP3,SP4=SP4,SP5=SP5,SP6=SP6,AGE=AGE)
#
# fit spline model
#
fit<-svyglm(INTUSE~SP1+SP2+SP3+SP5+SP6
      +(as.integer(SEX)-1)+ (as.integer(SEX)-1)*SP1+
     (as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3+
        (as.integer(SEX)-1)*SP4+(as.integer(SEX)-1)*SP5+ 
           (as.integer(SEX)-1)*SP6                                            
            , design=shs.des, family='binomial')


#
# there is some code at the end of this page that does the 
# plotting of the figure
#
#
# now the age and sex plot shown on Exemplar 1
#
plot(c(15,83),c(0,65),type='n',xlab='age',ylab='% internet users',)
#
# fit for men and women calculated from predicted vals
#
ilogit<-function(x){1/(1+exp(-x))}
Mfit<- ilogit(predict(fit))[shs$SEX=='male'][match(c(16:80),shs$AGE[shs$SEX=='male'])]*100
Wfit<- ilogit(predict(fit))[shs$SEX=='female'][match(c(16:80),shs$AGE[shs$SEX=='female'])]*100
#
# plot rates and add the fitted lines to the plot
#
agemeans<-svyby(~INTUSE,~SEX+AGE,shs.des,svymean)
usebyageM<-agemeans[1:65,3]*100
usebyageW<-agemeans[1:65+65,3]*100
plot(c(15,83),c(0,65),type='n',xlab='age',ylab='% internet users',)
points(c(16:79,82),usebyageM)
points(c(16:79,82),usebyageW,col=2,pch=16)
lines(c(16:79,82),Mfit,lwd=2)
lines(c(16:79,82),Wfit,lwd=2,col=2)

##########################################################
# now look at additional effect of other variables
#
summary(update(fit,~.+SHS_6CLA))
summary(update(fit,~.+EMP_STA))

#
# now let us look at the effect of local authority on
# internet use when adjusted for age and sex
#
# unadjusted effect
#
fit1<-svyglm(INTUSE~COUNCIL,design=shs.des)
#
#adjusted age sex
#
fit2<-svyglm(INTUSE~COUNCIL+SP1+SP2+SP3+(as.integer(SEX)-1)+(as.integer(SEX)-1)*SP1+(as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3, design=shs.des, family='binomial')
#
# adjusted age sex income
#
fit3<-svyglm(INTUSE~COUNCIL+SP1+SP2+SP3+(as.integer(SEX)-1)+(as.integer(SEX)-1)*SP1+(as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3+GROUPINC, design=shs.des, family='binomial')
#
# this next bit compares ranking of adjusted and unadjusted
#  coefficients. It seems that this large variation between local 
# authorities is not explained by age sex or household income
#
comp<-cbind(rank(-c(0,fit1$coefficients[2:32])), 
rank(-fit2$coefficients[1:32]+fit2$coefficients[1]), 
rank(-fit3$coefficients[1:32]+fit3$coefficients[1]))
dimnames(comp)[[1]][1]<-'Aberdeen City'
dimnames(comp)[[2]]<-c('Unadjusted','Age sex','Age sex wealth')
print('Ranking of internet use')
print(comp)