############################################### # HOW TO USE THIS FILE # To submit one line at a time just press Ctrl+R and the line # where the pointer is will be sent to R # # To submit more lines highlight them and press Ctrl+R # # It like to tile the two windows (windows menu) # ######### ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS # # data stored in c:/GR/aprojects/peas/web/exemp2/data/.Rdata # this file is c:/GR/aprojects/peas/web/exemp2/program_code/ex2.R # ############################################## # # data are in a data frame shs # to get the names of the variables # names(shs) # # and to see a histogram of the weights and the range of weights # (pretty large) and the ratio of the most extereme weights # hist(shs$IND_WT,col='red') range(shs$IND_WT) max(shs$IND_WT)/min(shs$IND_WT) # # now it is time to try using the survey package # # If you have not already done so you will need to # download the survey package # (>>pakages menu >> install package from cran>> select a mirror near you # and pick the survey pakage from the list) # then to give R access to the survey functions do # library(survey) # # # The first step is to create a SURVEY DESIGN OBJECT that # will contain both the data and the information about the design # # note that in the latest version of R survey 2.9.1 it is necessary # to use the grossing-up weight to get the correct design effects # Go to # html help #packages #survey #svymean to see where # this is explained # # version 3 will have an option to overcome this # ########################################## shs.des <- svydesign(id=~PSU, weights=~GROSSWT,strata=~STRATUM,data=shs) print(summary(shs.des)) # # You will have seen a summary of the strata and for each stratum # the number of PSUs and observations per stratum # in the LAs where there was no clustering the no. of clusters is # the same as the number of observations # NOTE THAT SOME STRATA HAVE BEEN COMBINED TO PREVENT LONELY PSUS # # ############# linkxx RESULTS FOR TABLE 2.3 xxendlink IN EXEMPLAR HOME PAGE############## # now use the command svymean to get the mean of # the 0/1 variable for internet use # for the whole sample and for subgroups # svymean(~INTUSE,shs.des,deff=T) # # by sex svyby(~INTUSE,~SEX,shs.des,svymean,keep.var=T,deff=T) # # The error messages are because splitting the data by sex has # produced a few more lonely PSUS. See the html help for # survey.lonely.psu by following links from html help menu to #packages #survey # There are some options to adjust estimates for this # this code will run them # options("survey.adjust.domain.lonely"=T) options("survey.lonely.psu"='adjust') svyby(~INTUSE,~SEX,shs.des,svymean,keep.var=T,deff=T) # # this increases the design effect by only a tiny amount # so not worth fussing with here # # Now proportions different using for different hours per week # # Need to usesubset to get rid of the cases where this # is missing for non-internet users # svymean(~RC5,subset(shs.des,INTUSE==1),deff=T) # #################### linkxx THIS CODE FOR TABLE 2.4 xxendlink ################ # testing efect of different designs # shs.des <- svydesign(id=~UNIQID, weights=~GROSSWT,data=shs) svymean(~INTUSE,shs.des,deff=T) shs.des <- svydesign(id=~PSU, weights=~GROSSWT,data=shs) svymean(~INTUSE,shs.des,deff=T) shs.des <- svydesign(id=~UNIQID, weights=~GROSSWT,strata=~STRATUM,data=shs) svymean(~INTUSE,shs.des,deff=T) shs.des <- svydesign(id=~PSU, weights=~GROSSWT,strata=~STRATUM,data=shs) svymean(~INTUSE,shs.des,deff=T) # # ################### linkxx now tables xxendlink and chi squared tests for tables 2.5 2.6 2.7################ # now some chisquare tests # the R help (internet help menu #packages #surveys #svychisq) # gives details of other options # svychisq(~INTUSE+SEX,shs.des) chisq.test(table(shs$INTUSE,shs$SEX),correct=F)191.89* # # table gives sum of weights by default # svytable(~INTUSE+SEX,shs.des) svytable(~SEX+GROC,shs.des) # # table gives sum of weights # more work would be needed to produce a nice table from this # svychisq(~GROC+SEX,shs.des) chisq.test(table(shs$GROC,shs$SEX),correct=F) svychisq(~RC5+EMP_STA,subset(shs.des,INTUSE==1)) chisq.test(table(shs$RC5,shs$EMP_STA),correct=T) # ######################## linkxx by council area xxendlink############ # svyby(~INTUSE,~COUNCIL,shs.des,svymean,keep.var=T,deff=T) # # now get mean internet use and CI and DEFF for Just Orkney # svymean(~INTUSE,subset(shs.des,COUNCIL=='Orkney Islands'),deff=T) # # # now logistic regression analyses for survey data ren=moving missing categories # ################# linkxx table 2.7 xxendlink ##################### summary(svyglm(INTUSE~GROUPINC,subset(shs.des,!is.na(GROUPINC)),family='binomial')) # # add rurality # summary(svyglm(INTUSE~GROUPINC+SHS_6CLA,subset(shs.des,!is.na(GROUPINC)),family='binomial')) # ################### linkxx figure 1.1 xxlink ########################################## # fit<-svyglm(INTUSE~GROUPINC,design=shs.des) # now some more complicated code to fit the curves for age and sex # # shown on the first page of this exemplar # using the splines library in R # library(splines) # # replace one missing value in age # shs$AGE[is.na(shs$AGE)]<-40 splinexs<-bs(shs$AGE,knots=c(25,45,65),degree=3) SP1<-splinexs[,1] SP2<-splinexs[,2] SP3<-splinexs[,3] SP4<-splinexs[,4] SP5<-splinexs[,5] SP6<-splinexs[,6] # # add extra vars to survey design # shs.des<-update(shs.des,SP1=SP1,SP2=SP2,SP3=SP3,SP4=SP4,SP5=SP5,SP6=SP6,AGE=AGE) # # fit spline model # fit<-svyglm(INTUSE~SP1+SP2+SP3+SP5+SP6 +(as.integer(SEX)-1)+ (as.integer(SEX)-1)*SP1+ (as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3+ (as.integer(SEX)-1)*SP4+(as.integer(SEX)-1)*SP5+ (as.integer(SEX)-1)*SP6 , design=shs.des, family='binomial') # # there is some code at the end of this page that does the # plotting of the figure # # # now the age and sex plot shown on Exemplar 1 # plot(c(15,83),c(0,65),type='n',xlab='age',ylab='% internet users',) # # fit for men and women calculated from predicted vals # ilogit<-function(x){1/(1+exp(-x))} Mfit<- ilogit(predict(fit))[shs$SEX=='male'][match(c(16:80),shs$AGE[shs$SEX=='male'])]*100 Wfit<- ilogit(predict(fit))[shs$SEX=='female'][match(c(16:80),shs$AGE[shs$SEX=='female'])]*100 # # plot rates and add the fitted lines to the plot # agemeans<-svyby(~INTUSE,~SEX+AGE,shs.des,svymean) usebyageM<-agemeans[1:65,3]*100 usebyageW<-agemeans[1:65+65,3]*100 plot(c(15,83),c(0,65),type='n',xlab='age',ylab='% internet users',) points(c(16:79,82),usebyageM) points(c(16:79,82),usebyageW,col=2,pch=16) lines(c(16:79,82),Mfit,lwd=2) lines(c(16:79,82),Wfit,lwd=2,col=2) ########################################################## # now look at additional effect of other variables # summary(update(fit,~.+SHS_6CLA)) summary(update(fit,~.+EMP_STA)) # # now let us look at the effect of local authority on # internet use when adjusted for age and sex # # unadjusted effect # fit1<-svyglm(INTUSE~COUNCIL,design=shs.des) # #adjusted age sex # fit2<-svyglm(INTUSE~COUNCIL+SP1+SP2+SP3+(as.integer(SEX)-1)+(as.integer(SEX)-1)*SP1+(as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3, design=shs.des, family='binomial') # # adjusted age sex income # fit3<-svyglm(INTUSE~COUNCIL+SP1+SP2+SP3+(as.integer(SEX)-1)+(as.integer(SEX)-1)*SP1+(as.integer(SEX)-1)*SP2+(as.integer(SEX)-1)*SP3+GROUPINC, design=shs.des, family='binomial') # # this next bit compares ranking of adjusted and unadjusted # coefficients. It seems that this large variation between local # authorities is not explained by age sex or household income # comp<-cbind(rank(-c(0,fit1$coefficients[2:32])), rank(-fit2$coefficients[1:32]+fit2$coefficients[1]), rank(-fit3$coefficients[1:32]+fit3$coefficients[1])) dimnames(comp)[[1]][1]<-'Aberdeen City' dimnames(comp)[[2]]<-c('Unadjusted','Age sex','Age sex wealth') print('Ranking of internet use') print(comp)