##############################################

#

# HOW TO USE THIS FILE

# A text version of this file (ex3.R) is also available which can just be sourced

# into the R program

# This file is intended for you to use interactively by pasting into R

# Comments are in black - red bits are commands that can be pasted into R

#

#

######### ANY TEXT AFTER THE # CHARACTER ARE TREATED AS COMMENTS

#

# data stored in c:/GR/aprojects/peas/web/exemp3/data/ex4.Rdata

# this file is c:/GR/aprojects/peas/web/exemp3/program_code/ex4_R.html

#

##############################################

#

# data are in a data frame health

# to get the names of the variables

#

names(wers)

#

# the ethnic group percentages are in a column labelled ETHNIC

# to get a histogram of this variable do

#

hist(wers$ETHNIC)

#

# now it is time to try using the survey package

# to give R access to the survey functions do - see mini guide if this fails

#

library(survey)

#

# The first step is to create a SURVEY DESIGN OBJECT that

# will contain both the data and the information about the design

#

# This design has WEIGHTS (EST_WT),

# and it is stratified by SIZEBAND and ?????

# Since it is not clustered we set the sampling unit ids

# to be the serial number SERNO, different for each workplace

#

wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~EST_WT,data=wers)

#

# we can look at the survey weighted mean of the percentages

# of females, disabled people and those from ethnic minorities

# as follows though in each case we need to select the subset of the design

# without missing values (!is.na means not missing)

#

svymean(~FEMALE,subset(wers.des,!is.na(FEMALE)),deff=T)

svymean(~DISAB,subset(wers.des,!is.na(DISAB)),deff=T)

svymean(~ETHNIC,subset(wers.des,!is.na(ETHNIC)),deff=T)

#

# in order to compare means for groups in R for employers

# with equal opps policies and those without

# we need to run a linear model with the glm command

# or its survey equivalent svyglm

#

summary(svyglm(FEMALE~EO,wers.des))

summary(glm(FEMALE~EO,data=wers))

#

# both analyses show many much higher rates of female employees when

# workplace has an equal opportunities policy

#

# But we find a very large difference between the weighted and unweighted differences

# in % female 11% unweighted versus 18% weighted

# this is confirmed by computing the subgroup means with the commands below

# that also annoyingly need lots of code to sort out missing values

#

mean(wers$FEMALE[wers$EO==0 & !is.na(wers$EO)& !is.na(wers$FEMALE) ])

mean(wers$FEMALE[wers$EO==1 & !is.na(wers$EO)& !is.na(wers$FEMALE) ])

weighted.mean(wers$FEMALE[wers$EO==0 & !is.na(wers$EO)& !is.na(wers$FEMALE) ],wers$EST_WT[wers$EO==0 & !is.na(wers$EO)& !is.na(wers$FEMALE) ])

weighted.mean(wers$FEMALE[wers$EO==1 & !is.na(wers$EO)& !is.na(wers$FEMALE) ],wers$EST_WT[wers$EO==1 & !is.na(wers$EO)& !is.na(wers$FEMALE) ])

#

# now looking at disabled

#

summary(svyglm(DISAB~EO,wers.des))

summary(glm(DISAB~EO,data=wers))

#

# and ethnic groups

# Very small differences here, low rates overall and no evidence of differences

#

summary(svyglm(ETHNIC~EO,wers.des))

summary(glm(ETHNIC~EO,data=wers))

#

# like females a bigger effect in the adjusted analysis

# but note larger standard error in the survey adjusted analysis

# but clea evidence of more ethnic minority employees when policy in place

#

# Now looking at the question the other way round

# what predictes an equal opportiunities policy

#

# R does not do stepwise methods (some of us think this is good)

# so we need to decide our own strategy starting with putting everything in

#

summary(svyglm(EO~ETHNIC+FEMALE+DISAB+NEMPSIZE,wers.des,family='binomial'))

#

# clearly size of establishment is the biggest predictor

# closely followed by percent female

# ethnic minorities also contributing a bit

# but disabled employees having no effect

#

# next step drops the disabled variable and looks at female and

# size in groups to check linearity

#

wers.des<-update(wers.des,GPSIZE=factor(NEMPSIZE))

wers.des<-update( wers.des,GPFEMALE=factor(ceiling(FEMALE/20)*(FEMALE>0)+(1*FEMALE==0),

labels=c('0-<20','20-<40','40-<60','60-<80','80-100') ) )

summary(svyglm(EO~ETHNIC+GPFEMALE+GPSIZE,wers.des,family='binomial'))

#

# we can see that linearlity looks not to bad for female, but there

# is a tep function up at groups 7 and 8 for size of establishment

# this suggests a model with an extra step function for size

# of workplace

#

wers.des<-update( wers.des,BIG=factor(NEMPSIZE>6,labels=c('size 6 or under','sizes 7 and 8') ))

#

#

summary(svyglm(EO~ETHNIC+FEMALE+NEMPSIZE+BIG,wers.des,family='binomial'))

#

# this shows the expected effects with the large

# size having the biggest effect

# It is now worth checking for any interaction between BIG and FEMALE

#

summary(svyglm(EO~ETHNIC+FEMALE+NEMPSIZE+BIG*FEMALE*BIG,wers.des,family='binomial'))

#

# we can see that the effect of % of female employees is less marked in the bigger workplaces

# note also that the efect of ethnic group still stands out clearly in these

# analyses with the odds of EO increasing by around 1.8 (exp(0.3*20)) when the

# percentage ethnic goes from 0 to 20%.

#

# This final bit of code makes a plot of %EO by FEMALE and SIZE to understand findings.

#

eo<-wers.des$variables$EO

gpfemale<-wers.des$variables$GPFEMALE[!is.na(eo)]

big<-wers.des$variables$BIG[!is.na(eo)]

eo<-eo[!is.na(eo)]

fits<-tapply(eo,list(big,gpfemale),mean)*100

barplot(fits,beside=T,main='%EO policy by %female and size of workplace',

sub='%female groups red=small workplace yellow= large workplace')

lines(c(0,100),c(100,100))

lines(c(0,100),c(0,0))