###############################################
# HOW TO USE THIS FILE 
# This is an HTML file to show you what R output looks like 
# Comments relate to interpretation of output 
# for comments on how torun code see the html R source code
red stuff is (R commands)
blue is output
# Black is comment
#

#
Links in this page
Equal opportunities policies weighted and unweighted - Table 4.3
E O policy by workplace size Table 4.4
Other factors by equal opportunities policy Table 4.5
Logistic regression Table 4.6
Logistic regression Table 4.7
Finite population corrections Table 4.8


Back to topuparrow

## ################ table 4.3 ###############################
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
svymean(~EO,wers.des)
    mean SE
EO NA NA
# sadly this just gives a missing value since EO has 7 missing values # but the answer is to remove the missing data from the survey design # see the help for is.na and for subset.surveydesign svymean(~EO,subset(wers.des,!is.na(EO)),deff=T) mean SE DEff
EO 0.672785 0.018102 3.2766
# # we can compare this with the unweighted mean had the design been different # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) Warning message:
No weights or probabilities supplied, assuming equal probability in: svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
svymean(~EO,subset(wers.des,!is.na(EO)),deff=T)
mean SE DEff
EO 0.8113553 0.0076675 Inf
# # Design effects don't come out because no sampling weights # were given and they need to add to the population total Back to topuparrow ###################Table 4.4################################## # wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=GROSSWT,data=wers) svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) NEMPSIZE statistic.EO SE
10-24 10-24 0.6911197 0.02755723
25-49 25-49 0.7171717 0.02182522
50-99 50-99 0.7743590 0.02034539
100-199 100-199 0.8501292 0.01784374
200-499 200-499 0.8903509 0.01442407
500+ 500+ 0.9189189 0.01548492
There were 50 or more warnings (use warnings() to see the first 50)
# # the warning messages refer to lonely PSUs (see theory section on checking data) # This means that the standard errors masy be slightly underestimated # Back to topuparrow #######################Table 4.5######################################## # # Now we get means of percent female by EO # svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean) EO statistic.FEMALE SE
0 0 40.96654 1.2826394
1 1 51.76528 0.5253835
NA NA 60.65429 10.1791482
There were 23 warnings (use warnings() to see them)
# # again the easisest way to get the standard error of # the difference is with a with svyglm (see help) # summary(svyglm(FEMALE~EO,wers.des) )
Call:
svyglm.survey.design(formula = FEMALE ~ EO, design = wers.des)

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 40.967 1.283 31.939 < 2e-16 ***
EO 10.799 1.464 7.376 2.31e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 803.8749) Number of Fisher Scoring iterations: 2 > #
# obviously workplaces with an EO policy have a higher % women
# we can now do similar analyses for % ethnic minority employees
#
svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
EO statistic.ETHNIC SE
0 0 3.810382 0.462366
1 1 5.616825 0.266537
NA NA 2.861205 1.424828
There were 19 warnings (use warnings() to see them)

summary(svyglm(ETHNIC~EO,wers.des))

Call:
svyglm.survey.design(formula = ETHNIC ~ EO, design = wers.des)
Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.8104 0.4624 8.241 3e-16 ***
EO 1.8064 0.5348 3.378 0.000745 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 107.4674) Number of Fisher Scoring iterations: 2


#
# obviously workplaces with an EO policy have a higher % women
# we can now do similar analyses for % ethnic minority employees
#
 svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
   EO statistic.ETHNIC       SE
0 0 3.810382 0.462366
1 1 5.616825 0.266537
NA NA 2.861205 1.424828
There were 19 warnings (use warnings() to see them)
summary(svyglm(ETHNIC~EO,wers.des))
Call:
svyglm.survey.design(formula = ETHNIC ~ EO, design = wers.des)

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.8104 0.4624 8.241 3e-16 ***
EO 1.8064 0.5348 3.378 0.000745 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 107.4674)


#
# like females a bigger effect in the weighted analysis 
# but clear evidence of more ethnic minority employees when policy in place
#
# now find weighted percent in each  categories of disabled worker %s
# since DISABGRP is a factor R gives proportions
#
 svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP)))
                    mean     SE
DISABGRPnone 0.56271 0.0099
DISABGRPunder 3% 0.35704 0.0094
DISABGRP3%+ 0.08025 0.0059
# # now means of EO by disabled groups # svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean) # # svyglm compares percent in each disabled group category # between eo and non-eo workplaces # summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO)))) Call:
svyglm.survey.design(formula = (DISABGRP == "none") ~ EO, design = subset(wers.des,
!is.na(wers$DISABGRP) & !is.na(EO)))

Survey design:
subset(wers.des, !is.na(wers$DISABGRP) & !is.na(EO))

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.69289 0.02274 30.467 < 2e-16 ***
EO -0.16135 0.02602 -6.201 6.74e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.2422422)


 summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),]))
Call:
svyglm.survey.design(formula = (DISABGRP == "under 3%") ~ EO,
design = wers.des[!is.na(wers$DISABGRP), ])

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.22589 0.02052 11.009 < 2e-16 ***
EO 0.16221 0.02390 6.788 1.48e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.224932)

Number of Fisher Scoring iterations: 2


 summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),]))
Call:
svyglm.survey.design(formula = (DISABGRP == "3%+") ~ EO, design = wers.des[!is.na(wers$DISABGRP),
])

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0812183 0.0136809 5.937 3.4e-09 ***
EO -0.0008611 0.0152388 -0.057 0.955
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.07382358)

Number of Fisher Scoring iterations: 2


##############################################################
# now the same things for unweighted data 
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
#
# detailed results not shown
#
svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean)
summary(svyglm(FEMALE~EO,wers.des) )
svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
summary(svyglm(ETHNIC~EO,wers.des))
svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP)))
svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean)
 summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO))))
 summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),]))
 summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),]))
Back to topuparrow
##########################################################
# Table 4.6 
# now the svyglm command and the glm commands to
# fit logistic regressions
#  first restore design
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial'))

#
# and unweighted
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial'))
Back to topuparrow 
##################################################################
# table 4.7
# now explore models that include workplace size  
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial'))
#
# and without weighting
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial'))
Back to topuparrow
###############################################################
#
#  table 4.8 with finite population corrections
#
wers.des <- svydesign(id=~SERNO,fpc=~SAMPFRAC, strata=~STRATA,weight=~GROSSWT,data=wers)
svymean(~EO,subset(wers.des,!is.na(EO)))
svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE)
#
# and without
wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=~GROSSWT,data=wers)
svymean(~EO,subset(wers.des,!is.na(EO)))
svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE)
#
#############################################################
# now some more modelling stuff to make plots
#

#################################################################