###############################################
# HOW TO USE THIS FILE 
# This is an HTML file to show you what R output looks like 
# Comments relate to interpretation of output 
# for comments on how torun code see the html R source code
red stuff is (R commands)
blue is output
# Black is comment

#

#
Links in this page
Equal opportunities policies weighted and unweighted - Table 4.3
E O policy by workplace size Table 4.4
Other factors by equal opportunities policy Table 4.5
Logistic regression Table 4.6
Logistic regression Table 4.7
Finite population corrections Table 4.8


Back to topuparrow

## ################ table 4.3 ###############################
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
svymean(~EO,wers.des)
    mean SEEO   NA NA

# sadly this just gives a missing value since EO has 7 missing values
# but the answer is to remove the missing data from the survey design
# see the help for is.na and for subset.surveydesign 
svymean(~EO,subset(wers.des,!is.na(EO)),deff=T)
       mean       SE   DEffE
   O 0.672785 0.018102 3.2766
#
# we can compare this with the unweighted mean had the design been different
#
wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
Warning message:No weights or probabilities supplied, assuming equal probability 
in: svydesign(id = ~SERNO, strata = ~STRATA, data = wers) 
svymean(~EO,subset(wers.des,!is.na(EO)),deff=T)
        mean        SE DEffE
   O 0.8113553 0.0076675  Inf

#
# Design effects don't come out because no sampling weights 
# were given and they need to add to the population total 

Back to topuparrow
###################Table 4.4##################################
#
wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=GROSSWT,data=wers)
svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE)
 
        NEMPSIZE statistic.EO         SE
10-24      10-24    0.6911197 0.02755723
25-49      25-49    0.7171717 0.02182522
50-99      50-99    0.7743590 0.02034539
100-199  100-199    0.8501292 0.01784374
200-499  200-499    0.8903509 0.01442407
500+        500+    0.9189189 0.01548492
There were 50 or more warnings (use warnings() to see the first 50)

#
# the warning messages refer to lonely PSUs (see theory section on checking data)
# This means that the standard errors masy be slightly underestimated
#
Back to topuparrow
#######################Table 4.5########################################
#
# Now we get means of percent female by EO
#
svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean)
 
   EO statistic.FEMALE         SE
0   0         40.96654  1.2826394
1   1         51.76528  0.5253835
NA NA         60.65429 10.1791482
There were 23 warnings (use warnings() to see them)

#
# again the easisest way to get the standard error of
# the difference is with a with svyglm (see help)
#
summary(svyglm(FEMALE~EO,wers.des) )

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   40.967      1.283  31.939  < 2e-16 ***
EO            10.799      1.464   7.376 2.31e-13 ***

  # obviously workplaces with an EO policy have a higher % women
  # we can now do similar analyses for % ethnic minority employees
  #
  svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
    EO statistic.ETHNIC SE
    0 0 3.810382 0.462366
    1 1 5.616825 0.266537
    NA NA 2.861205 1.424828
    There were 19 warnings (use warnings() to see them)
    summary(svyglm(ETHNIC~EO,wers.des))
  
  Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   40.967      1.283  31.939  < 2e-16 ***
EO            10.799      1.464   7.376 2.31e-13 ***

# obviously workplaces with an EO policy have a higher % women
# we can now do similar analyses for % ethnic minority employees
#
 svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)

   EO statistic.ETHNIC       SE
0   0         3.810382 0.462366
1   1         5.616825 0.266537
NA NA         2.861205 1.424828
There were 19 warnings (use warnings() to see them)
 
 summary(svyglm(ETHNIC~EO,wers.des))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   3.8104     0.4624   8.241    3e-16 ***
EO            1.8064     0.5348   3.378 0.000745 ***

#
# like females a bigger effect in the weighted analysis 
# but clear evidence of more ethnic minority employees when policy in place
#
# now find weighted percent in each  categories of disabled worker %s
# since DISABGRP is a factor R gives proportions
#
 svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP)))

                    mean     SE
DISABGRPnone     0.56271 0.0099
DISABGRPunder 3% 0.35704 0.0094
DISABGRP3%+      0.08025 0.0059
#
# now means of EO by disabled groups
#
svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean)

         DISABGRP statistic.EO         SE
none         none    0.7658662 0.01172310
under 3% under 3%    0.8798920 0.01171431
3%+           3%+    0.8083832 0.03005366
There were 23 warnings (use warnings() to see them)
#
# svyglm compares percent in each disabled group category 
# between eo and non-eo workplaces
#
 summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO))))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.69289    0.02274  30.467  < 2e-16 ***
EO          -0.16135    0.02602  -6.201 6.74e-10 ***

 summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),]))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.22589    0.02052  11.009  < 2e-16 ***
EO           0.16221    0.02390   6.788 1.48e-11 ***

 summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),]))

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.0812183  0.0136809   5.937  3.4e-09 ***
EO          -0.0008611  0.0152388  -0.057    0.955 

##############################################################
# now the same things for
 unweighted data 
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
#
# detailed results not shown
#
svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean)
summary(svyglm(FEMALE~EO,wers.des) )
svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
summary(svyglm(ETHNIC~EO,wers.des))
svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP)))
svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean)
 summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO))))
 summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),]))
 summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),]))
Back to topuparrow
##########################################################
# Table 4.6 
# now the svyglm command and the glm commands to
# fit logistic regressions
#  first restore design
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial'))
 
 Call:
svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP, 
    design = wers.des, family = "binomial")

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, weights = ~GROSSWT, 
    data = wers)

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -0.412330   0.191134  -2.157  0.03110 *  
ETHNIC            0.028791   0.011844   2.431  0.01515 *  
FEMALE            0.018575   0.003247   5.721 1.22e-08 ***
DISABGRPunder 3%  0.680734   0.211643   3.216  0.00132 ** 
DISABGRP3%+      -0.189232   0.375388  -0.504  0.61425    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for binomial family taken to be 0.970625)

Number of Fisher Scoring iterations: 5

Warning message:
non-integer #successes in a binomial glm! in: eval(expr, envir, enclos) 
 
#
# and unweighted
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial'))
 
 Call:
svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP, 
    design = wers.des, family = "binomial")

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      0.444213   0.108854   4.081 4.66e-05 ***
ETHNIC           0.017137   0.008531   2.009   0.0447 *  
FEMALE           0.013992   0.001876   7.458 1.30e-13 ***
DISABGRPunder 3% 0.847344   0.135626   6.248 5.07e-10 ***
DISABGRP3%+      0.108964   0.208531   0.523   0.6014    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for binomial family taken to be 0.9219255)

Number of Fisher Scoring iterations: 5
 
Back to topuparrow 
##################################################################
# table 4.7
# now explore models that include workplace size  
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial'))
 
 Call:
svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP + 
    NEMPSIZE, design = wers.des, family = "binomial")

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, weights = ~GROSSWT, 
    data = wers)

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -0.616667   0.236489  -2.608  0.00919 ** 
ETHNIC            0.029325   0.012202   2.403  0.01634 *  
FEMALE            0.019776   0.003333   5.934 3.47e-09 ***
DISABGRPunder 3%  0.004699   0.224611   0.021  0.98331    
DISABGRP3%+      -0.222865   0.376990  -0.591  0.55447    
NEMPSIZE25-49     0.071526   0.220591   0.324  0.74578    
NEMPSIZE50-99     0.553989   0.218764   2.532  0.01141 *  
NEMPSIZE100-199   1.152938   0.250897   4.595 4.59e-06 ***
NEMPSIZE200-499   1.653990   0.291210   5.680 1.55e-08 ***
NEMPSIZE500+      1.991005   0.349684   5.694 1.43e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for binomial family taken to be 0.976199)

Number of Fisher Scoring iterations: 5

# # and without weighting # wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers) summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial')) Call: svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP + NEMPSIZE, design = wers.des, family = "binomial") Survey design: svydesign(id = ~SERNO, strata = ~STRATA, data = wers) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.207243 0.175511 -1.181 0.23782 ETHNIC 0.015954 0.009046 1.764 0.07795 . FEMALE 0.017123 0.001953 8.768 < 2e-16 *** DISABGRPunder 3% 0.177042 0.161640 1.095 0.27352 DISABGRP3%+ 0.004065 0.213042 0.019 0.98478 NEMPSIZE25-49 0.122007 0.179442 0.680 0.49663 NEMPSIZE50-99 0.545961 0.189638 2.879 0.00403 ** NEMPSIZE100-199 1.096627 0.217922 5.032 5.28e-07 *** NEMPSIZE200-499 1.575925 0.233285 6.755 1.86e-11 *** NEMPSIZE500+ 1.734561 0.305587 5.676 1.58e-08 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for binomial family taken to be 0.9131494) Number of Fisher Scoring iterations: 5 Back to topuparrow ############################################################### # # table 4.8 with finite population corrections # wers.des <- svydesign(id=~SERNO,fpc=~SAMPFRAC, strata=~STRATA,weight=~GROSSWT,data=wers) svymean(~EO,subset(wers.des,!is.na(EO))) svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) x NEMPSIZE statistic.EO SE 10-24 10-24 0.6343108 0.03275105 25-49 25-49 0.6480435 0.03483782 50-99 50-99 0.7187470 0.02627534 100-199 100-199 0.8124946 0.02334485 200-499 200-499 0.8679404 0.02012385 500+ 500+ 0.9129004 0.01756700 # # and without wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=~GROSSWT,data=wers) svymean(~EO,subset(wers.des,!is.na(EO))) mean SE EO 0.67279 0.0181 svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE) NEMPSIZE statistic.EO SE 10-24 10-24 0.6343108 0.03280815 25-49 25-49 0.6480435 0.03489757 50-99 50-99 0.7187470 0.02641132 100-199 100-199 0.8124946 0.02358097 200-499 200-499 0.8679404 0.02045991 500+ 500+ 0.9129004 0.01834597 There were 50 or more warnings (use warnings() to see the first 50) # ############################################################# # now some more modelling stuff to make plots # #################################################################