Exemplar 4 R output

###############################################
# HOW TO USE THIS FILE 
# This is an HTML file to show you what R output looks like 
# Comments relate to interpretation of output 
# for comments on how torun code see the html R source code

red stuff is (R commands)

blue is output

# Black is comment

#

#
Links in this page
Equal opportunities policies weighted and unweighted - Table 4.3
E O policy by workplace size Table 4.4
Other factors by equal opportunities policy Table 4.5
Logistic regression Table 4.6
Logistic regression Table 4.7
Finite population corrections Table 4.8


Back to top

## ################ table 4.3 ###############################
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
svymean(~EO,wers.des)
    mean SEEO   NA NA

# sadly this just gives a missing value since EO has 7 missing values
# but the answer is to remove the missing data from the survey design
# see the help for is.na and for subset.surveydesign 
svymean(~EO,subset(wers.des,!is.na(EO)),deff=T)
       mean       SE   DEffE
   O 0.672785 0.018102 3.2766
#
# we can compare this with the unweighted mean had the design been different
#
wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
Warning message:No weights or probabilities supplied, assuming equal probability 
in: svydesign(id = ~SERNO, strata = ~STRATA, data = wers) 
svymean(~EO,subset(wers.des,!is.na(EO)),deff=T)
        mean        SE DEffE
   O 0.8113553 0.0076675  Inf

#
# Design effects don't come out because no sampling weights 
# were given and they need to add to the population total 

Back to top
###################Table 4.4##################################
#
wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=GROSSWT,data=wers)
svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE)
 
        NEMPSIZE statistic.EO         SE
10-24      10-24    0.6911197 0.02755723
25-49      25-49    0.7171717 0.02182522
50-99      50-99    0.7743590 0.02034539
100-199  100-199    0.8501292 0.01784374
200-499  200-499    0.8903509 0.01442407
500+        500+    0.9189189 0.01548492
There were 50 or more warnings (use warnings() to see the first 50)

#
# the warning messages refer to lonely PSUs (see theory section on checking data)
# This means that the standard errors masy be slightly underestimated
#
Back to top
#######################Table 4.5########################################
#
# Now we get means of percent female by EO
#
svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean)
 
   EO statistic.FEMALE         SE
0   0         40.96654  1.2826394
1   1         51.76528  0.5253835
NA NA         60.65429 10.1791482
There were 23 warnings (use warnings() to see them)

#
# again the easisest way to get the standard error of
# the difference is with a with svyglm (see help)
#
summary(svyglm(FEMALE~EO,wers.des) )

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   40.967      1.283  31.939  < 2e-16 ***
EO            10.799      1.464   7.376 2.31e-13 ***

  # obviously workplaces with an EO policy have a higher % women
  # we can now do similar analyses for % ethnic minority employees
  #
  svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
    EO statistic.ETHNIC SE
    0 0 3.810382 0.462366
    1 1 5.616825 0.266537
    NA NA 2.861205 1.424828
    There were 19 warnings (use warnings() to see them)
    summary(svyglm(ETHNIC~EO,wers.des))
  
  Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   40.967      1.283  31.939  < 2e-16 ***
EO            10.799      1.464   7.376 2.31e-13 ***

# obviously workplaces with an EO policy have a higher % women
# we can now do similar analyses for % ethnic minority employees
#
 svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)

   EO statistic.ETHNIC       SE
0   0         3.810382 0.462366
1   1         5.616825 0.266537
NA NA         2.861205 1.424828
There were 19 warnings (use warnings() to see them)
 
 summary(svyglm(ETHNIC~EO,wers.des))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   3.8104     0.4624   8.241    3e-16 ***
EO            1.8064     0.5348   3.378 0.000745 ***

#
# like females a bigger effect in the weighted analysis 
# but clear evidence of more ethnic minority employees when policy in place
#
# now find weighted percent in each  categories of disabled worker %s
# since DISABGRP is a factor R gives proportions
#
 svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP)))

                    mean     SE
DISABGRPnone     0.56271 0.0099
DISABGRPunder 3% 0.35704 0.0094
DISABGRP3%+      0.08025 0.0059
#
# now means of EO by disabled groups
#
svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean)

         DISABGRP statistic.EO         SE
none         none    0.7658662 0.01172310
under 3% under 3%    0.8798920 0.01171431
3%+           3%+    0.8083832 0.03005366
There were 23 warnings (use warnings() to see them)
#
# svyglm compares percent in each disabled group category 
# between eo and non-eo workplaces
#
 summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO))))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.69289    0.02274  30.467  < 2e-16 ***
EO          -0.16135    0.02602  -6.201 6.74e-10 ***

 summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),]))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.22589    0.02052  11.009  < 2e-16 ***
EO           0.16221    0.02390   6.788 1.48e-11 ***

 summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),]))

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.0812183  0.0136809   5.937  3.4e-09 ***
EO          -0.0008611  0.0152388  -0.057    0.955 

##############################################################
# now the same things for
 unweighted data 
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
#
# detailed results not shown
#
svyby(~FEMALE,~EO,subset(wers.des,!is.na(FEMALE)),svymean)
summary(svyglm(FEMALE~EO,wers.des) )
svyby(~ETHNIC,~EO,subset(wers.des,!is.na(ETHNIC)),svymean)
summary(svyglm(ETHNIC~EO,wers.des))
svymean(~DISABGRP,subset(wers.des,!is.na(DISABGRP)))
svyby(~EO,~DISABGRP,subset(wers.des,!is.na(DISABGRP) & !is.na(EO)),svymean)
 summary(svyglm((DISABGRP=='none')~EO,subset(wers.des,!is.na(wers$DISABGRP) & !is.na(EO))))
 summary(svyglm((DISABGRP=='under 3%')~EO,wers.des[!is.na(wers$DISABGRP),]))
 summary(svyglm((DISABGRP=='3%+')~EO,wers.des[!is.na(wers$DISABGRP),]))
Back to top
##########################################################
# Table 4.6 
# now the svyglm command and the glm commands to
# fit logistic regressions
#  first restore design
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial'))
 
 Call:
svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP, 
    design = wers.des, family = "binomial")

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, weights = ~GROSSWT, 
    data = wers)

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -0.412330   0.191134  -2.157  0.03110 *  
ETHNIC            0.028791   0.011844   2.431  0.01515 *  
FEMALE            0.018575   0.003247   5.721 1.22e-08 ***
DISABGRPunder 3%  0.680734   0.211643   3.216  0.00132 ** 
DISABGRP3%+      -0.189232   0.375388  -0.504  0.61425    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for binomial family taken to be 0.970625)

Number of Fisher Scoring iterations: 5

Warning message:
non-integer #successes in a binomial glm! in: eval(expr, envir, enclos) 
 
#
# and unweighted
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP,wers.des,family='binomial'))
 
 Call:
svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP, 
    design = wers.des, family = "binomial")

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      0.444213   0.108854   4.081 4.66e-05 ***
ETHNIC           0.017137   0.008531   2.009   0.0447 *  
FEMALE           0.013992   0.001876   7.458 1.30e-13 ***
DISABGRPunder 3% 0.847344   0.135626   6.248 5.07e-10 ***
DISABGRP3%+      0.108964   0.208531   0.523   0.6014    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for binomial family taken to be 0.9219255)

Number of Fisher Scoring iterations: 5
 
Back to top 
##################################################################
# table 4.7
# now explore models that include workplace size  
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,weights=~GROSSWT,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial'))
 
 Call:
svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP + 
    NEMPSIZE, design = wers.des, family = "binomial")

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, weights = ~GROSSWT, 
    data = wers)

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -0.616667   0.236489  -2.608  0.00919 ** 
ETHNIC            0.029325   0.012202   2.403  0.01634 *  
FEMALE            0.019776   0.003333   5.934 3.47e-09 ***
DISABGRPunder 3%  0.004699   0.224611   0.021  0.98331    
DISABGRP3%+      -0.222865   0.376990  -0.591  0.55447    
NEMPSIZE25-49     0.071526   0.220591   0.324  0.74578    
NEMPSIZE50-99     0.553989   0.218764   2.532  0.01141 *  
NEMPSIZE100-199   1.152938   0.250897   4.595 4.59e-06 ***
NEMPSIZE200-499   1.653990   0.291210   5.680 1.55e-08 ***
NEMPSIZE500+      1.991005   0.349684   5.694 1.43e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for binomial family taken to be 0.976199)

Number of Fisher Scoring iterations: 5

#
# and without weighting
#
 wers.des <- svydesign(id=~SERNO, strata=~STRATA,data=wers)
 summary(svyglm(EO~ETHNIC+FEMALE+DISABGRP+NEMPSIZE,wers.des,family='binomial'))
 
Call:
svyglm.survey.design(formula = EO ~ ETHNIC + FEMALE + DISABGRP + 
    NEMPSIZE, design = wers.des, family = "binomial")

Survey design:
svydesign(id = ~SERNO, strata = ~STRATA, data = wers)

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      -0.207243   0.175511  -1.181  0.23782    
ETHNIC            0.015954   0.009046   1.764  0.07795 .  
FEMALE            0.017123   0.001953   8.768  < 2e-16 ***
DISABGRPunder 3%  0.177042   0.161640   1.095  0.27352    
DISABGRP3%+       0.004065   0.213042   0.019  0.98478    
NEMPSIZE25-49     0.122007   0.179442   0.680  0.49663    
NEMPSIZE50-99     0.545961   0.189638   2.879  0.00403 ** 
NEMPSIZE100-199   1.096627   0.217922   5.032 5.28e-07 ***
NEMPSIZE200-499   1.575925   0.233285   6.755 1.86e-11 ***
NEMPSIZE500+      1.734561   0.305587   5.676 1.58e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 

(Dispersion parameter for binomial family taken to be 0.9131494)

Number of Fisher Scoring iterations: 5

 
Back to top
###############################################################
#
#  table 4.8 with finite population corrections
#
wers.des <- svydesign(id=~SERNO,fpc=~SAMPFRAC, strata=~STRATA,weight=~GROSSWT,data=wers)
svymean(~EO,subset(wers.des,!is.na(EO)))


svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE)
x
        NEMPSIZE statistic.EO         SE
10-24      10-24    0.6343108 0.03275105
25-49      25-49    0.6480435 0.03483782
50-99      50-99    0.7187470 0.02627534
100-199  100-199    0.8124946 0.02334485
200-499  200-499    0.8679404 0.02012385
500+        500+    0.9129004 0.01756700

#
# and without
wers.des <- svydesign(id=~SERNO, strata=~STRATA,weight=~GROSSWT,data=wers)
svymean(~EO,subset(wers.des,!is.na(EO)))

      mean     SE
EO 0.67279 0.0181

svyby(~EO,~NEMPSIZE,subset(wers.des,!is.na(EO)),svymean,keep.var=TRUE)

        NEMPSIZE statistic.EO         SE
10-24      10-24    0.6343108 0.03280815
25-49      25-49    0.6480435 0.03489757
50-99      50-99    0.7187470 0.02641132
100-199  100-199    0.8124946 0.02358097
200-499  200-499    0.8679404 0.02045991
500+        500+    0.9129004 0.01834597
There were 50 or more warnings (use warnings() to see the first 50)

#
#############################################################
# now some more modelling stuff to make plots
#

#################################################################