• Two-way Table
    • Data
    • Two-way Independent Model
    • Two-way Saturated Model
    • Extending Two-way Saturated Model
  • Three-way
  • Inference

Two-way Table

Data

library(dplyr)
library(kableExtra)
library(ggplot2)

heaven <- read.table("http://www.stat.ufl.edu/~aa/cat/data/HappyHeaven.dat", 
                     header = TRUE)

hdf <- heaven[rep(1:nrow(heaven), times = heaven$count), ]
with(hdf, table(happy, heaven))
##         heaven
## happy     no yes
##   not     32 190
##   pretty 113 611
##   very    51 326

Two-way Independent Model

glm(count ~ happy + heaven, family = poisson, data = heaven) %>% 
  summary()
## 
## Call:
## glm(formula = count ~ happy + heaven, family = poisson, data = heaven)
## 
## Coefficients:
##             Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)   3.4931     0.0941   37.13 < 0.0000000000000002 ***
## happypretty   1.1821     0.0767   15.41 < 0.0000000000000002 ***
## happyvery     0.5296     0.0846    6.26        0.00000000039 ***
## heavenyes     1.7492     0.0774   22.60 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1019.87238  on 5  degrees of freedom
## Residual deviance:    0.89111  on 2  degrees of freedom
## AIC: 49.5
## 
## Number of Fisher Scoring iterations: 3

Two-way Saturated Model

glm(count ~ happy + heaven + happy:heaven, family = poisson, data = heaven) %>% 
  summary()
## 
## Call:
## glm(formula = count ~ happy + heaven + happy:heaven, family = poisson, 
##     data = heaven)
## 
## Coefficients:
##                       Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)             3.4657     0.1768   19.61 < 0.0000000000000002 ***
## happypretty             1.2617     0.2002    6.30         0.0000000003 ***
## happyvery               0.4661     0.2255    2.07                0.039 *  
## heavenyes               1.7813     0.1911    9.32 < 0.0000000000000002 ***
## happypretty:heavenyes  -0.0936     0.2168   -0.43                0.666    
## happyvery:heavenyes     0.0738     0.2433    0.30                0.762    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1019.87238285508135504642  on 5  degrees of freedom
## Residual deviance:    0.00000000000000022204  on 0  degrees of freedom
## AIC: 52.61
## 
## Number of Fisher Scoring iterations: 2

Extending Two-way Saturated Model

he <- data.frame(happy = c("not", "pretty", "very"), 
                 heaven = "maybe", 
                 count = c(170, 550, 400))
heaven2 <- rbind(heaven, he)

hdf <- heaven2[rep(1:nrow(heaven2), times = heaven2$count), ]
with(hdf, table(happy, heaven))
##         heaven
## happy    maybe  no yes
##   not      170  32 190
##   pretty   550 113 611
##   very     400  51 326
glm(count ~ happy + heaven + happy:heaven, family = poisson, data = heaven2) %>% 
  summary()
## 
## Call:
## glm(formula = count ~ happy + heaven + happy:heaven, family = poisson, 
##     data = heaven2)
## 
## Coefficients:
##                       Estimate Std. Error z value            Pr(>|z|)    
## (Intercept)            5.13580    0.07670   66.96 <0.0000000000000002 ***
## happypretty            1.17412    0.08775   13.38 <0.0000000000000002 ***
## happyvery              0.85567    0.09156    9.35 <0.0000000000000002 ***
## heavenno              -1.67006    0.19270   -8.67 <0.0000000000000002 ***
## heavenyes              0.11123    0.10557    1.05               0.292    
## happypretty:heavenno   0.08753    0.21863    0.40               0.689    
## happyvery:heavenno    -0.38958    0.24339   -1.60               0.109    
## happypretty:heavenyes -0.00605    0.12083   -0.05               0.960    
## happyvery:heavenyes   -0.31579    0.12928   -2.44               0.015 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1397.711780557911197320  on 8  degrees of freedom
## Residual deviance:   -0.000000000000011102  on 0  degrees of freedom
## AIC: 81.57
## 
## Number of Fisher Scoring iterations: 2

Three-way

coffee <- read.csv("https://collinn.github.io/data/coffee.csv")
cdf <- coffee[rep(1:nrow(coffee), times = coffee$Freq), ]

## Partial tables show no effect of smoking given cancer
with(cdf, table(coffee, cancer, smoking))
## , , smoking = no
## 
##       cancer
## coffee  no yes
##    no  135  15
##    yes 108  12
## 
## , , smoking = yes
## 
##       cancer
## coffee  no yes
##    no    1   4
##    yes   4  16
## Model 0  assumes three-way independence
(m0 <- glm(Freq ~ coffee + cancer + smoking, 
    family = poisson, data = coffee)) %>% summary()
## 
## Call:
## glm(formula = Freq ~ coffee + cancer + smoking, family = poisson, 
##     data = coffee)
## 
## Coefficients:
##             Estimate Std. Error z value            Pr(>|z|)    
## (Intercept)   4.7813     0.0861   55.55 <0.0000000000000002 ***
## coffeeyes    -0.1018     0.1166   -0.87                0.38    
## canceryes    -1.6633     0.1591  -10.46 <0.0000000000000002 ***
## smokingyes   -2.3795     0.2091  -11.38 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 459.10  on 7  degrees of freedom
## Residual deviance:  70.39  on 4  degrees of freedom
## AIC: 113.7
## 
## Number of Fisher Scoring iterations: 6
## Model 1 assumes all related
(m1 <- glm(Freq ~ coffee + cancer + smoking + coffee:smoking + 
      smoking:cancer + coffee:cancer, 
    family = poisson, data = coffee)) %>% summary()
## 
## Call:
## glm(formula = Freq ~ coffee + cancer + smoking + coffee:smoking + 
##     smoking:cancer + coffee:cancer, family = poisson, data = coffee)
## 
## Coefficients:
##                                 Estimate          Std. Error z value
## (Intercept)           4.9052747784384323  0.0858817527152413   57.12
## coffeeyes            -0.2231435513142164  0.1284757718493133   -1.74
## canceryes            -2.1972245773362498  0.2662722061256526   -8.25
## smokingyes           -4.9052747784376960  0.6484554276202241   -7.56
## coffeeyes:smokingyes  1.6094379124336859  0.5820609694768787    2.77
## canceryes:smokingyes  3.5835189384557204  0.5569481055997451    6.43
## coffeeyes:canceryes   0.0000000000000773  0.3880752350545657    0.00
##                                  Pr(>|z|)    
## (Intercept)          < 0.0000000000000002 ***
## coffeeyes                          0.0824 .  
## canceryes            < 0.0000000000000002 ***
## smokingyes              0.000000000000039 ***
## coffeeyes:smokingyes               0.0057 ** 
## canceryes:smokingyes    0.000000000124120 ***
## coffeeyes:canceryes                1.0000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 459.102399105386496103  on 7  degrees of freedom
## Residual deviance:  -0.000000000000009992  on 1  degrees of freedom
## AIC: 49.31
## 
## Number of Fisher Scoring iterations: 3
## Model 2 removes association between coffee and cancer
(m2 <- glm(Freq ~ coffee + cancer + smoking + coffee:smoking + smoking:cancer, 
    family = poisson, data = coffee)) %>% summary()
## 
## Call:
## glm(formula = Freq ~ coffee + cancer + smoking + coffee:smoking + 
##     smoking:cancer, family = poisson, data = coffee)
## 
## Coefficients:
##                      Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)            4.9053     0.0841   58.30 < 0.0000000000000002 ***
## coffeeyes             -0.2231     0.1225   -1.82               0.0685 .  
## canceryes             -2.1972     0.2029  -10.83 < 0.0000000000000002 ***
## smokingyes            -4.9053     0.6059   -8.10  0.00000000000000057 ***
## coffeeyes:smokingyes   1.6094     0.5148    3.13               0.0018 ** 
## canceryes:smokingyes   3.5835     0.5396    6.64  0.00000000003110370 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 459.102399105386496103  on 7  degrees of freedom
## Residual deviance:   0.000000000000013767  on 2  degrees of freedom
## AIC: 47.31
## 
## Number of Fisher Scoring iterations: 3
anova(m2, m1)
## Analysis of Deviance Table
## 
## Model 1: Freq ~ coffee + cancer + smoking + coffee:smoking + smoking:cancer
## Model 2: Freq ~ coffee + cancer + smoking + coffee:smoking + smoking:cancer + 
##     coffee:cancer
##   Resid. Df           Resid. Dev Df           Deviance Pr(>Chi)
## 1         2  0.00000000000001377                               
## 2         1 -0.00000000000000999  1 0.0000000000000238        1

Inference

Essentially, we are testing independence (mutual or conditional) by comparing