library(dplyr)
library(kableExtra)
library(ggplot2)
heaven <- read.table("http://www.stat.ufl.edu/~aa/cat/data/HappyHeaven.dat",
header = TRUE)
hdf <- heaven[rep(1:nrow(heaven), times = heaven$count), ]
with(hdf, table(happy, heaven))
## heaven
## happy no yes
## not 32 190
## pretty 113 611
## very 51 326
glm(count ~ happy + heaven, family = poisson, data = heaven) %>%
summary()
##
## Call:
## glm(formula = count ~ happy + heaven, family = poisson, data = heaven)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4931 0.0941 37.13 < 0.0000000000000002 ***
## happypretty 1.1821 0.0767 15.41 < 0.0000000000000002 ***
## happyvery 0.5296 0.0846 6.26 0.00000000039 ***
## heavenyes 1.7492 0.0774 22.60 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1019.87238 on 5 degrees of freedom
## Residual deviance: 0.89111 on 2 degrees of freedom
## AIC: 49.5
##
## Number of Fisher Scoring iterations: 3
glm(count ~ happy + heaven + happy:heaven, family = poisson, data = heaven) %>%
summary()
##
## Call:
## glm(formula = count ~ happy + heaven + happy:heaven, family = poisson,
## data = heaven)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4657 0.1768 19.61 < 0.0000000000000002 ***
## happypretty 1.2617 0.2002 6.30 0.0000000003 ***
## happyvery 0.4661 0.2255 2.07 0.039 *
## heavenyes 1.7813 0.1911 9.32 < 0.0000000000000002 ***
## happypretty:heavenyes -0.0936 0.2168 -0.43 0.666
## happyvery:heavenyes 0.0738 0.2433 0.30 0.762
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1019.87238285508135504642 on 5 degrees of freedom
## Residual deviance: 0.00000000000000022204 on 0 degrees of freedom
## AIC: 52.61
##
## Number of Fisher Scoring iterations: 2
he <- data.frame(happy = c("not", "pretty", "very"),
heaven = "maybe",
count = c(170, 550, 400))
heaven2 <- rbind(heaven, he)
hdf <- heaven2[rep(1:nrow(heaven2), times = heaven2$count), ]
with(hdf, table(happy, heaven))
## heaven
## happy maybe no yes
## not 170 32 190
## pretty 550 113 611
## very 400 51 326
glm(count ~ happy + heaven + happy:heaven, family = poisson, data = heaven2) %>%
summary()
##
## Call:
## glm(formula = count ~ happy + heaven + happy:heaven, family = poisson,
## data = heaven2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.13580 0.07670 66.96 <0.0000000000000002 ***
## happypretty 1.17412 0.08775 13.38 <0.0000000000000002 ***
## happyvery 0.85567 0.09156 9.35 <0.0000000000000002 ***
## heavenno -1.67006 0.19270 -8.67 <0.0000000000000002 ***
## heavenyes 0.11123 0.10557 1.05 0.292
## happypretty:heavenno 0.08753 0.21863 0.40 0.689
## happyvery:heavenno -0.38958 0.24339 -1.60 0.109
## happypretty:heavenyes -0.00605 0.12083 -0.05 0.960
## happyvery:heavenyes -0.31579 0.12928 -2.44 0.015 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1397.711780557911197320 on 8 degrees of freedom
## Residual deviance: -0.000000000000011102 on 0 degrees of freedom
## AIC: 81.57
##
## Number of Fisher Scoring iterations: 2
coffee <- read.csv("https://collinn.github.io/data/coffee.csv")
cdf <- coffee[rep(1:nrow(coffee), times = coffee$Freq), ]
## Partial tables show no effect of smoking given cancer
with(cdf, table(coffee, cancer, smoking))
## , , smoking = no
##
## cancer
## coffee no yes
## no 135 15
## yes 108 12
##
## , , smoking = yes
##
## cancer
## coffee no yes
## no 1 4
## yes 4 16
## Model 0 assumes three-way independence
(m0 <- glm(Freq ~ coffee + cancer + smoking,
family = poisson, data = coffee)) %>% summary()
##
## Call:
## glm(formula = Freq ~ coffee + cancer + smoking, family = poisson,
## data = coffee)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.7813 0.0861 55.55 <0.0000000000000002 ***
## coffeeyes -0.1018 0.1166 -0.87 0.38
## canceryes -1.6633 0.1591 -10.46 <0.0000000000000002 ***
## smokingyes -2.3795 0.2091 -11.38 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 459.10 on 7 degrees of freedom
## Residual deviance: 70.39 on 4 degrees of freedom
## AIC: 113.7
##
## Number of Fisher Scoring iterations: 6
## Model 1 assumes all related
(m1 <- glm(Freq ~ coffee + cancer + smoking + coffee:smoking +
smoking:cancer + coffee:cancer,
family = poisson, data = coffee)) %>% summary()
##
## Call:
## glm(formula = Freq ~ coffee + cancer + smoking + coffee:smoking +
## smoking:cancer + coffee:cancer, family = poisson, data = coffee)
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) 4.9052747784384323 0.0858817527152413 57.12
## coffeeyes -0.2231435513142164 0.1284757718493133 -1.74
## canceryes -2.1972245773362498 0.2662722061256526 -8.25
## smokingyes -4.9052747784376960 0.6484554276202241 -7.56
## coffeeyes:smokingyes 1.6094379124336859 0.5820609694768787 2.77
## canceryes:smokingyes 3.5835189384557204 0.5569481055997451 6.43
## coffeeyes:canceryes 0.0000000000000773 0.3880752350545657 0.00
## Pr(>|z|)
## (Intercept) < 0.0000000000000002 ***
## coffeeyes 0.0824 .
## canceryes < 0.0000000000000002 ***
## smokingyes 0.000000000000039 ***
## coffeeyes:smokingyes 0.0057 **
## canceryes:smokingyes 0.000000000124120 ***
## coffeeyes:canceryes 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 459.102399105386496103 on 7 degrees of freedom
## Residual deviance: -0.000000000000009992 on 1 degrees of freedom
## AIC: 49.31
##
## Number of Fisher Scoring iterations: 3
## Model 2 removes association between coffee and cancer
(m2 <- glm(Freq ~ coffee + cancer + smoking + coffee:smoking + smoking:cancer,
family = poisson, data = coffee)) %>% summary()
##
## Call:
## glm(formula = Freq ~ coffee + cancer + smoking + coffee:smoking +
## smoking:cancer, family = poisson, data = coffee)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.9053 0.0841 58.30 < 0.0000000000000002 ***
## coffeeyes -0.2231 0.1225 -1.82 0.0685 .
## canceryes -2.1972 0.2029 -10.83 < 0.0000000000000002 ***
## smokingyes -4.9053 0.6059 -8.10 0.00000000000000057 ***
## coffeeyes:smokingyes 1.6094 0.5148 3.13 0.0018 **
## canceryes:smokingyes 3.5835 0.5396 6.64 0.00000000003110370 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 459.102399105386496103 on 7 degrees of freedom
## Residual deviance: 0.000000000000013767 on 2 degrees of freedom
## AIC: 47.31
##
## Number of Fisher Scoring iterations: 3
anova(m2, m1)
## Analysis of Deviance Table
##
## Model 1: Freq ~ coffee + cancer + smoking + coffee:smoking + smoking:cancer
## Model 2: Freq ~ coffee + cancer + smoking + coffee:smoking + smoking:cancer +
## coffee:cancer
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2 0.00000000000001377
## 2 1 -0.00000000000000999 1 0.0000000000000238 1
Essentially, we are testing independence (mutual or conditional) by comparing