Cheatsheet

Data

library(ggplot2)
library(dplyr)

data(mtcars)
data(mpg)

college <- read.csv("https://remiller1450.github.io/data/Colleges2019.csv")
dogs <- read.csv("https://collinn.github.io/data/dogs.csv")
anorexia <- read.csv("https://collinn.github.io/data/anorexia.txt")

Difference in proportion (2x2 table)

titanic <- as.data.frame(Titanic)
titanic <- titanic[rep(1:nrow(titanic), times = titanic$Freq), ]
titanic$Freq <- NULL

# Table
(tab <- with(titanic, table(Sex, Survived)))

##         Survived
## Sex        No  Yes
##   Male   1364  367
##   Female  126  344

## Put in success and total
prop.test(x = c(1364, 126), n = c(1731, 470))

## Put in 2x2 table
prop.test(tab)

t-tests

## Two sample formula syntax
t.test(Enrollment ~ Private, college)

## Two Sample, multiple column syntax
t.test(mpg$hwy, mpg$cty)

## Paired t.test
t.test(anorexia$Prewt, anorexia$Postwt, paired = TRUE)

\(\chi^2\) tests

Goodness of fit tests

## Testing counts where all proportions the same (A-E question distribution)
obs <- c(A = 74, B = 90, C = 76, D = 87, E = 73)
chisq.test(obs)

## Testing observations against specific counts (jury pool/census)
jury <- c(780, 117,114,384,58)
chisq.test(jury, p = c(0.54, 0.18, 0.12, 0.15, 0.01))

Test of independence

## We can just pass in any two-dimension table object
tab <- with(college, table(Private, Region))
chisq.test(tab)

ANOVA

## Always takes formula syntax
av <- aov(speed ~ breed, dogs)

## Use summary() for output
summary(av)

Regression

## Works just like anova
fit <- lm(speed ~ size, dogs)
summary(fit)

## Can add more variables with +
fit <- lm(mpg ~ disp + hp + am, mtcars)
summary(fit)