Data
library(ggplot2)
library(dplyr)
data(mtcars)
data(mpg)
college <- read.csv("https://remiller1450.github.io/data/Colleges2019.csv")
dogs <- read.csv("https://collinn.github.io/data/dogs.csv")
anorexia <- read.csv("https://collinn.github.io/data/anorexia.txt")
Difference in proportion (2x2 table)
titanic <- as.data.frame(Titanic)
titanic <- titanic[rep(1:nrow(titanic), times = titanic$Freq), ]
titanic$Freq <- NULL
# Table
(tab <- with(titanic, table(Sex, Survived)))
## Survived
## Sex No Yes
## Male 1364 367
## Female 126 344
## Put in success and total
prop.test(x = c(1364, 126), n = c(1731, 470))
## Put in 2x2 table
prop.test(tab)
t-tests
## Two sample formula syntax
t.test(Enrollment ~ Private, college)
## Two Sample, multiple column syntax
t.test(mpg$hwy, mpg$cty)
## Paired t.test
t.test(anorexia$Prewt, anorexia$Postwt, paired = TRUE)
\(\chi^2\) tests
Goodness of fit tests
## Testing counts where all proportions the same (A-E question distribution)
obs <- c(A = 74, B = 90, C = 76, D = 87, E = 73)
chisq.test(obs)
## Testing observations against specific counts (jury pool/census)
jury <- c(780, 117,114,384,58)
chisq.test(jury, p = c(0.54, 0.18, 0.12, 0.15, 0.01))
Test of independence
## We can just pass in any two-dimension table object
tab <- with(college, table(Private, Region))
chisq.test(tab)
ANOVA
## Always takes formula syntax
av <- aov(speed ~ breed, dogs)
## Use summary() for output
summary(av)
Regression
## Works just like anova
fit <- lm(speed ~ size, dogs)
summary(fit)
## Can add more variables with +
fit <- lm(mpg ~ disp + hp + am, mtcars)
summary(fit)