library(mosaic)
library(dplyr)
library(ggplot2)
library(ggvis)
library(parallel)
First, let’s take a look at the “comprehending ambiguous prose” study:
#### Load the data
ambiguous <- read.csv("http://www.bradthiessen.com/html5/data/ambiguousprose.csv")
#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod1 <- aov(Comprehension ~ Condition, data=ambiguous)
#### Summarize the model in ANOVA format
anova(mod1)
## Analysis of Variance Table
##
## Response: Comprehension
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 2 35.053 17.5263 10.012 0.0002002 ***
## Residuals 54 94.526 1.7505
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Box plots
bwplot(~Comprehension|Condition, data=ambiguous, lwd=3,
ylab="Condition", xlab="Comprehension",
main="Ambiguous Prose",
layout=(c(1,3)))
#### To check for normality, we could look at plots of the data
densityplot(~Comprehension|Condition, data=ambiguous, lwd=3,
main="Comprehension by Condition",
xlab="Comprehension score",
layout=c(1,3))
#### or a Q-Q plot for each condition
after <- ambiguous %>%
filter(Condition=="After")
qqnorm(after$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)
before <- ambiguous %>%
filter(Condition=="Before")
qqnorm(before$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)
none <- ambiguous %>%
filter(Condition=="None")
qqnorm(none$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)
## Normality test
shapiro.test(ambiguous$Comprehension)
##
## Shapiro-Wilk normality test
##
## data: ambiguous$Comprehension
## W = 0.9433, p-value = 0.009909
## Bartlett's test (test for equal variances)
bartlett.test(Comprehension~Condition, data=ambiguous)
##
## Bartlett test of homogeneity of variances
##
## data: Comprehension by Condition
## Bartlett's K-squared = 0.2025, df = 2, p-value = 0.9037
## Post-hoc tests
## Tukey's HSD
TukeyHSD(mod1, conf.level=.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Comprehension ~ Condition, data = ambiguous)
##
## $Condition
## diff lwr upr p adj
## Before-After 1.7368421 0.7023389 2.7713453 0.0004836
## None-After 0.1578947 -0.8766084 1.1923979 0.9282350
## None-Before -1.5789474 -2.6134505 -0.5444442 0.0015494
## We can plot this confidence interval
plot(TukeyHSD(mod1))
## We could also use t-tests with the Bonferroni adjustment
## to compare pairs of means
with(ambiguous, pairwise.t.test(Comprehension, Condition, p.adjust.method="bonferroni"))
##
## Pairwise comparisons using t tests with pooled SD
##
## data: Comprehension and Condition
##
## After Before
## Before 0.0005 -
## None 1.0000 0.0016
##
## P value adjustment method: bonferroni
#### Load the data
diet <- read.csv("http://www.bradthiessen.com/html5/data/diet.csv")
#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod2 <- aov(WeightLoss ~ Diet, data=diet)
#### Summarize the model in ANOVA format
anova(mod2)
## Analysis of Variance Table
##
## Response: WeightLoss
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 3 77.6 25.866 0.5361 0.6587
## Residuals 89 4293.7 48.244
### Box plots
bwplot(~WeightLoss|Diet, data=diet, lwd=3,
ylab="Weight Loss", xlab="Diet",
main="Diet and Weightloss",
layout=(c(1,4)))
#### To check for normality, we could look at plots of the data
densityplot(~WeightLoss|Diet, data=diet, lwd=3,
main="Weight loss by diet",
xlab="Pounds lost",
layout=c(1,4))
#### or a Q-Q plot for each condition
atkins <- diet %>%
filter(Diet=="Atkins")
qqnorm(atkins$WeightLoss, ylab = "Atkins"); qqline(atkins$WeightLoss, ylab = "Atkins", lwd=3)
ornish <- diet %>%
filter(Diet=="Ornish")
qqnorm(ornish$WeightLoss, ylab = "Ornish"); qqline(ornish$WeightLoss, ylab = "Ornish", lwd=3)
wtwtchrs <- diet %>%
filter(Diet=="WtWtchrs")
qqnorm(wtwtchrs$WeightLoss, ylab = "WtWtchrs"); qqline(wtwtchrs$WeightLoss, ylab = "WtWtchrs", lwd=3)
zone <- diet %>%
filter(Diet=="Zone")
qqnorm(zone$WeightLoss, ylab = "Zone"); qqline(zone$WeightLoss, ylab = "Zone", lwd=3)
## Normality test
shapiro.test(diet$WeightLoss)
##
## Shapiro-Wilk normality test
##
## data: diet$WeightLoss
## W = 0.9558, p-value = 0.003198
## Bartlett's test (test for equal variances)
bartlett.test(WeightLoss ~ Diet, data=diet)
##
## Bartlett test of homogeneity of variances
##
## data: WeightLoss by Diet
## Bartlett's K-squared = 7.235, df = 3, p-value = 0.06477
## This command creates a series of plots...
plot(mod2)
## Post-hoc tests
## Tukey's HSD
TukeyHSD(mod2, conf.level=.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = WeightLoss ~ Diet, data = diet)
##
## $Diet
## diff lwr upr p adj
## Ornish-Atkins 2.6409524 -3.041011 8.322916 0.6178040
## WtWtchrs-Atkins 0.6732601 -4.662346 6.008866 0.9874777
## Zone-Atkins 0.9655678 -4.370039 6.301174 0.9646520
## WtWtchrs-Ornish -1.9676923 -7.376586 3.441201 0.7765563
## Zone-Ornish -1.6753846 -7.084278 3.733509 0.8491203
## Zone-WtWtchrs 0.2923077 -4.751511 5.336127 0.9987459
## We can plot this confidence interval
plot(TukeyHSD(mod2))
## We could also use t-tests with the Bonferroni adjustment
## to compare pairs of means
with(diet, pairwise.t.test(WeightLoss, Diet, p.adjust.method="bonferroni"))
##
## Pairwise comparisons using t tests with pooled SD
##
## data: WeightLoss and Diet
##
## Atkins Ornish WtWtchrs
## Ornish 1 - -
## WtWtchrs 1 1 -
## Zone 1 1 1
##
## P value adjustment method: bonferroni