Ambiguous prose
#### Load the data from the web
ambiguous <- read.csv("http://www.bradthiessen.com/html5/data/ambiguousprose.csv")
head(ambiguous)
## Condition Comprehension
## 1 After 6
## 2 After 5
## 3 After 4
## 4 After 2
## 5 After 1
## 6 After 3
table(ambiguous$Condition, ambiguous$Comprehension)
##
## 1 2 3 4 5 6 7
## After 2 4 6 3 3 1 0
## Before 0 1 2 3 5 7 1
## None 1 4 5 6 2 1 0
ambiguous %>%
group_by(Condition) %>%
summarize(comprehension = mean(Comprehension), sd = sd(Comprehension), median=median(Comprehension), n = n())
## Source: local data frame [3 x 5]
##
## Condition comprehension sd median n
## 1 After 3.210526 1.397575 3 19
## 2 Before 4.947368 1.311220 5 19
## 3 None 3.368421 1.256562 3 19
ambiguous %>%
summarize(comprehension = mean(Comprehension), sd = sd(Comprehension), median=median(Comprehension), n = n())
## comprehension sd median n
## 1 3.842105 1.521154 4 57
dotPlot( ~ Comprehension | Condition, data = ambiguous, nint = 17,
endpoints = c(-2, 10), layout = c(1,3), aspect = .35, cex=.7,
xlab = "Comprehension score (0-7)")
## Take some random shuffles and calculate means
mean(Comprehension ~ shuffle(Condition), data=ambiguous)
## After Before None
## 4.105263 3.421053 4.000000
##### MAD approach
xyplot(jitter(Comprehension) ~ Condition, data = ambiguous, alpha = 0.6, cex = 1.2, par.settings=simpleTheme(col="black"))
# Create our test statistic (mean absolute difference in means)
test.stat <- MAD(mean(Comprehension ~ Condition, data = ambiguous))
# Randomly shuffle the "weight" groups 100,000 times and calculate our test statistic
rtest.stats = do(100000) * MAD(mean(Comprehension ~ shuffle(Condition), data=ambiguous))
histogram(~result, data = rtest.stats, col="grey", xlim=c(-.2, 1.75),
xlab = "MAD values (assuming null hypothesis is true)", width=.07,
main=paste("mean =", mean(rtest.stats$result, na.rm=T), "; std. dev =", sd(rtest.stats$result, na.rm=T)))
ladd(panel.abline(v=test.stat))
tally(~ (result >= test.stat), format="prop", data=rtest.stats)
##
## TRUE FALSE
## 0.00086 0.99914
#### Load the data from the web
recall <- read.csv("http://www.bradthiessen.com/html5/data/recall.csv")
head(recall)
## Condition Recall
## 1 After 7
## 2 After 5
## 3 After 5
## 4 After 5
## 5 After 2
## 6 After 8
mean(recall$Recall ~ recall$Condition)
## After Before None
## 5.368421 8.263158 6.631579
sd(recall$Recall ~ recall$Condition)
## After Before None
## 1.460994 1.820931 2.005839
mean(recall$Recall)
## [1] 6.754386
sd(recall$Recall)
## [1] 2.115257
table(recall$Recall, recall$Condition)
##
## After Before None
## 2 1 0 0
## 3 1 0 2
## 4 0 1 0
## 5 11 1 4
## 6 2 1 3
## 7 2 2 3
## 8 2 4 3
## 9 0 5 3
## 10 0 4 1
## 11 0 1 0
dotPlot( ~ Recall | Condition, data = recall, nint = 1000, cex=.3,
endpoints = c(-1, 12), layout = c(1,3), aspect = .35,
xlab = "Comprehension score (0-11)")
#### Load the data
ambiguous <- read.csv("http://www.bradthiessen.com/html5/data/ambiguousprose.csv")
#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod1 <- aov(Comprehension ~ Condition, data=ambiguous)
#### Summarize the model in ANOVA format
anova(mod1)
## Analysis of Variance Table
##
## Response: Comprehension
## Df Sum Sq Mean Sq F value Pr(>F)
## Condition 2 35.053 17.5263 10.012 0.0002002 ***
## Residuals 54 94.526 1.7505
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#### Load the diet data
diet <- read.csv("http://www.bradthiessen.com/html5/data/diet.csv")
diet %>%
group_by(Diet) %>%
summarize(n = n(), mean=mean(WeightLoss), sd=sd(WeightLoss))
## Source: local data frame [4 x 4]
##
## Diet n mean sd
## 1 Atkins 21 3.919048 6.045215
## 2 Ornish 20 6.560000 9.291218
## 3 WtWtchrs 26 4.592308 5.392656
## 4 Zone 26 4.884615 6.915472
mean(diet$WeightLoss)
## [1] 4.945161
sd(diet$WeightLoss)
## [1] 6.893058
##### MAD approach
# Create our test statistic (mean absolute difference in means)
## (4/6) to make the average over the 6 comparisons; not the 4 groups
test.stat <- MAD(mean(WeightLoss ~ Diet, data = diet)) * (4/6)
# Randomly shuffle the "diet" groups 100,000 times and calculate our test statistic
rtest.stats = do(100000) * MAD(mean(WeightLoss ~ shuffle(Diet), data=diet)) * (4/6)
histogram(~result, data = rtest.stats, col="grey", xlim=c(-.2, 7),
xlab = "MAD values (assuming null hypothesis is true)", width=.07,
main=paste("mean =", mean(rtest.stats$result, na.rm=T), "; std. dev =", sd(rtest.stats$result, na.rm=T)))
ladd(panel.abline(v=test.stat))
tally(~ (result >= test.stat), format="prop", data=rtest.stats)
##
## TRUE FALSE
## 0.61972 0.38028
#### Run an ANOVA
mod1 <- aov(WeightLoss ~ Diet, data=diet)
#### Summarize the model in ANOVA format
anova(mod1)
## Analysis of Variance Table
##
## Response: WeightLoss
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 3 77.6 25.866 0.5361 0.6587
## Residuals 89 4293.7 48.244
# Permutation approach
mod2 <- lm(WeightLoss ~ Diet, data=diet)
anova(mod2)
## Analysis of Variance Table
##
## Response: WeightLoss
## Df Sum Sq Mean Sq F value Pr(>F)
## Diet 3 77.6 25.866 0.5361 0.6587
## Residuals 89 4293.7 48.244
teststat.obs = summary(mod2)$fstatistic[1]
rtest.stats = do(10000) * summary(
lm(WeightLoss ~ shuffle(Diet), data=diet))$fstatistic[1]
histogram(~ value, xlim=c(-.2,7), groups=value >= test.stat, data=rtest.stats, width=.1)
ladd(panel.abline(v=test.stat))
tally(~ (value >= teststat.obs), format="prop", data=rtest.stats)
##
## TRUE FALSE
## 0.6563 0.3437