Load packages
library(mosaic)
library(dplyr)
library(ggplot2)
library(ggvis)
library(parallel)
AxS ANOVA: Optimism Example
## Load data
optimism <- read.csv("http://www.bradthiessen.com/html5/data/optimism.csv")
## Look at variable types
str(optimism)
## 'data.frame': 54 obs. of 3 variables:
## $ subject : int 1 2 3 4 5 6 7 8 9 10 ...
## $ age : int 16 16 16 16 16 16 16 16 16 16 ...
## $ optimism: int 35 32 33 32 31 29 29 27 27 28 ...
## Change subject and age to factor variables
optimism$age <-as.factor(optimism$age)
optimism$subject <-as.factor(optimism$subject)
## Peek at data
head(optimism)
## subject age optimism
## 1 1 16 35
## 2 2 16 32
## 3 3 16 33
## 4 4 16 32
## 5 5 16 31
## 6 6 16 29
## Summary of data
favstats(optimism ~ age, data=optimism)
## .group min Q1 median Q3 max mean sd n missing
## 1 16 13 24.00 27 30.50 35 25.88889 6.578833 18 0
## 2 18 13 25.25 29 31.75 39 26.94444 7.487026 18 0
## 3 20 12 19.00 24 27.75 32 23.38889 5.922429 18 0
## Some summary plots
densityplot(~optimism|age, data=optimism, lwd=2,
main="Optimism by age",
layout=c(1,3))
## Let's also run a test to check for homogeneity of variances
bartlett.test(optimism~age, data=optimism)
##
## Bartlett test of homogeneity of variances
##
## data: optimism by age
## Bartlett's K-squared = 0.9167, df = 2, p-value = 0.6323
## Regular one-way ANOVA
aov.out <- lm(optimism ~ age, data=optimism)
anova(aov.out)
## Analysis of Variance Table
##
## Response: optimism
## Df Sum Sq Mean Sq F value Pr(>F)
## age 2 120.04 60.019 1.3396 0.271
## Residuals 51 2285.00 44.804
## No significant differences among age means
## Check assumptions
plot(aov.out)
##############################################
# Subject means
mean(optimism ~ subject, data=optimism)
## 1 2 3 4 5 6 7 8
## 35.33333 32.66667 31.00000 31.00000 30.00000 29.33333 29.00000 27.66667
## 9 10 11 12 13 14 15 16
## 27.33333 26.33333 25.66667 25.33333 24.00000 22.66667 17.00000 16.33333
## 17 18
## 13.66667 13.00000
sd(optimism$optimism)
## [1] 6.736324
## AxS ANOVA
# Take a look at profile plots (optimism by subject at each age)
interaction.plot(x.factor=optimism$age, trace.factor=optimism$subject,
response=optimism$optimism,
fun=mean, type="b", legend=F)
# Same plot using ggplot2
ggplot(data = optimism, aes(x = age, y = optimism, group = subject)) +
ggtitle("Optimism by age") +
ylab("optimism") + geom_line()
## Run the AxS ANOVA
## Specifying the error as subjects nested within age categories
axs <- aov(optimism ~ age + Error(subject/age), data=optimism)
summary(axs)
##
## Error: subject
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 17 2182 128.3
##
## Error: subject:age
## Df Sum Sq Mean Sq F value Pr(>F)
## age 2 120.0 60.02 19.75 2.03e-06 ***
## Residuals 34 103.3 3.04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AxS ANOVA: Grocery Example
## Load data
groceries <- read.csv("http://www.bradthiessen.com/html5/data/groceries.csv")
## Look at variable types
str(groceries)
## 'data.frame': 40 obs. of 3 variables:
## $ price: num 1.17 1.77 1.49 0.65 1.58 3.13 2.09 0.62 5.89 4.46 ...
## $ store: Factor w/ 4 levels "A","B","C","D": 1 1 1 1 1 1 1 1 1 1 ...
## $ item : Factor w/ 10 levels "aspirin","bread",..: 7 9 8 4 2 3 5 10 6 1 ...
## We want store and item to be factor variables
## Peek at data
head(groceries)
## price store item
## 1 1.17 A lettuce
## 2 1.77 A potatoes
## 3 1.49 A milk
## 4 0.65 A eggs
## 5 1.58 A bread
## 6 3.13 A cereal
## Summary of data
favstats(price ~ store, data=groceries)
## .group min Q1 median Q3 max mean sd n missing
## 1 A 0.62 1.2500 1.675 2.8700 5.89 2.285 1.717933 10 0
## 2 B 0.65 1.6925 1.830 2.8575 5.99 2.465 1.707430 10 0
## 3 C 0.65 1.4150 1.940 2.7650 5.99 2.436 1.765296 10 0
## 4 D 0.69 1.3650 1.940 2.9400 6.99 2.626 1.987758 10 0
mean(price ~ item, data=groceries)
## aspirin bread cereal eggs
## 4.8600 1.7650 3.0900 0.8550
## ground.beef laundry.detergent lettuce milk
## 2.1375 6.2150 1.3825 1.6400
## potatoes tomato.soup
## 1.9325 0.6525
sd(price ~ item, data=groceries)
## aspirin bread cereal eggs
## 0.29518356 0.15242484 0.07118052 0.21809784
## ground.beef laundry.detergent lettuce milk
## 0.25500000 0.51881275 0.27097048 0.12909944
## potatoes tomato.soup
## 0.10843585 0.02872281
## Some summary plots
densityplot(~price|store, data=groceries, lwd=2,
main="Price by store",
layout=c(1,4))
## Let's also run a test to check for homogeneity of variances
bartlett.test(price~store, data=groceries)
##
## Bartlett test of homogeneity of variances
##
## data: price by store
## Bartlett's K-squared = 0.2701, df = 3, p-value = 0.9655
## Regular one-way ANOVA
aov.out <- lm(price ~ store, data=groceries)
anova(aov.out)
## Analysis of Variance Table
##
## Response: price
## Df Sum Sq Mean Sq F value Pr(>F)
## store 3 0.586 0.1953 0.0604 0.9803
## Residuals 36 116.407 3.2335
## No significant differences among age means
## Check assumptions
plot(aov.out)
##############################################
## AxS ANOVA
# Take a look at profile plots (optimism by subject at each age)
interaction.plot(x.factor=groceries$store, trace.factor=groceries$item,
response=groceries$price,
fun=mean, type="l", legend=F)
# Same plot using ggplot2
ggplot(data = groceries, aes(x = store, y = price, group = item)) +
ggtitle("Price by store") +
ylab("Price") + geom_line()
## Run the AxS ANOVA
## Specifying the error as subjects nested within age categories
axs <- aov(price ~ store + Error(item/store), data=groceries)
summary(axs)
##
## Error: item
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 9 115.2 12.8
##
## Error: item:store
## Df Sum Sq Mean Sq F value Pr(>F)
## store 3 0.5859 0.19529 4.344 0.0127 *
## Residuals 27 1.2137 0.04495
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1