require(ggplot2)
require(ggvis)
require(dplyr)
require(mosaic)
## Load the data
aleks <- read.csv(("~/Desktop/aleks.csv"))
options(digits=1)
## Check variable types and data length
aleks$name <- as.character(aleks$name)
aleks$cohort <- as.factor(aleks$cohort)
aleks$w1ctime <- as.integer(aleks$w1ctime)
aleks$w2ctime <- as.integer(aleks$w2ctime)
aleks$w4time <- as.integer(aleks$w4time)
aleks$w5time <- as.integer(aleks$w5time)
aleks$w6time <- as.integer(aleks$w6time)
aleks$extendtime <- as.integer(aleks$extendtime)
str(aleks)
## 'data.frame': 209 obs. of 42 variables:
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ cohort : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ name : chr "Arndt, Courtney" "Bailey, Meredith" "Bennett, Allyson E." "Benoit, Kayla A." ...
## $ actmath : int 21 21 16 18 16 16 20 19 15 20 ...
## $ initialpercent : num 0.45 0.31 0.57 0.44 0.14 0.55 0.23 0.47 0.03 0.29 ...
## $ initialtopics : int 69 47 87 67 21 84 35 71 5 44 ...
## $ w1time : int 13 2 3 2 32 262 1 0 43 53 ...
## $ w1topics : int 0 0 0 0 0 7 4 0 27 0 ...
## $ w2time : int 141 0 27 0 0 139 134 85 482 0 ...
## $ w2topics : int 0 0 0 0 0 7 4 0 27 0 ...
## $ w3time : int 260 0 0 0 244 254 472 37 388 89 ...
## $ w3topics : int 20 0 0 0 10 10 52 2 31 4 ...
## $ w4time : int 500 692 54 0 66 316 428 25 445 208 ...
## $ w4topics : int 16 69 1 0 2 20 24 4 35 20 ...
## $ w5time : int 52 236 252 189 91 414 243 180 304 88 ...
## $ w5topics : int 2 25 10 5 3 17 6 12 22 8 ...
## $ w6time : int 961 972 599 1421 980 897 1241 946 747 288 ...
## $ w6topics : int 26 61 24 98 73 33 45 65 67 30 ...
## $ extendtime : int 0 293 385 51 815 0 0 0 155 275 ...
## $ extendtopics : int 0 9 14 7 56 0 0 0 23 16 ...
## $ totaltime : int 1928 2195 1320 1663 2228 2283 2519 1273 2564 999 ...
## $ totalmastered : int 133 211 136 177 165 178 170 154 237 122 ...
## $ finalmastered : int 152 152 152 152 152 152 152 152 152 152 ...
## $ finalpercent : num 1 1 1 1 1 1 1 1 1 1 ...
## $ minutespertopic: num 12.69 14.44 8.68 10.94 14.66 ...
## $ topicsperhour : num 1.99 4.48 2.23 3.97 3.88 ...
## $ w1ctime : int 13 2 3 2 32 262 1 0 43 53 ...
## $ w2ctime : int 155 2 30 2 32 401 135 85 526 53 ...
## $ w3ctime : int 415 2 30 2 275 655 607 122 913 142 ...
## $ w4ctime : int 915 694 84 2 342 971 1035 147 1358 350 ...
## $ w5ctime : int 967 930 336 191 433 1386 1278 327 1662 437 ...
## $ w6ctime : int 1928 1902 935 1612 1413 2283 2519 1273 2409 725 ...
## $ extendctime : int 1928 2195 1320 1663 2228 2283 2519 1273 2564 999 ...
## $ time : num 32.1 36.6 22 27.7 37.1 38 42 21.2 42.7 16.7 ...
## $ start : Factor w/ 14 levels "06/03/2014","06/06/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ w1date : Factor w/ 14 levels "06/10/2014","06/13/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ w2date : Factor w/ 14 levels "06/17/2014","06/20/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ w3date : Factor w/ 14 levels "06/24/2014","06/27/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ w4date : Factor w/ 14 levels "07/01/2014","07/04/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ w5date : Factor w/ 14 levels "07/08/2014","07/11/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ w6date : Factor w/ 14 levels "07/15/2014","07/18/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ extend : Factor w/ 14 levels "07/18/2014","07/21/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## Generate hours
aleks$totalhours <- aleks$totaltime / 60
## Generate ID and group variables (based on initial performance)
aleks$i.group[aleks$initialtopics < 22] <- "0-15% initial"
aleks$i.group[aleks$initialtopics >= 22 & aleks$initialtopics < 46] <- "15-30%"
aleks$i.group[aleks$initialtopics >= 46 & aleks$initialtopics < 76] <- "30-50%"
aleks$i.group[aleks$initialtopics >= 76 & aleks$initialtopics < 114] <- "50-75%"
aleks$i.group[aleks$initialtopics >= 114] <- "75%+ initial"
## Convert to long format
# Create column with initial/final groups
aleks.long <- rbind(data.frame(time="initialpercent", topics=aleks$initialtopics, id=aleks$id, group=aleks$i.group), data.frame(time="finalpercent", topics=aleks$finalmastered, id=aleks$id, group=aleks$i.group))
# Filter out non-participants from long data
aleks.long.participate <- subset(aleks.long, topics>0)
# Filter out non-participants from wide data
aleks.participate <- subset(aleks, finalmastered>0)
## Create jitter value for rugs
aleks$jit <- rnorm(nrow(aleks), -1,.10)
aleks.participate$jit <- rnorm(nrow(aleks.participate), -1,.10)
Participation rate = 198 of 209 students (94.7%) mastered at least one topic.
Average initial mastery = 34.3% (or 52 of 152 topics) for 198 students who participated (out of 209 total students)
Average final mastery = 91.3% (or 139 of 152 topics)
Difference = 57% (or 87 of 152 topics) in 27.4 hours
158 of students who participated (79.8%) mastered all 152 topics.
166 of students who participated (83.8%) mastered at least 90% of the topics.
Median initial mastery = 30% (or 46 of 152 topics)
Median final mastery = 100% (or 152 of 152 topics)
Note: Unless otherwise noted, all charts include only the 198 students who participated.
## Histogram for initial topics mastered
i.m <- ggplot(aleks.participate, aes(x=initialtopics)) +
geom_histogram(binwidth=15, colour = "steelblue", fill = "white", size=1, alpha=0.7) +
geom_point(aes(y = -2), position = position_jitter(height = 0.2), size=1) +
ggtitle("Initial Topics Mastered") +
xlim(-15, 165) +
xlab("Topics Mastered (of 152 total)") +
ylab("number of students")
i.m
## ggvis Histogram for initial topics mastered (filter out students who did nothing)
aleks.participate %>%
filter(!is.na(initialtopics) & !finalmastered==0) %>%
ggvis(~initialtopics, fill := "lightgrey", stroke:="steelblue", fillOpacity:=0.8) %>%
layer_histograms(binwidth = 15) %>%
layer_points(y=~jit, size=0.1, stroke:=FALSE, fill:="grey", fillOpacity:=0.3) %>%
add_axis("x", title = "Initial topics mastered (of 152 total)") %>%
scale_numeric("x", domain=c(0, 152), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "number of students")
## Histogram for final topics mastered
f.m <- ggplot(aleks.participate, aes(x=finalmastered)) +
geom_histogram(binwidth=15, colour = "steelblue", fill = "white", size=1) +
geom_point(aes(y = -2), position = position_jitter(height = 1.5), size=1) +
ggtitle("Final Topics Mastered") +
xlim(-5, 165) +
xlab("Topics Mastered (of 152 total)") +
ylab("number of students")
f.m
## ggvis Histogram for final topics mastered
aleks.participate %>%
filter(!is.na(finalmastered) & !finalmastered==0) %>% ## Eliminate NA responses
ggvis(~finalmastered, fill := "lightgrey", stroke:="steelblue", fillOpacity:=.8) %>%
layer_histograms(binwidth = 15) %>%
layer_points(y=~jit, size=0.1, stroke:=FALSE, fill:="grey") %>% ## Add rug
add_axis("x", title = "Final topics mastered (ignoring students who did nothing") %>%
scale_numeric("x", domain=c(0, 152), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "count")
## Density plots for initial and final
ggplot(aleks.long.participate, aes(x=topics, fill=time)) + geom_density(alpha=.3) +
ggtitle("Initial and Final Topics Mastered") +
xlim(-10, 152) +
xlab("Topics Mastered (of 152 total)") +
ylab("density")
## ggvis Density plots for initial and final
aleks.long.participate %>% filter(!is.na(topics) & !topics==0) %>%
ggvis(~topics, fill=~time) %>%
group_by(time) %>%
layer_densities() %>%
add_axis("x", title = "Topics mastered: initial vs final") %>%
scale_numeric("x", domain=c(0, 145), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "density")
# Initial and Final topics in panel plot
sp <- ggplot(aleks.long.participate, aes(x=topics)) +
geom_histogram(binwidth=15, colour = "steelblue", fill = "white", size=1) +
geom_point(aes(y = -2), position = position_jitter(height = 0.2), size=1) +
ggtitle("Topics Mastered") +
xlim(-10, 165) +
xlab("Topics Mastered (of 152 total)") +
ylab("number of students")
sp + facet_grid(time ~ .)
## ggvis does not support panels
## Histogram for hours spent
hours <- ggplot(aleks.participate, aes(x=totalhours)) +
geom_histogram(binwidth=7, colour = "steelblue", fill = "white", size=1) +
geom_point(aes(y = -0.5), position = position_jitter(height = 0.2), size=1) +
ggtitle("Time Spent in Aleks") +
xlim(-10, 140) +
xlab("Hours for each student") +
ylab("number of students")
hours
## ggvis Histogram for hours spent
aleks.participate %>% filter(!is.na(totalhours) & !finalmastered==0) %>%
ggvis(~totalhours, fill := "lightgrey", stroke:="steelblue", fillOpacity:=.8) %>%
layer_histograms(binwidth = 7) %>%
layer_points(y=~jit, size=0.1, stroke:=FALSE, fill:="grey", fillOpacity:=0.6) %>% ## Add rug
add_axis("x", title = "Hours in ALEKS") %>%
scale_numeric("x", domain=c(0, 140), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "count")
## Histogram for topics mastered per hour
topicsperhour <- ggplot(aleks.participate, aes(x=topicsperhour)) +
geom_histogram(binwidth=1, colour = "steelblue", fill = "white", size=1) +
geom_point(aes(y = -2), position = position_jitter(height = 0.2), size=1) +
ggtitle("Topics Mastered per Hour") +
xlim(-1, 12) +
xlab("Topics per hour") +
ylab("number of students")
topicsperhour
## ggvis Histogram for topics mastered per hour
aleks.participate %>% filter(!is.na(topicsperhour)) %>%
ggvis(~topicsperhour, fill := "lightgrey", stroke:="steelblue", fillOpacity:=.8) %>%
layer_histograms(binwidth = 1) %>%
layer_points(y=~jit-.5, size=0.1, stroke:=FALSE, fill:="grey", fillOpacity:=0.4) %>% ## Add rug
add_axis("x", title = "Topics mastered per hour") %>%
scale_numeric("x", domain=c(0, 12), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "count")
# Scatterplot for topics mastered
qplot(x = initialtopics, y = finalmastered, data = aleks, size = totalhours, colour = I("steelblue"), geom="point", xlim=c(0,152), ylim=c(0,152), xlab="Initial topics mastered", ylab="Final topics mastered")
## ggvis Scatterplot for topics mastered
aleks.participate %>%
filter(!is.na(initialtopics)) %>%
filter(!is.na(finalmastered)) %>%
filter(!is.na(totalhours)) %>%
ggvis(x=~initialtopics, y=~finalmastered, size=~totalhours, fill := "steelblue", stroke:="steelblue", fillOpacity:=.2) %>%
layer_points() %>%
add_axis("x", title = "Initial topics mastered") %>%
scale_numeric("x", domain=c(0, 152), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Final topics mastered") %>%
scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)
# Scatterplot for topics learned and time spent
aleks$gain <- aleks$finalmastered - aleks$initialtopics
aleks.participate$gain <- aleks.participate$finalmastered - aleks.participate$initialtopics
qplot(x = totalhours, y = gain, data = aleks.participate, size = initialtopics, colour = I("steelblue"), geom="point", xlim=c(0,130), ylim=c(0,152), xlab="Hours", ylab="Change in topics mastered")
## ggvis Scatterplot for topics learned and time spent
aleks.participate %>%
filter(!is.na(initialtopics)) %>%
filter(!is.na(finalmastered)) %>%
filter(!is.na(totalhours)) %>%
mutate(gain = finalmastered-initialtopics, na.rm=TRUE) %>%
ggvis(x=~totalhours, y=~gain, size=~initialtopics, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
layer_points() %>%
add_axis("x", title = "Hours") %>%
scale_numeric("x", domain=c(0, 130), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Change in topics mastered") %>%
scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)
## Adding loess curve
aleks.participate %>%
filter(!is.na(initialtopics)) %>%
filter(!is.na(finalmastered)) %>%
filter(!is.na(totalhours)) %>%
mutate(gain = finalmastered-initialtopics, na.rm=TRUE) %>%
ggvis(x=~totalhours, y=~gain, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
layer_points() %>%
layer_smooths() %>%
add_axis("x", title = "Hours") %>%
scale_numeric("x", domain=c(0, 130), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Change in topics mastered") %>%
scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)
## ACT vs. initial topics mastered
aleks.participate %>%
filter(!is.na(initialtopics)) %>%
filter(!is.na(finalmastered)) %>%
filter(!is.na(actmath)) %>%
ggvis(x=~actmath, y=~initialtopics, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
layer_points() %>%
layer_smooths() %>%
add_axis("x", title = "ACT Math score") %>%
scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Initial topics mastered") %>%
scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)
## ACT vs. minutes per topic mastered
aleks.participate %>%
filter(!is.na(minutespertopic)) %>%
filter(!is.na(actmath)) %>%
ggvis(x=~actmath, y=~minutespertopic, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
layer_points() %>%
layer_smooths() %>%
add_axis("x", title = "ACT Math score") %>%
scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Minutes per Topic") %>%
scale_numeric("y", domain=c(0, 60), nice=FALSE, clamp=TRUE)
## ACT vs. topics per hour
aleks.participate %>%
filter(!is.na(topicsperhour)) %>%
filter(!is.na(actmath)) %>%
ggvis(x=~actmath, y=~topicsperhour, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
layer_points() %>%
layer_smooths() %>%
add_axis("x", title = "ACT Math score") %>%
scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Topics per Hour") %>%
scale_numeric("y", domain=c(0, 15), nice=FALSE, clamp=TRUE)
## ACT vs. final topics mastered
aleks.participate %>%
filter(!is.na(initialtopics)) %>%
filter(!is.na(finalmastered)) %>%
filter(!is.na(actmath)) %>%
ggvis(x=~actmath, y=~finalmastered, fill := "steelblue", stroke:="steelblue", fillOpacity:=.1) %>%
layer_points() %>%
layer_smooths() %>%
add_axis("x", title = "ACT Math score") %>%
scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Final topics mastered") %>%
scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)
# Profile plot using ggplot2
p <- ggplot(data = aleks.long.participate, aes(x = time, y = topics, group = id)) +
ggtitle("Topics Mastered: Initial vs Final") +
ylab("number of students")
p + geom_line()
# Only include students who finished at less than 90%
aleks.long.participate.didnotfinish = aleks.long.participate %>% filter(topics<137)
p <- ggplot(data = aleks.long.participate.didnotfinish, aes(x = time, y = topics, group = id)) +
ggtitle("Topics Mastered: Initial vs Final") +
ylab("number of students")
p + geom_line()
## ggvis Profile plot
aleks.long.participate %>%
ggvis(x=~time, y=~topics, stroke=~group, strokeOpacity:=0.5) %>%
group_by(id) %>%
layer_paths()
# Only include students who finished at less than 90%
aleks.long.participate.didnotfinish %>%
ggvis(x=~time, y=~topics, strokeOpacity:=0.5) %>%
group_by(id) %>%
layer_paths()
## Create panels based on initial topics mastered
p <- ggplot(data = aleks.long.participate, aes(x = time, y = topics, group = id))
p + geom_line() + facet_grid(. ~ group)
==========
I want to create a profile plot based on progress per week.
## Convert to long format
aleks2 <- aleks.participate
aleks2.long <- rbind(
data.frame(time="initial", cohort=aleks2$cohort, minutes=0,
topics=aleks2$initialtopics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w1", cohort=aleks2$cohort, minutes=aleks2$w1time,
topics=aleks2$w1topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w2", cohort=aleks2$cohort, minutes=aleks2$w2time,
topics=aleks2$w2topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w3", cohort=aleks2$cohort, minutes=aleks2$w3time,
topics=aleks2$w3topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w4", cohort=aleks2$cohort, minutes=aleks2$w4time,
topics=aleks2$w4topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w5", cohort=aleks2$cohort, minutes=aleks2$w5time,
topics=aleks2$w5topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w6", cohort=aleks2$cohort, minutes=aleks2$w6time,
topics=aleks2$w6topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="extend", cohort=aleks2$cohort, minutes=aleks2$extendtime,
topics=aleks2$extendtopics, group=aleks2$i.group, id=aleks2$id))
## Get rid of initial topics mastered for the next plot
aleks4.long <- rbind(
data.frame(time="w1", cohort=aleks2$cohort, minutes=aleks2$w1time,
topics=aleks2$w1topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w2", cohort=aleks2$cohort, minutes=aleks2$w2time,
topics=aleks2$w2topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w3", cohort=aleks2$cohort, minutes=aleks2$w3time,
topics=aleks2$w3topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w4", cohort=aleks2$cohort, minutes=aleks2$w4time,
topics=aleks2$w4topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w5", cohort=aleks2$cohort, minutes=aleks2$w5time,
topics=aleks2$w5topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w6", cohort=aleks2$cohort, minutes=aleks2$w6time,
topics=aleks2$w6topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="extend", cohort=aleks2$cohort, minutes=aleks2$extendtime,
topics=aleks2$extendtopics, group=aleks2$i.group, id=aleks2$id))
## Create the plot
p <- ggplot(data = aleks4.long, aes(x = time, y = topics, group = id))
p + geom_line() + facet_grid(. ~ group)
# Cohort #1 topics mastered
aleks2.long.c1 <- subset(aleks4.long, cohort == 1)
p <- ggplot(data = aleks2.long.c1, aes(x = time, y = topics, group = id))
p + geom_line()
### Look at cumulative time spent
### (maybe using proportion of total time spent per week)
# Generate percent of time per week
aleks2$w1ctp <- aleks2$w1ctime / aleks2$totaltime
aleks2$w2ctp <- aleks2$w2ctime / aleks2$totaltime
aleks2$w3ctp <- aleks2$w3ctime / aleks2$totaltime
aleks2$w4ctp <- aleks2$w4ctime / aleks2$totaltime
aleks2$w5ctp <- aleks2$w5ctime / aleks2$totaltime
aleks2$w6ctp <- aleks2$w6ctime / aleks2$totaltime
aleks2$extendctp <- aleks2$extendctime / aleks2$totaltime
## Convert to long format and use cumulative time
aleks3.long <- rbind(
data.frame(time="initial", cohort=aleks2$cohort, minutes=0, proptime=0,
topics=aleks2$initialtopics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w1", cohort=aleks2$cohort, minutes=aleks2$w1ctime, proptime=aleks2$w1ctp,
topics=aleks2$w1topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w2", cohort=aleks2$cohort, minutes=aleks2$w2ctime, proptime=aleks2$w2ctp,
topics=aleks2$w2topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w3", cohort=aleks2$cohort, minutes=aleks2$w3ctime, proptime=aleks2$w3ctp,
topics=aleks2$w3topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w4", cohort=aleks2$cohort, minutes=aleks2$w4ctime, proptime=aleks2$w4ctp,
topics=aleks2$w4topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w5", cohort=aleks2$cohort, minutes=aleks2$w5ctime, proptime=aleks2$w5ctp,
topics=aleks2$w5topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="w6", cohort=aleks2$cohort, minutes=aleks2$w6ctime, proptime=aleks2$w6ctp,
topics=aleks2$w6topics, group=aleks2$i.group, id=aleks2$id),
data.frame(time="extend", cohort=aleks2$cohort, minutes=aleks2$extendctime, proptime=aleks2$extendctp,
topics=aleks2$extendtopics, group=aleks2$i.group, id=aleks2$id))
## Create the plot
p <- ggplot(data = aleks3.long, aes(x = time, y = minutes, group = id))
p + geom_line() + facet_grid(. ~ group)
## Create plot for average student
averagetime <- aleks3.long %>%
filter(!is.na(group)) %>%
group_by(time) %>%
mutate(hours = minutes/60) %>%
summarize(hours = mean(hours))
ggplot(data=averagetime, aes(x=time, y=hours, group=1)) + geom_line()
## Create plot for average student by initial mastery
averagetime <- aleks3.long %>%
filter(!is.na(group)) %>%
group_by(time, group) %>%
mutate(hours = minutes/60) %>%
summarize(hours = mean(hours))
ggplot(data=averagetime, aes(x=time, y=hours, group=group)) + geom_line() + facet_grid(. ~ group)
## Average proportion of time
averageproptime <- aleks3.long %>%
filter(!is.na(proptime) & !is.na(group)) %>%
group_by(time) %>%
summarize(avgproptime = mean(proptime))
ggplot(data=averageproptime, aes(x=time, y=avgproptime, group=1)) + geom_line()
## Create plot for average proportion of time by initial mastery
averageproptime <- aleks3.long %>%
filter(!is.na(proptime) & !is.na(group)) %>%
group_by(time, group) %>%
summarize(avgproptime = mean(proptime))
ggplot(data=averageproptime, aes(x=time, y=avgproptime, group=group)) + geom_line() + facet_grid(. ~ group)
## Proportion of students who completed each week
alekspercentcomplete <- read.csv(("~/Desktop/alekspercentcomplete.csv"))
ggplot(data=alekspercentcomplete, aes(x=Week, y=Finished, group=Cohort, color=Cohort)) + geom_line()
## Overall proportion
alekspercentcomplete2 <- alekspercentcomplete %>% filter(Cohort=="all")
ggplot(data=alekspercentcomplete2, aes(x=Week, y=Finished, group=Cohort))+
geom_line() +
ggtitle("Cumulative Proportion of Students Completing") +
xlab("Week") +
ylab("proportion of students")
# Only students who DID finish
aleks2.f <- subset(aleks2, finalmastered == 152)
mean(~time, data=aleks2.f)
## [1] 29
mean(~topicsperhour, data=aleks2.f)
## [1] 4
# Only students who did NOT finish - long format
aleks2.nf <- subset(aleks2, finalmastered < 152)
aleks2.nf$w1ctp <- aleks2.nf$w1ctime / aleks2.nf$totaltime
aleks2.nf$w2ctp <- aleks2.nf$w2ctime / aleks2.nf$totaltime
aleks2.nf$w3ctp <- aleks2.nf$w3ctime / aleks2.nf$totaltime
aleks2.nf$w4ctp <- aleks2.nf$w4ctime / aleks2.nf$totaltime
aleks2.nf$w5ctp <- aleks2.nf$w5ctime / aleks2.nf$totaltime
aleks2.nf$w6ctp <- aleks2.nf$w6ctime / aleks2.nf$totaltime
aleks2.nf$extendctp <- aleks2.nf$extendctime / aleks2.nf$totaltime
aleks3.long.nf <- rbind(
data.frame(time="initial", cohort=aleks2.nf$cohort, minutes=0, proptime=0,
topics=aleks2.nf$initialtopics, group=aleks2.nf$i.group, id=aleks2.nf$id),
data.frame(time="w1", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w1ctime, proptime=aleks2.nf$w1ctp,
topics=aleks2.nf$w1topics, group=aleks2.nf$i.group, id=aleks2.nf$id),
data.frame(time="w2", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w2ctime, proptime=aleks2.nf$w2ctp,
topics=aleks2.nf$w2topics, group=aleks2.nf$i.group, id=aleks2.nf$id),
data.frame(time="w3", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w3ctime, proptime=aleks2.nf$w3ctp,
topics=aleks2.nf$w3topics, group=aleks2.nf$i.group, id=aleks2.nf$id),
data.frame(time="w4", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w4ctime, proptime=aleks2.nf$w4ctp,
topics=aleks2.nf$w4topics, group=aleks2.nf$i.group, id=aleks2.nf$id),
data.frame(time="w5", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w5ctime, proptime=aleks2.nf$w5ctp,
topics=aleks2.nf$w5topics, group=aleks2.nf$i.group, id=aleks2.nf$id),
data.frame(time="w6", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w6ctime, proptime=aleks2.nf$w6ctp,
topics=aleks2.nf$w6topics, group=aleks2.nf$i.group, id=aleks2.nf$id),
data.frame(time="extend", cohort=aleks2.nf$cohort, minutes=aleks2.nf$extendctime, proptime=aleks2.nf$extendctp, topics=aleks2.nf$extendtopics, group=aleks2.nf$i.group, id=aleks2.nf$id))
# Cohort #1 cumulative time spent
aleks2.long.c1 <- subset(aleks3.long, cohort == 1)
p <- ggplot(data = aleks2.long.c1, aes(x = time, y = minutes, group = id))
p + geom_line()
## Using ggvis
aleks3.long %>%
filter(cohort==1 & !is.na(group)) %>%
ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
## Cohort 2
aleks3.long %>%
filter(cohort==2 & !is.na(group)) %>%
ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
## Cohort 3
aleks3.long %>%
filter(cohort==3 & !is.na(group)) %>%
ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
## Cohort 4
aleks3.long %>%
filter(cohort==4 & !is.na(group)) %>%
ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
## Cohort 5
aleks3.long %>%
filter(cohort==5 & !is.na(group)) %>%
ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
## Cohort 6
aleks3.long %>%
filter(cohort==6 & !is.na(group)) %>%
ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
# Cumulative proportion of time spent
aleks2.long.c1 <- subset(aleks3.long, cohort == 1)
p <- ggplot(data = aleks2.long.c1, aes(x = time, y = proptime, group = id))
p + geom_line()
## Using ggvis
aleks3.long %>%
filter(cohort==1 & !is.na(group)) %>%
ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
aleks3.long %>%
filter(cohort==2 & !is.na(group)) %>%
ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
aleks3.long %>%
filter(cohort==3 & !is.na(group)) %>%
ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
aleks3.long %>%
filter(cohort==4 & !is.na(group)) %>%
ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
aleks3.long %>%
filter(cohort==5 & !is.na(group)) %>%
ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
aleks3.long %>%
filter(cohort==6 & !is.na(group)) %>%
ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
# Cumulative proportion of time spent for students who did NOT finish
aleks2.long.nf <- subset(aleks3.long.nf)
p <- ggplot(data = aleks2.long.nf, aes(x = time, y = proptime, group = id))
p + geom_line()
## Using ggvis
aleks2.long.nf %>%
ggvis(x=~time, y=~proptime, stroke=~cohort, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
## Showing total minutes (not proportion of time)
aleks2.long.nf %>%
ggvis(x=~time, y=~minutes, stroke=~cohort, strokeOpacity:=0.75) %>%
group_by(id) %>%
layer_paths()
## Create panels based on initial topics mastered
p <- ggplot(data = aleks3.long.nf, aes(x = time, y = proptime, group = id, color=group))
p + geom_line() + facet_grid(. ~ group)
## Create a separate plot for each individual student
library(lattice)
xyplot(topics ~ time | id, data=aleks3.long.nf, as.table=T, type="l")
xyplot(proptime ~ time | id, type = c("p", "l"), data=aleks2.long.c1, as.table=T)