Load packages
require(ggplot2)
require(ggvis)
require(dplyr)
require(mosaic)



Load/Clean data
## Load the data
aleks <- read.csv(("~/Desktop/aleks.csv"))
options(digits=1)

## Check variable types and data length
aleks$name <- as.character(aleks$name)
aleks$cohort <- as.factor(aleks$cohort)
aleks$w1ctime <- as.integer(aleks$w1ctime)
aleks$w2ctime <- as.integer(aleks$w2ctime)
aleks$w4time <- as.integer(aleks$w4time)
aleks$w5time <- as.integer(aleks$w5time)
aleks$w6time <- as.integer(aleks$w6time)
aleks$extendtime <- as.integer(aleks$extendtime)
str(aleks)
## 'data.frame':    209 obs. of  42 variables:
##  $ id             : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ cohort         : Factor w/ 6 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ name           : chr  "Arndt, Courtney" "Bailey, Meredith" "Bennett, Allyson E." "Benoit, Kayla A." ...
##  $ actmath        : int  21 21 16 18 16 16 20 19 15 20 ...
##  $ initialpercent : num  0.45 0.31 0.57 0.44 0.14 0.55 0.23 0.47 0.03 0.29 ...
##  $ initialtopics  : int  69 47 87 67 21 84 35 71 5 44 ...
##  $ w1time         : int  13 2 3 2 32 262 1 0 43 53 ...
##  $ w1topics       : int  0 0 0 0 0 7 4 0 27 0 ...
##  $ w2time         : int  141 0 27 0 0 139 134 85 482 0 ...
##  $ w2topics       : int  0 0 0 0 0 7 4 0 27 0 ...
##  $ w3time         : int  260 0 0 0 244 254 472 37 388 89 ...
##  $ w3topics       : int  20 0 0 0 10 10 52 2 31 4 ...
##  $ w4time         : int  500 692 54 0 66 316 428 25 445 208 ...
##  $ w4topics       : int  16 69 1 0 2 20 24 4 35 20 ...
##  $ w5time         : int  52 236 252 189 91 414 243 180 304 88 ...
##  $ w5topics       : int  2 25 10 5 3 17 6 12 22 8 ...
##  $ w6time         : int  961 972 599 1421 980 897 1241 946 747 288 ...
##  $ w6topics       : int  26 61 24 98 73 33 45 65 67 30 ...
##  $ extendtime     : int  0 293 385 51 815 0 0 0 155 275 ...
##  $ extendtopics   : int  0 9 14 7 56 0 0 0 23 16 ...
##  $ totaltime      : int  1928 2195 1320 1663 2228 2283 2519 1273 2564 999 ...
##  $ totalmastered  : int  133 211 136 177 165 178 170 154 237 122 ...
##  $ finalmastered  : int  152 152 152 152 152 152 152 152 152 152 ...
##  $ finalpercent   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ minutespertopic: num  12.69 14.44 8.68 10.94 14.66 ...
##  $ topicsperhour  : num  1.99 4.48 2.23 3.97 3.88 ...
##  $ w1ctime        : int  13 2 3 2 32 262 1 0 43 53 ...
##  $ w2ctime        : int  155 2 30 2 32 401 135 85 526 53 ...
##  $ w3ctime        : int  415 2 30 2 275 655 607 122 913 142 ...
##  $ w4ctime        : int  915 694 84 2 342 971 1035 147 1358 350 ...
##  $ w5ctime        : int  967 930 336 191 433 1386 1278 327 1662 437 ...
##  $ w6ctime        : int  1928 1902 935 1612 1413 2283 2519 1273 2409 725 ...
##  $ extendctime    : int  1928 2195 1320 1663 2228 2283 2519 1273 2564 999 ...
##  $ time           : num  32.1 36.6 22 27.7 37.1 38 42 21.2 42.7 16.7 ...
##  $ start          : Factor w/ 14 levels "06/03/2014","06/06/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ w1date         : Factor w/ 14 levels "06/10/2014","06/13/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ w2date         : Factor w/ 14 levels "06/17/2014","06/20/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ w3date         : Factor w/ 14 levels "06/24/2014","06/27/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ w4date         : Factor w/ 14 levels "07/01/2014","07/04/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ w5date         : Factor w/ 14 levels "07/08/2014","07/11/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ w6date         : Factor w/ 14 levels "07/15/2014","07/18/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ extend         : Factor w/ 14 levels "07/18/2014","07/21/2014",..: 1 1 1 1 1 1 1 1 1 1 ...
## Generate hours
aleks$totalhours <- aleks$totaltime / 60

## Generate ID and group variables (based on initial performance)
aleks$i.group[aleks$initialtopics < 22] <- "0-15% initial"
aleks$i.group[aleks$initialtopics >= 22 & aleks$initialtopics < 46] <- "15-30%"
aleks$i.group[aleks$initialtopics >= 46 & aleks$initialtopics < 76] <- "30-50%"
aleks$i.group[aleks$initialtopics >= 76 & aleks$initialtopics < 114] <- "50-75%"
aleks$i.group[aleks$initialtopics >= 114] <- "75%+ initial"

## Convert to long format
# Create column with initial/final groups
aleks.long <- rbind(data.frame(time="initialpercent", topics=aleks$initialtopics, id=aleks$id, group=aleks$i.group), data.frame(time="finalpercent", topics=aleks$finalmastered, id=aleks$id, group=aleks$i.group))

# Filter out non-participants from long data
aleks.long.participate <- subset(aleks.long, topics>0)

# Filter out non-participants from wide data
aleks.participate <- subset(aleks, finalmastered>0)

## Create jitter value for rugs
aleks$jit <- rnorm(nrow(aleks), -1,.10)
aleks.participate$jit <- rnorm(nrow(aleks.participate), -1,.10)



Summary & Visualizations
Results

Participation rate = 198 of 209 students (94.7%) mastered at least one topic.

Average initial mastery = 34.3% (or 52 of 152 topics) for 198 students who participated (out of 209 total students)

Average final mastery = 91.3% (or 139 of 152 topics)

Difference = 57% (or 87 of 152 topics) in 27.4 hours

158 of students who participated (79.8%) mastered all 152 topics.

166 of students who participated (83.8%) mastered at least 90% of the topics.


Median initial mastery = 30% (or 46 of 152 topics)

Median final mastery = 100% (or 152 of 152 topics)


Visualizations

Note: Unless otherwise noted, all charts include only the 198 students who participated.

## Histogram for initial topics mastered
i.m <- ggplot(aleks.participate, aes(x=initialtopics)) + 
  geom_histogram(binwidth=15, colour = "steelblue", fill = "white", size=1, alpha=0.7) + 
  geom_point(aes(y = -2), position = position_jitter(height = 0.2), size=1) +
  ggtitle("Initial Topics Mastered") +
  xlim(-15, 165) +
  xlab("Topics Mastered (of 152 total)") +
  ylab("number of students") 
i.m

plot of chunk unnamed-chunk-3

## ggvis Histogram for initial topics mastered (filter out students who did nothing)
aleks.participate %>% 
  filter(!is.na(initialtopics) & !finalmastered==0) %>%
  ggvis(~initialtopics, fill := "lightgrey", stroke:="steelblue", fillOpacity:=0.8) %>%
  layer_histograms(binwidth = 15) %>%
  layer_points(y=~jit, size=0.1, stroke:=FALSE, fill:="grey", fillOpacity:=0.3) %>%
  add_axis("x", title = "Initial topics mastered (of 152 total)") %>%
    scale_numeric("x", domain=c(0, 152), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "number of students")

## Histogram for final topics mastered
f.m <- ggplot(aleks.participate, aes(x=finalmastered)) + 
  geom_histogram(binwidth=15, colour = "steelblue", fill = "white", size=1) + 
  geom_point(aes(y = -2), position = position_jitter(height = 1.5), size=1) +
  ggtitle("Final Topics Mastered") +
  xlim(-5, 165) +
  xlab("Topics Mastered (of 152 total)") +
  ylab("number of students") 
f.m

plot of chunk unnamed-chunk-3

## ggvis Histogram for final topics mastered
aleks.participate %>% 
  filter(!is.na(finalmastered) & !finalmastered==0) %>%   ## Eliminate NA responses
  ggvis(~finalmastered, fill := "lightgrey", stroke:="steelblue", fillOpacity:=.8) %>%
  layer_histograms(binwidth = 15) %>%
  layer_points(y=~jit, size=0.1, stroke:=FALSE, fill:="grey") %>%  ## Add rug
  add_axis("x", title = "Final topics mastered (ignoring students who did nothing") %>%
    scale_numeric("x", domain=c(0, 152), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "count")

## Density plots for initial and final
ggplot(aleks.long.participate, aes(x=topics, fill=time)) + geom_density(alpha=.3) +
  ggtitle("Initial and Final Topics Mastered") +
  xlim(-10, 152) +
  xlab("Topics Mastered (of 152 total)") +
  ylab("density") 

plot of chunk unnamed-chunk-3

## ggvis Density plots for initial and final
aleks.long.participate %>% filter(!is.na(topics) & !topics==0) %>%
  ggvis(~topics, fill=~time) %>%
  group_by(time) %>%
  layer_densities() %>%
  add_axis("x", title = "Topics mastered: initial vs final") %>%
  scale_numeric("x", domain=c(0, 145), nice=FALSE, clamp=TRUE) %>%
  add_axis("y", title = "density")

# Initial and Final topics in panel plot
sp <- ggplot(aleks.long.participate, aes(x=topics)) + 
  geom_histogram(binwidth=15, colour = "steelblue", fill = "white", size=1) + 
  geom_point(aes(y = -2), position = position_jitter(height = 0.2), size=1) +
  ggtitle("Topics Mastered") +
  xlim(-10, 165) +
  xlab("Topics Mastered (of 152 total)") +
  ylab("number of students") 
sp + facet_grid(time ~ .)

plot of chunk unnamed-chunk-3

## ggvis does not support panels

## Histogram for hours spent
hours <- ggplot(aleks.participate, aes(x=totalhours)) + 
  geom_histogram(binwidth=7, colour = "steelblue", fill = "white", size=1) + 
  geom_point(aes(y = -0.5), position = position_jitter(height = 0.2), size=1) +
  ggtitle("Time Spent in Aleks") +
  xlim(-10, 140) +
  xlab("Hours for each student") +
  ylab("number of students") 
hours

plot of chunk unnamed-chunk-3

## ggvis Histogram for hours spent
aleks.participate %>% filter(!is.na(totalhours) & !finalmastered==0) %>%
  ggvis(~totalhours, fill := "lightgrey", stroke:="steelblue", fillOpacity:=.8) %>%
  layer_histograms(binwidth = 7) %>%
  layer_points(y=~jit, size=0.1, stroke:=FALSE, fill:="grey", fillOpacity:=0.6) %>%  ## Add rug
  add_axis("x", title = "Hours in ALEKS") %>%
  scale_numeric("x", domain=c(0, 140), nice=FALSE, clamp=TRUE) %>%
  add_axis("y", title = "count")

## Histogram for topics mastered per hour
topicsperhour <- ggplot(aleks.participate, aes(x=topicsperhour)) + 
  geom_histogram(binwidth=1, colour = "steelblue", fill = "white", size=1) + 
  geom_point(aes(y = -2), position = position_jitter(height = 0.2), size=1) +
  ggtitle("Topics Mastered per Hour") +
  xlim(-1, 12) +
  xlab("Topics per hour") +
  ylab("number of students") 
topicsperhour

plot of chunk unnamed-chunk-3

## ggvis Histogram for topics mastered per hour
aleks.participate %>% filter(!is.na(topicsperhour)) %>%
  ggvis(~topicsperhour, fill := "lightgrey", stroke:="steelblue", fillOpacity:=.8) %>%
  layer_histograms(binwidth = 1) %>%
  layer_points(y=~jit-.5, size=0.1, stroke:=FALSE, fill:="grey", fillOpacity:=0.4) %>%  ## Add rug
  add_axis("x", title = "Topics mastered per hour") %>%
  scale_numeric("x", domain=c(0, 12), nice=FALSE, clamp=TRUE) %>%
  add_axis("y", title = "count")

# Scatterplot for topics mastered
qplot(x = initialtopics, y = finalmastered, data = aleks, size = totalhours, colour = I("steelblue"), geom="point", xlim=c(0,152), ylim=c(0,152), xlab="Initial topics mastered", ylab="Final topics mastered")

plot of chunk unnamed-chunk-3

## ggvis Scatterplot for topics mastered
aleks.participate %>% 
  filter(!is.na(initialtopics)) %>%
  filter(!is.na(finalmastered)) %>%
  filter(!is.na(totalhours)) %>%
  ggvis(x=~initialtopics, y=~finalmastered, size=~totalhours, fill := "steelblue", stroke:="steelblue", fillOpacity:=.2) %>%
  layer_points() %>%
  add_axis("x", title = "Initial topics mastered") %>%
  scale_numeric("x", domain=c(0, 152), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Final topics mastered") %>%
  scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE) 

# Scatterplot for topics learned and time spent
aleks$gain <- aleks$finalmastered - aleks$initialtopics
aleks.participate$gain <- aleks.participate$finalmastered - aleks.participate$initialtopics
qplot(x = totalhours, y = gain, data = aleks.participate, size = initialtopics, colour = I("steelblue"), geom="point", xlim=c(0,130), ylim=c(0,152), xlab="Hours", ylab="Change in topics mastered")

plot of chunk unnamed-chunk-3

## ggvis Scatterplot for topics learned and time spent
aleks.participate %>% 
  filter(!is.na(initialtopics)) %>%
  filter(!is.na(finalmastered)) %>%
  filter(!is.na(totalhours)) %>%
  mutate(gain = finalmastered-initialtopics, na.rm=TRUE) %>%
  ggvis(x=~totalhours, y=~gain, size=~initialtopics, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
  layer_points() %>%
  add_axis("x", title = "Hours") %>%
  scale_numeric("x", domain=c(0, 130), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Change in topics mastered") %>%
  scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)

## Adding loess curve
aleks.participate %>% 
  filter(!is.na(initialtopics)) %>%
  filter(!is.na(finalmastered)) %>%
  filter(!is.na(totalhours)) %>%
  mutate(gain = finalmastered-initialtopics, na.rm=TRUE) %>%
  ggvis(x=~totalhours, y=~gain, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
  layer_points() %>%
  layer_smooths() %>%
  add_axis("x", title = "Hours") %>%
  scale_numeric("x", domain=c(0, 130), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Change in topics mastered") %>%
  scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)

## ACT vs. initial topics mastered
aleks.participate %>% 
  filter(!is.na(initialtopics)) %>%
  filter(!is.na(finalmastered)) %>%
  filter(!is.na(actmath)) %>%
  ggvis(x=~actmath, y=~initialtopics, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
  layer_points() %>%
  layer_smooths() %>%
  add_axis("x", title = "ACT Math score") %>%
  scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Initial topics mastered") %>%
  scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)

## ACT vs. minutes per topic mastered
aleks.participate %>% 
  filter(!is.na(minutespertopic)) %>%
  filter(!is.na(actmath)) %>%
  ggvis(x=~actmath, y=~minutespertopic, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
  layer_points() %>%
  layer_smooths() %>%
  add_axis("x", title = "ACT Math score") %>%
  scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Minutes per Topic") %>%
  scale_numeric("y", domain=c(0, 60), nice=FALSE, clamp=TRUE)

## ACT vs. topics per hour
aleks.participate %>% 
  filter(!is.na(topicsperhour)) %>%
  filter(!is.na(actmath)) %>%
  ggvis(x=~actmath, y=~topicsperhour, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6) %>%
  layer_points() %>%
  layer_smooths() %>%
  add_axis("x", title = "ACT Math score") %>%
  scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Topics per Hour") %>%
  scale_numeric("y", domain=c(0, 15), nice=FALSE, clamp=TRUE)

## ACT vs. final topics mastered
aleks.participate %>% 
  filter(!is.na(initialtopics)) %>%
  filter(!is.na(finalmastered)) %>%
  filter(!is.na(actmath)) %>%
  ggvis(x=~actmath, y=~finalmastered, fill := "steelblue", stroke:="steelblue", fillOpacity:=.1) %>%
  layer_points() %>%
  layer_smooths() %>%
  add_axis("x", title = "ACT Math score") %>%
  scale_numeric("x", domain=c(15, 21), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Final topics mastered") %>%
  scale_numeric("y", domain=c(0, 152), nice=FALSE, clamp=TRUE)

# Profile plot using ggplot2
p <- ggplot(data = aleks.long.participate, aes(x = time, y = topics, group = id)) +
  ggtitle("Topics Mastered: Initial vs Final") +
  ylab("number of students") 
p + geom_line()

plot of chunk unnamed-chunk-4

# Only include students who finished at less than 90%
aleks.long.participate.didnotfinish = aleks.long.participate %>% filter(topics<137)
p <- ggplot(data = aleks.long.participate.didnotfinish, aes(x = time, y = topics, group = id)) +
  ggtitle("Topics Mastered: Initial vs Final") +
  ylab("number of students") 
p + geom_line()

plot of chunk unnamed-chunk-4

## ggvis Profile plot
aleks.long.participate %>%
  ggvis(x=~time, y=~topics, stroke=~group, strokeOpacity:=0.5) %>%
  group_by(id) %>%
  layer_paths()

# Only include students who finished at less than 90%
aleks.long.participate.didnotfinish %>%
  ggvis(x=~time, y=~topics, strokeOpacity:=0.5) %>%
  group_by(id) %>%
  layer_paths()

## Create panels based on initial topics mastered
p <- ggplot(data = aleks.long.participate, aes(x = time, y = topics, group = id))
p + geom_line() + facet_grid(. ~ group)

plot of chunk unnamed-chunk-4



==========

I want to create a profile plot based on progress per week.

## Convert to long format
aleks2 <- aleks.participate

aleks2.long <- rbind(
  data.frame(time="initial", cohort=aleks2$cohort, minutes=0, 
             topics=aleks2$initialtopics, group=aleks2$i.group, id=aleks2$id),
  data.frame(time="w1", cohort=aleks2$cohort, minutes=aleks2$w1time, 
             topics=aleks2$w1topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w2", cohort=aleks2$cohort, minutes=aleks2$w2time, 
             topics=aleks2$w2topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w3", cohort=aleks2$cohort, minutes=aleks2$w3time, 
             topics=aleks2$w3topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w4", cohort=aleks2$cohort, minutes=aleks2$w4time, 
             topics=aleks2$w4topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w5", cohort=aleks2$cohort, minutes=aleks2$w5time, 
             topics=aleks2$w5topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w6", cohort=aleks2$cohort, minutes=aleks2$w6time, 
             topics=aleks2$w6topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="extend", cohort=aleks2$cohort, minutes=aleks2$extendtime, 
             topics=aleks2$extendtopics, group=aleks2$i.group, id=aleks2$id))

## Get rid of initial topics mastered for the next plot
aleks4.long <- rbind(
  data.frame(time="w1", cohort=aleks2$cohort, minutes=aleks2$w1time, 
             topics=aleks2$w1topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w2", cohort=aleks2$cohort, minutes=aleks2$w2time, 
             topics=aleks2$w2topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w3", cohort=aleks2$cohort, minutes=aleks2$w3time, 
             topics=aleks2$w3topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w4", cohort=aleks2$cohort, minutes=aleks2$w4time, 
             topics=aleks2$w4topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w5", cohort=aleks2$cohort, minutes=aleks2$w5time, 
             topics=aleks2$w5topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w6", cohort=aleks2$cohort, minutes=aleks2$w6time, 
             topics=aleks2$w6topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="extend", cohort=aleks2$cohort, minutes=aleks2$extendtime, 
             topics=aleks2$extendtopics, group=aleks2$i.group, id=aleks2$id))

## Create the plot
p <- ggplot(data = aleks4.long, aes(x = time, y = topics, group = id))
p + geom_line() + facet_grid(. ~ group)

plot of chunk unnamed-chunk-5

# Cohort #1 topics mastered
aleks2.long.c1 <- subset(aleks4.long, cohort == 1)
p <- ggplot(data = aleks2.long.c1, aes(x = time, y = topics, group = id))
p + geom_line()

plot of chunk unnamed-chunk-5

### Look at cumulative time spent
### (maybe using proportion of total time spent per week)
# Generate percent of time per week
aleks2$w1ctp <- aleks2$w1ctime / aleks2$totaltime
aleks2$w2ctp <- aleks2$w2ctime / aleks2$totaltime
aleks2$w3ctp <- aleks2$w3ctime / aleks2$totaltime
aleks2$w4ctp <- aleks2$w4ctime / aleks2$totaltime
aleks2$w5ctp <- aleks2$w5ctime / aleks2$totaltime
aleks2$w6ctp <- aleks2$w6ctime / aleks2$totaltime
aleks2$extendctp <- aleks2$extendctime / aleks2$totaltime

## Convert to long format and use cumulative time
aleks3.long <- rbind(
  data.frame(time="initial", cohort=aleks2$cohort, minutes=0, proptime=0,
             topics=aleks2$initialtopics, group=aleks2$i.group, id=aleks2$id),
  data.frame(time="w1", cohort=aleks2$cohort, minutes=aleks2$w1ctime, proptime=aleks2$w1ctp,
             topics=aleks2$w1topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w2", cohort=aleks2$cohort, minutes=aleks2$w2ctime, proptime=aleks2$w2ctp,
             topics=aleks2$w2topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w3", cohort=aleks2$cohort, minutes=aleks2$w3ctime, proptime=aleks2$w3ctp,
             topics=aleks2$w3topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w4", cohort=aleks2$cohort, minutes=aleks2$w4ctime, proptime=aleks2$w4ctp,
             topics=aleks2$w4topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w5", cohort=aleks2$cohort, minutes=aleks2$w5ctime, proptime=aleks2$w5ctp,
             topics=aleks2$w5topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="w6", cohort=aleks2$cohort, minutes=aleks2$w6ctime, proptime=aleks2$w6ctp,
             topics=aleks2$w6topics, group=aleks2$i.group, id=aleks2$id), 
  data.frame(time="extend", cohort=aleks2$cohort, minutes=aleks2$extendctime, proptime=aleks2$extendctp,
             topics=aleks2$extendtopics, group=aleks2$i.group, id=aleks2$id))

## Create the plot
p <- ggplot(data = aleks3.long, aes(x = time, y = minutes, group = id))
p + geom_line() + facet_grid(. ~ group)

plot of chunk unnamed-chunk-5

## Create plot for average student
averagetime <- aleks3.long %>%
  filter(!is.na(group)) %>%
  group_by(time) %>%
  mutate(hours = minutes/60) %>%
  summarize(hours = mean(hours))
ggplot(data=averagetime, aes(x=time, y=hours, group=1)) + geom_line()

plot of chunk unnamed-chunk-5

## Create plot for average student by initial mastery
averagetime <- aleks3.long %>%
  filter(!is.na(group)) %>%
  group_by(time, group) %>%
  mutate(hours = minutes/60) %>%
  summarize(hours = mean(hours))
ggplot(data=averagetime, aes(x=time, y=hours, group=group)) + geom_line() + facet_grid(. ~ group)

plot of chunk unnamed-chunk-5

## Average proportion of time
averageproptime <- aleks3.long %>%
  filter(!is.na(proptime) & !is.na(group)) %>%
  group_by(time) %>%
  summarize(avgproptime = mean(proptime))
ggplot(data=averageproptime, aes(x=time, y=avgproptime, group=1)) + geom_line()

plot of chunk unnamed-chunk-5

## Create plot for average proportion of time by initial mastery
averageproptime <- aleks3.long %>%
  filter(!is.na(proptime) & !is.na(group)) %>%
  group_by(time, group) %>%
  summarize(avgproptime = mean(proptime))
ggplot(data=averageproptime, aes(x=time, y=avgproptime, group=group)) + geom_line() + facet_grid(. ~ group)

plot of chunk unnamed-chunk-5

## Proportion of students who completed each week
alekspercentcomplete <- read.csv(("~/Desktop/alekspercentcomplete.csv"))
ggplot(data=alekspercentcomplete, aes(x=Week, y=Finished, group=Cohort, color=Cohort)) + geom_line()

plot of chunk unnamed-chunk-5

## Overall proportion
alekspercentcomplete2 <- alekspercentcomplete %>% filter(Cohort=="all")
ggplot(data=alekspercentcomplete2, aes(x=Week, y=Finished, group=Cohort))+ 
  geom_line() +
  ggtitle("Cumulative Proportion of Students Completing") +
  xlab("Week") +
  ylab("proportion of students") 

plot of chunk unnamed-chunk-5

# Only students who DID finish
aleks2.f <- subset(aleks2, finalmastered == 152)
mean(~time, data=aleks2.f)
## [1] 29
mean(~topicsperhour, data=aleks2.f)
## [1] 4
# Only students who did NOT finish - long format
aleks2.nf <- subset(aleks2, finalmastered < 152)

aleks2.nf$w1ctp <- aleks2.nf$w1ctime / aleks2.nf$totaltime
aleks2.nf$w2ctp <- aleks2.nf$w2ctime / aleks2.nf$totaltime
aleks2.nf$w3ctp <- aleks2.nf$w3ctime / aleks2.nf$totaltime
aleks2.nf$w4ctp <- aleks2.nf$w4ctime / aleks2.nf$totaltime
aleks2.nf$w5ctp <- aleks2.nf$w5ctime / aleks2.nf$totaltime
aleks2.nf$w6ctp <- aleks2.nf$w6ctime / aleks2.nf$totaltime
aleks2.nf$extendctp <- aleks2.nf$extendctime / aleks2.nf$totaltime

aleks3.long.nf <- rbind(
  data.frame(time="initial", cohort=aleks2.nf$cohort, minutes=0, proptime=0,
             topics=aleks2.nf$initialtopics, group=aleks2.nf$i.group, id=aleks2.nf$id),
  data.frame(time="w1", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w1ctime, proptime=aleks2.nf$w1ctp,
             topics=aleks2.nf$w1topics, group=aleks2.nf$i.group, id=aleks2.nf$id), 
  data.frame(time="w2", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w2ctime, proptime=aleks2.nf$w2ctp,
             topics=aleks2.nf$w2topics, group=aleks2.nf$i.group, id=aleks2.nf$id), 
  data.frame(time="w3", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w3ctime, proptime=aleks2.nf$w3ctp,
             topics=aleks2.nf$w3topics, group=aleks2.nf$i.group, id=aleks2.nf$id), 
  data.frame(time="w4", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w4ctime, proptime=aleks2.nf$w4ctp,
             topics=aleks2.nf$w4topics, group=aleks2.nf$i.group, id=aleks2.nf$id), 
  data.frame(time="w5", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w5ctime, proptime=aleks2.nf$w5ctp,
             topics=aleks2.nf$w5topics, group=aleks2.nf$i.group, id=aleks2.nf$id), 
  data.frame(time="w6", cohort=aleks2.nf$cohort, minutes=aleks2.nf$w6ctime, proptime=aleks2.nf$w6ctp,
             topics=aleks2.nf$w6topics, group=aleks2.nf$i.group, id=aleks2.nf$id), 
  data.frame(time="extend", cohort=aleks2.nf$cohort, minutes=aleks2.nf$extendctime, proptime=aleks2.nf$extendctp, topics=aleks2.nf$extendtopics, group=aleks2.nf$i.group, id=aleks2.nf$id))


# Cohort #1 cumulative time spent
aleks2.long.c1 <- subset(aleks3.long, cohort == 1)
p <- ggplot(data = aleks2.long.c1, aes(x = time, y = minutes, group = id))
p + geom_line()

plot of chunk unnamed-chunk-5

## Using ggvis
aleks3.long %>%
  filter(cohort==1 & !is.na(group)) %>%
  ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

## Cohort 2
aleks3.long %>%
  filter(cohort==2 & !is.na(group)) %>%
  ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

## Cohort 3
aleks3.long %>%
  filter(cohort==3 & !is.na(group)) %>%
  ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

## Cohort 4
aleks3.long %>%
  filter(cohort==4 & !is.na(group)) %>%
  ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

## Cohort 5
aleks3.long %>%
  filter(cohort==5 & !is.na(group)) %>%
  ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

## Cohort 6
aleks3.long %>%
  filter(cohort==6 & !is.na(group)) %>%
  ggvis(x=~time, y=~minutes, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

# Cumulative proportion of time spent
aleks2.long.c1 <- subset(aleks3.long, cohort == 1)
p <- ggplot(data = aleks2.long.c1, aes(x = time, y = proptime, group = id))
p + geom_line()

plot of chunk unnamed-chunk-5

## Using ggvis
aleks3.long %>%
  filter(cohort==1 & !is.na(group)) %>%
  ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

aleks3.long %>%
  filter(cohort==2 & !is.na(group)) %>%
  ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

aleks3.long %>%
  filter(cohort==3 & !is.na(group)) %>%
  ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

aleks3.long %>%
  filter(cohort==4 & !is.na(group)) %>%
  ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

aleks3.long %>%
  filter(cohort==5 & !is.na(group)) %>%
  ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

aleks3.long %>%
  filter(cohort==6 & !is.na(group)) %>%
  ggvis(x=~time, y=~proptime, stroke=~group, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

# Cumulative proportion of time spent for students who did NOT finish
aleks2.long.nf <- subset(aleks3.long.nf)
p <- ggplot(data = aleks2.long.nf, aes(x = time, y = proptime, group = id))
p + geom_line()

plot of chunk unnamed-chunk-5

## Using ggvis
aleks2.long.nf %>%
  ggvis(x=~time, y=~proptime, stroke=~cohort, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

## Showing total minutes (not proportion of time)
aleks2.long.nf %>%
  ggvis(x=~time, y=~minutes, stroke=~cohort, strokeOpacity:=0.75) %>%
  group_by(id) %>%
  layer_paths()

## Create panels based on initial topics mastered
p <- ggplot(data = aleks3.long.nf, aes(x = time, y = proptime, group = id, color=group))
p + geom_line() + facet_grid(. ~ group)

plot of chunk unnamed-chunk-5

## Create a separate plot for each individual student
library(lattice) 
xyplot(topics ~ time | id, data=aleks3.long.nf, as.table=T, type="l")

plot of chunk unnamed-chunk-5

xyplot(proptime ~ time | id, type = c("p", "l"), data=aleks2.long.c1, as.table=T)

plot of chunk unnamed-chunk-5