MATH 301: Activity 10

Simple Linear Regression

## Load cat dataset
library(MASS)
data(cats)
cats <- cats

## Scatterplot
cats %>% 
  ggvis(x=~Bwt, y=~Hwt, fill := "steelblue", stroke:="steelblue", fillOpacity:=.4, strokeOpacity:=.4) %>%
  layer_points() %>%
  add_axis("x", title = "Body weight (kg)") %>%
  scale_numeric("x", domain=c(1.5, 4), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Heart weight (g)") %>%
  scale_numeric("y", domain=c(6, 22), nice=FALSE, clamp=TRUE)

## Correlation
cor(cats$Bwt, cats$Hwt)

## [1] 0.8041274

## summary
cats %>%
  summarize(mean_bwt=mean(Bwt), sd_bwt=sd(Bwt), 
            mean_hwt=mean(Hwt), sd_hwt=sd(Hwt))

##   mean_bwt    sd_bwt mean_hwt   sd_hwt
## 1 2.723611 0.4853066 10.63056 2.434636

## Scatterplot with line
cats %>% 
  ggvis(x=~Bwt, y=~Hwt, fill := "steelblue", stroke:="steelblue", fillOpacity:=.4, strokeOpacity:=.4) %>%
  layer_points() %>%
  layer_model_predictions(model = "lm", strokeWidth:=5, stroke:="red") %>%
  add_axis("x", title = "Body weight (kg)") %>%
  scale_numeric("x", domain=c(1.5, 4), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Heart weight (g)") %>%
  scale_numeric("y", domain=c(6, 22), nice=FALSE, clamp=TRUE)

## Get coefficients for best-fitting line
lm(Hwt~Bwt, data=cats)

## 
## Call:
## lm(formula = Hwt ~ Bwt, data = cats)
## 
## Coefficients:
## (Intercept)          Bwt  
##     -0.3567       4.0341

## Load HSGPA SAU GPA data 
actgpa <- read.csv("http://www.bradthiessen.com/html5/data/f13s14.csv")

## Select ACT and GPA data
actgpa <-
  actgpa %>%
  dplyr::select(S1_P49HSGPA, S1_P49FallTermGPA, S1_P49ACTComposite, 
         S1_P49ACTMath, S1_P49ACTReading)
actgpa2 <- actgpa[complete.cases(actgpa),]

## Scatterplot
actgpa2 %>% 
  ggvis(x=~S1_P49HSGPA, y=~S1_P49FallTermGPA, fill := "steelblue", stroke:="steelblue", fillOpacity:=.2, strokeOpacity:=.4) %>%
  layer_points() %>%
  add_axis("x", title = "High School GPA") %>%
  scale_numeric("x", domain=c(1.8, 4), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "SAU 1st semester GPA") %>%
  scale_numeric("y", domain=c(0, 4.1), nice=FALSE, clamp=TRUE)

## Correlation and summary statistics
cor(S1_P49FallTermGPA, S1_P49HSGPA, data=actgpa2)

## [1] 0.6784345

mean(S1_P49FallTermGPA, data=actgpa2)

## [1] 2.63624

sd(S1_P49FallTermGPA, data=actgpa2)

## [1] 0.750968

mean(S1_P49HSGPA, data=actgpa2)

## [1] 3.235433

sd(S1_P49HSGPA, data=actgpa2)

## [1] 0.5436225

## Scatterplot with linear prediction
actgpa2 %>% 
  ggvis(x=~S1_P49HSGPA, y=~S1_P49FallTermGPA, fill := "steelblue", stroke:="steelblue", fillOpacity:=.2, strokeOpacity:=.4) %>%
  layer_points() %>%
    layer_model_predictions(model = "lm", strokeWidth:=5, stroke:="red") %>%
  add_axis("x", title = "High School GPA") %>%
  scale_numeric("x", domain=c(1.8, 4), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "SAU 1st semester GPA") %>%
  scale_numeric("y", domain=c(0, 4.1), nice=FALSE, clamp=TRUE)

## Coefficients of linear model
lm(S1_P49FallTermGPA~S1_P49HSGPA, data=actgpa2)

## 
## Call:
## lm(formula = S1_P49FallTermGPA ~ S1_P49HSGPA, data = actgpa2)
## 
## Coefficients:
## (Intercept)  S1_P49HSGPA  
##     -0.3960       0.9372

## Input beer dataset
brand <- c("Busch", "MGD", "Bud Light", "Coors Light", "Miller Lite", "Budweiser")
media <- c(8.7, 21.5, 68.7, 76.6, 100.1, 120.0)
ship <- c(8.1, 5.6, 20.7, 13.2, 15.9, 36.3)
beer <- data.frame(brand, media, ship)

## Scatterplot
beer %>% 
  ggvis(x=~media, y=~ship, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
  layer_points(size:=100) %>%
  add_axis("x", title = "Media expenditures (millions of $)") %>%
  scale_numeric("x", domain=c(0, 125), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Shipments (millions of bottles)") %>%
  scale_numeric("y", domain=c(0, 40), nice=FALSE, clamp=TRUE)

## Scatterplot with least squares line
beer %>% 
  ggvis(x=~media, y=~ship, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
  layer_points(size:=100) %>%
  layer_model_predictions(model = "lm", strokeWidth:=3, stroke:="red") %>%
  add_axis("x", title = "Media expenditures (millions of $)", grid=F) %>%
  scale_numeric("x", domain=c(0, 125), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Shipments (millions of bottles)", grid=F) %>%
  scale_numeric("y", domain=c(0, 40), nice=FALSE, clamp=TRUE)

## Scatterplot with lowess regression (curve)
beer %>% 
  ggvis(x=~media, y=~ship, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
  layer_points(size:=100) %>%
  layer_smooths(strokeWidth:=5, stroke:="red") %>%
  add_axis("x", title = "Media expenditures (millions of $)") %>%
  scale_numeric("x", domain=c(0, 125), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Shipments (millions of bottles)") %>%
  scale_numeric("y", domain=c(0, 40), nice=FALSE, clamp=TRUE)

## Summary statistics for beer data
beer %>%
  summarize(avgShip = mean(ship), sdShip = sd(ship),
            acgMedia = mean(media), sdMedia = sd(media))

##    avgShip   sdShip acgMedia  sdMedia
## 1 16.63333 11.04711 65.93333 43.50166

cor(media, ship, data=beer)

## [1] 0.8288211

## Get coefficients for best-fitting line
lm(ship~media, data=beer)

## 
## Call:
## lm(formula = ship ~ media, data = beer)
## 
## Coefficients:
## (Intercept)        media  
##      2.7559       0.2105

## Create an example in which the best-fitting line
## doesn't fit well at all
x <- c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
y <- x^2
xy <- data.frame(x, y)

## Create scatterplot
xy %>% 
  ggvis(x=~x, y=~y, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
  layer_points(size:=100) %>%
  layer_model_predictions(model = "lm", strokeWidth:=3, stroke:="red") %>%
  add_axis("x", title = "X", grid=F) %>%
  scale_numeric("x", domain=c(-5, 5), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Y", grid=F) %>%
  scale_numeric("y", domain=c(0, 25), nice=FALSE, clamp=TRUE)

## Create data with no correlation
x <- c(0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6)
y <- c(0, 1, 2, 3, 2, 1, 0, 0, -1, -2, -3, -2, -1, 0)
xy <- data.frame(x, y)

## Scatterplot
xy %>% 
  ggvis(x=~x, y=~y, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
  layer_points(size:=100) %>%
  add_axis("x", title = "X", grid=F) %>%
  scale_numeric("x", domain=c(0, 6), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Y", grid=F) %>%
  scale_numeric("y", domain=c(-5, 5), nice=FALSE, clamp=TRUE)

# Best-fitting line
lm(y~x, data=xy)

## 
## Call:
## lm(formula = y ~ x, data = xy)
## 
## Coefficients:
## (Intercept)            x  
##           0            0

#########
## Get "full" output for beer regression
model = lm(ship~media, data=beer)
summary(model)

## 
## Call:
## lm(formula = ship ~ media, data = beer)
## 
## Residuals:
##      1      2      3      4      5      6 
##  3.513 -1.681  3.484 -5.678 -7.925  8.287 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  2.75591    5.46812   0.504   0.6408  
## media        0.21048    0.07104   2.963   0.0414 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.911 on 4 degrees of freedom
## Multiple R-squared:  0.6869, Adjusted R-squared:  0.6087 
## F-statistic: 8.777 on 1 and 4 DF,  p-value: 0.04145

coef(model)

## (Intercept)       media 
##   2.7559140   0.2104765

confint(model)

##                    2.5 %     97.5 %
## (Intercept) -12.42603485 17.9378628
## media         0.01322851  0.4077246

vcov(model)

##             (Intercept)        media
## (Intercept)  29.9003908 -0.332776144
## media        -0.3327761  0.005047161

residuals(model)

##         1         2         3         4         5         6 
##  3.512940 -1.681159  3.484348 -5.678416 -7.924615  8.286902

fitted(model)

##         1         2         3         4         5         6 
##  4.587060  7.281159 17.215652 18.878416 23.824615 28.013098

deviance(model)

## [1] 191.0244

predict(model)

##         1         2         3         4         5         6 
##  4.587060  7.281159 17.215652 18.878416 23.824615 28.013098

anova(model)

## Analysis of Variance Table
## 
## Response: ship
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## media      1 419.17  419.17  8.7773 0.04145 *
## Residuals  4 191.02   47.76                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

AIC(model)

## [1] 43.79111

model.matrix(model)

##   (Intercept) media
## 1           1   8.7
## 2           1  21.5
## 3           1  68.7
## 4           1  76.6
## 5           1 100.1
## 6           1 120.0
## attr(,"assign")
## [1] 0 1

plot(model)

#########



### Randomization-based test of the slope of our regression line
randomizations <- do(10000) * coef(lm(S1_P49FallTermGPA~shuffle(S1_P49HSGPA), data=actgpa2))

histogram(~S1_P49HSGPA, data = randomizations, col="grey", xlim=c(-.4, 0.5), xlab = "Slope", width=.005)

tally(~ (S1_P49HSGPA >= 0.9372), format="prop", data=randomizations)

## 
##  TRUE FALSE 
##     0     1

####################
## Violin example ##
####################
violin <- read.csv("http://www.bradthiessen.com/html5/data/violin.csv")

## Scatterplot
violin %>% 
  ggvis(x=~years, y=~neural, fill := "steelblue", stroke:="steelblue", fillOpacity:=.8, strokeOpacity:=.8) %>%
  layer_points(size:=100) %>%
  add_axis("x", title = "Years played") %>%
  scale_numeric("x", domain=c(0, 20), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Neural activity") %>%
  scale_numeric("y", domain=c(0, 30), nice=FALSE, clamp=TRUE)

violin %>%
  summarize(mean=mean(years), sd=sd(years), mean2=mean(neural), sd2=sd(neural))

##   mean      sd    mean2      sd2
## 1  7.2 7.24273 15.56667 7.782459

cor(years, neural, data=violin)

## [1] 0.9279869

model <- lm(neural~years, data=violin)
summary(model)

## 
## Call:
## lm(formula = neural ~ years, data = violin)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8644 -2.3730  0.1614  2.3713  4.6471 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.3873     1.1149   7.523 4.35e-06 ***
## years         0.9971     0.1110   8.980 6.18e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.009 on 13 degrees of freedom
## Multiple R-squared:  0.8612, Adjusted R-squared:  0.8505 
## F-statistic: 80.63 on 1 and 13 DF,  p-value: 6.178e-07

anova(model)

## Analysis of Variance Table
## 
## Response: neural
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## years      1 730.21  730.21  80.633 6.178e-07 ***
## Residuals 13 117.73    9.06                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(model)

######################
## Prestige example ##
######################


## Load data
prestige <- read.csv("http://www.bradthiessen.com/html5/data/prestige.csv")

## Set occupation type as a factor variable
prestige$type <- as.factor(prestige$occ_type)

## Eliminate the job titles
prestige2 <- prestige %>%
  dplyr::select(-title, -blue, -professional, -white)

## Create scatterplot matrix with histograms on diagonals
panel.hist <- function(x, ...)
{
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}
pairs(prestige2,
      cex = 1, pch = 1, bg = "steelblue",
      diag.panel = panel.hist, cex.labels = 1, font.labels = 2)

## ANOVA, summary statistics, Bartlett's test, Tukey/Bonferroni
model <- lm(prestige ~ as.factor(occ_type), data=prestige)
anova(model)

## Analysis of Variance Table
## 
## Response: prestige
##                     Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(occ_type)  2  19467  9733.6  92.405 < 2.2e-16 ***
## Residuals           99  10428   105.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

prestige %>%
  group_by(occ_type) %>%
  summarize(n=n(), mean=mean(prestige), sd=sd(prestige))

## Source: local data frame [3 x 4]
## 
##   occ_type  n     mean        sd
## 1        0 49 36.08571 11.347320
## 2        1 23 42.24348  9.515816
## 3        2 30 67.90667  8.819255

bartlett.test(prestige ~ occ_type, data = prestige)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  prestige by occ_type
## Bartlett's K-squared = 2.4469, df = 2, p-value = 0.2942

TukeyHSD(model, conf.level=.95)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = x)
## 
## $`as.factor(occ_type)`
##          diff         lwr      upr     p adj
## 1-0  6.157764 -0.01491793 12.33045 0.0507001
## 2-0 31.820952 26.15954398 37.48236 0.0000000
## 2-1 25.663188 18.89483548 32.43154 0.0000000

plot(TukeyHSD(model))

with(prestige, pairwise.t.test(prestige, occ_type, p.adjust.method="bonferroni"))

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  prestige and occ_type 
## 
##   0       1      
## 1 0.059   -      
## 2 < 2e-16 4.4e-14
## 
## P value adjustment method: bonferroni

##### Construct linear model: Prestige = f(income)

## Scatterplot with regression line
prestige %>% 
  ggvis(x=~income, y=~prestige, fill := "steelblue", stroke:="steelblue", fillOpacity:=.3, strokeOpacity:=.5) %>%
  layer_points() %>%
    layer_model_predictions(model = "lm", strokeWidth:=5, stroke:="red") %>%
  add_axis("x", title = "Income") %>%
  scale_numeric("x", domain=c(0, 28000), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Prestige") %>%
  scale_numeric("y", domain=c(0, 100), nice=FALSE, clamp=TRUE)

## Create model
model <- lm(prestige~income, data=prestige)
# Get coefficients
coef(model)

##  (Intercept)       income 
## 27.141176368  0.002896799

# Summarize model to get R-squared
summary(model)

## 
## Call:
## lm(formula = prestige ~ income, data = prestige)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.007  -8.378  -2.378   8.432  32.084 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.714e+01  2.268e+00   11.97   <2e-16 ***
## income      2.897e-03  2.833e-04   10.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.09 on 100 degrees of freedom
## Multiple R-squared:  0.5111, Adjusted R-squared:  0.5062 
## F-statistic: 104.5 on 1 and 100 DF,  p-value: < 2.2e-16

# Get ANOVA summary table
anova(model)

## Analysis of Variance Table
## 
## Response: prestige
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## income      1  15279 15279.3  104.54 < 2.2e-16 ***
## Residuals 100  14616   146.2                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Get confidence intervals for coefficients
confint(model)

##                    2.5 %       97.5 %
## (Intercept) 22.642116976 31.640235760
## income       0.002334692  0.003458907

### Create new data (job with income = 7000)
newdata = data.frame(income=7000)

## Find the predicted prestige for that new job
predict(model, newdata)

##        1 
## 47.41877

## Find confidence interval
predict(model, newdata, interval="confidence")

##        fit      lwr      upr
## 1 47.41877 45.04112 49.79642

## Find prediction interval
predict(model, newdata, interval="predict")

##        fit      lwr      upr
## 1 47.41877 23.31552 71.52202

## Assumptions
plot(model)

## Homoscedasticity test
ncvTest(model)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 3.088455    Df = 1     p = 0.07884962

# Normality visualization: component + residual plot 
crPlots(model)

# Square transformation
lm(prestige^2 ~ income, data=prestige)

## 
## Call:
## lm(formula = prestige^2 ~ income, data = prestige)
## 
## Coefficients:
## (Intercept)       income  
##    447.7104       0.2999

## Robust regression
library(MASS)
rlm(prestige ~ income, data=prestige)

## Call:
## rlm(formula = prestige ~ income, data = prestige)
## Converged in 6 iterations
## 
## Coefficients:
##  (Intercept)       income 
## 25.121145855  0.003125411 
## 
## Degrees of freedom: 102 total; 100 residual
## Scale estimate: 13

## Quantile regression
library(quantreg)
rq(prestige ~ income, tau=.5, data=prestige)

## Call:
## rq(formula = prestige ~ income, tau = 0.5, data = prestige)
## 
## Coefficients:
##  (Intercept)       income 
## 23.945837640  0.003029339 
## 
## Degrees of freedom: 102 total; 100 residual

summary(rq(prestige ~ income, tau=.5, data=prestige))

## Warning in rq.fit.br(x, y, tau = tau, ci = TRUE, ...): Solution may be
## nonunique

## 
## Call: rq(formula = prestige ~ income, tau = 0.5, data = prestige)
## 
## tau: [1] 0.5
## 
## Coefficients:
##             coefficients lower bd upper bd
## (Intercept) 23.94584     15.10204 26.72000
## income       0.00303      0.00289  0.00453

## Lowess
lowess(prestige$prestige ~ prestige$income)

## $x
##   [1]   611   918  1656  1890  2370  2448  2594  2847  2901  3000  3016
##  [12]  3116  3148  3161  3472  3485  3485  3582  3617  3643  3739  3910
##  [23]  3930  3942  4036  4075  4199  4224  4330  4348  4443  4549  4614
##  [34]  4686  4696  4741  4753  4761  5052  5092  5134  5134  5180  5299
##  [45]  5449  5511  5562  5648  5795  5811  5902  5959  6112  6197  6259
##  [56]  6336  6462  6477  6565  6573  6686  6860  6928  6992  7059  7147
##  [67]  7405  7482  7562  7716  7869  7956  8034  8043  8049  8131  8206
##  [78]  8258  8316  8403  8425  8780  8845  8865  8880  8891  8895  9271
##  [89]  9593 10432 11023 11030 11377 12351 12480 14032 14163 14558 17498
## [100] 19263 25308 25879
## 
## $y
##   [1] 20.51871 21.82813 24.93925 25.91418 27.90203 28.22461 28.82842
##   [8] 29.87773 30.10282 30.51549 30.58219 31.00135 31.13548 31.18997
##  [15] 32.50728 32.56291 32.56291 32.97803 33.12781 33.23908 33.65130
##  [22] 34.38259 34.46812 34.51943 34.91100 35.07346 35.57983 35.68192
##  [29] 36.10845 36.18088 36.56315 36.97804 37.23245 37.51426 37.55561
##  [36] 37.74170 37.79132 37.82440 38.96850 39.12933 39.29821 39.29821
##  [43] 39.48317 39.96165 40.58139 40.83755 41.06556 41.45005 42.13674
##  [50] 42.21148 42.69183 42.99272 43.72096 44.12554 44.44584 44.84364
##  [57] 45.45230 45.52476 45.94987 45.98851 46.48047 47.21826 47.50658
##  [64] 47.75696 48.01906 48.36332 49.39793 49.72322 50.06118 50.72886
##  [71] 51.46755 51.88759 52.30803 52.35654 52.38888 52.83088 53.23514
##  [78] 53.51203 53.82086 54.28411 54.40125 56.40358 56.76581 56.87726
##  [85] 56.96085 57.02215 57.04444 59.08704 60.70490 63.38973 65.19523
##  [92] 65.21742 66.25486 68.37407 68.58814 71.42799 71.70646 72.56689
##  [99] 75.02568 76.11860 82.53114 83.12620

prestige %>% 
  ggvis(x=~income, y=~prestige, fill := "steelblue", stroke:="steelblue", fillOpacity:=.4, strokeOpacity:=.5) %>%
  layer_points() %>%
  layer_smooths(stroke:="red", strokeWidth:=5) %>%
  add_axis("x", title = "Income") %>%
  scale_numeric("x", domain=c(0, 28000), nice=FALSE, clamp=TRUE)  %>%
  add_axis("y", title = "Prestige") %>%
  scale_numeric("y", domain=c(0, 100), nice=FALSE, clamp=TRUE)

## Nonlinear -- quadratic model
quad = lm(prestige ~ income + I(income^2), data=prestige)
coef(quad)

##   (Intercept)        income   I(income^2) 
##  1.418319e+01  6.153506e-03 -1.433097e-07

summary(quad)

## 
## Call:
## lm(formula = prestige ~ income + I(income^2), data = prestige)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.963  -7.967  -2.303   7.847  32.928 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.418e+01  3.515e+00   4.035 0.000108 ***
## income       6.154e-03  7.593e-04   8.104 1.43e-12 ***
## I(income^2) -1.433e-07  3.141e-08  -4.562 1.45e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.04 on 99 degrees of freedom
## Multiple R-squared:  0.596,  Adjusted R-squared:  0.5879 
## F-statistic: 73.03 on 2 and 99 DF,  p-value: < 2.2e-16

plot(prestige~income, data=prestige)
curve(14.18319+0.0061535*x-.0000001433097*x^2,add=T)

plot(quad)

## Log model
logmod = lm(prestige ~ log(income), data=prestige)
summary(logmod)

## 
## Call:
## lm(formula = prestige ~ log(income), data = prestige)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.114  -9.342  -1.218   8.101  30.454 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -139.856     16.954  -8.249  6.6e-13 ***
## log(income)   21.556      1.953  11.037  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.61 on 100 degrees of freedom
## Multiple R-squared:  0.5492, Adjusted R-squared:  0.5447 
## F-statistic: 121.8 on 1 and 100 DF,  p-value: < 2.2e-16

plot(prestige~income, data=prestige)
curve(-139.856 + 21.556*log(x),add=T)

plot(logmod)

MATH 301: Activity 10

Brad Thiessen

October 26, 2014

Load packages

Simple Linear Regression