Simple Linear Regression
## Load cat dataset
library(MASS)
data(cats)
cats <- cats
## Scatterplot
cats %>%
ggvis(x=~Bwt, y=~Hwt, fill := "steelblue", stroke:="steelblue", fillOpacity:=.4, strokeOpacity:=.4) %>%
layer_points() %>%
add_axis("x", title = "Body weight (kg)") %>%
scale_numeric("x", domain=c(1.5, 4), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Heart weight (g)") %>%
scale_numeric("y", domain=c(6, 22), nice=FALSE, clamp=TRUE)
## Correlation
cor(cats$Bwt, cats$Hwt)
## [1] 0.8041274
## summary
cats %>%
summarize(mean_bwt=mean(Bwt), sd_bwt=sd(Bwt),
mean_hwt=mean(Hwt), sd_hwt=sd(Hwt))
## mean_bwt sd_bwt mean_hwt sd_hwt
## 1 2.723611 0.4853066 10.63056 2.434636
## Scatterplot with line
cats %>%
ggvis(x=~Bwt, y=~Hwt, fill := "steelblue", stroke:="steelblue", fillOpacity:=.4, strokeOpacity:=.4) %>%
layer_points() %>%
layer_model_predictions(model = "lm", strokeWidth:=5, stroke:="red") %>%
add_axis("x", title = "Body weight (kg)") %>%
scale_numeric("x", domain=c(1.5, 4), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Heart weight (g)") %>%
scale_numeric("y", domain=c(6, 22), nice=FALSE, clamp=TRUE)
## Get coefficients for best-fitting line
lm(Hwt~Bwt, data=cats)
##
## Call:
## lm(formula = Hwt ~ Bwt, data = cats)
##
## Coefficients:
## (Intercept) Bwt
## -0.3567 4.0341
## Load HSGPA SAU GPA data
actgpa <- read.csv("http://www.bradthiessen.com/html5/data/f13s14.csv")
## Select ACT and GPA data
actgpa <-
actgpa %>%
dplyr::select(S1_P49HSGPA, S1_P49FallTermGPA, S1_P49ACTComposite,
S1_P49ACTMath, S1_P49ACTReading)
actgpa2 <- actgpa[complete.cases(actgpa),]
## Scatterplot
actgpa2 %>%
ggvis(x=~S1_P49HSGPA, y=~S1_P49FallTermGPA, fill := "steelblue", stroke:="steelblue", fillOpacity:=.2, strokeOpacity:=.4) %>%
layer_points() %>%
add_axis("x", title = "High School GPA") %>%
scale_numeric("x", domain=c(1.8, 4), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "SAU 1st semester GPA") %>%
scale_numeric("y", domain=c(0, 4.1), nice=FALSE, clamp=TRUE)
## Correlation and summary statistics
cor(S1_P49FallTermGPA, S1_P49HSGPA, data=actgpa2)
## [1] 0.6784345
mean(S1_P49FallTermGPA, data=actgpa2)
## [1] 2.63624
sd(S1_P49FallTermGPA, data=actgpa2)
## [1] 0.750968
mean(S1_P49HSGPA, data=actgpa2)
## [1] 3.235433
sd(S1_P49HSGPA, data=actgpa2)
## [1] 0.5436225
## Scatterplot with linear prediction
actgpa2 %>%
ggvis(x=~S1_P49HSGPA, y=~S1_P49FallTermGPA, fill := "steelblue", stroke:="steelblue", fillOpacity:=.2, strokeOpacity:=.4) %>%
layer_points() %>%
layer_model_predictions(model = "lm", strokeWidth:=5, stroke:="red") %>%
add_axis("x", title = "High School GPA") %>%
scale_numeric("x", domain=c(1.8, 4), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "SAU 1st semester GPA") %>%
scale_numeric("y", domain=c(0, 4.1), nice=FALSE, clamp=TRUE)
## Coefficients of linear model
lm(S1_P49FallTermGPA~S1_P49HSGPA, data=actgpa2)
##
## Call:
## lm(formula = S1_P49FallTermGPA ~ S1_P49HSGPA, data = actgpa2)
##
## Coefficients:
## (Intercept) S1_P49HSGPA
## -0.3960 0.9372
## Input beer dataset
brand <- c("Busch", "MGD", "Bud Light", "Coors Light", "Miller Lite", "Budweiser")
media <- c(8.7, 21.5, 68.7, 76.6, 100.1, 120.0)
ship <- c(8.1, 5.6, 20.7, 13.2, 15.9, 36.3)
beer <- data.frame(brand, media, ship)
## Scatterplot
beer %>%
ggvis(x=~media, y=~ship, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
layer_points(size:=100) %>%
add_axis("x", title = "Media expenditures (millions of $)") %>%
scale_numeric("x", domain=c(0, 125), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Shipments (millions of bottles)") %>%
scale_numeric("y", domain=c(0, 40), nice=FALSE, clamp=TRUE)
## Scatterplot with least squares line
beer %>%
ggvis(x=~media, y=~ship, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
layer_points(size:=100) %>%
layer_model_predictions(model = "lm", strokeWidth:=3, stroke:="red") %>%
add_axis("x", title = "Media expenditures (millions of $)", grid=F) %>%
scale_numeric("x", domain=c(0, 125), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Shipments (millions of bottles)", grid=F) %>%
scale_numeric("y", domain=c(0, 40), nice=FALSE, clamp=TRUE)
## Scatterplot with lowess regression (curve)
beer %>%
ggvis(x=~media, y=~ship, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
layer_points(size:=100) %>%
layer_smooths(strokeWidth:=5, stroke:="red") %>%
add_axis("x", title = "Media expenditures (millions of $)") %>%
scale_numeric("x", domain=c(0, 125), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Shipments (millions of bottles)") %>%
scale_numeric("y", domain=c(0, 40), nice=FALSE, clamp=TRUE)
## Summary statistics for beer data
beer %>%
summarize(avgShip = mean(ship), sdShip = sd(ship),
acgMedia = mean(media), sdMedia = sd(media))
## avgShip sdShip acgMedia sdMedia
## 1 16.63333 11.04711 65.93333 43.50166
cor(media, ship, data=beer)
## [1] 0.8288211
## Get coefficients for best-fitting line
lm(ship~media, data=beer)
##
## Call:
## lm(formula = ship ~ media, data = beer)
##
## Coefficients:
## (Intercept) media
## 2.7559 0.2105
## Create an example in which the best-fitting line
## doesn't fit well at all
x <- c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5)
y <- x^2
xy <- data.frame(x, y)
## Create scatterplot
xy %>%
ggvis(x=~x, y=~y, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
layer_points(size:=100) %>%
layer_model_predictions(model = "lm", strokeWidth:=3, stroke:="red") %>%
add_axis("x", title = "X", grid=F) %>%
scale_numeric("x", domain=c(-5, 5), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Y", grid=F) %>%
scale_numeric("y", domain=c(0, 25), nice=FALSE, clamp=TRUE)
## Create data with no correlation
x <- c(0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6)
y <- c(0, 1, 2, 3, 2, 1, 0, 0, -1, -2, -3, -2, -1, 0)
xy <- data.frame(x, y)
## Scatterplot
xy %>%
ggvis(x=~x, y=~y, fill := "steelblue", stroke:="steelblue", fillOpacity:=.9, strokeOpacity:=1) %>%
layer_points(size:=100) %>%
add_axis("x", title = "X", grid=F) %>%
scale_numeric("x", domain=c(0, 6), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Y", grid=F) %>%
scale_numeric("y", domain=c(-5, 5), nice=FALSE, clamp=TRUE)
# Best-fitting line
lm(y~x, data=xy)
##
## Call:
## lm(formula = y ~ x, data = xy)
##
## Coefficients:
## (Intercept) x
## 0 0
#########
## Get "full" output for beer regression
model = lm(ship~media, data=beer)
summary(model)
##
## Call:
## lm(formula = ship ~ media, data = beer)
##
## Residuals:
## 1 2 3 4 5 6
## 3.513 -1.681 3.484 -5.678 -7.925 8.287
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.75591 5.46812 0.504 0.6408
## media 0.21048 0.07104 2.963 0.0414 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.911 on 4 degrees of freedom
## Multiple R-squared: 0.6869, Adjusted R-squared: 0.6087
## F-statistic: 8.777 on 1 and 4 DF, p-value: 0.04145
coef(model)
## (Intercept) media
## 2.7559140 0.2104765
confint(model)
## 2.5 % 97.5 %
## (Intercept) -12.42603485 17.9378628
## media 0.01322851 0.4077246
vcov(model)
## (Intercept) media
## (Intercept) 29.9003908 -0.332776144
## media -0.3327761 0.005047161
residuals(model)
## 1 2 3 4 5 6
## 3.512940 -1.681159 3.484348 -5.678416 -7.924615 8.286902
fitted(model)
## 1 2 3 4 5 6
## 4.587060 7.281159 17.215652 18.878416 23.824615 28.013098
deviance(model)
## [1] 191.0244
predict(model)
## 1 2 3 4 5 6
## 4.587060 7.281159 17.215652 18.878416 23.824615 28.013098
anova(model)
## Analysis of Variance Table
##
## Response: ship
## Df Sum Sq Mean Sq F value Pr(>F)
## media 1 419.17 419.17 8.7773 0.04145 *
## Residuals 4 191.02 47.76
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(model)
## [1] 43.79111
model.matrix(model)
## (Intercept) media
## 1 1 8.7
## 2 1 21.5
## 3 1 68.7
## 4 1 76.6
## 5 1 100.1
## 6 1 120.0
## attr(,"assign")
## [1] 0 1
plot(model)
#########
### Randomization-based test of the slope of our regression line
randomizations <- do(10000) * coef(lm(S1_P49FallTermGPA~shuffle(S1_P49HSGPA), data=actgpa2))
histogram(~S1_P49HSGPA, data = randomizations, col="grey", xlim=c(-.4, 0.5), xlab = "Slope", width=.005)
tally(~ (S1_P49HSGPA >= 0.9372), format="prop", data=randomizations)
##
## TRUE FALSE
## 0 1
####################
## Violin example ##
####################
violin <- read.csv("http://www.bradthiessen.com/html5/data/violin.csv")
## Scatterplot
violin %>%
ggvis(x=~years, y=~neural, fill := "steelblue", stroke:="steelblue", fillOpacity:=.8, strokeOpacity:=.8) %>%
layer_points(size:=100) %>%
add_axis("x", title = "Years played") %>%
scale_numeric("x", domain=c(0, 20), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Neural activity") %>%
scale_numeric("y", domain=c(0, 30), nice=FALSE, clamp=TRUE)
violin %>%
summarize(mean=mean(years), sd=sd(years), mean2=mean(neural), sd2=sd(neural))
## mean sd mean2 sd2
## 1 7.2 7.24273 15.56667 7.782459
cor(years, neural, data=violin)
## [1] 0.9279869
model <- lm(neural~years, data=violin)
summary(model)
##
## Call:
## lm(formula = neural ~ years, data = violin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8644 -2.3730 0.1614 2.3713 4.6471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.3873 1.1149 7.523 4.35e-06 ***
## years 0.9971 0.1110 8.980 6.18e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.009 on 13 degrees of freedom
## Multiple R-squared: 0.8612, Adjusted R-squared: 0.8505
## F-statistic: 80.63 on 1 and 13 DF, p-value: 6.178e-07
anova(model)
## Analysis of Variance Table
##
## Response: neural
## Df Sum Sq Mean Sq F value Pr(>F)
## years 1 730.21 730.21 80.633 6.178e-07 ***
## Residuals 13 117.73 9.06
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(model)
######################
## Prestige example ##
######################
## Load data
prestige <- read.csv("http://www.bradthiessen.com/html5/data/prestige.csv")
## Set occupation type as a factor variable
prestige$type <- as.factor(prestige$occ_type)
## Eliminate the job titles
prestige2 <- prestige %>%
dplyr::select(-title, -blue, -professional, -white)
## Create scatterplot matrix with histograms on diagonals
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}
pairs(prestige2,
cex = 1, pch = 1, bg = "steelblue",
diag.panel = panel.hist, cex.labels = 1, font.labels = 2)
## ANOVA, summary statistics, Bartlett's test, Tukey/Bonferroni
model <- lm(prestige ~ as.factor(occ_type), data=prestige)
anova(model)
## Analysis of Variance Table
##
## Response: prestige
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(occ_type) 2 19467 9733.6 92.405 < 2.2e-16 ***
## Residuals 99 10428 105.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
prestige %>%
group_by(occ_type) %>%
summarize(n=n(), mean=mean(prestige), sd=sd(prestige))
## Source: local data frame [3 x 4]
##
## occ_type n mean sd
## 1 0 49 36.08571 11.347320
## 2 1 23 42.24348 9.515816
## 3 2 30 67.90667 8.819255
bartlett.test(prestige ~ occ_type, data = prestige)
##
## Bartlett test of homogeneity of variances
##
## data: prestige by occ_type
## Bartlett's K-squared = 2.4469, df = 2, p-value = 0.2942
TukeyHSD(model, conf.level=.95)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = x)
##
## $`as.factor(occ_type)`
## diff lwr upr p adj
## 1-0 6.157764 -0.01491793 12.33045 0.0507001
## 2-0 31.820952 26.15954398 37.48236 0.0000000
## 2-1 25.663188 18.89483548 32.43154 0.0000000
plot(TukeyHSD(model))
with(prestige, pairwise.t.test(prestige, occ_type, p.adjust.method="bonferroni"))
##
## Pairwise comparisons using t tests with pooled SD
##
## data: prestige and occ_type
##
## 0 1
## 1 0.059 -
## 2 < 2e-16 4.4e-14
##
## P value adjustment method: bonferroni
##### Construct linear model: Prestige = f(income)
## Scatterplot with regression line
prestige %>%
ggvis(x=~income, y=~prestige, fill := "steelblue", stroke:="steelblue", fillOpacity:=.3, strokeOpacity:=.5) %>%
layer_points() %>%
layer_model_predictions(model = "lm", strokeWidth:=5, stroke:="red") %>%
add_axis("x", title = "Income") %>%
scale_numeric("x", domain=c(0, 28000), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Prestige") %>%
scale_numeric("y", domain=c(0, 100), nice=FALSE, clamp=TRUE)
## Create model
model <- lm(prestige~income, data=prestige)
# Get coefficients
coef(model)
## (Intercept) income
## 27.141176368 0.002896799
# Summarize model to get R-squared
summary(model)
##
## Call:
## lm(formula = prestige ~ income, data = prestige)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.007 -8.378 -2.378 8.432 32.084
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.714e+01 2.268e+00 11.97 <2e-16 ***
## income 2.897e-03 2.833e-04 10.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.09 on 100 degrees of freedom
## Multiple R-squared: 0.5111, Adjusted R-squared: 0.5062
## F-statistic: 104.5 on 1 and 100 DF, p-value: < 2.2e-16
# Get ANOVA summary table
anova(model)
## Analysis of Variance Table
##
## Response: prestige
## Df Sum Sq Mean Sq F value Pr(>F)
## income 1 15279 15279.3 104.54 < 2.2e-16 ***
## Residuals 100 14616 146.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Get confidence intervals for coefficients
confint(model)
## 2.5 % 97.5 %
## (Intercept) 22.642116976 31.640235760
## income 0.002334692 0.003458907
### Create new data (job with income = 7000)
newdata = data.frame(income=7000)
## Find the predicted prestige for that new job
predict(model, newdata)
## 1
## 47.41877
## Find confidence interval
predict(model, newdata, interval="confidence")
## fit lwr upr
## 1 47.41877 45.04112 49.79642
## Find prediction interval
predict(model, newdata, interval="predict")
## fit lwr upr
## 1 47.41877 23.31552 71.52202
## Assumptions
plot(model)
## Homoscedasticity test
ncvTest(model)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 3.088455 Df = 1 p = 0.07884962
# Normality visualization: component + residual plot
crPlots(model)
# Square transformation
lm(prestige^2 ~ income, data=prestige)
##
## Call:
## lm(formula = prestige^2 ~ income, data = prestige)
##
## Coefficients:
## (Intercept) income
## 447.7104 0.2999
## Robust regression
library(MASS)
rlm(prestige ~ income, data=prestige)
## Call:
## rlm(formula = prestige ~ income, data = prestige)
## Converged in 6 iterations
##
## Coefficients:
## (Intercept) income
## 25.121145855 0.003125411
##
## Degrees of freedom: 102 total; 100 residual
## Scale estimate: 13
## Quantile regression
library(quantreg)
rq(prestige ~ income, tau=.5, data=prestige)
## Call:
## rq(formula = prestige ~ income, tau = 0.5, data = prestige)
##
## Coefficients:
## (Intercept) income
## 23.945837640 0.003029339
##
## Degrees of freedom: 102 total; 100 residual
summary(rq(prestige ~ income, tau=.5, data=prestige))
## Warning in rq.fit.br(x, y, tau = tau, ci = TRUE, ...): Solution may be
## nonunique
##
## Call: rq(formula = prestige ~ income, tau = 0.5, data = prestige)
##
## tau: [1] 0.5
##
## Coefficients:
## coefficients lower bd upper bd
## (Intercept) 23.94584 15.10204 26.72000
## income 0.00303 0.00289 0.00453
## Lowess
lowess(prestige$prestige ~ prestige$income)
## $x
## [1] 611 918 1656 1890 2370 2448 2594 2847 2901 3000 3016
## [12] 3116 3148 3161 3472 3485 3485 3582 3617 3643 3739 3910
## [23] 3930 3942 4036 4075 4199 4224 4330 4348 4443 4549 4614
## [34] 4686 4696 4741 4753 4761 5052 5092 5134 5134 5180 5299
## [45] 5449 5511 5562 5648 5795 5811 5902 5959 6112 6197 6259
## [56] 6336 6462 6477 6565 6573 6686 6860 6928 6992 7059 7147
## [67] 7405 7482 7562 7716 7869 7956 8034 8043 8049 8131 8206
## [78] 8258 8316 8403 8425 8780 8845 8865 8880 8891 8895 9271
## [89] 9593 10432 11023 11030 11377 12351 12480 14032 14163 14558 17498
## [100] 19263 25308 25879
##
## $y
## [1] 20.51871 21.82813 24.93925 25.91418 27.90203 28.22461 28.82842
## [8] 29.87773 30.10282 30.51549 30.58219 31.00135 31.13548 31.18997
## [15] 32.50728 32.56291 32.56291 32.97803 33.12781 33.23908 33.65130
## [22] 34.38259 34.46812 34.51943 34.91100 35.07346 35.57983 35.68192
## [29] 36.10845 36.18088 36.56315 36.97804 37.23245 37.51426 37.55561
## [36] 37.74170 37.79132 37.82440 38.96850 39.12933 39.29821 39.29821
## [43] 39.48317 39.96165 40.58139 40.83755 41.06556 41.45005 42.13674
## [50] 42.21148 42.69183 42.99272 43.72096 44.12554 44.44584 44.84364
## [57] 45.45230 45.52476 45.94987 45.98851 46.48047 47.21826 47.50658
## [64] 47.75696 48.01906 48.36332 49.39793 49.72322 50.06118 50.72886
## [71] 51.46755 51.88759 52.30803 52.35654 52.38888 52.83088 53.23514
## [78] 53.51203 53.82086 54.28411 54.40125 56.40358 56.76581 56.87726
## [85] 56.96085 57.02215 57.04444 59.08704 60.70490 63.38973 65.19523
## [92] 65.21742 66.25486 68.37407 68.58814 71.42799 71.70646 72.56689
## [99] 75.02568 76.11860 82.53114 83.12620
prestige %>%
ggvis(x=~income, y=~prestige, fill := "steelblue", stroke:="steelblue", fillOpacity:=.4, strokeOpacity:=.5) %>%
layer_points() %>%
layer_smooths(stroke:="red", strokeWidth:=5) %>%
add_axis("x", title = "Income") %>%
scale_numeric("x", domain=c(0, 28000), nice=FALSE, clamp=TRUE) %>%
add_axis("y", title = "Prestige") %>%
scale_numeric("y", domain=c(0, 100), nice=FALSE, clamp=TRUE)
## Nonlinear -- quadratic model
quad = lm(prestige ~ income + I(income^2), data=prestige)
coef(quad)
## (Intercept) income I(income^2)
## 1.418319e+01 6.153506e-03 -1.433097e-07
summary(quad)
##
## Call:
## lm(formula = prestige ~ income + I(income^2), data = prestige)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.963 -7.967 -2.303 7.847 32.928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.418e+01 3.515e+00 4.035 0.000108 ***
## income 6.154e-03 7.593e-04 8.104 1.43e-12 ***
## I(income^2) -1.433e-07 3.141e-08 -4.562 1.45e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.04 on 99 degrees of freedom
## Multiple R-squared: 0.596, Adjusted R-squared: 0.5879
## F-statistic: 73.03 on 2 and 99 DF, p-value: < 2.2e-16
plot(prestige~income, data=prestige)
curve(14.18319+0.0061535*x-.0000001433097*x^2,add=T)
plot(quad)
## Log model
logmod = lm(prestige ~ log(income), data=prestige)
summary(logmod)
##
## Call:
## lm(formula = prestige ~ log(income), data = prestige)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.114 -9.342 -1.218 8.101 30.454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -139.856 16.954 -8.249 6.6e-13 ***
## log(income) 21.556 1.953 11.037 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.61 on 100 degrees of freedom
## Multiple R-squared: 0.5492, Adjusted R-squared: 0.5447
## F-statistic: 121.8 on 1 and 100 DF, p-value: < 2.2e-16
plot(prestige~income, data=prestige)
curve(-139.856 + 21.556*log(x),add=T)
plot(logmod)