DPI R Bootcamp
Jared Knowles
In this lesson we hope to learn:
In this tutorial we will use a number of datasets of different types:
stulong: student-level assessment and demographics data (simulated and research ready)midwest_schools.csv: aggregate school level test score averages from a large Midwest stateload("data/midwest_schools.rda")
head(midsch[, 1:12])
## district_id school_id subject grade n1 ss1 n2 ss2 predicted
## 1 14 130 math 4 44 433.1 40 463.0 468.7
## 2 70 20 math 4 18 443.0 20 477.2 476.5
## 3 112 80 math 4 86 445.4 94 472.6 478.4
## 4 119 50 math 4 95 427.1 94 460.7 464.1
## 5 147 60 math 4 27 424.2 27 458.7 461.8
## 6 147 125 math 4 17 423.5 26 463.1 461.2
## residuals resid_z resid_t
## 1 -5.7446 -0.59190 -0.59171
## 2 0.7235 0.07456 0.07452
## 3 -5.7509 -0.59267 -0.59248
## 4 -3.3586 -0.34606 -0.34591
## 5 -3.0937 -0.31877 -0.31863
## 6 1.8530 0.19094 0.19085
table(midsch$test_year, midsch$grade)
##
## 4 5 6 7 8
## 2007 1150 1094 472 638 734
## 2008 1204 1146 462 588 692
## 2009 1173 1092 434 592 668
## 2010 1120 1090 428 610 686
## 2011 1126 1060 420 618 688
length(unique(midsch$district_id))
## [1] 357
length(unique(midsch$school_id))
## [1] 247
table(midsch$subject, midsch$grade)
##
## 4 5 6 7 8
## math 2886 2741 1108 1523 1734
## read 2887 2741 1108 1523 1734
table(midsch$district_id,midsch$grade)library(ggplot2)
qplot(ss1, ss2, data = midsch, alpha = I(0.07)) + theme_dpi() + geom_smooth() +
geom_smooth(method = "lm", se = FALSE, color = "purple")
plot of chunk diag1
data(mtcars) # load the data from R
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
shapiro.test(mtcars$mpg)
##
## Shapiro-Wilk normality test
##
## data: mtcars$mpg
## W = 0.9476, p-value = 0.1229
shapiro.test(mtcars$hp)
##
## Shapiro-Wilk normality test
##
## data: mtcars$hp
## W = 0.9334, p-value = 0.04881
mpg variable thenmean(mtcars$mpg)
## [1] 20.09
t.test(mtcars$mpg, mu = 18, alternative = "greater")
##
## One Sample t-test
##
## data: mtcars$mpg
## t = 1.962, df = 31, p-value = 0.02938
## alternative hypothesis: true mean is greater than 18
## 95 percent confidence interval:
## 18.28 Inf
## sample estimates:
## mean of x
## 20.09
t.test(mtcars$mpg, mu = 22, alternative = "less")
##
## One Sample t-test
##
## data: mtcars$mpg
## t = -1.792, df = 31, p-value = 0.04144
## alternative hypothesis: true mean is less than 22
## 95 percent confidence interval:
## -Inf 21.9
## sample estimates:
## mean of x
## 20.09
t.test(mtcars$mpg,mu=18) test?t.test(mpg ~ am, data = mtcars)
##
## Welch Two Sample t-test
##
## data: mpg by am
## t = -3.767, df = 18.33, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.28 -3.21
## sample estimates:
## mean in group 0 mean in group 1
## 17.15 24.39
wilcox.test(mtcars$hp,mu=102)wilcox.test(hp~am,data=mtcars)aspirin <- matrix(c(189, 104, 10845, 10933), ncol = 2, dimnames = list(c("Placebo",
"Aspirin"), c("MI", "No MI")))
aspirin
## MI No MI
## Placebo 189 10845
## Aspirin 104 10933
chisq.test(aspirin, correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: aspirin
## X-squared = 25.01, df = 1, p-value = 5.692e-07
fisher.test(aspirin)
##
## Fisher's Exact Test for Count Data
##
## data: aspirin
## p-value = 5.033e-07
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.432 2.354
## sample estimates:
## odds ratio
## 1.832
test_year, grade, and subject?nrow(unique(midsch[, c(3, 4, 14)]))
## [1] 50
5th grade, 2011, math scores
midsch_sub <- subset(midsch, midsch$grade == 5 & midsch$test_year == 2011 &
midsch$subject == "math")
midsch_sub?my_mod<-lm(ss2~ss1,data=midsch_sub)
lm function~ character divides the dependent variable ss2 from the independent variable ss1my_mod<-data means we don't have to write: lm(midsch_sub$ss2~midsch_sub$ss1)ss_mod <- lm(ss2 ~ ss1, data = midsch_sub)
summary(ss_mod)
##
## Call:
## lm(formula = ss2 ~ ss1, data = midsch_sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.36 -7.60 -0.42 6.49 58.36
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.1687 11.3446 -0.46 0.65
## ss1 1.0644 0.0242 44.00 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.2 on 528 degrees of freedom
## Multiple R-squared: 0.786, Adjusted R-squared: 0.785
## F-statistic: 1.94e+03 on 1 and 528 DF, p-value: <2e-16
objects(ss_mod)
## [1] "assign" "call" "coefficients" "df.residual"
## [5] "effects" "fitted.values" "model" "qr"
## [9] "rank" "residuals" "terms" "xlevels"
coefficients fitted.values and callqplot(n2, ss2 - ss1, data = midsch, alpha = I(0.1)) + theme_dpi() + geom_smooth()
plot of chunk diagn
ssN1_mod <- lm(ss2 ~ ss1 + n1, data = midsch_sub)
summary(ssN1_mod)
##
## Call:
## lm(formula = ss2 ~ ss1 + n1, data = midsch_sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.39 -7.73 -0.52 6.42 59.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.6849 11.7688 0.14 0.886
## ss1 1.0450 0.0258 40.49 <2e-16 ***
## n1 0.0406 0.0193 2.10 0.036 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.2 on 527 degrees of freedom
## Multiple R-squared: 0.787, Adjusted R-squared: 0.787
## F-statistic: 976 on 2 and 527 DF, p-value: <2e-16
ssN2_mod <- lm(ss2 ~ ss1 + n2, data = midsch_sub)
summary(ssN2_mod)
##
## Call:
## lm(formula = ss2 ~ ss1 + n2, data = midsch_sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.60 -7.62 -0.53 6.52 59.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7971 11.8544 0.15 0.88
## ss1 1.0450 0.0260 40.12 <2e-16 ***
## n2 0.0377 0.0192 1.97 0.05 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.2 on 527 degrees of freedom
## Multiple R-squared: 0.787, Adjusted R-squared: 0.786
## F-statistic: 975 on 2 and 527 DF, p-value: <2e-16
anova(ss_mod, ssN1_mod, ssN2_mod)
## Analysis of Variance Table
##
## Model 1: ss2 ~ ss1
## Model 2: ss2 ~ ss1 + n1
## Model 3: ss2 ~ ss1 + n2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 528 66239
## 2 527 65688 1 551 4.42 0.036 *
## 3 527 65755 0 -67
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(ssN2_mod)
## [1] 4067
AIC(ssN1_mod)
## [1] 4067
n1 and n2 but either improves model fit over the model without itlibrary(lmtest)
resettest(ss_mod, power = 2:4)
##
## RESET test
##
## data: ss_mod
## RESET = 2.642, df1 = 3, df2 = 525, p-value = 0.04866
raintest(ss2 ~ ss1, fraction = 0.5, order.by = ~ss1, data = midsch_sub)
##
## Rainbow test
##
## data: ss2 ~ ss1
## Rain = 1.402, df1 = 265, df2 = 263, p-value = 0.003105
harvtest(ss2 ~ ss1, order.by = ~ss1, data = midsch_sub)
##
## Harvey-Collier test
##
## data: ss2 ~ ss1
## HC = 2.734, df = 527, p-value = 0.006462
ss_poly <- lm(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4), data = midsch_sub)
summary(ss_poly)
##
## Call:
## lm(formula = ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4), data = midsch_sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.89 -6.92 -0.20 6.76 59.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.61e+03 3.61e+04 0.07 0.94
## ss1 -8.72e+00 3.18e+02 -0.03 0.98
## I(ss1^2) -9.51e-03 1.05e+00 -0.01 0.99
## I(ss1^3) 7.21e-05 1.54e-03 0.05 0.96
## I(ss1^4) -6.98e-08 8.42e-07 -0.08 0.93
##
## Residual standard error: 11.1 on 525 degrees of freedom
## Multiple R-squared: 0.789, Adjusted R-squared: 0.787
## F-statistic: 490 on 4 and 525 DF, p-value: <2e-16
anova(ss_mod, ss_poly)
## Analysis of Variance Table
##
## Model 1: ss2 ~ ss1
## Model 2: ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 528 66239
## 2 525 65253 3 985 2.64 0.049 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resettest(ss_poly, power = 2:4)
##
## RESET test
##
## data: ss_poly
## RESET = 1.562, df1 = 3, df2 = 522, p-value = 0.1976
raintest(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4), fraction = 0.5, order.by = ~ss1,
data = midsch_sub)
##
## Rainbow test
##
## data: ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4)
## Rain = 1.392, df1 = 265, df2 = 260, p-value = 0.003804
harvtest(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4), order.by = ~ss1, data = midsch_sub)
##
## Harvey-Collier test
##
## data: ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4)
## HC = NaN, df = 524, p-value = NA
ss_polyn <- lm(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2, data = midsch_sub)
anova(ss_mod, ssN2_mod, ss_poly, ss_polyn)
## Analysis of Variance Table
##
## Model 1: ss2 ~ ss1
## Model 2: ss2 ~ ss1 + n2
## Model 3: ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4)
## Model 4: ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 528 66239
## 2 527 65755 1 483 3.91 0.049 *
## 3 525 65253 2 502 2.03 0.133
## 4 524 64842 1 411 3.32 0.069 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resettest(ss_polyn, power = 2:4)
##
## RESET test
##
## data: ss_polyn
## RESET = 2.485, df1 = 3, df2 = 521, p-value = 0.05991
raintest(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2, fraction = 0.5, order.by = ~ss1,
data = midsch_sub)
##
## Rainbow test
##
## data: ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2
## Rain = 1.381, df1 = 265, df2 = 259, p-value = 0.004606
harvtest(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2, order.by = ~ss1, data = midsch_sub)
##
## Harvey-Collier test
##
## data: ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2
## HC = NA, df = 523, p-value = NA
library(quantreg)
ss_quant <- rq(ss2 ~ ss1, tau = c(seq(0.1, 0.9, 0.1)), data = midsch_sub)
plot(summary(ss_quant, se = "boot", method = "wild"))
plot of chunk quantileregression
ss_quant shows that in the lower quantiles the coefficients for the intercept and ss1 fall outside the confidence interval around the base coefficientss_quant2 <- rq(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2, tau = c(seq(0.1,
0.9, 0.1)), data = midsch_sub)
plot(summary(ss_quant2, se = "boot", method = "wild"))
plot of chunk quantileregression2
ss_quant3 <- rq(ss2 ~ ss1, tau = -1, data = midsch_sub)
qplot(ss_quant3$sol[1, ], ss_quant3$sol[5, ], geom = "line", main = "Continuous Quantiles") +
theme_dpi() + xlab("Quantile") + ylab(expression(beta)) + geom_hline(yintercept = coef(summary(ss_mod))[2,
1]) + geom_hline(yintercept = coef(summary(ss_mod))[2, 1] + (2 * coef(summary(ss_mod))[2,
2]), linetype = 3) + geom_hline(yintercept = coef(summary(ss_mod))[2, 1] -
(2 * coef(summary(ss_mod))[2, 2]), linetype = 3)
plot of chunk betterquantileplot
ss_quant4 <- rq(ss2 ~ ss1 + I(ss1^2) + I(ss1^3) + I(ss1^4) + n2, tau = -1, data = midsch_sub)
qplot(ss_quant4$sol[1, ], ss_quant4$sol[5, ], geom = "line", main = "Continuous Quantiles") +
theme_dpi() + xlab("Quantile") + ylab(expression(beta)) + geom_hline(yintercept = coef(summary(ss_mod))[2,
1]) + geom_hline(yintercept = coef(summary(ss_mod))[2, 1] + (2 * coef(summary(ss_mod))[2,
2]), linetype = 3) + geom_hline(yintercept = coef(summary(ss_mod))[2, 1] -
(2 * coef(summary(ss_mod))[2, 2]), linetype = 3)
plot of chunk betterquantileplot2
dlplylibrary(plyr)
midsch$id <- interaction(midsch$test_year, midsch$grade, midsch$subject)
mods <- dlply(midsch, .(id), lm, formula = ss2 ~ ss1)
objects(mods)[1:10]
## [1] "2007.4.math" "2007.4.read" "2007.5.math" "2007.5.read" "2007.6.math"
## [6] "2007.6.read" "2007.7.math" "2007.7.read" "2007.8.math" "2007.8.read"
mytest <- llply(mods, function(x) resettest(x, power = 2:4))
mytest[[1]]
##
## RESET test
##
## data: x
## RESET = 2.499, df1 = 3, df2 = 570, p-value = 0.05876
mytest[[2]]
##
## RESET test
##
## data: x
## RESET = 0.8864, df1 = 3, df2 = 597, p-value = 0.4478
a1 <- qplot(id, residmean, data = ddply(midsch, .(id), summarize, residmean = mean(residuals)),
geom = "bar", main = "Provided Residuals") + theme_dpi() + opts(axis.text.x = theme_blank(),
axis.ticks = theme_blank()) + ylab("Mean of Residuals") + xlab("Model") +
geom_text(aes(x = 12, y = 0.3), label = "SD of Residuals = 9")
a2 <- qplot(id, V1, data = ldply(mods, function(x) mean(x$residuals)), geom = "bar",
main = "Replication Models") + theme_dpi() + opts(axis.text.x = theme_blank(),
axis.ticks = theme_blank()) + ylab("Mean of Residuals") + xlab("Model") +
geom_text(aes(x = 7, y = 0.3), label = paste("SD =", round(mean(ldply(mods,
function(x) sd(x$residuals))$V1), 2)))
grid.arrange(a1, a2, main = "Comparing Replication and Provided Residual Means by Model")
plot of chunk residplot1
qplot(residuals, data = midsch, geom = "density") + stat_function(fun = dnorm,
aes(colour = "Normal")) + geom_histogram(aes(y = ..density..), alpha = I(0.4)) +
geom_line(aes(y = ..density.., colour = "Empirical"), stat = "density") +
scale_colour_manual(name = "Density", values = c("red", "blue")) + opts(legend.position = c(0.85,
0.85)) + theme_dpi()
plot of chunk residplot
b <- 2 * rnorm(5000)
c <- b + runif(5000)
dem <- lm(c ~ b)
a1 <- qplot(midsch$ss1, abs(midsch$residuals), main = "Residual Plot of Replication Data",
geom = "point", alpha = I(0.1)) + geom_smooth(method = "lm", se = TRUE) +
xlab("SS1") + ylab("Residuals") + geom_smooth(se = FALSE) + ylim(c(0, 50)) +
theme_dpi()
a2 <- qplot(b, abs(lm(c ~ b)$residuals), main = "Well Specified OLS", alpha = I(0.3)) +
theme_dpi() + geom_smooth()
grid.arrange(a1, a2, ncol = 2)
plot of chunk perfectmodel
bptest(ss_mod)
##
## studentized Breusch-Pagan test
##
## data: ss_mod
## BP = 7.499, df = 1, p-value = 0.006172
gqtest(ss_mod)
##
## Goldfeld-Quandt test
##
## data: ss_mod
## GQ = 0.8302, df1 = 263, df2 = 263, p-value = 0.9341
lm objectggplot2 we can run something called fortify on our linear model to get a data frame that tells us a lot of diagnostics about each observationdamodel <- fortify(ss_mod)
summary(damodel)
## ss2 ss1 .hat .sigma
## Min. :416 Min. :392 Min. :0.00189 Min. :10.9
## 1st Qu.:478 1st Qu.:457 1st Qu.:0.00207 1st Qu.:11.2
## Median :495 Median :471 Median :0.00275 Median :11.2
## Mean :494 Mean :468 Mean :0.00377 Mean :11.2
## 3rd Qu.:510 3rd Qu.:483 3rd Qu.:0.00416 3rd Qu.:11.2
## Max. :560 Max. :511 Max. :0.02920 Max. :11.2
## .cooksd .fitted .resid .stdresid
## Min. :0.00000 Min. :412 Min. :-46.36 Min. :-4.148
## 1st Qu.:0.00015 1st Qu.:481 1st Qu.: -7.60 1st Qu.:-0.680
## Median :0.00062 Median :496 Median : -0.42 Median :-0.038
## Mean :0.00225 Mean :494 Mean : 0.00 Mean : 0.000
## 3rd Qu.:0.00179 3rd Qu.:509 3rd Qu.: 6.49 3rd Qu.: 0.581
## Max. :0.06596 Max. :539 Max. : 58.36 Max. : 5.218
dv iv .hat .sigma .cooksd .fitted .resid and .stdresid.fitted is the prediction from our model.resid = dv - .fitted.stdresid = normalized .resid.sigma = estimate of residual standard deviation when observation is dropped from the model.hat is more obscure, but is a measure of the influence an individual observation has on overall model fita <- rnorm(500)
b <- runif(500)
c <- a + b
goodsim <- lm(c ~ a)
goodsim_a <- fortify(goodsim)
qplot(c, .hat, data = goodsim_a) + theme_dpi() + geom_smooth(se = FALSE)
plot of chunk simulatedgoodmodel
qplot(ss2, .hat, data = damodel) + theme_dpi() + geom_smooth(se = FALSE)
plot of chunk nonsim
a <- qplot(c, .hat, data = goodsim_a) + theme_dpi() + geom_smooth(se = FALSE)
b <- qplot(ss2, .hat, data = damodel) + theme_dpi() + geom_smooth(se = FALSE)
grid.arrange(a, b, ncol = 2)
hat value are what we call "high leverage" observations, and on their own are not bad--in fact our good model has lots of themqplot(ss2, .hat, data = damodel) + theme_dpi() + geom_smooth(se = FALSE) + geom_hline(yintercept = 3 *
mean(damodel$.hat), color = I("red"), size = I(1.1))
plot of chunk diagnosticplot
infobs <- which(apply(influence.measures(ss_mod)$is.inf, 1, any))
ssdata <- cbind(fortify(ss_mod), midsch_sub)
ssdata$id3 <- paste(ssdata$district_id, ssdata$school_id, sep = ".")
noinf <- lm(ss2 ~ ss1, data = midsch_sub[-infobs, ])
noinff <- fortify(noinf)
qplot(ss1, ss2, data = ssdata, alpha = I(0.5)) + geom_line(aes(ss1, .fitted,
group = 1), data = ssdata, size = I(1.02)) + geom_line(aes(x = ss1, y = .fitted,
group = 1), data = noinff, linetype = 6, size = 1.1, color = "blue") + theme_dpi() +
xlab("SS1") + ylab("Y")
plot of chunk infobsplot
my_megamod <- lm(ss2 ~ ss1 + grade + test_year + subject, data = midsch)
summary(my_megamod)
##
## Call:
## lm(formula = ss2 ~ ss1 + grade + test_year + subject, data = midsch)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.58 -6.38 0.69 6.93 62.80
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 415.92105 108.01823 3.85 0.00012 ***
## ss1 0.89548 0.00321 278.85 < 2e-16 ***
## grade -0.72909 0.08014 -9.10 < 2e-16 ***
## test_year -0.16754 0.05380 -3.11 0.00185 **
## subjectread -11.53144 0.15245 -75.64 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.7 on 19980 degrees of freedom
## Multiple R-squared: 0.9, Adjusted R-squared: 0.9
## F-statistic: 4.5e+04 on 4 and 19980 DF, p-value: <2e-16
my_megamod2 <- lm(ss2 ~ ss1 + as.factor(grade) + as.factor(test_year) + subject,
data = midsch)
summary(my_megamod2)
##
## Call:
## lm(formula = ss2 ~ ss1 + as.factor(grade) + as.factor(test_year) +
## subject, data = midsch)
##
## Residuals:
## Min 1Q Median 3Q Max
## -77.43 -5.78 0.36 6.18 60.16
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.93813 1.35590 53.79 < 2e-16 ***
## ss1 0.91197 0.00306 298.17 < 2e-16 ***
## as.factor(grade)5 -8.39756 0.20701 -40.57 < 2e-16 ***
## as.factor(grade)6 -0.69535 0.27917 -2.49 0.013 *
## as.factor(grade)7 -2.92812 0.29120 -10.06 < 2e-16 ***
## as.factor(grade)8 -7.64546 0.32318 -23.66 < 2e-16 ***
## as.factor(test_year)2008 -3.08623 0.22493 -13.72 < 2e-16 ***
## as.factor(test_year)2009 -0.46178 0.22667 -2.04 0.042 *
## as.factor(test_year)2010 -1.86967 0.22716 -8.23 < 2e-16 ***
## as.factor(test_year)2011 -1.49652 0.22769 -6.57 5.1e-11 ***
## subjectread -11.59171 0.14416 -80.41 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.2 on 19974 degrees of freedom
## Multiple R-squared: 0.911, Adjusted R-squared: 0.911
## F-statistic: 2.04e+04 on 10 and 19974 DF, p-value: <2e-16
anova(my_megamod, my_megamod2)
## Analysis of Variance Table
##
## Model 1: ss2 ~ ss1 + grade + test_year + subject
## Model 2: ss2 ~ ss1 + as.factor(grade) + as.factor(test_year) + subject
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 19980 2306425
## 2 19974 2061668 6 244757 395 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
*megamodeli <- lm(ss2 ~ ss1 + as.factor(grade) + subject * factor(test_year),
data = midsch)
summary(megamodeli)
##
## Call:
## lm(formula = ss2 ~ ss1 + as.factor(grade) + subject * factor(test_year),
## data = midsch)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78.38 -5.74 0.32 6.18 60.86
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.82489 1.35266 53.84 < 2e-16
## ss1 0.91331 0.00305 299.53 < 2e-16
## as.factor(grade)5 -8.43203 0.20601 -40.93 < 2e-16
## as.factor(grade)6 -0.74629 0.27784 -2.69 0.0072
## as.factor(grade)7 -3.00776 0.28994 -10.37 < 2e-16
## as.factor(grade)8 -7.74983 0.32188 -24.08 < 2e-16
## subjectread -12.54107 0.31707 -39.55 < 2e-16
## factor(test_year)2008 -4.46270 0.31648 -14.10 < 2e-16
## factor(test_year)2009 0.26387 0.31898 0.83 0.4081
## factor(test_year)2010 -1.58019 0.31991 -4.94 7.9e-07
## factor(test_year)2011 -3.51305 0.32088 -10.95 < 2e-16
## subjectread:factor(test_year)2008 2.74390 0.44713 6.14 8.6e-10
## subjectread:factor(test_year)2009 -1.45665 0.45085 -3.23 0.0012
## subjectread:factor(test_year)2010 -0.58815 0.45192 -1.30 0.1931
## subjectread:factor(test_year)2011 4.02058 0.45290 8.88 < 2e-16
##
## (Intercept) ***
## ss1 ***
## as.factor(grade)5 ***
## as.factor(grade)6 **
## as.factor(grade)7 ***
## as.factor(grade)8 ***
## subjectread ***
## factor(test_year)2008 ***
## factor(test_year)2009
## factor(test_year)2010 ***
## factor(test_year)2011 ***
## subjectread:factor(test_year)2008 ***
## subjectread:factor(test_year)2009 **
## subjectread:factor(test_year)2010
## subjectread:factor(test_year)2011 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.1 on 19970 degrees of freedom
## Multiple R-squared: 0.912, Adjusted R-squared: 0.912
## F-statistic: 1.47e+04 on 14 and 19970 DF, p-value: <2e-16
: in this case only includes the interactions, but not the main effects in every combinationmegamodelii <- lm(ss2 ~ ss1 + as.factor(grade) + subject:factor(test_year),
data = midsch)
summary(megamodelii)
##
## Call:
## lm(formula = ss2 ~ ss1 + as.factor(grade) + subject:factor(test_year),
## data = midsch)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78.38 -5.74 0.32 6.18 60.86
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.79135 1.37799 44.12 < 2e-16 ***
## ss1 0.91331 0.00305 299.53 < 2e-16 ***
## as.factor(grade)5 -8.43203 0.20601 -40.93 < 2e-16 ***
## as.factor(grade)6 -0.74629 0.27784 -2.69 0.0072 **
## as.factor(grade)7 -3.00776 0.28994 -10.37 < 2e-16 ***
## as.factor(grade)8 -7.74983 0.32188 -24.08 < 2e-16 ***
## subjectmath:factor(test_year)2007 12.03354 0.32069 37.52 < 2e-16 ***
## subjectread:factor(test_year)2007 -0.50753 0.31972 -1.59 0.1124
## subjectmath:factor(test_year)2008 7.57083 0.31980 23.67 < 2e-16 ***
## subjectread:factor(test_year)2008 -2.22633 0.31968 -6.96 3.4e-12 ***
## subjectmath:factor(test_year)2009 12.29741 0.32256 38.12 < 2e-16 ***
## subjectread:factor(test_year)2009 -1.70032 0.32223 -5.28 1.3e-07 ***
## subjectmath:factor(test_year)2010 10.45335 0.32279 32.38 < 2e-16 ***
## subjectread:factor(test_year)2010 -2.67587 0.32275 -8.29 < 2e-16 ***
## subjectmath:factor(test_year)2011 8.52049 0.32321 26.36 < 2e-16 ***
## subjectread:factor(test_year)2011 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.1 on 19970 degrees of freedom
## Multiple R-squared: 0.912, Adjusted R-squared: 0.912
## F-statistic: 1.47e+04 on 14 and 19970 DF, p-value: <2e-16
anova(my_megamod, my_megamod2, megamodelii, megamodeli)
## Analysis of Variance Table
##
## Model 1: ss2 ~ ss1 + grade + test_year + subject
## Model 2: ss2 ~ ss1 + as.factor(grade) + as.factor(test_year) + subject
## Model 3: ss2 ~ ss1 + as.factor(grade) + subject:factor(test_year)
## Model 4: ss2 ~ ss1 + as.factor(grade) + subject * factor(test_year)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 19980 2306425
## 2 19974 2061668 6 244757 399.3 <2e-16 ***
## 3 19970 2040193 4 21475 52.5 <2e-16 ***
## 4 19970 2040193 0 0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
badidea <- lm(ss2 ~ ss1 + as.factor(grade) + as.factor(test_year) + subject +
factor(district_id), data = midsch)
summary(badidea)
##
## Call:
## lm(formula = ss2 ~ ss1 + as.factor(grade) + as.factor(test_year) +
## subject + factor(district_id), data = midsch)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.49 -5.48 -0.01 5.47 62.79
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 148.98751 3.81658 39.04 < 2e-16 ***
## ss1 0.74205 0.00423 175.31 < 2e-16 ***
## as.factor(grade)5 -3.85890 0.21074 -18.31 < 2e-16 ***
## as.factor(grade)6 6.60333 0.30341 21.76 < 2e-16 ***
## as.factor(grade)7 7.73675 0.33706 22.95 < 2e-16 ***
## as.factor(grade)8 5.69441 0.38978 14.61 < 2e-16 ***
## as.factor(test_year)2008 -2.59695 0.21338 -12.17 < 2e-16 ***
## as.factor(test_year)2009 0.17251 0.21602 0.80 0.42453
## as.factor(test_year)2010 -0.84928 0.21778 -3.90 9.7e-05 ***
## as.factor(test_year)2011 -0.45301 0.21753 -2.08 0.03731 *
## subjectread -10.97461 0.13420 -81.78 < 2e-16 ***
## factor(district_id)14 -5.89212 3.61105 -1.63 0.10276
## factor(district_id)70 1.89458 4.47156 0.42 0.67179
## factor(district_id)84 -1.31629 4.71604 -0.28 0.78016
## factor(district_id)91 2.99628 4.17978 0.72 0.47347
## factor(district_id)112 0.30500 3.65142 0.08 0.93343
## factor(district_id)119 3.35189 3.68485 0.91 0.36302
## factor(district_id)126 -2.41477 5.77471 -0.42 0.67583
## factor(district_id)140 -1.61203 3.62384 -0.44 0.65644
## factor(district_id)147 1.75132 3.36281 0.52 0.60252
## factor(district_id)154 -1.94406 3.58967 -0.54 0.58812
## factor(district_id)161 3.79405 5.77435 0.66 0.51116
## factor(district_id)170 -0.95470 3.54819 -0.27 0.78788
## factor(district_id)182 4.67066 3.56335 1.31 0.18996
## factor(district_id)203 0.95483 4.30520 0.22 0.82448
## factor(district_id)217 -2.14888 5.77247 -0.37 0.70970
## factor(district_id)231 1.32681 3.84933 0.34 0.73033
## factor(district_id)238 0.99708 3.81210 0.26 0.79367
## factor(district_id)280 -1.67854 3.49559 -0.48 0.63110
## factor(district_id)308 -4.03393 3.66661 -1.10 0.27127
## factor(district_id)336 0.01280 3.49218 0.00 0.99708
## factor(district_id)350 5.40344 5.09415 1.06 0.28883
## factor(district_id)413 -4.40799 3.38955 -1.30 0.19346
## factor(district_id)422 -1.68519 3.68427 -0.46 0.64739
## factor(district_id)427 19.33171 7.45405 2.59 0.00951 **
## factor(district_id)434 -1.73695 3.65076 -0.48 0.63424
## factor(district_id)469 3.75988 4.71376 0.80 0.42509
## factor(district_id)476 -4.06442 3.72603 -1.09 0.27537
## factor(district_id)485 2.61823 4.30459 0.61 0.54303
## factor(district_id)497 -3.12815 5.09092 -0.61 0.53892
## factor(district_id)602 -1.92506 3.94358 -0.49 0.62545
## factor(district_id)609 -0.43771 4.71448 -0.09 0.92603
## factor(district_id)616 1.28019 3.94472 0.32 0.74554
## factor(district_id)623 3.18066 4.30363 0.74 0.45988
## factor(district_id)637 -0.55437 7.45378 -0.07 0.94071
## factor(district_id)657 12.53789 4.47516 2.80 0.00509 **
## factor(district_id)658 -1.00424 4.30261 -0.23 0.81545
## factor(district_id)665 -1.05043 3.58996 -0.29 0.76983
## factor(district_id)700 -0.79929 3.84847 -0.21 0.83547
## factor(district_id)714 7.22362 3.45606 2.09 0.03662 *
## factor(district_id)721 0.24070 3.65129 0.07 0.94744
## factor(district_id)735 -1.14845 4.08171 -0.28 0.77843
## factor(district_id)777 -1.51765 3.61159 -0.42 0.67433
## factor(district_id)840 7.54670 7.45362 1.01 0.31132
## factor(district_id)870 1.36074 4.17983 0.33 0.74477
## factor(district_id)896 -0.29208 4.71412 -0.06 0.95060
## factor(district_id)903 -1.34798 3.81176 -0.35 0.72361
## factor(district_id)910 -3.96702 4.00659 -0.99 0.32213
## factor(district_id)994 9.29289 5.77557 1.61 0.10763
## factor(district_id)1015 6.43248 3.62528 1.77 0.07602 .
## factor(district_id)1029 6.11486 4.47180 1.37 0.17151
## factor(district_id)1078 1.14949 3.84857 0.30 0.76519
## factor(district_id)1085 3.77354 3.94367 0.96 0.33865
## factor(district_id)1092 -0.18947 3.48134 -0.05 0.95660
## factor(district_id)1134 -2.73460 3.66722 -0.75 0.45587
## factor(district_id)1141 -4.27890 3.66731 -1.17 0.24332
## factor(district_id)1162 5.02019 5.09040 0.99 0.32404
## factor(district_id)1176 1.51987 3.75060 0.41 0.68531
## factor(district_id)1183 1.93317 3.72664 0.52 0.60394
## factor(district_id)1218 -1.35219 4.71286 -0.29 0.77418
## factor(district_id)1232 1.50135 4.71616 0.32 0.75023
## factor(district_id)1253 -4.08712 3.49242 -1.17 0.24190
## factor(district_id)1260 -1.79921 3.70474 -0.49 0.62722
## factor(district_id)1295 -0.65535 4.47422 -0.15 0.88355
## factor(district_id)1309 2.55693 4.17909 0.61 0.54065
## factor(district_id)1316 3.67797 3.65186 1.01 0.31387
## factor(district_id)1376 6.55185 3.56432 1.84 0.06605 .
## factor(district_id)1380 -7.86521 3.49585 -2.25 0.02447 *
## factor(district_id)1407 2.30171 4.30266 0.53 0.59269
## factor(district_id)1414 5.36948 3.62488 1.48 0.13855
## factor(district_id)1428 2.20112 4.08271 0.54 0.58980
## factor(district_id)1449 10.67951 7.45333 1.43 0.15192
## factor(district_id)1499 -2.28246 4.18016 -0.55 0.58506
## factor(district_id)1526 -0.69238 3.62326 -0.19 0.84846
## factor(district_id)1540 2.69523 3.65116 0.74 0.46041
## factor(district_id)1554 1.37737 3.38333 0.41 0.68394
## factor(district_id)1568 0.77716 3.65077 0.21 0.83143
## factor(district_id)1600 5.29397 7.45430 0.71 0.47760
## factor(district_id)1631 2.77177 4.47192 0.62 0.53539
## factor(district_id)1638 2.45280 3.51370 0.70 0.48514
## factor(district_id)1645 4.41788 3.89267 1.13 0.25642
## factor(district_id)1659 2.53142 3.77974 0.67 0.50304
## factor(district_id)1673 -7.60574 5.77207 -1.32 0.18763
## factor(district_id)1687 3.52457 3.59087 0.98 0.32634
## factor(district_id)1694 -0.18870 3.68475 -0.05 0.95916
## factor(district_id)1729 -0.84508 5.77264 -0.15 0.88361
## factor(district_id)1813 -7.13497 5.09373 -1.40 0.16131
## factor(district_id)1848 -10.70569 4.47481 -2.39 0.01675 *
## factor(district_id)1855 -2.50834 5.77242 -0.43 0.66390
## factor(district_id)1862 -0.37137 3.41120 -0.11 0.91331
## factor(district_id)1883 2.20178 3.68386 0.60 0.55006
## factor(district_id)1890 5.96119 4.47214 1.33 0.18256
## factor(district_id)1897 4.40600 3.62546 1.22 0.22427
## factor(district_id)1900 4.66362 3.52007 1.32 0.18523
## factor(district_id)1939 -2.29608 5.09149 -0.45 0.65202
## factor(district_id)1945 0.52588 3.58011 0.15 0.88322
## factor(district_id)1953 2.08524 3.77912 0.55 0.58111
## factor(district_id)2009 -0.54325 4.30303 -0.13 0.89954
## factor(district_id)2044 4.81875 5.09405 0.95 0.34418
## factor(district_id)2051 -1.05207 3.70471 -0.28 0.77643
## factor(district_id)2058 5.48442 3.60238 1.52 0.12791
## factor(district_id)2128 3.17750 5.09021 0.62 0.53248
## factor(district_id)2142 4.70263 3.81229 1.23 0.21739
## factor(district_id)2184 -2.01049 3.68504 -0.55 0.58536
## factor(district_id)2205 5.24323 4.71439 1.11 0.26608
## factor(district_id)2212 -7.82371 5.77328 -1.36 0.17538
## factor(district_id)2217 3.29519 3.77999 0.87 0.38336
## factor(district_id)2226 -8.82873 4.08243 -2.16 0.03058 *
## factor(district_id)2233 1.27707 3.72649 0.34 0.73183
## factor(district_id)2240 -5.08814 4.71348 -1.08 0.28038
## factor(district_id)2289 -1.41013 3.35904 -0.42 0.67463
## factor(district_id)2296 4.88177 3.52538 1.38 0.16614
## factor(district_id)2303 -2.15791 3.51330 -0.61 0.53908
## factor(district_id)2310 0.57489 5.09356 0.11 0.91014
## factor(district_id)2420 6.15664 3.52477 1.75 0.08071 .
## factor(district_id)2422 -2.71442 3.72648 -0.73 0.46637
## factor(district_id)2443 2.17936 3.65091 0.60 0.55056
## factor(district_id)2460 4.63823 3.62429 1.28 0.20064
## factor(district_id)2478 -0.07777 3.65072 -0.02 0.98300
## factor(district_id)2485 -0.32718 4.47159 -0.07 0.94167
## factor(district_id)2523 2.10726 4.08129 0.52 0.60563
## factor(district_id)2527 2.19518 4.17909 0.53 0.59940
## factor(district_id)2534 4.39408 7.45429 0.59 0.55555
## factor(district_id)2541 -7.08520 5.09390 -1.39 0.16427
## factor(district_id)2562 2.36758 3.50406 0.68 0.49926
## factor(district_id)2576 -0.25670 3.89144 -0.07 0.94741
## factor(district_id)2583 3.22269 3.49641 0.92 0.35669
## factor(district_id)2604 4.19445 3.58169 1.17 0.24158
## factor(district_id)2605 4.42663 4.30389 1.03 0.30372
## factor(district_id)2611 5.74927 3.47547 1.65 0.09809 .
## factor(district_id)2618 -3.66167 4.17895 -0.88 0.38092
## factor(district_id)2632 4.82899 4.47407 1.08 0.28045
## factor(district_id)2646 3.15504 4.47427 0.71 0.48072
## factor(district_id)2695 1.17008 3.38716 0.35 0.72976
## factor(district_id)2702 -0.10449 3.72627 -0.03 0.97763
## factor(district_id)2730 -3.09343 4.30366 -0.72 0.47228
## factor(district_id)2737 5.32855 4.47168 1.19 0.23342
## factor(district_id)2744 -4.60344 3.77878 -1.22 0.22315
## factor(district_id)2758 -1.76045 3.61111 -0.49 0.62590
## factor(district_id)2793 -0.56062 3.35586 -0.17 0.86733
## factor(district_id)2800 -0.06760 3.65091 -0.02 0.98523
## factor(district_id)2814 3.62502 4.71318 0.77 0.44183
## factor(district_id)2828 1.44955 3.75220 0.39 0.69926
## factor(district_id)2835 6.56438 3.62457 1.81 0.07014 .
## factor(district_id)2842 10.16507 5.09333 2.00 0.04597 *
## factor(district_id)2849 -1.56283 3.41139 -0.46 0.64687
## factor(district_id)2856 -2.41869 3.75122 -0.64 0.51908
## factor(district_id)2863 0.75008 7.45383 0.10 0.91985
## factor(district_id)2885 -2.90480 3.50402 -0.83 0.40712
## factor(district_id)2898 1.42446 3.75087 0.38 0.70412
## factor(district_id)2912 1.41648 4.71501 0.30 0.76386
## factor(district_id)2940 -7.94843 4.71462 -1.69 0.09183 .
## factor(district_id)2961 7.85789 4.71429 1.67 0.09557 .
## factor(district_id)3087 -4.01147 4.71609 -0.85 0.39501
## factor(district_id)3129 0.38014 3.65092 0.10 0.91707
## factor(district_id)3150 1.97300 3.84836 0.51 0.60818
## factor(district_id)3171 2.00136 4.71435 0.42 0.67119
## factor(district_id)3206 10.75595 7.45256 1.44 0.14896
## factor(district_id)3220 2.40836 3.72648 0.65 0.51810
## factor(district_id)3269 -1.52719 3.35200 -0.46 0.64868
## factor(district_id)3290 -1.75732 3.42052 -0.51 0.60743
## factor(district_id)3297 -0.83698 3.94285 -0.21 0.83189
## factor(district_id)3304 5.20597 4.47435 1.16 0.24464
## factor(district_id)3311 -1.18911 3.72686 -0.32 0.74968
## factor(district_id)3318 -1.50459 5.09382 -0.30 0.76771
## factor(district_id)3325 -3.15125 5.77539 -0.55 0.58532
## factor(district_id)3332 -0.52079 3.72640 -0.14 0.88885
## factor(district_id)3339 5.17735 3.46511 1.49 0.13516
## factor(district_id)3360 0.00374 3.59970 0.00 0.99917
## factor(district_id)3367 -0.24629 3.58983 -0.07 0.94530
## factor(district_id)3381 1.58885 3.68557 0.43 0.66640
## factor(district_id)3409 -0.60769 3.66741 -0.17 0.86839
## factor(district_id)3427 -2.25874 4.47161 -0.51 0.61347
## factor(district_id)3428 5.82348 5.77326 1.01 0.31313
## factor(district_id)3430 -5.60499 3.47808 -1.61 0.10708
## factor(district_id)3434 -8.43723 3.72691 -2.26 0.02359 *
## factor(district_id)3437 4.23683 3.54239 1.20 0.23170
## factor(district_id)3444 -2.75734 3.53509 -0.78 0.43541
## factor(district_id)3479 8.07864 3.49078 2.31 0.02066 *
## factor(district_id)3484 -6.21708 4.47217 -1.39 0.16449
## factor(district_id)3500 1.48277 3.58078 0.41 0.67881
## factor(district_id)3510 11.97253 3.75368 3.19 0.00143 **
## factor(district_id)3528 8.12939 3.72929 2.18 0.02928 *
## factor(district_id)3549 6.71485 3.42833 1.96 0.05017 .
## factor(district_id)3612 0.58480 3.78012 0.15 0.87706
## factor(district_id)3619 -11.40273 3.33859 -3.42 0.00064 ***
## factor(district_id)3640 -1.95526 4.47171 -0.44 0.66193
## factor(district_id)3654 0.16701 4.47190 0.04 0.97021
## factor(district_id)3661 0.98847 5.77469 0.17 0.86409
## factor(district_id)3668 5.59218 5.77236 0.97 0.33266
## factor(district_id)3675 4.46823 3.66813 1.22 0.22319
## factor(district_id)3682 1.05085 3.75157 0.28 0.77940
## factor(district_id)3689 -0.81978 4.17866 -0.20 0.84447
## factor(district_id)3696 0.61941 5.77433 0.11 0.91458
## factor(district_id)3787 -1.82025 3.65125 -0.50 0.61812
## factor(district_id)3794 3.50740 3.65132 0.96 0.33677
## factor(district_id)3822 6.69083 3.49384 1.92 0.05550 .
## factor(district_id)3857 3.63767 3.51487 1.03 0.30071
## factor(district_id)3862 9.15740 3.65499 2.51 0.01224 *
## factor(district_id)3871 -3.21138 3.72616 -0.86 0.38878
## factor(district_id)3892 3.30407 3.42625 0.96 0.33489
## factor(district_id)3899 -2.46155 4.08164 -0.60 0.54646
## factor(district_id)3906 -3.12682 3.65131 -0.86 0.39181
## factor(district_id)3913 -1.24485 4.47425 -0.28 0.78084
## factor(district_id)3925 5.25080 3.48699 1.51 0.13213
## factor(district_id)3934 4.35825 5.09372 0.86 0.39222
## factor(district_id)3941 -0.21718 4.08117 -0.05 0.95756
## factor(district_id)3948 -4.38526 3.81242 -1.15 0.25005
## factor(district_id)3955 -0.73287 3.63626 -0.20 0.84028
## factor(district_id)3962 0.49831 3.61097 0.14 0.89024
## factor(district_id)3969 -1.32823 5.09142 -0.26 0.79419
## factor(district_id)3983 -3.93047 3.68462 -1.07 0.28611
## factor(district_id)3990 -8.58757 5.09367 -1.69 0.09183 .
## factor(district_id)4011 1.01859 4.30367 0.24 0.81291
## factor(district_id)4018 -0.46530 3.43564 -0.14 0.89227
## factor(district_id)4060 4.70264 3.48238 1.35 0.17690
## factor(district_id)4067 -2.17809 3.68491 -0.59 0.55447
## factor(district_id)4074 -1.54639 3.75127 -0.41 0.68017
## factor(district_id)4088 -5.26138 3.77880 -1.39 0.16383
## factor(district_id)4095 3.15058 3.51345 0.90 0.36988
## factor(district_id)4144 2.90557 3.75214 0.77 0.43872
## factor(district_id)4151 0.82096 3.66734 0.22 0.82287
## factor(district_id)4165 -0.15343 3.77881 -0.04 0.96761
## factor(district_id)4179 0.88401 3.38827 0.26 0.79417
## factor(district_id)4186 -4.82044 4.71338 -1.02 0.30646
## factor(district_id)4207 2.35274 5.09413 0.46 0.64419
## factor(district_id)4221 0.11081 7.45369 0.01 0.98814
## factor(district_id)4228 -0.55538 3.81152 -0.15 0.88415
## factor(district_id)4235 6.42321 3.65247 1.76 0.07866 .
## factor(district_id)4270 -2.05746 4.08349 -0.50 0.61437
## factor(district_id)4305 -0.84025 3.72717 -0.23 0.82164
## factor(district_id)4312 4.54308 3.75327 1.21 0.22613
## factor(district_id)4330 -3.29387 5.09215 -0.65 0.51773
## factor(district_id)4347 3.15451 4.71379 0.67 0.50337
## factor(district_id)4368 -2.06023 3.94404 -0.52 0.60142
## factor(district_id)4375 -4.94303 7.45176 -0.66 0.50712
## factor(district_id)4389 3.52896 3.81179 0.93 0.35456
## factor(district_id)4459 -1.21356 4.71317 -0.26 0.79681
## factor(district_id)4473 0.48775 4.00598 0.12 0.90309
## factor(district_id)4501 2.66716 3.48896 0.76 0.44460
## factor(district_id)4515 1.68297 3.65202 0.46 0.64492
## factor(district_id)4529 -2.79105 4.71634 -0.59 0.55400
## factor(district_id)4536 -1.22878 3.94418 -0.31 0.75539
## factor(district_id)4543 -3.70921 3.94404 -0.94 0.34699
## factor(district_id)4571 6.56348 4.17899 1.57 0.11629
## factor(district_id)4578 -0.34842 3.89196 -0.09 0.92867
## factor(district_id)4606 -5.12270 4.47217 -1.15 0.25203
## factor(district_id)4613 3.71168 3.58992 1.03 0.30119
## factor(district_id)4620 -5.72089 3.36191 -1.70 0.08883 .
## factor(district_id)4627 0.42263 3.59009 0.12 0.90629
## factor(district_id)4634 0.53901 3.94257 0.14 0.89126
## factor(district_id)4641 2.86318 3.84934 0.74 0.45700
## factor(district_id)4686 -1.31770 4.47447 -0.29 0.76838
## factor(district_id)4690 4.97604 3.94520 1.26 0.20722
## factor(district_id)4753 -2.89608 3.56281 -0.81 0.41630
## factor(district_id)4760 -0.49195 5.09203 -0.10 0.92304
## factor(district_id)4781 -2.04888 3.77917 -0.54 0.58772
## factor(district_id)4802 1.20556 3.63632 0.33 0.74025
## factor(district_id)4843 2.64340 4.47498 0.59 0.55472
## factor(district_id)4851 -0.02014 3.72589 -0.01 0.99569
## factor(district_id)4872 3.02402 3.70453 0.82 0.41434
## factor(district_id)4893 2.42670 3.61136 0.67 0.50162
## factor(district_id)4956 -1.02238 5.77526 -0.18 0.85949
## factor(district_id)4963 0.09122 5.77239 0.02 0.98739
## factor(district_id)4970 1.84765 3.46417 0.53 0.59379
## factor(district_id)4998 0.33248 3.75226 0.09 0.92939
## factor(district_id)5019 1.99548 3.65133 0.55 0.58472
## factor(district_id)5026 -2.27390 3.65125 -0.62 0.53344
## factor(district_id)5068 0.76529 3.58969 0.21 0.83118
## factor(district_id)5100 2.39017 3.65142 0.65 0.51274
## factor(district_id)5138 -0.12093 3.68491 -0.03 0.97382
## factor(district_id)5264 -1.97448 3.57126 -0.55 0.58035
## factor(district_id)5271 -2.39441 3.39648 -0.70 0.48084
## factor(district_id)5278 0.02451 3.70488 0.01 0.99472
## factor(district_id)5306 0.08681 4.17874 0.02 0.98343
## factor(district_id)5348 1.60339 4.47423 0.36 0.72008
## factor(district_id)5355 7.44106 3.53756 2.10 0.03544 *
## factor(district_id)5362 -10.94444 5.77429 -1.90 0.05806 .
## factor(district_id)5369 -3.42761 3.94394 -0.87 0.38481
## factor(district_id)5390 4.57097 3.68549 1.24 0.21489
## factor(district_id)5432 -1.67459 3.66743 -0.46 0.64795
## factor(district_id)5439 0.22307 3.51339 0.06 0.94938
## factor(district_id)5457 3.90093 4.71348 0.83 0.40790
## factor(district_id)5460 -1.62756 3.75091 -0.43 0.66436
## factor(district_id)5467 -4.42693 4.71391 -0.94 0.34768
## factor(district_id)5474 -1.44116 3.65127 -0.39 0.69307
## factor(district_id)5523 0.43580 4.71383 0.09 0.92634
## factor(district_id)5593 3.46994 3.77893 0.92 0.35851
## factor(district_id)5607 2.12929 3.39691 0.63 0.53078
## factor(district_id)5614 7.36388 5.09060 1.45 0.14804
## factor(district_id)5621 2.28187 3.63714 0.63 0.53042
## factor(district_id)5628 -4.38937 5.09158 -0.86 0.38865
## factor(district_id)5642 1.17074 3.68471 0.32 0.75069
## factor(district_id)5656 3.25016 3.42249 0.95 0.34230
## factor(district_id)5663 -2.10336 3.51346 -0.60 0.54941
## factor(district_id)5740 1.62524 5.77432 0.28 0.77836
## factor(district_id)5747 -2.07069 3.48823 -0.59 0.55277
## factor(district_id)5754 -0.82441 3.65073 -0.23 0.82134
## factor(district_id)5757 1.25904 4.47154 0.28 0.77828
## factor(district_id)5810 0.59861 4.47433 0.13 0.89357
## factor(district_id)5817 0.19100 3.58970 0.05 0.95757
## factor(district_id)5824 -1.87195 3.75177 -0.50 0.61782
## factor(district_id)5859 -0.37260 3.58995 -0.10 0.91734
## factor(district_id)5866 -1.26506 3.89199 -0.33 0.74515
## factor(district_id)5901 4.99285 3.43009 1.46 0.14552
## factor(district_id)5985 0.48091 3.70479 0.13 0.89672
## factor(district_id)6022 0.59604 3.75057 0.16 0.87373
## factor(district_id)6027 3.73754 4.47270 0.84 0.40337
## factor(district_id)6069 6.75909 7.45406 0.91 0.36454
## factor(district_id)6113 3.01917 3.54343 0.85 0.39420
## factor(district_id)6118 2.15589 4.17919 0.52 0.60596
## factor(district_id)6125 -3.01876 3.50402 -0.86 0.38897
## factor(district_id)6174 -0.84132 3.38023 -0.25 0.80345
## factor(district_id)6181 4.72504 3.68737 1.28 0.20006
## factor(district_id)6195 1.50596 3.65099 0.41 0.67999
## factor(district_id)6216 -0.28851 3.84864 -0.07 0.94024
## factor(district_id)6223 -0.98643 3.40467 -0.29 0.77203
## factor(district_id)6237 -1.21340 3.65132 -0.33 0.73965
## factor(district_id)6244 5.09525 3.46210 1.47 0.14111
## factor(district_id)6251 -7.39975 7.45382 -0.99 0.32085
## factor(district_id)6293 -4.72362 5.77298 -0.82 0.41324
## factor(district_id)6300 -1.23379 3.39171 -0.36 0.71604
## factor(district_id)6307 3.35074 3.42736 0.98 0.32826
## factor(district_id)6321 2.70223 4.30224 0.63 0.52995
## factor(district_id)6328 2.52815 3.58970 0.70 0.48127
## factor(district_id)6335 -5.87878 4.00720 -1.47 0.14238
## factor(district_id)6354 4.41717 5.77437 0.76 0.44430
## factor(district_id)6370 1.61430 3.68449 0.44 0.66129
## factor(district_id)6384 3.15642 3.94354 0.80 0.42349
## factor(district_id)6410 4.04696 4.71351 0.86 0.39058
## factor(district_id)6412 -1.06433 4.47409 -0.24 0.81197
## factor(district_id)6419 7.85542 3.60295 2.18 0.02925 *
## factor(district_id)6426 -4.29733 4.71493 -0.91 0.36208
## factor(district_id)6461 -1.73587 3.72576 -0.47 0.64128
## factor(district_id)6470 5.11143 3.58076 1.43 0.15346
## factor(district_id)6475 3.70838 4.71326 0.79 0.43141
## factor(district_id)6482 6.38815 3.94488 1.62 0.10539
## factor(district_id)6608 1.08436 3.81156 0.28 0.77604
## factor(district_id)6678 -1.38829 3.62290 -0.38 0.70158
## factor(district_id)6685 0.95113 3.41806 0.28 0.78081
## factor(district_id)6692 0.54324 3.65124 0.15 0.88173
## factor(district_id)6713 5.17383 5.09167 1.02 0.30958
## factor(district_id)6720 0.05296 3.75167 0.01 0.98874
## factor(district_id)6734 2.26972 3.94448 0.58 0.56502
## factor(district_id)6748 0.91425 3.94414 0.23 0.81670
## factor(district_id)8101 12.82290 7.45221 1.72 0.08532 .
## factor(district_id)8105 -7.72771 3.59165 -2.15 0.03144 *
## factor(district_id)8106 -9.86535 3.59567 -2.74 0.00608 **
## factor(district_id)8108 -9.30716 3.59433 -2.59 0.00962 **
## factor(district_id)8109 -12.35445 3.65396 -3.38 0.00072 ***
## factor(district_id)8110 -7.54074 3.59052 -2.10 0.03573 *
## factor(district_id)8111 -2.87774 3.59103 -0.80 0.42293
## factor(district_id)8112 -16.47665 3.95843 -4.16 3.2e-05 ***
## factor(district_id)8113 -2.60778 3.94245 -0.66 0.50832
## factor(district_id)8114 -5.19282 4.71499 -1.10 0.27076
## factor(district_id)8121 -2.72249 4.00629 -0.68 0.49679
## factor(district_id)8122 -17.15444 5.77587 -2.97 0.00298 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.42 on 19618 degrees of freedom
## Multiple R-squared: 0.925, Adjusted R-squared: 0.923
## F-statistic: 657 on 366 and 19618 DF, p-value: <2e-16
sandwich package, which allows us to manipulate the variance-covariance matrix and adjust our estimates of standard errors appropriately to correct for non-independencelibrary(sandwich)
vcovHC(ss_mod, type = "HC4")
## (Intercept) ss1
## (Intercept) 182.3905 -0.3858415
## ss1 -0.3858 0.0008173
gls.correct <- function(lm) {
require(sandwich)
rob <- t(sapply(c("const", "HC0", "HC1", "HC2", "HC3", "HC4", "HC5"), function(x) sqrt(diag(vcovHC(lm,
type = x)))))
a <- c(NA, (rob[2, 1] - rob[1, 1])/rob[1, 1], (rob[3, 1] - rob[1, 1])/rob[1,
1], (rob[4, 1] - rob[1, 1])/rob[1, 1], (rob[5, 1] - rob[1, 1])/rob[1,
1], (rob[6, 1] - rob[1, 1])/rob[1, 1], (rob[7, 1] - rob[1, 1])/rob[1,
1])
rob <- cbind(rob, round(a * 100, 2))
a <- c(NA, (rob[2, 2] - rob[1, 2])/rob[1, 2], (rob[3, 2] - rob[1, 2])/rob[1,
2], (rob[4, 2] - rob[1, 2])/rob[1, 2], (rob[5, 2] - rob[1, 2])/rob[1,
2], (rob[6, 2] - rob[1, 2])/rob[1, 2], (rob[7, 2] - rob[1, 2])/rob[1,
2])
rob <- cbind(rob, round(a * 100, 2))
rob <- as.data.frame(rob)
names(rob) <- c("alpha", "beta", "alpha.pchange", "beta.pchange")
rob$id2 <- rownames(rob)
rob
}
gls.correct(ss_mod)