% Tutorial 6: Visualizations % DPI R Bootcamp % Jared Knowles
In this lesson we hope to learn:
names(object) helpshist(df$readSS)
plot(df$readSS, df$mathSS)
plot(df$readSS, df$mathSS)
lines(lowess(df$readSS ~ df$mathSS), col = "red")
ggplot2 is pretty much the new standard in Rlibrary(ggplot2)
qplot(readSS, mathSS, data = df)
ggplot2 an R package does just this by breaking plots into a few basic components
qplot(readSS, mathSS, data = df, alpha = I(0.3)) + theme_dpi()
readSS is the x coordinate and mathSS is the y coordinate for each observation in our datadf$mathSS using 3 separate geomsqplot(mathSS, readSS, data = df) + theme_dpi()
qplot(mathSS, data = df) + theme_dpi()
qplot(factor(grade), mathSS, data = df, geom = "line", group = stuid, alpha = I(0.2)) +
theme_dpi()
ggplot2 has an extended syntax that makes this obviousggplot(df, aes(x = readSS, y = mathSS)) + geom_point()
# Identical to: qplot(readSS,mathSS,data=df)
aes says we are specifying aesthetics, here we specified x and y to make a two dimensional graphicdata(mpg)
qplot(displ, cty, data = mpg) + theme_dpi()
qplot(displ, cty, data = mpg, size = cyl) + theme_dpi()
qplot(displ, cty, data = mpg, shape = drv, size = I(3)) + theme_dpi()
qplot(displ, cty, data = mpg, color = class) + theme_dpi()
qplot(mathSS, readSS, data = df[1:100, ], size = race, alpha = I(0.8)) + theme_dpi()
df$proflvl2 <- factor(df$proflvl, levels = c("advanced", "basic", "proficient",
"below basic"))
df$proflvl2 <- ordered(df$proflvl2)
qplot(mathSS, readSS, data = df[1:100, ], color = proflvl2, size = I(3)) + scale_color_brewer(type = "seq") +
theme_dpi()
mathSS to, and what can we map discrete characteristics like race to?qplot(factor(grade), readSS, data = df[1:100, ], color = mathSS, geom = "jitter",
size = I(3.2)) + theme_dpi()
qplot(factor(grade), readSS, data = df[1:100, ], color = dist, geom = "jitter",
size = I(3.2)) + theme_dpi()
Aesthetic Discrete Continuous
Color Disparate colors Sequential or divergent colors Size Unique size for each value mapping to radius of value Shape A shape for each value does not make sense
Aesthetic Ordered Unordered
Color Sequential or divergent colors Rainbow Size Increasing or decreasing radius does not make sense Shape does not make sense A shape for each value
qplot(readSS, mathSS, data = df) + facet_wrap(~grade) + theme_dpi(base_size = 12) +
geom_smooth(method = "lm", se = FALSE, size = I(1.2))
qplot(readSS, mathSS, data = df) + facet_grid(ell ~ grade) + theme_dpi(base_size = 12) +
geom_smooth(method = "lm", se = FALSE, size = I(1.2))
library(vcd)
df$proflvl <- factor(df$proflvl, levels = c("advanced", "proficient", "basic",
"below basic"))
a <- structable(proflvl ~ race, data = df)
mosaic(a, shade = TRUE)
library(vcd)
df$proflvl <- factor(df$proflvl, levels = c("advanced", "proficient", "basic",
"below basic"))
a <- structable(female ~ race, data = df)
mosaic(a, shade = TRUE)
## [1] "plum" "violet" "darkmagenta" "magenta4" "magenta3"
## [6] "magenta2" "magenta" "magenta1" "orchid4" "orchid"
## [1] "salmon1" "darksalmon" "orangered4" "orangered3"
## [5] "coral" "orangered2" "orangered" "orangered1"
## [9] "lightsalmon2" "lightsalmon" "peru" "tan3"
## [13] "darkorange2" "darkorange4" "darkorange3" "darkorange1"
## [17] "linen" "bisque3" "bisque1" "bisque2"
## [21] "darkorange" "antiquewhite3" "antiquewhite1" "papayawhip"
## [25] "moccasin" "orange2" "orange" "orange1"
## [29] "orange4" "wheat4" "orange3" "wheat"
## [33] "oldlace"
## [1] "snow1" "snow2" "rosybrown" "rosybrown1" "rosybrown2"
## [6] "rosybrown3" "rosybrown4" "lightcoral" "indianred" "indianred1"
## [11] "indianred3" "brown" "brown4" "brown1" "brown3"
## [16] "brown2" "firebrick" "firebrick1" "chocolate" "chocolate4"
## [21] "saddlebrown" "seashell3" "seashell2" "seashell4" "sandybrown"
## [26] "peachpuff2" "peachpuff3"
+scale_color_brewer(palette"X")library(grid)
p1<-qplot(readSS,..density..,data=df,fill=race,
position='fill',geom='density')+scale_fill_brewer(
type='qual',palette=2)
p2<-qplot(readSS,..fill..,data=df,fill=race,
position='fill',geom='density')+scale_fill_brewer(
type='qual',palette=2)+ylim(c(0,1))+theme_bw()+
opts(legend.position='none',
axis.text.x=theme_blank(),
axis.text.y=theme_blank(),
axis.ticks=theme_blank(),
panel.margin=unit(0,"lines"))+ylab('')+
xlab('')
vp<-viewport(x=unit(.65,"npc"),y=unit(.73,"npc"),
width=unit(.2,"npc"),height=unit(.2,"npc"))
print(p1)
print(p2,vp=vp)
Embed one plot in another plot in R using two different data elements from our data set. For example, plot a histogram of readSS inside a scatterplot of readSS and mathSS
Explore some examples on the ggplot2 website. What are some ways to overlay more than 3 dimensions of data in a single plot?
What types of data work best for what types of visualizations?
It is good to include the session info, e.g. this document is produced with knitr version 0.9.6. Here is my session info:
print(sessionInfo(), locale = FALSE)
## R version 2.15.2 (2012-10-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
##
## attached base packages:
## [1] splines grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] R2SWF_0.4 snow_0.3-10 gbm_1.6-3.2 reshape_0.8.4
## [5] caret_5.15-048 foreach_1.4.0 cluster_1.14.3 reshape2_1.2.2
## [9] lme4_0.999999-0 Matrix_1.0-10 lattice_0.20-10 xtable_1.7-0
## [13] vcd_1.2-13 colorspace_1.2-0 MASS_7.3-22 Hmisc_3.10-1
## [17] survival_2.37-2 sandwich_2.2-9 quantreg_4.94 SparseM_0.96
## [21] gridExtra_0.9.1 mgcv_1.7-22 eeptools_0.1 mapproj_1.2-0
## [25] maps_2.3-0 proto_0.3-10 plyr_1.8 stringr_0.6.2
## [29] ggplot2_0.9.3 lmtest_0.9-30 zoo_1.7-9 knitr_0.9.6
##
## loaded via a namespace (and not attached):
## [1] codetools_0.2-8 compiler_2.15.2 dichromat_1.2-4
## [4] digest_0.6.0 evaluate_0.4.3 formatR_0.7
## [7] gtable_0.1.2 iterators_1.0.6 labeling_0.1
## [10] markdown_0.5.3 munsell_0.4 nlme_3.1-106
## [13] RColorBrewer_1.0-5 scales_0.2.3 stats4_2.15.2
## [16] tools_2.15.2
This work (R Tutorial for Education, by Jared E. Knowles), in service of the Wisconsin Department of Public Instruction, is free of known copyright restrictions.