mfrow and layout)png, pdf,
jpg and in Makdown)ggplotUsing the World Development Indicators data from the last lab
wdi <- read.csv("WDI.csv")
wdi$region <- factor(wdi$region)
cols <- c("East Asia & Pacific" = "steelblue",
"Europe & Central Asia" = "forestgreen",
"Latin America & Caribbean" = "goldenrod",
"Middle East & North Africa" = "darkorange",
"North America" = "mediumpurple",
"South Asia" = "firebrick",
"Sub-Saharan Africa" = "sienna")
Recall:
str(wdi)
## 'data.frame': 208 obs. of 16 variables:
## $ country : chr "India" "China" "United States" "Indonesia" ...
## $ iso2c : chr "IN" "CN" "US" "ID" ...
## $ iso3c : chr "IND" "CHN" "USA" "IDN" ...
## $ year : int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ status : logi NA NA NA NA NA NA ...
## $ lastupdated: chr "2026-01-28" "2026-01-28" "2026-01-28" "2026-01-28" ...
## $ LifeExp : num 71.7 78.2 77.4 70.9 67.4 ...
## $ GDP : num 2347 12971 76657 4731 1538 ...
## $ Area : num 2973190 9388210 9147420 1892555 770880 ...
## $ Population : int 1425423212 1412175000 334017321 278830529 243700667 223150896 210306415 169384897 144236933 128613117 ...
## $ region : Factor w/ 7 levels "East Asia & Pacific",..: 6 1 5 1 6 7 3 6 2 3 ...
## $ capital : chr "New Delhi" "Beijing" "Washington D.C." "Jakarta" ...
## $ longitude : num 77.2 116.3 -77 106.8 72.8 ...
## $ latitude : num 28.6 40 38.9 -6.2 30.5 ...
## $ income : chr "Lower middle income" "Upper middle income" "High income" "Upper middle income" ...
## $ lending : chr "IBRD" "IBRD" "Not classified" "IBRD" ...
mfrow()par(mfrow = c(2,1))
boxplot(GDP/1000 ~ region, data = wdi, log = "y", las = 1, col = cols)
boxplot(LifeExp ~ region, data = wdi, las = 1, col = cols)
oma and
outerpar(mfrow = c(2,1),
mar = c(0,4,1,2), # margins around each figure
oma = c(3,0,2,0), # outer margins
cex.axis = 0.8,
tck = 0.01, bty ="l", mgp = c(2,.25,0))
boxplot(GDP/1000 ~ region, data = wdi, log = "y", las = 1,
col = cols,
xlab = "", xaxt = "n")
boxplot(LifeExp ~ region, data = wdi, las = 1, col = cols)
title("Global Indicators across regions", outer = TRUE)
par(mfrow = c(2,1),
mar = c(0,4,1,2), # margins around each figure
oma = c(3,0,2,0), # outer margins
cex.axis = 0.8,
tck = 0.01, bty ="l", mgp = c(2,.25,0))
boxplot(GDP/1000 ~ region, data = wdi, log = "y", las = 1, col = cols,
varwidth = TRUE,
xlab = "", xaxt = "n") # no axis labelling
boxplot(LifeExp ~ region, data = wdi, las = 1,
varwidth = TRUE,
col = cols, xaxt = "n")
title("Global Indicators across regions", outer = TRUE)
regions <- levels(factor(wdi$region))
region_labels <- gsub("&", "&\n", regions, fixed = TRUE)
mtext(region_labels, side = 1, at = 1:7, line = 0, padj = 1)
layoutlayout(rbind(c(1,2), c(1,3)))
layout.show(3)
require(scales) # for transparency!
layout(rbind(c(1,2), c(1,3)))
par(cex.lab = 1.2, las = 1,
mgp = c(2, .25, 0), tck = 0.01,
bty = "l", mar = c(0,4,0,2),
oma = c(4,0,4,0), xpd = NA)
cont_col <- cols[match(wdi$region, levels(wdi$region))]
cex_pop <- sqrt(wdi$Population / max(wdi$Population, na.rm = TRUE)) * 15
plot(wdi$GDP/1e3, wdi$LifeExp, log = "x",
ylab = "Life expectancy (years)",
xlab = "GDP per capita (1000 USD, log scale)", type = "n")
points(wdi$GDP/1e3, wdi$LifeExp,
cex = cex_pop + .5, pch = 21, col = cont_col,
bg = alpha(cont_col, .7))
legend("bottomright", title = "Region",
legend = levels(wdi$region),
pt.bg = cols, col = "grey40",
pch = 21, pt.cex = 1.8, bty = "n")
boxplot(GDP/1000 ~ region, data = wdi, log = "y", las = 1,
varwidth = TRUE,
col = alpha(cols, .7), border = cols,
xlab = "", xaxt = "n") # no axis labelling
boxplot(LifeExp ~ region, data = wdi, las = 1,
varwidth = TRUE,
col = alpha(cols, .7), border = cols,
xaxt = "n", xlab = "Region")
title("Global Indicators across regions", outer = TRUE)
We’ll talk about functions more later in class. But one very basica pplication for using functions is bundling a bunch of code into a single quick line.
For example:
setPars <- function(){
par(cex.lab = 1.2, las = 1,
mgp = c(2, .25, 0), tck = 0.01,
bty = "l", mar = c(0,4,0,2),
oma = c(4,0,4,0), xpd = NA)
}
plotWDI <- function(){
cont_col <- cols[match(wdi$region, levels(wdi$region))]
cex_pop <- sqrt(wdi$Population / max(wdi$Population, na.rm = TRUE)) * 15
plot(wdi$GDP/1e3, wdi$LifeExp, log = "x",
ylab = "Life expectancy (years)",
xlab = "GDP per capita (1000 USD, log scale)", type = "n")
points(wdi$GDP/1e3, wdi$LifeExp,
cex = cex_pop + .5, pch = 21, col = cont_col,
bg = alpha(cont_col, .7))
legend("bottomright", title = "Region",
legend = levels(wdi$region),
pt.bg = cols, col = "grey40",
pch = 21, pt.cex = 1.8, bty = "n")
}
boxplotWDI <- function(){
boxplot(GDP/1000 ~ region, data = wdi, log = "y", las = 1,
varwidth = TRUE,
col = alpha(cols, .7), border = cols,
xlab = "", xaxt = "n") # no axis labelling
boxplot(LifeExp ~ region, data = wdi, las = 1,
varwidth = TRUE,
col = alpha(cols, .7), border = cols,
xaxt = "n", xlab = "Region")
}
Note - these functions ONLY do the plotting - none of the setup. Also they have no arguments and they return nothing. They just do the thing. Code then becomes very simple:
layout(rbind(c(1,2), c(1,3)))
setPars()
plotWDI()
boxplotWDI()
When R draws a plot, it sends output to a graphics
device — a destination that knows how to render lines, points,
text, and colors. By default that destination is your screen (the
interactive device: windows() on Windows,
quartz() on Mac). To save a figure to a file, you open a
file device instead, draw your plot, then close the
device with dev.off(). The pattern is always the same:
device_function("filename.ext", width = ..., height = ...) # open
# ... plotting code ...
dev.off() # close and write
Nothing appears on screen — output goes straight to the file.
Forgetting dev.off() is the most common mistake: the file
stays open and incomplete.
The two most useful formats are PDF (vector) and PNG (raster). They store images differently and suit different purposes.
| PNG | ||
|---|---|---|
| Type | Vector | Raster (bitmap) |
| Stores images as | Mathematical shape descriptions | Grid of pixels |
| Scaling | Perfect at any size | Degrades if enlarged |
| Best for | Publications, LaTeX, print | Web, Word, presentations |
| Resolution setting | Not applicable | Critical — use res = |
PDF is resolution-independent: a line is stored as “from point A to point B,” so it renders crisp at any zoom level or print size. Ideal for journal submissions.
PNG stores a fixed pixel grid. At low resolution it
looks blurry in print; at high resolution files get large. The
res argument (dots per inch) controls the tradeoff.
pdf("WDImegaplot.pdf", width = 12, height = 6) # dimensions in inches
layout(rbind(c(1,2), c(1,3)))
setPars()
plotWDI()
boxplotWDI()
dev.off()
width and height are always
inches7 × 7 inchespng("WDImegaplot.png", width = 2400, height = 1200, res = 200) # pixels
layout(rbind(c(1,2), c(1,3)))
setPars()
plotWDI()
boxplotWDI()
dev.off()
width and height are
pixels by defaultres sets dots per inch — controls how large text and
points appear relative to the canvaswidth / res, so
width = 800, res = 150 gives a ~5.3 × 4 inch plotres = 300; for screen
res = 96 or res = 150 is fineIn an .Rmd file you don’t open and close devices
manually — knitr handles that. Instead, control figure size through
chunk options:
``` r
# ... plotting code ...
```
fig.width / fig.height — dimensions in
inches (like PDF)dpi — resolution of the rendered PNG (knitr defaults to
72; use 150–300 for print)knitr::opts_chunk$set(fig.width = 7, fig.height = 5, dpi = 150)
jpeg() — when to use itjpeg() works identically to png() but uses
lossy compression:
jpeg("myplot.jpg", width = 800, height = 600, res = 150, quality = 90)
# ... plotting code ...
dev.off()
quality ranges 0–100; lower = smaller file, more
compression artifactsggplot2ggplot2 was created by Hadley Wickham in 2005, based on
Leland Wilkinson’s book The Grammar of Graphics (1999) — hence
the “gg”. The core idea is that every statistical graphic can be
described by a small set of components assembled in a consistent
way:
In base R, you build a plot by issuing a sequence of imperative
commands (plot(), then points(), then
legend(), …). In ggplot2, you declare
what you want and the package figures out the rendering. The result is a
single object you can inspect, modify, and print.
This is the single most important constraint: every variable
you want to plot must be a column in a data frame. You cannot
pass a loose vector to ggplot2 the way you can to
plot(). This is sometimes inconvenient, but it forces a
discipline that pays off with complex data.
library(ggplot2)
wdi <- read.csv("WDI.csv")
wdi$region <- factor(wdi$region)
A ggplot2 call has two required ingredients:
ggplot() sets up the data and aesthetic mappings; a
geom_*() function specifies what to draw.
ggplot(data = wdi, aes(x = GDP/1e3, y = LifeExp)) +
geom_point()
aes() maps variables to visual
channels:
| aesthetic | visual property |
|---|---|
x, y |
position |
color |
point/line color (outline) |
fill |
filled shape color |
size |
point size |
alpha |
transparency |
shape |
point character |
Common geom_* functions:
| geom | what it draws |
|---|---|
geom_point() |
scatter plot |
geom_line() |
lines connecting observations |
geom_smooth() |
smoothed trend with confidence band |
geom_boxplot() |
box-and-whisker |
geom_histogram() |
histogram |
geom_bar() / geom_col() |
bar chart |
geom_text() |
text labels |
In base R, encoding a third or fourth variable requires computing
color vectors, size scalings, and legend calls by hand. In
ggplot2, you just add aesthetics — scales and legends are
generated automatically.
Compare adding color, size, and a log scale:
ggplot(wdi, aes(x = GDP/1e3, y = LifeExp, color = region, size = Population/1e6)) +
geom_point(alpha = 0.7) +
scale_x_log10() +
theme_bw()
Four variables (GDP, life expectancy, region, population) in about 4 lines, with automatic legends for both color and size.
ggplot2’s layered system makes common tasks easy but
uncommon tasks surprisingly hard. Moving a legend to an exact
coordinate, controlling tick positions on a secondary axis, mixing fonts
within a plot title, nudging individual labels — all of these require
either digging into theme() (which has ~100 arguments) or
reaching for extension packages like ggtext,
ggnewscale, or ggrepel.
In base R, arbitrary customization is usually a few
par() arguments or a call to text() /
mtext(). The same result in ggplot2 may
require a stack of overrides.
# Moving legend inside the plot, changing its background,
# adjusting axis text size, removing the minor grid,
# and making the title left-aligned — all separate theme() calls
theme(
legend.position = c(0.15, 0.85),
legend.background = element_rect(fill = "white", color = "grey70"),
axis.text = element_text(size = 11),
panel.grid.minor = element_blank(),
plot.title.position = "plot"
)
Customizing a ggplot is one of the better uses of Large-Language Models
Faceting splits the data into subplots by one or more categorical
variables — something that requires manual layout() calls
and repeated plotting code in base R. In ggplot2 it is a
single line.
ggplot(wdi, aes(x = GDP/1e3, y = LifeExp, size = Population/1e6)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", color = "firebrick",linewidth = 0.7) +
scale_x_log10() +
facet_wrap(~ region) # key line of code
facet_wrap(~ region) produces one panel per region, all
on the same scales, with a trend line in each. Doing this in base R
would require a loop, careful margin management, and separate legend
placement.
facet_grid(rows ~ cols) extends this to two categorical
variables simultaneously.
The equivalent task in base R plotting is much harder.
| base R | ggplot2 | |
|---|---|---|
| Learning curve | Steeper for basics | Steeper for customization |
| Multi-variable plots | Manual (but explicit) | Fast (automatic legends/scales) |
| Faceting | Manual layout() |
facet_wrap() / facet_grid() |
| Fine-grained control | Excellent | Difficult |
| Default aesthetics | Minimal, print-friendly | Ugly (IMO) |
| Data requirement | Any form of data is OK | Data frame required |
The two approaches are complementary. ggplot2 is often
faster for exploratory work and for panel figures; base R is often
better when you need precise, publication-ready control over every
element.