This document is adapted from the Case Studies section of the Altair Example Gallery.

Our first step is to set up our environment:

Anscombe’s Quartet

Altair example

Data

glimpse(vega_data$anscombe())
#> Rows: 44
#> Columns: 3
#> $ Series <chr> "I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "II", "…
#> $ X      <dbl> 10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5, 10, 8, 13, 9, 11, 14, 6,…
#> $ Y      <dbl> 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.81, 5…

Chart

chart <- 
  alt$Chart(vega_data$anscombe())$
  mark_circle()$
  encode(
    x = alt$X("X", scale = alt$Scale(zero = FALSE)),
    y = alt$Y("Y", scale = alt$Scale(zero = FALSE)),
    column = "Series"
  )$
  properties(width = 180, height = 180)

chart

Atmospheric CO2 Concentration

Altair example

Data

glimpse(vega_data$co2_concentration())
#> Rows: 713
#> Columns: 2
#> $ Date <chr> "1958-03-01", "1958-04-01", "1958-05-01", "1958-07-01", "1958-08…
#> $ CO2  <dbl> 315.70, 317.46, 317.51, 315.86, 314.93, 313.21, 313.33, 314.67, …

Chart

source <- vega_data$co2_concentration$url

base <-
  alt$Chart(source, title="Carbon Dioxide in the Atmosphere")$
    transform_calculate(year = "year(datum.Date)")$
    transform_calculate(decade = "floor(datum.year / 10)")$
    transform_calculate(scaled_date = "(datum.year % 10) + (month(datum.Date)/12)")$
    transform_window(
      first_date = "first_value(scaled_date)",
      last_date = "last_value(scaled_date)",
      sort = list(alt$EncodingSortField(
        field = "scaled_date", # field to use for the sort
        order = "ascending"    # order to sort in
      )
    ),
    groupby = list("decade"),
    frame = list(NULL, NULL)
  )$transform_calculate(
     end = "datum.first_date === datum.scaled_date ? 'first' : datum.last_date === datum.scaled_date ? 'last' : null"
  )$encode(
    x = alt$X(
      "scaled_date:Q",
      axis = alt$Axis(title = "Year into Decade", tickCount = 11)
    ),
    y=alt$Y(
      "CO2:Q",
      title = "CO2 concentration in ppm",
      scale = alt$Scale(zero = FALSE)
    )
  )

line <- 
  base$
  mark_line()$
  encode(
    color=alt$Color(
      "decade:O",
      scale = alt$Scale(scheme = "magma"),
      legend = NULL
    )
  )

text <- base$encode(text = "year:N")

start_year <- 
  text$
    transform_filter("datum.end == 'first'")$
    mark_text(baseline = "top")

end_year <- 
  text$
  transform_filter("datum.end == 'last'")$
  mark_text(baseline="bottom")

chart <- (line + start_year + end_year)

chart <- 
  chart$
    configure_text(align = "left", dx = 1, dy = 3)$
    properties(width = 600, height = 375)

chart

Becker’s Barley Trellis Plot

Altair example

Data

glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield   <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 4…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuri…
#> $ year    <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1…
#> $ site    <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Ra…

Chart

chart <- 
  alt$Chart(vega_data$barley())$
  mark_point()$
  encode(
    x = alt$X(
      "yield", 
      title = "Barley Yield (bushels/acre)",
      scale = alt$Scale(zero = FALSE),
      axis = alt$Axis(grid = FALSE)
    ),
    y = alt$Y(
      "variety",
      title = "",
      sort = alt$EncodingSortField(
        field = "yield", 
        op = "sum", 
        order = "descending"
      ),
      axis = alt$Axis(grid = TRUE)
    ),
    color = alt$Color("year:N", legend = alt$Legend(title = "Year")),
    row=alt$Row(
      "site:N",
      title = "",
      sort = alt$EncodingSortField(
        field = "yield", 
        op = "sum", 
        order = "descending"
      )
    )
  )$
  properties(height = alt$Step(20))$
  configure_view(stroke = "transparent")

chart

Cumulative Wikipedia Donations

Altair example

This chart shows cumulative donations to Wikipedia over the past 10 years. Inspired by this Reddit post but using lines instead of areas.

Data

data <- 
  read_csv("https://frdata.wikimedia.org/donationdata-vs-day.csv") %>%
  mutate(date = as.character(date))
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   date = col_date(format = ""),
#>   sum = col_double(),
#>   refund_sum = col_double(),
#>   donations = col_double(),
#>   refunds = col_double(),
#>   avg = col_double(),
#>   max = col_double(),
#>   ytdsum = col_double(),
#>   ytdloss = col_double()
#> )

glimpse(data)
#> Rows: 4,434
#> Columns: 9
#> $ date       <chr> "2007-12-31", "2008-11-03", "2008-11-04", "2008-11-05", "2…
#> $ sum        <dbl> 250.00, 24.00, 2882.16, 97295.89, 97362.30, 75486.06, 6515…
#> $ refund_sum <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
#> $ donations  <dbl> 1, 6, 73, 3715, 3537, 2823, 2334, 2415, 2142, 2046, 1863, …
#> $ refunds    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
#> $ avg        <dbl> 250.00000, 4.00000, 39.48164, 26.19001, 27.52680, 26.73966…
#> $ max        <dbl> 250.00, 10.00, 375.00, 1287.00, 2500.00, 1000.00, 1000.00,…
#> $ ytdsum     <dbl> 250.00, 24.00, 2906.16, 100202.05, 197564.35, 273050.41, 3…
#> $ ytdloss    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…

Chart

chart <- 
  alt$Chart(data)$
  mark_line()$
  encode(
    x = alt$X(
      "date:T", 
      timeUnit = "monthdate",
      axis = alt$Axis(format = "%B", title = "Month")
    ),
    y = alt$Y(
      "max(ytdsum):Q", 
      stack = NULL,
      axis = alt$Axis(title = "Cumulative Donations")
    ),
    color = alt$Color(
      "date:O", 
      timeUnit = "year", 
      legend = alt$Legend(title = "Year")
    ),
    order = alt$Order("data:O", timeUnit = "year")
  )
chart