Case Studies

This document is adapted from the Case Studies section of the Altair Example Gallery.

Our first step is to set up our environment:

library("altair")
library("tibble")
library("dplyr")
library("readr")
library("jsonlite")
library("stringr")

vega_data <- import_vega_data()

Anscombe’s Quartet

Altair example

Data

glimpse(vega_data$anscombe())
#> Rows: 44
#> Columns: 3
#> $ Series <chr> "I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "I", "II", "I…
#> $ X      <dbl> 10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5, 10, 8, 13, 9, 11, 14, 6, …
#> $ Y      <dbl> 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.81, 5.…

Chart

chart <- 
  alt$Chart(vega_data$anscombe())$
  mark_circle()$
  encode(
    x = alt$X("X", scale = alt$Scale(zero = FALSE)),
    y = alt$Y("Y", scale = alt$Scale(zero = FALSE)),
    column = "Series"
  )$
  properties(width = 180, height = 180)

chart

Atmospheric CO2 Concentration

Altair example

Data

glimpse(vega_data$co2_concentration())
#> Rows: 713
#> Columns: 2
#> $ Date <chr> "1958-03-01", "1958-04-01", "1958-05-01", "1958-07-01", "1958-08-…
#> $ CO2  <dbl> 315.70, 317.46, 317.51, 315.86, 314.93, 313.21, 313.33, 314.67, 3…

Chart

source <- vega_data$co2_concentration$url

base <-
  alt$Chart(source, title="Carbon Dioxide in the Atmosphere")$
    transform_calculate(year = "year(datum.Date)")$
    transform_calculate(decade = "floor(datum.year / 10)")$
    transform_calculate(scaled_date = "(datum.year % 10) + (month(datum.Date)/12)")$
    transform_window(
      first_date = "first_value(scaled_date)",
      last_date = "last_value(scaled_date)",
      sort = list(alt$EncodingSortField(
        field = "scaled_date", # field to use for the sort
        order = "ascending"    # order to sort in
      )
    ),
    groupby = list("decade"),
    frame = list(NULL, NULL)
  )$transform_calculate(
     end = "datum.first_date === datum.scaled_date ? 'first' : datum.last_date === datum.scaled_date ? 'last' : null"
  )$encode(
    x = alt$X(
      "scaled_date:Q",
      axis = alt$Axis(title = "Year into Decade", tickCount = 11)
    ),
    y=alt$Y(
      "CO2:Q",
      title = "CO2 concentration in ppm",
      scale = alt$Scale(zero = FALSE)
    )
  )

line <- 
  base$
  mark_line()$
  encode(
    color=alt$Color(
      "decade:O",
      scale = alt$Scale(scheme = "magma"),
      legend = NULL
    )
  )

text <- base$encode(text = "year:N")

start_year <- 
  text$
    transform_filter("datum.end == 'first'")$
    mark_text(baseline = "top")

end_year <- 
  text$
  transform_filter("datum.end == 'last'")$
  mark_text(baseline="bottom")

chart <- (line + start_year + end_year)

chart <- 
  chart$
    configure_text(align = "left", dx = 1, dy = 3)$
    properties(width = 600, height = 375)

#chart$to_json(validate = FALSE) %>% as_vegaspec()
chart

Becker’s Barley Trellis Plot

Altair example

Data

glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield   <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year    <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site    <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…

Chart

chart <- 
  alt$Chart(vega_data$barley())$
  mark_point()$
  encode(
    x = alt$X(
      "yield", 
      title = "Barley Yield (bushels/acre)",
      scale = alt$Scale(zero = FALSE),
      axis = alt$Axis(grid = FALSE)
    ),
    y = alt$Y(
      "variety",
      title = "",
      sort = alt$EncodingSortField(
        field = "yield", 
        op = "sum", 
        order = "descending"
      ),
      axis = alt$Axis(grid = TRUE)
    ),
    color = alt$Color("year:N", legend = alt$Legend(title = "Year")),
    row=alt$Row(
      "site:N",
      title = "",
      sort = alt$EncodingSortField(
        field = "yield", 
        op = "sum", 
        order = "descending"
      )
    )
  )$
  properties(height = alt$Step(20))$
  configure_view(stroke = "transparent")

chart

Cumulative Wikipedia Donations

Altair example

This chart shows cumulative donations to Wikipedia over the past 10 years. Inspired by this Reddit post but using lines instead of areas.

Data

data <- 
  read_csv("https://frdata.wikimedia.org/donationdata-vs-day.csv") %>%
  mutate(date = as.character(date))
#> Rows: 5397 Columns: 9
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl  (8): sum, refund_sum, donations, refunds, avg, max, ytdsum, ytdloss
#> date (1): date
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

glimpse(data)
#> Rows: 5,397
#> Columns: 9
#> $ date       <chr> "2007-12-31", "2008-11-03", "2008-11-04", "2008-11-05", "20…
#> $ sum        <dbl> 250.00, 24.00, 2882.16, 97295.89, 97362.30, 75486.06, 65156…
#> $ refund_sum <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ donations  <dbl> 1, 6, 73, 3715, 3537, 2823, 2334, 2415, 2142, 2046, 1863, 1…
#> $ refunds    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ avg        <dbl> 250.00000, 4.00000, 39.48164, 26.19001, 27.52680, 26.73966,…
#> $ max        <dbl> 250.00, 10.00, 375.00, 1287.00, 2500.00, 1000.00, 1000.00, …
#> $ ytdsum     <dbl> 250.00, 24.00, 2906.16, 100202.05, 197564.35, 273050.41, 33…
#> $ ytdloss    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…

Chart

alt$data_transformers$disable_max_rows()
#> DataTransformerRegistry.enable('default')

chart <- 
  alt$Chart(data)$
  mark_line()$
  encode(
    x = alt$X(
      "date:T", 
      timeUnit = "monthdate",
      axis = alt$Axis(format = "%B", title = "Month")
    ),
    y = alt$Y(
      "max(ytdsum):Q", 
      stack = NULL,
      axis = alt$Axis(title = "Cumulative Donations")
    ),
    color = alt$Color(
      "date:O", 
      timeUnit = "year", 
      legend = alt$Legend(title = "Year")
    ),
    order = alt$Order("data:O", timeUnit = "year")
  )
chart

Falkensee

Altair example

Data

Definition

source <- fromJSON('[
    {"year": "1875", "population": 1309},
    {"year": "1890", "population": 1558},
    {"year": "1910", "population": 4512},
    {"year": "1925", "population": 8180},
    {"year": "1933", "population": 15915},
    {"year": "1939", "population": 24824},
    {"year": "1946", "population": 28275},
    {"year": "1950", "population": 29189},
    {"year": "1964", "population": 29881},
    {"year": "1971", "population": 26007},
    {"year": "1981", "population": 24029},
    {"year": "1985", "population": 23340},
    {"year": "1989", "population": 22307},
    {"year": "1990", "population": 22087},
    {"year": "1991", "population": 22139},
    {"year": "1992", "population": 22105},
    {"year": "1993", "population": 22242},
    {"year": "1994", "population": 22801},
    {"year": "1995", "population": 24273},
    {"year": "1996", "population": 25640},
    {"year": "1997", "population": 27393},
    {"year": "1998", "population": 29505},
    {"year": "1999", "population": 32124},
    {"year": "2000", "population": 33791},
    {"year": "2001", "population": 35297},
    {"year": "2002", "population": 36179},
    {"year": "2003", "population": 36829},
    {"year": "2004", "population": 37493},
    {"year": "2005", "population": 38376},
    {"year": "2006", "population": 39008},
    {"year": "2007", "population": 39366},
    {"year": "2008", "population": 39821},
    {"year": "2009", "population": 40179},
    {"year": "2010", "population": 40511},
    {"year": "2011", "population": 40465},
    {"year": "2012", "population": 40905},
    {"year": "2013", "population": 41258},
    {"year": "2014", "population": 41777}
  ]')

source2 <- fromJSON('[
    {
      "start": "1933",
      "end": "1945",
      "event": "Nazi Rule"
    },
    {
      "start": "1948",
      "end": "1989",
      "event": "GDR (East Germany)"
    }
  ]')

glimpse(source)
#> Rows: 38
#> Columns: 2
#> $ year       <chr> "1875", "1890", "1910", "1925", "1933", "1939", "1946", "19…
#> $ population <int> 1309, 1558, 4512, 8180, 15915, 24824, 28275, 29189, 29881, …

glimpse(source2)
#> Rows: 2
#> Columns: 3
#> $ start <chr> "1933", "1948"
#> $ end   <chr> "1945", "1989"
#> $ event <chr> "Nazi Rule", "GDR (East Germany)"

Chart

line <-
  alt$Chart(source)$
  mark_line(color="#333")$
  encode(
    x = alt$X("year:T", axis = alt$Axis(format = "%Y")),
    y = "population"
  )$
  properties(
    width = 500,
    height = 300
  )
point <- line$mark_point(color="#333")
rect <- 
  alt$Chart(source2)$
  mark_rect()$
  encode(
    x = "start:T",
    x2 = "end:T",
    color = "event:N"
  )
chart <- (rect + line + point)
chart

Gapminder Bubble Plot

Altair example

This example shows how to make a bubble plot showing the correlation between health and income for 187 countries in the world (modified from an example in Lisa Charlotte Rost’s blog post ‘One Chart, Twelve Charting Libraries’).

Data

glimpse(vega_data$gapminder_health_income())
#> Rows: 187
#> Columns: 4
#> $ country    <chr> "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "…
#> $ income     <dbl> 1925, 10620, 13434, 46577, 7615, 21049, 17344, 7763, 44056,…
#> $ health     <dbl> 57.63, 76.00, 76.50, 84.10, 61.00, 75.20, 76.20, 74.40, 81.…
#> $ population <dbl> 32526562, 2896679, 39666519, 70473, 25021974, 91818, 434167…

Chart

chart <- 
  alt$Chart(vega_data$gapminder_health_income$url)$
  mark_circle()$
  encode(
    alt$X("income:Q", scale = alt$Scale(type = "log")), 
    alt$Y("health:Q", scale = alt$Scale(zero = FALSE)), 
    size = "population:Q"
  )
chart

Iowa’s renewable energy boom

Altair example

This example is a fully developed stacked chart using the sample dataset of Iowa’s electricity sources.

Data

glimpse(vega_data$iowa_electricity())
#> Rows: 51
#> Columns: 3
#> $ year           <dttm> 2001-01-01, 2002-01-01, 2003-01-01, 2004-01-01, 2005-0…
#> $ source         <chr> "Fossil Fuels", "Fossil Fuels", "Fossil Fuels", "Fossil…
#> $ net_generation <dbl> 35361, 35991, 36234, 36205, 36883, 37014, 41389, 42734,…

Chart

source = vega_data$iowa_electricity()

chart <- 
  alt$Chart(
    source, 
    title="Iowa's renewable energy boom"
  )$
  mark_area()$
  encode(
    x=alt$X(
      "year:T",
      title = "Year"
    ),
    y=alt$Y(
      "net_generation:Q",
      stack = "normalize",
      title = "Share of net generation",
      axis = alt$Axis(format = ".0%")
    ),
    color=alt$Color(
      "source:N",
      legend = alt$Legend(title = "Electricity source")
    )
  )

chart

Isotype Visualization

Altair example

Isotype Visualization shows the distribution of animals across UK and US. Inspired by Only An Ocean Between, 1943. Population Live Stock, p.13. This is adapted from a [Vega-Lite example])https://vega.github.io/editor/#/examples/vega-lite/isotype_bar_chart).

Data

Definition

source <- fromJSON('[
      {"country": "Great Britain", "animal": "cattle"},
      {"country": "Great Britain", "animal": "cattle"},
      {"country": "Great Britain", "animal": "cattle"},
      {"country": "Great Britain", "animal": "pigs"},
      {"country": "Great Britain", "animal": "pigs"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"}
  ]')

glimpse(source)
#> Rows: 37
#> Columns: 2
#> $ country <chr> "Great Britain", "Great Britain", "Great Britain", "Great Brit…
#> $ animal  <chr> "cattle", "cattle", "cattle", "pigs", "pigs", "sheep", "sheep"…

Chart

Scales

domains = list('person', 'cattle', 'pigs', 'sheep')

shape_scale = alt$Scale(
  domain=domains,
  range=list(
    'M1.7 -1.7h-0.8c0.3 -0.2 0.6 -0.5 0.6 -0.9c0 -0.6 -0.4 -1 -1 -1c-0.6 0 -1 0.4 -1 1c0 0.4 0.2 0.7 0.6 0.9h-0.8c-0.4 0 -0.7 0.3 -0.7 0.6v1.9c0 0.3 0.3 0.6 0.6 0.6h0.2c0 0 0 0.1 0 0.1v1.9c0 0.3 0.2 0.6 0.3 0.6h1.3c0.2 0 0.3 -0.3 0.3 -0.6v-1.8c0 0 0 -0.1 0 -0.1h0.2c0.3 0 0.6 -0.3 0.6 -0.6v-2c0.2 -0.3 -0.1 -0.6 -0.4 -0.6z',
    'M4 -2c0 0 0.9 -0.7 1.1 -0.8c0.1 -0.1 -0.1 0.5 -0.3 0.7c-0.2 0.2 1.1 1.1 1.1 1.2c0 0.2 -0.2 0.8 -0.4 0.7c-0.1 0 -0.8 -0.3 -1.3 -0.2c-0.5 0.1 -1.3 1.6 -1.5 2c-0.3 0.4 -0.6 0.4 -0.6 0.4c0 0.1 0.3 1.7 0.4 1.8c0.1 0.1 -0.4 0.1 -0.5 0c0 0 -0.6 -1.9 -0.6 -1.9c-0.1 0 -0.3 -0.1 -0.3 -0.1c0 0.1 -0.5 1.4 -0.4 1.6c0.1 0.2 0.1 0.3 0.1 0.3c0 0 -0.4 0 -0.4 0c0 0 -0.2 -0.1 -0.1 -0.3c0 -0.2 0.3 -1.7 0.3 -1.7c0 0 -2.8 -0.9 -2.9 -0.8c-0.2 0.1 -0.4 0.6 -0.4 1c0 0.4 0.5 1.9 0.5 1.9l-0.5 0l-0.6 -2l0 -0.6c0 0 -1 0.8 -1 1c0 0.2 -0.2 1.3 -0.2 1.3c0 0 0.3 0.3 0.2 0.3c0 0 -0.5 0 -0.5 0c0 0 -0.2 -0.2 -0.1 -0.4c0 -0.1 0.2 -1.6 0.2 -1.6c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 0 -2.7 -0.2 -2.7c-0.1 0 -0.4 2 -0.4 2c0 0 0 0.2 -0.2 0.5c-0.1 0.4 -0.2 1.1 -0.2 1.1c0 0 -0.2 -0.1 -0.2 -0.2c0 -0.1 -0.1 -0.7 0 -0.7c0.1 -0.1 0.3 -0.8 0.4 -1.4c0 -0.6 0.2 -1.3 0.4 -1.5c0.1 -0.2 0.6 -0.4 0.6 -0.4z',
    'M1.2 -2c0 0 0.7 0 1.2 0.5c0.5 0.5 0.4 0.6 0.5 0.6c0.1 0 0.7 0 0.8 0.1c0.1 0 0.2 0.2 0.2 0.2c0 0 -0.6 0.2 -0.6 0.3c0 0.1 0.4 0.9 0.6 0.9c0.1 0 0.6 0 0.6 0.1c0 0.1 0 0.7 -0.1 0.7c-0.1 0 -1.2 0.4 -1.5 0.5c-0.3 0.1 -1.1 0.5 -1.1 0.7c-0.1 0.2 0.4 1.2 0.4 1.2l-0.4 0c0 0 -0.4 -0.8 -0.4 -0.9c0 -0.1 -0.1 -0.3 -0.1 -0.3l-0.2 0l-0.5 1.3l-0.4 0c0 0 -0.1 -0.4 0 -0.6c0.1 -0.1 0.3 -0.6 0.3 -0.7c0 0 -0.8 0 -1.5 -0.1c-0.7 -0.1 -1.2 -0.3 -1.2 -0.2c0 0.1 -0.4 0.6 -0.5 0.6c0 0 0.3 0.9 0.3 0.9l-0.4 0c0 0 -0.4 -0.5 -0.4 -0.6c0 -0.1 -0.2 -0.6 -0.2 -0.5c0 0 -0.4 0.4 -0.6 0.4c-0.2 0.1 -0.4 0.1 -0.4 0.1c0 0 -0.1 0.6 -0.1 0.6l-0.5 0l0 -1c0 0 0.5 -0.4 0.5 -0.5c0 -0.1 -0.7 -1.2 -0.6 -1.4c0.1 -0.1 0.1 -1.1 0.1 -1.1c0 0 -0.2 0.1 -0.2 0.1c0 0 0 0.9 0 1c0 0.1 -0.2 0.3 -0.3 0.3c-0.1 0 0 -0.5 0 -0.9c0 -0.4 0 -0.4 0.2 -0.6c0.2 -0.2 0.6 -0.3 0.8 -0.8c0.3 -0.5 1 -0.6 1 -0.6z',
    'M-4.1 -0.5c0.2 0 0.2 0.2 0.5 0.2c0.3 0 0.3 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.5 -0.2c0.2 0 0.2 0.2 0.4 0.2c0.2 0 0.2 -0.2 0.4 -0.2c0.1 0 0.2 0.2 0.4 0.1c0.2 0 0.2 -0.2 0.4 -0.3c0.1 0 0.1 -0.1 0.4 0c0.3 0 0.3 -0.4 0.6 -0.4c0.3 0 0.6 -0.3 0.7 -0.2c0.1 0.1 1.4 1 1.3 1.4c-0.1 0.4 -0.3 0.3 -0.4 0.3c-0.1 0 -0.5 -0.4 -0.7 -0.2c-0.3 0.2 -0.1 0.4 -0.2 0.6c-0.1 0.1 -0.2 0.2 -0.3 0.4c0 0.2 0.1 0.3 0 0.5c-0.1 0.2 -0.3 0.2 -0.3 0.5c0 0.3 -0.2 0.3 -0.3 0.6c-0.1 0.2 0 0.3 -0.1 0.5c-0.1 0.2 -0.1 0.2 -0.2 0.3c-0.1 0.1 0.3 1.1 0.3 1.1l-0.3 0c0 0 -0.3 -0.9 -0.3 -1c0 -0.1 -0.1 -0.2 -0.3 -0.2c-0.2 0 -0.3 0.1 -0.4 0.4c0 0.3 -0.2 0.8 -0.2 0.8l-0.3 0l0.3 -1c0 0 0.1 -0.6 -0.2 -0.5c-0.3 0.1 -0.2 -0.1 -0.4 -0.1c-0.2 -0.1 -0.3 0.1 -0.4 0c-0.2 -0.1 -0.3 0.1 -0.5 0c-0.2 -0.1 -0.1 0 -0.3 0.3c-0.2 0.3 -0.4 0.3 -0.4 0.3l0.2 1.1l-0.3 0l-0.2 -1.1c0 0 -0.4 -0.6 -0.5 -0.4c-0.1 0.3 -0.1 0.4 -0.3 0.4c-0.1 -0.1 -0.2 1.1 -0.2 1.1l-0.3 0l0.2 -1.1c0 0 -0.3 -0.1 -0.3 -0.5c0 -0.3 0.1 -0.5 0.1 -0.7c0.1 -0.2 -0.1 -1 -0.2 -1.1c-0.1 -0.2 -0.2 -0.8 -0.2 -0.8c0 0 -0.1 -0.5 0.4 -0.8z'
  ))

color_scale <- alt$Scale(
  domain=domains,
  range=list('rgb(162,160,152)', 'rgb(194,81,64)', 'rgb(93,93,93)', 'rgb(91,131,149)')
)

chart <- 
  alt$Chart(source)$
  mark_point(filled = TRUE, opacity = 1, size = 100)$
  encode(
    alt$X("x:O", axis = NULL),
    alt$Y("animal:O", axis = NULL),
    alt$Row("country:N", header = alt$Header(title = "")),
    alt$Shape("animal:N", legend = NULL, scale = shape_scale),
    alt$Color("animal:N", legend = NULL, scale = color_scale)
  )$
  transform_window(
    x="rank()",
    groupby = list("country", "animal")
  )$
  properties(width = 550, height = 140)

chart

Isotype Visualization with Emoji

Altair example

Isotype Visualization shows the distribution of animals across UK and US, using unicode emoji marks rather than custom SVG paths (see ). This is adapted from Vega-Lite example .

Data

Definition

source <- fromJSON('[
      {"country": "Great Britain", "animal": "cattle"},
      {"country": "Great Britain", "animal": "cattle"},
      {"country": "Great Britain", "animal": "cattle"},
      {"country": "Great Britain", "animal": "pigs"},
      {"country": "Great Britain", "animal": "pigs"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "Great Britain", "animal": "sheep"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "cattle"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "pigs"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"},
      {"country": "United States", "animal": "sheep"}
  ]')

glimpse(source)
#> Rows: 37
#> Columns: 2
#> $ country <chr> "Great Britain", "Great Britain", "Great Britain", "Great Brit…
#> $ animal  <chr> "cattle", "cattle", "cattle", "pigs", "pigs", "sheep", "sheep"…

Chart

chart <- 
  alt$Chart(source)$
  mark_text(size = 45, baseline = "middle")$
  encode(
    x = alt$X("x:O", axis = NULL),
    y = alt$Y("animal:O", axis = NULL),
    text = alt$Text("emoji:N"),
    row = alt$Row("country:N", header = alt$Header(title = ""))
  )$
  transform_calculate(
    emoji = "{'cattle': '🐄', 'pigs': '🐖', 'sheep': '🐏'}[datum.animal]"
  )$
  transform_window(
    x = "rank()",
    groupby = list("country", "animal")
  )$
  properties(width=550, height=140)

chart

Locations of US Airports

Altair example

Data

us <- vega_data$us_10m$url

airports <- vega_data$airports()

glimpse(airports)
#> Rows: 3,376
#> Columns: 7
#> $ iata      <chr> "00M", "00R", "00V", "01G", "01J", "01M", "02A", "02C", "02G…
#> $ name      <chr> "Thigpen", "Livingston Municipal", "Meadow Lake", "Perry-War…
#> $ city      <chr> "Bay Springs", "Livingston", "Colorado Springs", "Perry", "H…
#> $ state     <chr> "MS", "TX", "CO", "NY", "FL", "MS", "AL", "WI", "OH", "MO", …
#> $ country   <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA…
#> $ latitude  <dbl> 31.95376, 30.68586, 38.94575, 42.74135, 30.68801, 34.49167, …
#> $ longitude <dbl> -89.23450, -95.01793, -104.56989, -78.05208, -81.90594, -88.…

Chart

states <- alt$topo_feature(us, feature = "states")
# US states background
background <-
  alt$Chart(states)$
  mark_geoshape(
    fill = "lightgray",
    stroke = "white"
  )$
  properties(width = 500, height = 300)$
  project("albersUsa")
# airport positions on background
points <- 
  alt$Chart(airports)$
  mark_circle()$
  encode(
    longitude = "longitude:Q",
    latitude = "latitude:Q",
    size = alt$value(10),
    color = alt$value("steelblue")
  )
chart <- (background + points)
chart

London Tube Lines

Altair example

This example shows the London tube lines against the background of the borough boundaries. It is based on the Vega-Lite example.

Data

boroughs <- alt$topo_feature(vega_data$londonBoroughs$url, "boroughs")
tubelines <- alt$topo_feature(vega_data$londonTubeLines$url, "line")
glimpse(vega_data$londonCentroids())
#> Rows: 33
#> Columns: 3
#> $ name <chr> "Kingston upon Thames", "Croydon", "Bromley", "Hounslow", "Ealing…
#> $ cx   <dbl> -0.28691466, -0.08716546, 0.05153659, -0.36715164, -0.33101932, 0…
#> $ cy   <dbl> 51.38789, 51.35532, 51.37198, 51.46838, 51.52248, 51.56437, 51.54…

Chart

domain_line <- 
  list("Bakerloo", "Central", "Circle", "District", "DLR", 
       "Hammersmith & City", "Jubilee", "Metropolitan", "Northern",
       "Piccadilly", "Victoria", "Waterloo & City" )

range_line <- 
  list("rgb(137,78,36)", "rgb(220,36,30)", "rgb(255,206,0)",
       "rgb(1,114,41)", "rgb(0,175,173)", "rgb(215,153,175)",
       "rgb(106,114,120)", "rgb(114,17,84)", "rgb(0,0,0)",
       "rgb(0,24,168)", "rgb(0,160,226)", "rgb(106,187,170)")

background <-
  alt$Chart(boroughs)$
  mark_geoshape(stroke = "white", strokeWidth = 2)$
  encode(
    color = alt$value("#eee")
  )$
  properties(width = 700, height = 500)
labels <- 
  alt$Chart(vega_data$londonCentroids$url)$
  mark_text()$
  encode(
    longitude = "cx:Q",
    latitude = "cy:Q",
    text = "bLabel:N",
    size = alt$value(8),
    opacity = alt$value(0.6)
  )$
  transform_calculate("bLabel", "indexof (datum.name,' ') > 0  ? substring(datum.name,0,indexof(datum.name, ' ')) : datum.name")

line_scale <- 
  alt$Scale(domain = domain_line, range = range_line)

lines <- 
  alt$Chart(tubelines)$
  mark_geoshape(filled = FALSE, strokeWidth = 2)$
  encode(
    color = alt$Color(
      "id:N",
      legend = alt$Legend(
        title = NULL,
        orient = "bottom-right",
        offset = 0
      )
    )
  )

chart <- (background + labels + lines)
chart

Natural Disasters

Altair example

Data

glimpse(vega_data$disasters())
#> Rows: 803
#> Columns: 3
#> $ Entity <chr> "All natural disasters", "All natural disasters", "All natural …
#> $ Year   <dbl> 1900, 1901, 1902, 1903, 1905, 1906, 1907, 1908, 1909, 1910, 191…
#> $ Deaths <dbl> 1267360, 200018, 46037, 6506, 22758, 42970, 1325641, 75033, 151…

Chart

chart <- 
  alt$Chart(vega_data$disasters$url)$
  mark_circle(opacity = 0.8, stroke = "black", strokeWidth = 1)$
  encode(
    x = alt$X("Year:O", axis = alt$Axis(labelAngle = 0)),
    y = alt$Y("Entity:N"),
    size = alt$Size(
      "Deaths:Q",
      scale = alt$Scale(range = c(0, 5000)),
      legend = alt$Legend(title = "Annual Global Deaths")
    ),
    color = alt$Color("Entity:N", legend = NULL)
  )$
  properties(
    width = 480,
    height = 320
  )$
  transform_filter("datum.Entity != 'All natural disasters'")
chart

One Dot Per Zipcode

Altair example

If you are building a chart using a local data frame, the default is that there has to be less than 5000 observations. If you build a chart using a URL (local or remote), there is no such default.

To modify this default:

alt$data_transformers$enable('default', max_rows=<your new max>)

Data

glimpse(vega_data$zipcodes())
#> Rows: 42,049
#> Columns: 6
#> $ zip_code  <chr> "00501", "00544", "00601", "00602", "00603", "00604", "00605…
#> $ latitude  <dbl> 40.92233, 40.92233, 18.16527, 18.39310, 18.45591, 18.49352, …
#> $ longitude <dbl> -72.63708, -72.63708, -66.72258, -67.18095, -67.14578, -67.1…
#> $ city      <chr> "Holtsville", "Holtsville", "Adjuntas", "Aguada", "Aguadilla…
#> $ state     <chr> "NY", "NY", "PR", "PR", "PR", "PR", "PR", "PR", "PR", "PR", …
#> $ county    <chr> "Suffolk", "Suffolk", "Adjuntas", "Aguada", "Aguadilla", "Ag…

Chart

chart <- 
  alt$Chart(vega_data$zipcodes$url)$
  mark_circle(size = 3)$
  encode(
    longitude = "longitude:Q",
    latitude = "latitude:Q",
    color = "digit:N"
  )$
  properties(
    projection = list(type = "albersUsa"),
    width = 650,
    height = 400
  )$
  transform_calculate("digit", "substring(datum.zip_code, 0, 1)")
chart

Seattle Weather Heatmap

Altair Example

This example shows the 2010 daily high temperature (F) in Seattle, WA.

Data

glimpse(vega_data$seattle_temps())
#> Rows: 8,759
#> Columns: 2
#> $ date <dttm> 2010-01-01 00:00:00, 2010-01-01 01:00:00, 2010-01-01 02:00:00, 2…
#> $ temp <dbl> 39.4, 39.2, 39.0, 38.9, 38.8, 38.7, 38.7, 38.6, 38.7, 39.2, 40.1,…

Chart

# Since the data is more than 5,000 rows we'll import it from a URL
source <- vega_data$seattle_temps$url

chart <-
  alt$Chart(
    source,
    title = "2010 Daily High Temperature (F) in Seattle, WA"
  )$
  mark_rect()$
  encode(
    x = "date(date):O",
    y = "month(date):O",
    color = alt$Color("max(temp):Q", scale = alt$Scale(scheme = "inferno")),
    tooltip = list(
      alt$Tooltip("monthdate(date):T", title = "Date"),
      alt$Tooltip("max(temp):Q", title = "Max Temp")
    )
  )$
  properties(width = 550)

chart

Seattle Weather Interactive

Altair example

This chart provides an interactive exploration of Seattle weather over the course of the year. It includes a one-axis brush selection to easily see the distribution of weather types in a particular date range. #### Data

glimpse(vega_data$seattle_weather())
#> Rows: 1,461
#> Columns: 6
#> $ date          <dttm> 2012-01-01, 2012-01-02, 2012-01-03, 2012-01-04, 2012-01…
#> $ precipitation <dbl> 0.0, 10.9, 0.8, 20.3, 1.3, 2.5, 0.0, 0.0, 4.3, 1.0, 0.0,…
#> $ temp_max      <dbl> 12.8, 10.6, 11.7, 12.2, 8.9, 4.4, 7.2, 10.0, 9.4, 6.1, 6…
#> $ temp_min      <dbl> 5.0, 2.8, 7.2, 5.6, 2.8, 2.2, 2.8, 2.8, 5.0, 0.6, -1.1, …
#> $ wind          <dbl> 4.7, 4.5, 2.3, 4.7, 6.1, 2.2, 2.3, 2.0, 3.4, 3.4, 5.1, 1…
#> $ weather       <chr> "drizzle", "rain", "rain", "rain", "rain", "rain", "rain…

Chart

scale <- alt$Scale(
  domain = list("sun", "fog", "drizzle", "rain", "snow"),
  range = list("#e7ba52", "#a7a7a7", "#aec7e8", "#1f77b4", "#9467bd")
)

color <- alt$Color("weather:N", scale = scale)

# We create two selections:
# - a brush that is active on the top panel
# - a multi-click that is active on the bottom panel
brush <- alt$selection_interval(encodings = list("x"))
click <- alt$selection_multi(encodings = list("color"))

# Top panel is scatter plot of temperature vs time
points <- 
  alt$Chart(data = vega_data$seattle_weather())$
  mark_point()$
  encode(
    x = alt$X(
      "date:T", 
      timeUnit = "monthdate", 
      axis = alt$Axis(title = "Date")
    ),
    alt$Y(
      "temp_max:Q",
      axis = alt$Axis(title = "Maximum Daily Temperature (C)"),
      scale = alt$Scale(domain = list(-5, 40))
    ),
    color = alt$condition(brush, color, alt$value("lightgray")),
    size = alt$Size("precipitation:Q", scale = alt$Scale(range = list(5, 200)))
  )$
  properties(width = 600, height = 300, selection = brush)$
  transform_filter(click)

# Bottom panel is a bar chart of weather type
bars <- 
  alt$Chart(data = vega_data$seattle_weather())$
  mark_bar()$
  encode(
    x = "count(weather)",
    y = "weather:N",
    color = alt$condition(click, color, alt$value("lightgray"))
  )$
  transform_filter(brush)$
  properties(width = 600, selection = click)

chart <- 
  (points & bars)$ 
  properties(title = "Seattle Weather: 2012-2015")  

chart

The U.S. employment crash during the Great Recession

Altair example

Data

glimpse(vega_data$us_employment())
#> Rows: 120
#> Columns: 24
#> $ month                              <chr> "2006-01-01", "2006-02-01", "2006-0…
#> $ nonfarm                            <dbl> 135450, 135762, 136059, 136227, 136…
#> $ private                            <dbl> 113603, 113884, 114156, 114308, 114…
#> $ goods_producing                    <dbl> 22467, 22535, 22572, 22631, 22597, …
#> $ service_providing                  <dbl> 112983, 113227, 113487, 113596, 113…
#> $ private_service_providing          <dbl> 91136, 91349, 91584, 91677, 91735, …
#> $ mining_and_logging                 <dbl> 656, 662, 669, 679, 681, 686, 690, …
#> $ construction                       <dbl> 7601, 7664, 7689, 7726, 7713, 7699,…
#> $ manufacturing                      <dbl> 14210, 14209, 14214, 14226, 14203, …
#> $ durable_goods                      <dbl> 8982, 8986, 9000, 9020, 9017, 9028,…
#> $ nondurable_goods                   <dbl> 5228, 5223, 5214, 5206, 5186, 5185,…
#> $ trade_transportation_utilties      <dbl> 26162, 26196, 26239, 26230, 26223, …
#> $ wholesale_trade                    <dbl> 5840.4, 5854.8, 5873.3, 5886.9, 589…
#> $ retail_trade                       <dbl> 15351.5, 15361.3, 15388.0, 15348.5,…
#> $ transportation_and_warehousing     <dbl> 4420.0, 4429.4, 4429.7, 4445.4, 445…
#> $ utilities                          <dbl> 549.8, 550.1, 547.5, 548.9, 548.3, …
#> $ information                        <dbl> 3052, 3052, 3055, 3046, 3039, 3036,…
#> $ financial_activities               <dbl> 8307, 8332, 8348, 8369, 8376, 8362,…
#> $ professional_and_business_services <dbl> 17299, 17365, 17438, 17462, 17512, …
#> $ education_and_health_services      <dbl> 17946, 17998, 18045, 18070, 18100, …
#> $ leisure_and_hospitality            <dbl> 12945, 12980, 13034, 13074, 13052, …
#> $ other_services                     <dbl> 5425, 5426, 5425, 5426, 5433, 5432,…
#> $ government                         <dbl> 21847, 21878, 21903, 21919, 21926, …
#> $ nonfarm_change                     <dbl> 282, 312, 297, 168, 31, 79, 206, 17…

Chart

source <- vega_data$us_employment()
presidents = fromJSON('[
    {
        "start": "2006-01-01",
        "end": "2009-01-19",
        "president": "Bush"
    },
    {
        "start": "2009-01-20",
        "end": "2015-12-31",
        "president": "Obama"
    }
    ]')

bars <-
  alt$Chart(
    source,
    title = "The U.S. employment crash during the Great Recession"
  )$mark_bar()$
  encode(
    x = alt$X("month:T", title = ""),
    y = alt$Y(
      "nonfarm_change:Q", 
      title = "Change in non-farm employment (in thousands)"
    ),
    color=alt$condition(
      "datum.nonfarm_change > 0",
      alt$value("steelblue"),
      alt$value("orange")
    )
  )

rule <-
  alt$Chart(presidents)$
  mark_rule(color = "black",strokeWidth = 2)$
  encode(
    x = "end:T"
  )$
  transform_filter('datum.president == "Bush"')

text <- 
  alt$Chart(presidents)$
  mark_text(
    align = "left",
    baseline = "middle",
    dx = 7,
    dy = -135,
    size = 11
  )$encode(
    x = "start:T",
    x2 = "end:T",
    text = "president",
    color = alt$value("#000000")
  )

chart <- (bars + rule + text)$properties(width = 600)

chart

Top K Items

Altair Example

This example shows how to use the window and transformation filter to display the Top items of a long list of items in decreasing order. The sorting of the x-axis is needed for vega-lite and for the example does not do anything since we already have a unique value. Here we sort the top 10 highest ranking movies of IMDB.

Data

glimpse(fromJSON(vega_data$movies$url))
#> Rows: 3,201
#> Columns: 16
#> $ Title                  <chr> "The Land Girls", "First Love, Last Rites", "I …
#> $ US_Gross               <int> 146083, 10876, 203134, 373615, 1009819, 24551, …
#> $ Worldwide_Gross        <dbl> 146083, 10876, 203134, 373615, 1087521, 2624551…
#> $ US_DVD_Sales           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ Production_Budget      <int> 8000000, 300000, 250000, 300000, 1000000, 16000…
#> $ Release_Date           <chr> "Jun 12 1998", "Aug 07 1998", "Aug 28 1998", "S…
#> $ MPAA_Rating            <chr> "R", "R", NA, NA, "R", NA, "R", "R", "R", NA, N…
#> $ Running_Time_min       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ Distributor            <chr> "Gramercy", "Strand", "Lionsgate", "Fine Line",…
#> $ Source                 <chr> NA, NA, NA, NA, "Original Screenplay", NA, NA, …
#> $ Major_Genre            <chr> NA, "Drama", "Comedy", "Comedy", "Drama", NA, N…
#> $ Creative_Type          <chr> NA, NA, NA, NA, "Contemporary Fiction", NA, NA,…
#> $ Director               <chr> NA, NA, NA, NA, NA, NA, "Christopher Nolan", NA…
#> $ Rotten_Tomatoes_Rating <int> NA, NA, NA, 13, 62, NA, NA, NA, 25, 86, 81, 84,…
#> $ IMDB_Rating            <dbl> 6.1, 6.9, 6.8, NA, 3.4, NA, 7.7, 3.8, 5.8, 7.0,…
#> $ IMDB_Votes             <int> 1071, 207, 865, NA, 165, NA, 15133, 353, 3275, …

Chart

source <- vega_data$movies$url

# Top 10 movies by IMBD rating
chart <-
  alt$Chart(source)$
  mark_bar()$
  encode(
    x = alt$X(
      "Title:N",
      sort = alt$EncodingSortField(
        field = "IMDB_Rating", 
        op = "mean", 
        order = "descending"
      )
    ),
    y = alt$Y("IMDB_Rating:Q"),
    color = alt$Color("IMDB_Rating:Q")
  )$
  transform_window(
    rank = "rank(IMDB_Rating)",
    sort = list(alt$SortField("IMDB_Rating", order = "descending"))
  )$
  transform_filter("datum.rank < 10")

chart

Top K Letters

Altair Example

This example shows how to use a window transform in order to display only the top K categories by number of entries. In this case, we rank the characters in the first paragraph of Dickens’ A Tale of Two Cities by number of occurrences.

Data

Definition

txt = "It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, we were all going direct to Heaven, we were all going direct the other way - in short, the period was so far like the present period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree of comparison only."


ttxt <- txt %>% 
  str_remove_all(pattern = "[[:punct:]]") %>%
  str_remove_all(' ') %>% 
  str_split(pattern = "")

source <- tibble(letters = ttxt[[1]])

glimpse(source)
#> Rows: 475
#> Columns: 1
#> $ letters <chr> "I", "t", "w", "a", "s", "t", "h", "e", "b", "e", "s", "t", "o…

Chart

chart <-
  alt$Chart(source)$
  transform_aggregate(count = "count()", groupby = list("letters"))$
  transform_window(
    rank = "rank(count)",
    sort = list(alt$SortField("count", order = "descending"))
  )$
  transform_filter("datum.rank < 10")$
  mark_bar()$
  encode(
    y = alt$Y(
      "letters:N",
      sort = alt$EncodingSortField(
        field = "count", 
        op = "sum", 
        order = "descending"
      )
    ),
    x = "count:Q"
  )

chart

U.S. state capitals overlayed on a map of the U.S

Altair example

Data

glimpse(vega_data$us_state_capitals())
#> Rows: 50
#> Columns: 4
#> $ lon   <dbl> -86.27912, -134.41974, -112.07384, -92.33112, -121.46893, -104.9…
#> $ lat   <dbl> 32.36154, 58.30194, 33.44846, 34.73601, 38.55560, 39.73917, 41.7…
#> $ state <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Color…
#> $ city  <chr> "Montgomery", "Juneau", "Phoenix", "Little Rock", "Sacramento", …

Chart

states <- alt$topo_feature(vega_data$us_10m$url, "states")
capitals <- vega_data$us_state_capitals$url

# US states background
background <- 
  alt$Chart(states)$
  mark_geoshape(fill = "lightgray",stroke = "white")$
  properties(title = "US State Capitols", width = 700, height = 400)$
  project("albersUsa")

# Points and text
hover <- 
  alt$selection(
    type = "single", 
    on = "mouseover", 
    nearest = TRUE,
    fields = list("lat", "lon")
  )

base <- 
  alt$Chart(capitals)$
  encode(
    longitude = "lon:Q",
    latitude = "lat:Q"
  )

text <- 
  base$
  mark_text(dy = -5, align = "right")$
  encode(
    alt$Text("city", type = "nominal"),
    # switched from reference
    opacity = alt$condition(hover, alt$value(1), alt$value(0))
  )

points <- 
  base$
  mark_point()$
  encode(
    color = alt$value("black"),
    # switched from reference
    size = alt$condition(hover, alt$value(100), alt$value(30))
  )$
  properties(selection = hover)

chart <- (background + points + text)

chart

US Population Over Time

Altair example

This chart visualizes the age distribution of the US population over time. It uses a slider widget that is bound to the year to visualize the age distribution over time. #### Data

glimpse(vega_data$population())
#> Rows: 570
#> Columns: 4
#> $ year   <dbl> 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 185…
#> $ age    <dbl> 0, 0, 5, 5, 10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35, 35, 40,…
#> $ sex    <dbl> 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, …
#> $ people <dbl> 1483789, 1450376, 1411067, 1359668, 1260099, 1216114, 1077133, …

Chart

pop <- vega_data$population$url

pink_blue <- 
  alt$Scale(
    domain = list("Male", "Female"),
    range = list("steelblue", "salmon")
  )
slider <- alt$binding_range(min = 1900, max = 2000, step = 10)
year <- alt$selection_single(
  name = "year", 
  fields = list("year"), 
  bind = slider
)

chart <- 
  alt$Chart(pop)$
  mark_bar()$
  encode(
    x = alt$X("sex:N", axis = alt$Axis(title = NULL)),
    y = alt$Y("people:Q", scale = alt$Scale(domain = c(0, 1.2e7))),
    color = alt$Color("sex:N", scale = pink_blue),
    column = "age:O"
  )$
  properties(width = 20, selection = year)$
  transform_calculate("sex", "if(datum.sex == 1, 'Male', 'Female')")$
  transform_filter(year$ref())

chart

US Population Pyramid Over Time

Altair Example

A population pyramid shows the distribution of age groups within a population. It uses a slider widget that is bound to the year to visualize the age distribution over time.

Data

glimpse(vega_data$population())
#> Rows: 570
#> Columns: 4
#> $ year   <dbl> 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 185…
#> $ age    <dbl> 0, 0, 5, 5, 10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35, 35, 40,…
#> $ sex    <dbl> 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, …
#> $ people <dbl> 1483789, 1450376, 1411067, 1359668, 1260099, 1216114, 1077133, …

Chart

source <- vega_data$population$url

slider <- alt$binding_range(min = 1850, max = 2000, step = 10)

select_year <- 
  alt$selection_single(
    name = "year", 
    fields = list("year"),
    bind = slider,
    init = list(year = 2000)
  )

base <-
  alt$
  Chart(source)$
  add_selection(select_year)$
  transform_filter(select_year)$
  transform_calculate("gender", "if(datum.sex == 1, 'Male', 'Female')")$
  properties(width=250)


color_scale <-
  alt$Scale(
    domain = list("Male", "Female"),
    range = list("#1f77b4", "#e377c2")
  )

left <-
  base$
  transform_filter("datum.gender == 'Female'")$
  encode(
    y = alt$Y("age:O", axis=NULL),
    x = alt$X(
      "sum(people):Q",
      title = "population",
      sort = alt$SortOrder("descending")
    ),
    color = alt$Color("gender:N", scale = color_scale, legend = NULL)
  )$
  mark_bar()$
  properties(title = "Female")

middle <- 
  base$
  encode(
    y = alt$Y("age:O", axis = NULL),
    text = alt$Text("age:Q")
  )$
  mark_text()$
  properties(width = 20)

right <- 
  base$
  transform_filter("datum.gender == 'Male'")$
  encode(
    y = alt$Y("age:O", axis = NULL),
    x = alt$X("sum(people):Q", title = "population"),
    color = alt$Color("gender:N", scale = color_scale, legend = NULL)
  )$
  mark_bar()$
  properties(title = "Male")

chart <- alt$concat(left, middle, right, spacing = 5)

chart

Altair Example

This chart visualizes the age distribution of the US population over time, using a wrapped faceting of the data by decade.

Data

glimpse(vega_data$population())
#> Rows: 570
#> Columns: 4
#> $ year   <dbl> 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 1850, 185…
#> $ age    <dbl> 0, 0, 5, 5, 10, 10, 15, 15, 20, 20, 25, 25, 30, 30, 35, 35, 40,…
#> $ sex    <dbl> 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, …
#> $ people <dbl> 1483789, 1450376, 1411067, 1359668, 1260099, 1216114, 1077133, …

Chart

source <- vega_data$population$url

chart <- 
  alt$Chart(source)$
  mark_area()$
  encode(
    x = "age:O",
    y = alt$Y(
      "sum(people):Q",
      title = "Population",
      axis = alt$Axis(format = "~s")
    ),
    facet = alt$Facet("year:O", columns = 5)
  )$
  properties(
    title = "US Age Distribution By Year",
    width = 90,
    height = 80
  )

chart

Wheat and Wages

Altair Example

A recreation of William Playfair’s classic chart visualizing the price of wheat, the wages of a mechanic, and the reigning British monarch.

This is a more polished version of the simpler chart in Bar Chart with Line on Dual Axis.

Data

glimpse(vega_data$wheat())
#> Rows: 52
#> Columns: 3
#> $ year  <dbl> 1565, 1570, 1575, 1580, 1585, 1590, 1595, 1600, 1605, 1610, 1615…
#> $ wheat <dbl> 41.0, 45.0, 42.0, 49.0, 41.5, 47.0, 64.0, 27.0, 33.0, 32.0, 33.0…
#> $ wages <dbl> 5.00, 5.05, 5.08, 5.12, 5.15, 5.25, 5.54, 5.61, 5.69, 5.78, 5.94…

Chart

base_wheat <-
  alt$Chart(vega_data$wheat$url)$
    transform_calculate(year_end = "+datum.year + 5")

base_monarchs <- 
  alt$Chart(vega_data$monarchs$url)$
    transform_calculate(
      offset = "((!datum.commonwealth && datum.index % 2) ? -1: 1) * 2 + 95",
      off2 = "((!datum.commonwealth && datum.index % 2) ? -1: 1) + 95",
      y = "95",
      x = "+datum.start + (+datum.end - +datum.start)/2"
    )

bars <-
  base_wheat$
  mark_bar(fill = "#aaa", stroke = "#999")$
  encode(
    x = alt$X("year:Q", axis = alt$Axis(format = "d", tickCount = 5)),
    y = alt$Y("wheat:Q", axis = alt$Axis(zindex = 1)),
    x2 = alt$X2("year_end")
  )

area <-
  base_wheat$
  mark_area(color="#a4cedb", opacity = 0.7)$
  encode(
    x = alt$X("year:Q"),
    y = alt$Y("wages:Q")
  )

area_line_1 <- area$mark_line(color = "#000", opacity = 0.7)
area_line_2 <-area$mark_line(yOffset= -2, color = "#EE8182")

top_bars <- 
  base_monarchs$
  mark_bar(stroke = "#000")$
  encode(
    x = alt$X("start:Q"),
    x2 = alt$X2("end"),
    y = alt$Y("y:Q"),
    y2 = alt$Y2("offset"),
    fill = alt$Fill(
      "commonwealth:N", 
      legend = NULL, 
      scale = alt$Scale(range = list("black", "white"))
    )
  )

top_text <-
  base_monarchs$
  mark_text(yOffset = 14, fontSize = 9, fontStyle ="italic")$
  encode(
    x = alt$X("x:Q"),
    y = alt$Y("off2:Q"),
    text = alt$Text("name:N")
  )

chart <- 
  (bars + area + area_line_1 + area_line_2 + top_bars + top_text)$
  properties(width = 900, height = 400)$
  configure_axis(
    title = NULL, 
    gridColor = "white", 
    gridOpacity = 0.25, 
    domain = FALSE
  )$
  configure_view(stroke = "transparent")

chart

Haley Jeppson

2023-09-04

Anscombe’s Quartet

Data

Chart

Atmospheric CO2 Concentration

Data

Chart

Becker’s Barley Trellis Plot

Data

Chart

Cumulative Wikipedia Donations

Data

Chart

Falkensee

Data

Chart

Gapminder Bubble Plot

Data

Chart

Iowa’s renewable energy boom

Data

Chart

Isotype Visualization

Data

Chart

Isotype Visualization with Emoji

Data

Chart

Locations of US Airports

Data

Chart

London Tube Lines

Data

Chart

Natural Disasters

Data

Chart

One Dot Per Zipcode

Data

Chart

Seattle Weather Heatmap

Data

Chart

Seattle Weather Interactive

Chart

The U.S. employment crash during the Great Recession

Data

Chart

Top K Items

Data

Chart

Top K Letters

Data

Chart

U.S. state capitals overlayed on a map of the U.S

Data

Chart

US Population Over Time

Chart

US Population Pyramid Over Time

Data

Chart

US Population: Wrapped Facet

Data

Chart

Wheat and Wages

Data

Chart