This document is adapted from the Scatter Plots section of the Altair Example Gallery.

Our first step is to set up our environment:

Binned Scatterplot

Altair example

Data

glimpse(fromJSON(vega_data$movies$url))
#> Rows: 3,201
#> Columns: 16
#> $ Title                  <chr> "The Land Girls", "First Love, Last Rites", "I…
#> $ US_Gross               <int> 146083, 10876, 203134, 373615, 1009819, 24551,…
#> $ Worldwide_Gross        <dbl> 146083, 10876, 203134, 373615, 1087521, 262455…
#> $ US_DVD_Sales           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ Production_Budget      <int> 8000000, 300000, 250000, 300000, 1000000, 1600…
#> $ Release_Date           <chr> "Jun 12 1998", "Aug 07 1998", "Aug 28 1998", "…
#> $ MPAA_Rating            <chr> "R", "R", NA, NA, "R", NA, "R", "R", "R", NA, …
#> $ Running_Time_min       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ Distributor            <chr> "Gramercy", "Strand", "Lionsgate", "Fine Line"…
#> $ Source                 <chr> NA, NA, NA, NA, "Original Screenplay", NA, NA,…
#> $ Major_Genre            <chr> NA, "Drama", "Comedy", "Comedy", "Drama", NA, …
#> $ Creative_Type          <chr> NA, NA, NA, NA, "Contemporary Fiction", NA, NA…
#> $ Director               <chr> NA, NA, NA, NA, NA, NA, "Christopher Nolan", N…
#> $ Rotten_Tomatoes_Rating <int> NA, NA, NA, 13, 62, NA, NA, NA, 25, 86, 81, 84…
#> $ IMDB_Rating            <dbl> 6.1, 6.9, 6.8, NA, 3.4, NA, 7.7, 3.8, 5.8, 7.0…
#> $ IMDB_Votes             <int> 1071, 207, 865, NA, 165, NA, 15133, 353, 3275,…

Chart

chart <- 
  alt$Chart(vega_data$movies$url)$
  mark_circle()$
  encode(
    alt$X("IMDB_Rating:Q", bin = TRUE),
    alt$Y("Rotten_Tomatoes_Rating:Q", bin = TRUE),
    size = "count()"
  )

chart

Brushing Scatter Plot to show data on a table

Altair example

Data

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "p…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, Na…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 13…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 11…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, …
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 197…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…

Chart

source <- vega_data$cars()

# Brush for selection
brush <- alt$selection(type = "interval")

# Scatter Plot
points <- 
  alt$Chart(source)$
  mark_point()$
  encode(
    x = "Horsepower:Q",
    y = "Miles_per_Gallon:Q",
    color = alt$condition(brush, "Cylinders:O", alt$value("grey"))
  )$add_selection(brush)

# Base chart for data tables
ranked_text <-
  alt$Chart(source)$
  mark_text()$
  encode(
    y = alt$Y('row_number:O', axis = NULL)
  )$
  transform_window(row_number = "row_number()")$
  transform_filter(brush)$
  transform_window(rank = "rank(row_number)")$
  transform_filter("datum.rank<20")

# Data Tables
horsepower <- 
  ranked_text$
  encode(
    text = "Horsepower:N"
  )$
  properties(title = "Horsepower")

mpg <- 
  ranked_text$
  encode(
    text = "Miles_per_Gallon:N"
  )$
  properties(title = "MPG")

origin <- 
  ranked_text$
  encode(
    text = "Origin:N"
  )$
  properties(title = "Origin")

text <- (horsepower | mpg | origin) # Combine data tables

# Build chart
chart <- (points | text)$resolve_legend(color = "independent")

chart

Bubble Plot

Altair example

Data

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "p…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, Na…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 13…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 11…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, …
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 197…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…

Chart

chart <- 
  alt$Chart(vega_data$cars())$
  mark_point()$
  encode(
    x = "Horsepower:Q",
    y = "Miles_per_Gallon:Q",
    size = "Acceleration"
  )

chart

Connected Scatterplot (Lines with Custom Paths)

Altair example

This example shows how layering can be used to build a plot. This dataset tracks miles driven per capita along with gas prices annually from 1956 to 2010. It is based on the May 2, 2010 New York Times article ‘Driving Shifts Into Reverse’. See this reference.

Data

glimpse(vega_data$driving())
#> Rows: 55
#> Columns: 4
#> $ side  <chr> "left", "right", "bottom", "top", "right", "bottom", "right", "…
#> $ year  <dbl> 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 196…
#> $ miles <dbl> 3675, 3706, 3766, 3905, 3935, 3977, 4085, 4218, 4369, 4538, 467…
#> $ gas   <dbl> 2.38, 2.40, 2.26, 2.31, 2.27, 2.25, 2.22, 2.12, 2.11, 2.14, 2.1…

Chart

lines <- 
  alt$Chart(vega_data$driving())$
  mark_line()$
  encode(
    x = alt$X("miles", scale = alt$Scale(zero = FALSE)),
    y = alt$Y("gas", scale = alt$Scale(zero = FALSE)),
    order = "year"
  )

points <- 
  alt$Chart(vega_data$driving())$
  mark_circle()$
  encode(
    alt$X("miles", scale = alt$Scale(zero = FALSE)),
    alt$Y("gas", scale = alt$Scale(zero = FALSE))
  )

chart <- (lines + points)

chart

Dot Dash Plot

Altair example

This example shows how to make a dot-dash plot presented in Edward Tufte’s book Visual Display of Quantitative Information on page 133. This example is based on g3o2’s block.

Data

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "p…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, Na…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 13…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 11…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, …
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 197…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…

Chart

cars <- vega_data$cars()

brush <- alt$selection(type = "interval")

tick_axis <- alt$Axis(labels=FALSE, domain=FALSE, ticks=FALSE)

tick_axis_notitle <- 
  alt$Axis(labels=FALSE, domain=FALSE, ticks=FALSE, title="")

points <-
  alt$Chart(cars)$
  mark_point()$
  encode(
    x = alt$X("Miles_per_Gallon", axis = alt$Axis(title = "")),
    y = alt$Y("Horsepower", axis = alt$Axis(title = "")),
    color = alt$condition(brush, "Origin", alt$value("grey"))
  )$
  properties(selection = brush)

x_ticks <- 
  alt$Chart(cars)$
  mark_tick()$
  encode(
    x = alt$X("Miles_per_Gallon", axis = tick_axis),
    y = alt$Y("Origin", axis = tick_axis_notitle),
    color = alt$condition(brush, "Origin", alt$value("lightgrey"))
  )$
  properties(selection = brush)

y_ticks <- 
  alt$Chart(cars)$
  mark_tick()$
  encode(
    alt$X("Origin", axis = tick_axis_notitle),
    alt$Y("Horsepower", axis = tick_axis),
    color=alt$condition(brush, "Origin", alt$value("lightgrey"))
  )$
  properties(selection = brush)

chart <- (y_ticks | (points & x_ticks))

chart

Multifeature Scatter Plot

Altair example

Data

We will use the cars dataset here.

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "p…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, Na…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 13…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 11…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, …
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 197…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…

Chart

chart <- 
  alt$Chart(vega_data$cars())$
  mark_circle()$
  encode(
    x = alt$X("Weight_in_lbs", scale = alt$Scale(zero = FALSE)),
    y = alt$Y("Miles_per_Gallon", scale = alt$Scale(zero = FALSE, padding = 1)),
    color = alt$Color("Cylinders:N"),
    size = "Displacement"
  )

chart

Polynomial Fit Plot

Altair example

Data

Definition

n <- 40

data <- 
  data_frame(
    x = runif(n)**2,
    y = 10 - 1 / (x + 0.1) + runif(n)
  )
#> Warning: `data_frame()` is deprecated as of tibble 1.1.0.
#> Please use `tibble()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_warnings()` to see where this warning was generated.

degree <- c(1, 3, 5)

model_poly <- 
  map(degree, ~lm(y ~ poly(x, degree = .x), data = data)) 

x_pred <- seq(min(data$x), max(data$x), length.out = 500)

data_predict <- 
  crossing(degree, x = x_pred) %>%
  nest(x) %>%
  mutate(y = map2(model_poly, data, predict)) %>%
  unnest()
#> Warning: All elements of `...` must be named.
#> Did you want `data = c(x)`?
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(data, y)`
glimpse(data)
#> Rows: 40
#> Columns: 2
#> $ x <dbl> 0.27976022, 0.16820252, 0.17982285, 0.24862808, 0.09311060, 0.28002…
#> $ y <dbl> 8.1312547, 6.3581626, 7.1428275, 7.4862283, 5.3096406, 8.2037433, 8…
glimpse(data_predict)
#> Rows: 1,500
#> Columns: 3
#> $ degree <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ x      <dbl> 0.0005093288, 0.0023561494, 0.0042029701, 0.0060497907, 0.0078…
#> $ y      <dbl> 3.899416, 3.916723, 3.934030, 3.951337, 3.968644, 3.985951, 4.…

Chart

chart_data <- 
  alt$Chart(data)$
  mark_circle(color = "black")$
  encode(
    x = "x:Q",
    y = "y:Q"
  )

chart_predict <-
  alt$Chart(data_predict)$
  mark_line()$
  encode(
    x = "x:Q",
    y = "y:Q",
    color = "degree:N"
  )

chart <- (chart_data + chart_predict)

chart

Scatter Matrix

Altair example

Data

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "p…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, Na…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 13…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 11…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, …
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 197…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…

Chart

chart <- 
  alt$Chart(vega_data$cars())$
  mark_circle()$
  encode(
    x = alt$X(alt$`repeat`("column"), type = "quantitative"),
    y = alt$Y(alt$`repeat`("row"), type = "quantitative"),
    color = "Origin:N"
  )$
  properties(width = 150, height = 150)$
  `repeat`(
    row = list("Horsepower", "Acceleration", "Miles_per_Gallon"),
    column = list("Miles_per_Gallon", "Acceleration", "Horsepower")
  )$
  interactive()

chart

Scatter Plot with Href

Altair example

Data

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "p…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, Na…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 13…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 11…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, …
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 197…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…

Chart

chart <- 
  alt$Chart(vega_data$cars())$
  transform_calculate(
    "url", "'https://www.google.com/search?q=' + datum.Name"
  )$
  mark_point()$
  encode(
    x = "Horsepower:Q",
    y = "Miles_per_Gallon:Q",
    color = "Origin:N",
    href = "url:N",
    tooltip = list("Name:N", "url:N")
  )

chart

Scatter Plot with Rolling Mean

Altair example

Data

glimpse(vega_data$seattle_weather())
#> Rows: 1,461
#> Columns: 6
#> $ date          <dttm> 2012-01-01, 2012-01-02, 2012-01-03, 2012-01-04, 2012-0…
#> $ precipitation <dbl> 0.0, 10.9, 0.8, 20.3, 1.3, 2.5, 0.0, 0.0, 4.3, 1.0, 0.0…
#> $ temp_max      <dbl> 12.8, 10.6, 11.7, 12.2, 8.9, 4.4, 7.2, 10.0, 9.4, 6.1, …
#> $ temp_min      <dbl> 5.0, 2.8, 7.2, 5.6, 2.8, 2.2, 2.8, 2.8, 5.0, 0.6, -1.1,…
#> $ wind          <dbl> 4.7, 4.5, 2.3, 4.7, 6.1, 2.2, 2.3, 2.0, 3.4, 3.4, 5.1, …
#> $ weather       <chr> "drizzle", "rain", "rain", "rain", "rain", "rain", "rai…

Chart

source <- vega_data$seattle_weather()

line <- 
  alt$Chart(source)$
  mark_line(
    color = "red",
    size = 3
  )$
  transform_window(
    rolling_mean = "mean(temp_max)",
    frame = c(-15, 15)
  )$
  encode(
    x = "date:T",
    y = "rolling_mean:Q"
  )


points <- 
  alt$Chart(source)$
  mark_point()$
  encode(
    x = "date:T",
    y = alt$Y(
      "temp_max:Q",
      axis = alt$Axis(title = "Max Temp")
    )
  )

points + line