This document is adapted from the Histograms section of the Altair Example Gallery.

Our first step is to set up our environment:

Histogram with a Global Mean Overlay

Altair example

Data

glimpse(fromJSON(vega_data$movies$url))
#> Rows: 3,201
#> Columns: 16
#> $ Title                  <chr> "The Land Girls", "First Love, Last Rites", "I …
#> $ US_Gross               <int> 146083, 10876, 203134, 373615, 1009819, 24551, …
#> $ Worldwide_Gross        <dbl> 146083, 10876, 203134, 373615, 1087521, 2624551…
#> $ US_DVD_Sales           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ Production_Budget      <int> 8000000, 300000, 250000, 300000, 1000000, 16000…
#> $ Release_Date           <chr> "Jun 12 1998", "Aug 07 1998", "Aug 28 1998", "S…
#> $ MPAA_Rating            <chr> "R", "R", NA, NA, "R", NA, "R", "R", "R", NA, N…
#> $ Running_Time_min       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
#> $ Distributor            <chr> "Gramercy", "Strand", "Lionsgate", "Fine Line",…
#> $ Source                 <chr> NA, NA, NA, NA, "Original Screenplay", NA, NA, …
#> $ Major_Genre            <chr> NA, "Drama", "Comedy", "Comedy", "Drama", NA, N…
#> $ Creative_Type          <chr> NA, NA, NA, NA, "Contemporary Fiction", NA, NA,…
#> $ Director               <chr> NA, NA, NA, NA, NA, NA, "Christopher Nolan", NA…
#> $ Rotten_Tomatoes_Rating <int> NA, NA, NA, 13, 62, NA, NA, NA, 25, 86, 81, 84,…
#> $ IMDB_Rating            <dbl> 6.1, 6.9, 6.8, NA, 3.4, NA, 7.7, 3.8, 5.8, 7.0,…
#> $ IMDB_Votes             <int> 1071, 207, 865, NA, 165, NA, 15133, 353, 3275, …

Chart

data <- vega_data$movies$url

bar <- 
  alt$Chart(data)$
  mark_bar()$
  encode(
    x = alt$X("IMDB_Rating:Q", bin = TRUE, axis = NULL),
    y = "count()"
  )

rule <- 
  alt$Chart(data)$
  mark_rule(color = "red")$
  encode(
    x = "mean(IMDB_Rating):Q",
    size = alt$value(5)
  )

chart <- (bar + rule)

chart

Layered Histogram

Altair example

Data

Definition
data <- 
  tibble(
    Trial_A = rnorm(1000, 0, 0.8),
    Trial_B = rnorm(1000, -2, 1),
    Trial_C = rnorm(1000, 3, 2)
  ) %>%
  gather(Experiment, Measurement)
glimpse(data)
#> Rows: 3,000
#> Columns: 2
#> $ Experiment  <chr> "Trial_A", "Trial_A", "Trial_A", "Trial_A", "Trial_A", "Tr…
#> $ Measurement <dbl> -0.538663759, -0.004160715, -0.863691771, 0.259060487, -0.…

Chart

chart <- 
  alt$Chart(data)$
  mark_area(opacity = 0.3, interpolate="step")$
  encode(
    x = alt$X("Measurement", bin = alt$Bin(maxbins = 100)),
    y = alt$Y("count()", stack = NULL),
    color = alt$Color("Experiment")
  )

chart

Trellis Histogram

Altair example

This example is originally from Vega-Lite.

Data

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "pl…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, NaN…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8,…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 133…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 115…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425,…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, 8…
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 1970…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA…

Chart

chart <- 
  alt$Chart(vega_data$cars())$
  mark_bar()$
  encode(
    x = alt$X("Horsepower:Q", bin = TRUE),
    y = "count(Horsepower)",
    row = "Origin"
  )

chart