This document is adapted from the Histograms section of the Altair Example Gallery.

Our first step is to set up our environment:

Histogram with a Global Mean Overlay

Altair example

Data

glimpse(fromJSON(vega_data$movies$url))
#> Rows: 3,201
#> Columns: 16
#> $ Title                  <chr> "The Land Girls", "First Love, Last Rites", "I…
#> $ US_Gross               <int> 146083, 10876, 203134, 373615, 1009819, 24551,…
#> $ Worldwide_Gross        <dbl> 146083, 10876, 203134, 373615, 1087521, 262455…
#> $ US_DVD_Sales           <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ Production_Budget      <int> 8000000, 300000, 250000, 300000, 1000000, 1600…
#> $ Release_Date           <chr> "Jun 12 1998", "Aug 07 1998", "Aug 28 1998", "…
#> $ MPAA_Rating            <chr> "R", "R", NA, NA, "R", NA, "R", "R", "R", NA, …
#> $ Running_Time_min       <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
#> $ Distributor            <chr> "Gramercy", "Strand", "Lionsgate", "Fine Line"…
#> $ Source                 <chr> NA, NA, NA, NA, "Original Screenplay", NA, NA,…
#> $ Major_Genre            <chr> NA, "Drama", "Comedy", "Comedy", "Drama", NA, …
#> $ Creative_Type          <chr> NA, NA, NA, NA, "Contemporary Fiction", NA, NA…
#> $ Director               <chr> NA, NA, NA, NA, NA, NA, "Christopher Nolan", N…
#> $ Rotten_Tomatoes_Rating <int> NA, NA, NA, 13, 62, NA, NA, NA, 25, 86, 81, 84…
#> $ IMDB_Rating            <dbl> 6.1, 6.9, 6.8, NA, 3.4, NA, 7.7, 3.8, 5.8, 7.0…
#> $ IMDB_Votes             <int> 1071, 207, 865, NA, 165, NA, 15133, 353, 3275,…

Chart

data <- vega_data$movies$url

bar <- 
  alt$Chart(data)$
  mark_bar()$
  encode(
    x = alt$X("IMDB_Rating:Q", bin = TRUE, axis = NULL),
    y = "count()"
  )

rule <- 
  alt$Chart(data)$
  mark_rule(color = "red")$
  encode(
    x = "mean(IMDB_Rating):Q",
    size = alt$value(5)
  )

chart <- (bar + rule)

chart

Layered Histogram

Altair example

Data

Definition

data <- 
  tibble(
    Trial_A = rnorm(1000, 0, 0.8),
    Trial_B = rnorm(1000, -2, 1),
    Trial_C = rnorm(1000, 3, 2)
  ) %>%
  gather(Experiment, Measurement)
glimpse(data)
#> Rows: 3,000
#> Columns: 2
#> $ Experiment  <chr> "Trial_A", "Trial_A", "Trial_A", "Trial_A", "Trial_A", "T…
#> $ Measurement <dbl> -0.8325134888, 0.5649151821, 0.2099688836, -1.5610892592,…

Chart

chart <- 
  alt$Chart(data)$
  mark_area(opacity = 0.3, interpolate="step")$
  encode(
    x = alt$X("Measurement", bin = alt$Bin(maxbins = 100)),
    y = alt$Y("count()", stack = NULL),
    color = alt$Color("Experiment")
  )

chart

Trellis Histogram

Altair example

This example is originally from Vega-Lite.

Data

glimpse(vega_data$cars())
#> Rows: 406
#> Columns: 9
#> $ Name             <chr> "chevrolet chevelle malibu", "buick skylark 320", "p…
#> $ Miles_per_Gallon <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, NaN, NaN, Na…
#> $ Cylinders        <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8…
#> $ Displacement     <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 13…
#> $ Horsepower       <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 11…
#> $ Weight_in_lbs    <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425…
#> $ Acceleration     <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, …
#> $ Year             <dttm> 1970-01-01, 1970-01-01, 1970-01-01, 1970-01-01, 197…
#> $ Origin           <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "US…

Chart

chart <- 
  alt$Chart(vega_data$cars())$
  mark_bar()$
  encode(
    x = alt$X("Horsepower:Q", bin = TRUE),
    y = "count(Horsepower)",
    row = "Origin"
  )

chart