vignettes/example-gallery-02-bar-charts.Rmd
example-gallery-02-bar-charts.Rmd
This document is adapted from the Bar Charts section of the Altair Example Gallery.
Our first step is to set up our environment:
library("altair")
library("tibble")
library("jsonlite")
vega_data <- import_vega_data()
glimpse(vega_data$wheat())
#> Rows: 52
#> Columns: 3
#> $ year <dbl> 1565, 1570, 1575, 1580, 1585, 1590, 1595, 1600, 1605, 1610, 1615…
#> $ wheat <dbl> 41.0, 45.0, 42.0, 49.0, 41.5, 47.0, 64.0, 27.0, 33.0, 32.0, 33.0…
#> $ wages <dbl> 5.00, 5.05, 5.08, 5.12, 5.15, 5.25, 5.54, 5.61, 5.69, 5.78, 5.94…
chart <-
alt$Chart(vega_data$wheat())$
mark_bar()$
encode(
x = "year:O",
y = "wheat:Q",
# The highlight will be set on the result of a conditional statement
color=alt$condition(
"datum.year == 1810", # If the year is 1810 this test returns True,
alt$value("orange"), # which sets the bar orange.
alt$value("steelblue") # If it's not True, it sets the bar steelblue.
)
)$properties(width=600)
chart
glimpse(vega_data$wheat())
#> Rows: 52
#> Columns: 3
#> $ year <dbl> 1565, 1570, 1575, 1580, 1585, 1590, 1595, 1600, 1605, 1610, 1615…
#> $ wheat <dbl> 41.0, 45.0, 42.0, 49.0, 41.5, 47.0, 64.0, 27.0, 33.0, 32.0, 33.0…
#> $ wages <dbl> 5.00, 5.05, 5.08, 5.12, 5.15, 5.25, 5.54, 5.61, 5.69, 5.78, 5.94…
glimpse(vega_data$wheat())
#> Rows: 52
#> Columns: 3
#> $ year <dbl> 1565, 1570, 1575, 1580, 1585, 1590, 1595, 1600, 1605, 1610, 1615…
#> $ wheat <dbl> 41.0, 45.0, 42.0, 49.0, 41.5, 47.0, 64.0, 27.0, 33.0, 32.0, 33.0…
#> $ wages <dbl> 5.00, 5.05, 5.08, 5.12, 5.15, 5.25, 5.54, 5.61, 5.69, 5.78, 5.94…
This seems exemplary of why dual-axes should be used with caution.
glimpse(vega_data$wheat())
#> Rows: 52
#> Columns: 3
#> $ year <dbl> 1565, 1570, 1575, 1580, 1585, 1590, 1595, 1600, 1605, 1610, 1615…
#> $ wheat <dbl> 41.0, 45.0, 42.0, 49.0, 41.5, 47.0, 64.0, 27.0, 33.0, 32.0, 33.0…
#> $ wages <dbl> 5.00, 5.05, 5.08, 5.12, 5.15, 5.25, 5.54, 5.61, 5.69, 5.78, 5.94…
glimpse(vega_data$us_employment())
#> Rows: 120
#> Columns: 24
#> $ month <chr> "2006-01-01", "2006-02-01", "2006-0…
#> $ nonfarm <dbl> 135450, 135762, 136059, 136227, 136…
#> $ private <dbl> 113603, 113884, 114156, 114308, 114…
#> $ goods_producing <dbl> 22467, 22535, 22572, 22631, 22597, …
#> $ service_providing <dbl> 112983, 113227, 113487, 113596, 113…
#> $ private_service_providing <dbl> 91136, 91349, 91584, 91677, 91735, …
#> $ mining_and_logging <dbl> 656, 662, 669, 679, 681, 686, 690, …
#> $ construction <dbl> 7601, 7664, 7689, 7726, 7713, 7699,…
#> $ manufacturing <dbl> 14210, 14209, 14214, 14226, 14203, …
#> $ durable_goods <dbl> 8982, 8986, 9000, 9020, 9017, 9028,…
#> $ nondurable_goods <dbl> 5228, 5223, 5214, 5206, 5186, 5185,…
#> $ trade_transportation_utilties <dbl> 26162, 26196, 26239, 26230, 26223, …
#> $ wholesale_trade <dbl> 5840.4, 5854.8, 5873.3, 5886.9, 589…
#> $ retail_trade <dbl> 15351.5, 15361.3, 15388.0, 15348.5,…
#> $ transportation_and_warehousing <dbl> 4420.0, 4429.4, 4429.7, 4445.4, 445…
#> $ utilities <dbl> 549.8, 550.1, 547.5, 548.9, 548.3, …
#> $ information <dbl> 3052, 3052, 3055, 3046, 3039, 3036,…
#> $ financial_activities <dbl> 8307, 8332, 8348, 8369, 8376, 8362,…
#> $ professional_and_business_services <dbl> 17299, 17365, 17438, 17462, 17512, …
#> $ education_and_health_services <dbl> 17946, 17998, 18045, 18070, 18100, …
#> $ leisure_and_hospitality <dbl> 12945, 12980, 13034, 13074, 13052, …
#> $ other_services <dbl> 5425, 5426, 5425, 5426, 5433, 5432,…
#> $ government <dbl> 21847, 21878, 21903, 21919, 21926, …
#> $ nonfarm_change <dbl> 282, 312, 297, 168, 31, 79, 206, 17…
glimpse(data)
#> Rows: 7
#> Columns: 3
#> $ project <chr> "a", "b", "c", "d", "e", "f", "g"
#> $ score <dbl> 25, 57, 23, 19, 8, 47, 8
#> $ goal <dbl> 25, 47, 30, 27, 38, 19, 4
In the Altair example, we see some code like this:
='sum', field='Time', **{'as': 'TotalTime'}) alt.WindowFieldDef(op
In R, supply the stuff inside the **{}
as additional
arguments to the function:
alt$WindowFieldDef(op = "sum", field = "Time", as = "TotalTime")
See the Field Guide to Python Issues for more details and more examples.
glimpse(activities)
#> Rows: 5
#> Columns: 2
#> $ Activity <chr> "Sleeping", "Eating", "TV", "Work", "Exercise"
#> $ Time <dbl> 8, 2, 4, 8, 2
This example shows a diverging stacked bar chart for sentiments towards a set of eight questions, displayed as percentages with neutral responses straddling the 0% mark.
data <- fromJSON('[
{
"question": "Question 1",
"type": "Strongly disagree",
"value": 24,
"percentage": 0.7,
"percentage_start": -19.1,
"percentage_end": -18.4
},
{
"question": "Question 1",
"type": "Disagree",
"value": 294,
"percentage": 9.1,
"percentage_start": -18.4,
"percentage_end": -9.2
},
{
"question": "Question 1",
"type": "Neither agree nor disagree",
"value": 594,
"percentage": 18.5,
"percentage_start": -9.2,
"percentage_end": 9.2
},
{
"question": "Question 1",
"type": "Agree",
"value": 1927,
"percentage": 59.9,
"percentage_start": 9.2,
"percentage_end": 69.2
},
{
"question": "Question 1",
"type": "Strongly agree",
"value": 376,
"percentage": 11.7,
"percentage_start": 69.2,
"percentage_end": 80.9
},
{
"question": "Question 2",
"type": "Strongly disagree",
"value": 2,
"percentage": 18.2,
"percentage_start": -36.4,
"percentage_end": -18.2
},
{
"question": "Question 2",
"type": "Disagree",
"value": 2,
"percentage": 18.2,
"percentage_start": -18.2,
"percentage_end": 0
},
{
"question": "Question 2",
"type": "Neither agree nor disagree",
"value": 0,
"percentage": 0,
"percentage_start": 0,
"percentage_end": 0
},
{
"question": "Question 2",
"type": "Agree",
"value": 7,
"percentage": 63.6,
"percentage_start": 0,
"percentage_end": 63.6
},
{
"question": "Question 2",
"type": "Strongly agree",
"value": 11,
"percentage": 0,
"percentage_start": 63.6,
"percentage_end": 63.6
},
{
"question": "Question 3",
"type": "Strongly disagree",
"value": 2,
"percentage": 20,
"percentage_start": -30,
"percentage_end": -10
},
{
"question": "Question 3",
"type": "Disagree",
"value": 0,
"percentage": 0,
"percentage_start": -10,
"percentage_end": -10
},
{
"question": "Question 3",
"type": "Neither agree nor disagree",
"value": 2,
"percentage": 20,
"percentage_start": -10,
"percentage_end": 10
},
{
"question": "Question 3",
"type": "Agree",
"value": 4,
"percentage": 40,
"percentage_start": 10,
"percentage_end": 50
},
{
"question": "Question 3",
"type": "Strongly agree",
"value": 2,
"percentage": 20,
"percentage_start": 50,
"percentage_end": 70
},
{
"question": "Question 4",
"type": "Strongly disagree",
"value": 0,
"percentage": 0,
"percentage_start": -15.6,
"percentage_end": -15.6
},
{
"question": "Question 4",
"type": "Disagree",
"value": 2,
"percentage": 12.5,
"percentage_start": -15.6,
"percentage_end": -3.1
},
{
"question": "Question 4",
"type": "Neither agree nor disagree",
"value": 1,
"percentage": 6.3,
"percentage_start": -3.1,
"percentage_end": 3.1
},
{
"question": "Question 4",
"type": "Agree",
"value": 7,
"percentage": 43.8,
"percentage_start": 3.1,
"percentage_end": 46.9
},
{
"question": "Question 4",
"type": "Strongly agree",
"value": 6,
"percentage": 37.5,
"percentage_start": 46.9,
"percentage_end": 84.4
},
{
"question": "Question 5",
"type": "Strongly disagree",
"value": 0,
"percentage": 0,
"percentage_start": -10.4,
"percentage_end": -10.4
},
{
"question": "Question 5",
"type": "Disagree",
"value": 1,
"percentage": 4.2,
"percentage_start": -10.4,
"percentage_end": -6.3
},
{
"question": "Question 5",
"type": "Neither agree nor disagree",
"value": 3,
"percentage": 12.5,
"percentage_start": -6.3,
"percentage_end": 6.3
},
{
"question": "Question 5",
"type": "Agree",
"value": 16,
"percentage": 66.7,
"percentage_start": 6.3,
"percentage_end": 72.9
},
{
"question": "Question 5",
"type": "Strongly agree",
"value": 4,
"percentage": 16.7,
"percentage_start": 72.9,
"percentage_end": 89.6
},
{
"question": "Question 6",
"type": "Strongly disagree",
"value": 1,
"percentage": 6.3,
"percentage_start": -18.8,
"percentage_end": -12.5
},
{
"question": "Question 6",
"type": "Disagree",
"value": 1,
"percentage": 6.3,
"percentage_start": -12.5,
"percentage_end": -6.3
},
{
"question": "Question 6",
"type": "Neither agree nor disagree",
"value": 2,
"percentage": 12.5,
"percentage_start": -6.3,
"percentage_end": 6.3
},
{
"question": "Question 6",
"type": "Agree",
"value": 9,
"percentage": 56.3,
"percentage_start": 6.3,
"percentage_end": 62.5
},
{
"question": "Question 6",
"type": "Strongly agree",
"value": 3,
"percentage": 18.8,
"percentage_start": 62.5,
"percentage_end": 81.3
},
{
"question": "Question 7",
"type": "Strongly disagree",
"value": 0,
"percentage": 0,
"percentage_start": -10,
"percentage_end": -10
},
{
"question": "Question 7",
"type": "Disagree",
"value": 0,
"percentage": 0,
"percentage_start": -10,
"percentage_end": -10
},
{
"question": "Question 7",
"type": "Neither agree nor disagree",
"value": 1,
"percentage": 20,
"percentage_start": -10,
"percentage_end": 10
},
{
"question": "Question 7",
"type": "Agree",
"value": 4,
"percentage": 80,
"percentage_start": 10,
"percentage_end": 90
},
{
"question": "Question 7",
"type": "Strongly agree",
"value": 0,
"percentage": 0,
"percentage_start": 90,
"percentage_end": 90
},
{
"question": "Question 8",
"type": "Strongly disagree",
"value": 0,
"percentage": 0,
"percentage_start": 0,
"percentage_end": 0
},
{
"question": "Question 8",
"type": "Disagree",
"value": 0,
"percentage": 0,
"percentage_start": 0,
"percentage_end": 0
},
{
"question": "Question 8",
"type": "Neither agree nor disagree",
"value": 0,
"percentage": 0,
"percentage_start": 0,
"percentage_end": 0
},
{
"question": "Question 8",
"type": "Agree",
"value": 0,
"percentage": 0,
"percentage_start": 0,
"percentage_end": 0
},
{
"question": "Question 8",
"type": "Strongly agree",
"value": 2,
"percentage": 100,
"percentage_start": 0,
"percentage_end": 100
}
]')
glimpse(data)
#> Rows: 40
#> Columns: 6
#> $ question <chr> "Question 1", "Question 1", "Question 1", "Question 1…
#> $ type <chr> "Strongly disagree", "Disagree", "Neither agree nor d…
#> $ value <int> 24, 294, 594, 1927, 376, 2, 2, 0, 7, 11, 2, 0, 2, 4, …
#> $ percentage <dbl> 0.7, 9.1, 18.5, 59.9, 11.7, 18.2, 18.2, 0.0, 63.6, 0.…
#> $ percentage_start <dbl> -19.1, -18.4, -9.2, 9.2, 69.2, -36.4, -18.2, 0.0, 0.0…
#> $ percentage_end <dbl> -18.4, -9.2, 9.2, 69.2, 80.9, -18.2, 0.0, 0.0, 63.6, …
color_scale <-
alt$Scale(
domain = list(
"Strongly disagree",
"Disagree",
"Neither agree nor disagree",
"Agree",
"Strongly agree"
),
range = list("#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab")
)
y_axis <-
alt$Axis(
title = "Question",
offset = 5,
ticks = FALSE,
minExtent = 60,
domain = FALSE
)
chart <-
alt$Chart(data)$
mark_bar()$
encode(
x = "percentage_start:Q",
x2 = "percentage_end:Q",
y = alt$Y("question:N", axis = y_axis),
color = alt$Color(
"type:N",
legend = alt$Legend(title = "Response"),
scale = color_scale
)
)
chart
source <- vega_data$barley()
glimpse(source)
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
This example shows how to show error bars using confidence intervals. The confidence intervals are computed internally in vega by a non-parametric bootstrap of the mean.
source <- vega_data$barley()
glimpse(source)
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
bars <-
alt$Chart()$
mark_bar()$
encode(
x = "year:O",
y = alt$Y(
"mean(yield):Q",
axis = alt$Axis(title = "Mean Yield")
),
color = "year:N"
)
error_bars <-
alt$Chart()$
mark_errorbar(extent = "ci")$
encode(
x = "year:O",
y = "yield:Q"
)
chart <-
alt$layer(bars, error_bars, data = source)$
facet(
column = "site:N"
)
chart
Note that the argument to the transform_filter()
function is a JavaScript expression that refers to a variable in the
data using the datum.
prefix.
glimpse(vega_data$wheat())
#> Rows: 52
#> Columns: 3
#> $ year <dbl> 1565, 1570, 1575, 1580, 1585, 1590, 1595, 1600, 1605, 1610, 1615…
#> $ wheat <dbl> 41.0, 45.0, 42.0, 49.0, 41.5, 47.0, 64.0, 27.0, 33.0, 32.0, 33.0…
#> $ wages <dbl> 5.00, 5.05, 5.08, 5.12, 5.15, 5.25, 5.54, 5.61, 5.69, 5.78, 5.94…
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
glimpse(vega_data$iowa_electricity())
#> Rows: 51
#> Columns: 3
#> $ year <dttm> 2001-01-01, 2002-01-01, 2003-01-01, 2004-01-01, 2005-0…
#> $ source <chr> "Fossil Fuels", "Fossil Fuels", "Fossil Fuels", "Fossil…
#> $ net_generation <dbl> 35361, 35991, 36234, 36205, 36883, 37014, 41389, 42734,…
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…
source <- vega_data$barley()
bars <-
alt$Chart(source)$
mark_bar()$
encode(
x = alt$X("sum(yield):Q", stack = "zero"),
y = "variety:N",
color ="site:N"
)
text = alt$Chart(source)$
mark_text(dx = -15, dy = 3, color = "white")$
encode(
x =alt$X("sum(yield):Q", stack = "zero"),
y = "variety:N",
detail = "site:N",
text=alt$Text("sum(yield):Q", format = ".1f")
)
chart <- (bars + text)
chart
glimpse(vega_data$barley())
#> Rows: 120
#> Columns: 4
#> $ yield <dbl> 27.00000, 48.86667, 27.43334, 39.93333, 32.96667, 28.96667, 43…
#> $ variety <chr> "Manchuria", "Manchuria", "Manchuria", "Manchuria", "Manchuria…
#> $ year <dbl> 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 1931, 19…
#> $ site <chr> "University Farm", "Waseca", "Morris", "Crookston", "Grand Rap…