sorting_and_grouping_bars.ipynb

This commit is contained in:
James Turk 2024-10-27 18:57:37 -05:00
parent f6b4870339
commit 49068a469f
2 changed files with 399 additions and 19 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,378 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "228d57af-2bd1-458d-860b-e17b2fe7d445",
"metadata": {},
"source": [
"## Example: Bar Ordering\n",
"\n",
"Don't neglect the role of ordering when working with categorical variables. These provide an additional opportunity to emphasize or highlight.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "279c3be4-53f3-4db5-a4e3-8f55019529af",
"metadata": {},
"outputs": [],
"source": [
"import altair as alt\n",
"import polars as pl"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b071e9be-1fad-4852-87b8-38157be6de66",
"metadata": {},
"outputs": [],
"source": [
"countries = {\n",
" \"AA\": 13, \"AB\": 45, \"AC\": 30, \"AD\": 14, \"AE\": 21,\n",
" \"BA\": 17, \"BB\": 25, \"BC\": 29, \"BD\": 16, \"BE\": 21,\n",
" \"CA\": 20, \"CB\": 22, \"CC\": 28, \"CD\": 18, \"CE\": 24,\n",
" \"DA\": 40, \"DB\": 33, \"DC\": 30, \"DD\": 13, \"CE\": 28,\n",
" \"EA\": 16, \"EB\": 45, \"EC\": 27, \"ED\": 80, \"CE\": 33,\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f9d54624-a50e-414e-a29c-c8371fe5b98f",
"metadata": {},
"outputs": [],
"source": [
"df = pl.DataFrame(countries).unpivot(variable_name=\"country\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "334fe526-2536-478c-9457-aad9e166eb1d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<style>\n",
" #altair-viz-6128fc8afb7940f1ad0eba49c5b71e04.vega-embed {\n",
" width: 100%;\n",
" display: flex;\n",
" }\n",
"\n",
" #altair-viz-6128fc8afb7940f1ad0eba49c5b71e04.vega-embed details,\n",
" #altair-viz-6128fc8afb7940f1ad0eba49c5b71e04.vega-embed details summary {\n",
" position: relative;\n",
" }\n",
"</style>\n",
"<div id=\"altair-viz-6128fc8afb7940f1ad0eba49c5b71e04\"></div>\n",
"<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-6128fc8afb7940f1ad0eba49c5b71e04\") {\n",
" outputDiv = document.getElementById(\"altair-viz-6128fc8afb7940f1ad0eba49c5b71e04\");\n",
" }\n",
" const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
" \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
" \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
" \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
" };\n",
"\n",
" function maybeLoadScript(lib, version) {\n",
" var key = `${lib.replace(\"-\", \"\")}_version`;\n",
" return (VEGA_DEBUG[key] == version) ?\n",
" Promise.resolve(paths[lib]) :\n",
" new Promise(function(resolve, reject) {\n",
" var s = document.createElement('script');\n",
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
" s.async = true;\n",
" s.onload = () => {\n",
" VEGA_DEBUG[key] = version;\n",
" return resolve(paths[lib]);\n",
" };\n",
" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
" s.src = paths[lib];\n",
" });\n",
" }\n",
"\n",
" function showError(err) {\n",
" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
" throw err;\n",
" }\n",
"\n",
" function displayChart(vegaEmbed) {\n",
" vegaEmbed(outputDiv, spec, embedOpt)\n",
" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
" }\n",
"\n",
" if(typeof define === \"function\" && define.amd) {\n",
" requirejs.config({paths});\n",
" require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
" } else {\n",
" maybeLoadScript(\"vega\", \"5\")\n",
" .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
" .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n",
" }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-9bd5b15557267b851df082199b597c82\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"country\", \"type\": \"nominal\"}, \"y\": {\"field\": \"value\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-9bd5b15557267b851df082199b597c82\": [{\"country\": \"AA\", \"value\": 13}, {\"country\": \"AB\", \"value\": 45}, {\"country\": \"AC\", \"value\": 30}, {\"country\": \"AD\", \"value\": 14}, {\"country\": \"AE\", \"value\": 21}, {\"country\": \"BA\", \"value\": 17}, {\"country\": \"BB\", \"value\": 25}, {\"country\": \"BC\", \"value\": 29}, {\"country\": \"BD\", \"value\": 16}, {\"country\": \"BE\", \"value\": 21}, {\"country\": \"CA\", \"value\": 20}, {\"country\": \"CB\", \"value\": 22}, {\"country\": \"CC\", \"value\": 28}, {\"country\": \"CD\", \"value\": 18}, {\"country\": \"CE\", \"value\": 33}, {\"country\": \"DA\", \"value\": 40}, {\"country\": \"DB\", \"value\": 33}, {\"country\": \"DC\", \"value\": 30}, {\"country\": \"DD\", \"value\": 13}, {\"country\": \"EA\", \"value\": 16}, {\"country\": \"EB\", \"value\": 45}, {\"country\": \"EC\", \"value\": 27}, {\"country\": \"ED\", \"value\": 80}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>"
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"alt.Chart(df).mark_bar().encode(x=\"country\", y=\"value\")"
]
},
{
"cell_type": "markdown",
"id": "e163a712-3674-48bf-b0d3-f1d279c5ba2b",
"metadata": {},
"source": [
"This alphabetical ordering may serve you well if you want people to quickly be able to find their country. But you can consider other orderings & groupings that might make your point better."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "7883c59e-0bb4-4713-8772-aca9441e6800",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<style>\n",
" #altair-viz-1d055bbfea574170835834169a6b5fbf.vega-embed {\n",
" width: 100%;\n",
" display: flex;\n",
" }\n",
"\n",
" #altair-viz-1d055bbfea574170835834169a6b5fbf.vega-embed details,\n",
" #altair-viz-1d055bbfea574170835834169a6b5fbf.vega-embed details summary {\n",
" position: relative;\n",
" }\n",
"</style>\n",
"<div id=\"altair-viz-1d055bbfea574170835834169a6b5fbf\"></div>\n",
"<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-1d055bbfea574170835834169a6b5fbf\") {\n",
" outputDiv = document.getElementById(\"altair-viz-1d055bbfea574170835834169a6b5fbf\");\n",
" }\n",
" const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
" \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
" \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
" \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
" };\n",
"\n",
" function maybeLoadScript(lib, version) {\n",
" var key = `${lib.replace(\"-\", \"\")}_version`;\n",
" return (VEGA_DEBUG[key] == version) ?\n",
" Promise.resolve(paths[lib]) :\n",
" new Promise(function(resolve, reject) {\n",
" var s = document.createElement('script');\n",
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
" s.async = true;\n",
" s.onload = () => {\n",
" VEGA_DEBUG[key] = version;\n",
" return resolve(paths[lib]);\n",
" };\n",
" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
" s.src = paths[lib];\n",
" });\n",
" }\n",
"\n",
" function showError(err) {\n",
" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
" throw err;\n",
" }\n",
"\n",
" function displayChart(vegaEmbed) {\n",
" vegaEmbed(outputDiv, spec, embedOpt)\n",
" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
" }\n",
"\n",
" if(typeof define === \"function\" && define.amd) {\n",
" requirejs.config({paths});\n",
" require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
" } else {\n",
" maybeLoadScript(\"vega\", \"5\")\n",
" .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
" .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n",
" }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-9bd5b15557267b851df082199b597c82\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"x\": {\"field\": \"country\", \"sort\": {\"field\": \"value\", \"order\": \"ascending\"}, \"type\": \"nominal\"}, \"y\": {\"field\": \"value\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-9bd5b15557267b851df082199b597c82\": [{\"country\": \"AA\", \"value\": 13}, {\"country\": \"AB\", \"value\": 45}, {\"country\": \"AC\", \"value\": 30}, {\"country\": \"AD\", \"value\": 14}, {\"country\": \"AE\", \"value\": 21}, {\"country\": \"BA\", \"value\": 17}, {\"country\": \"BB\", \"value\": 25}, {\"country\": \"BC\", \"value\": 29}, {\"country\": \"BD\", \"value\": 16}, {\"country\": \"BE\", \"value\": 21}, {\"country\": \"CA\", \"value\": 20}, {\"country\": \"CB\", \"value\": 22}, {\"country\": \"CC\", \"value\": 28}, {\"country\": \"CD\", \"value\": 18}, {\"country\": \"CE\", \"value\": 33}, {\"country\": \"DA\", \"value\": 40}, {\"country\": \"DB\", \"value\": 33}, {\"country\": \"DC\", \"value\": 30}, {\"country\": \"DD\", \"value\": 13}, {\"country\": \"EA\", \"value\": 16}, {\"country\": \"EB\", \"value\": 45}, {\"country\": \"EC\", \"value\": 27}, {\"country\": \"ED\", \"value\": 80}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>"
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ordering by value to emphasize outliers\n",
"alt.Chart(df).mark_bar().encode(\n",
" alt.X(\"country\", sort=alt.EncodingSortField(field=\"value\", order=\"ascending\")\n",
" ),\n",
" y=\"value\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "1b965691-7e32-45d7-b1db-833e353c230d",
"metadata": {},
"outputs": [],
"source": [
"# adding a grouping column and using it for coloring/grouping\n",
"# using first letter of country for a pretend grouping -- \n",
"# in reality you could group by region/characteristics\n",
"df_grouped = df.with_columns(grouping=pl.col(\"country\").str.slice(0, 1))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c7735212-e6aa-405d-9b76-87a92565e88b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"<style>\n",
" #altair-viz-21fe0d7284524faaa93049adb64dd794.vega-embed {\n",
" width: 100%;\n",
" display: flex;\n",
" }\n",
"\n",
" #altair-viz-21fe0d7284524faaa93049adb64dd794.vega-embed details,\n",
" #altair-viz-21fe0d7284524faaa93049adb64dd794.vega-embed details summary {\n",
" position: relative;\n",
" }\n",
"</style>\n",
"<div id=\"altair-viz-21fe0d7284524faaa93049adb64dd794\"></div>\n",
"<script type=\"text/javascript\">\n",
" var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
" (function(spec, embedOpt){\n",
" let outputDiv = document.currentScript.previousElementSibling;\n",
" if (outputDiv.id !== \"altair-viz-21fe0d7284524faaa93049adb64dd794\") {\n",
" outputDiv = document.getElementById(\"altair-viz-21fe0d7284524faaa93049adb64dd794\");\n",
" }\n",
" const paths = {\n",
" \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
" \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
" \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
" \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
" };\n",
"\n",
" function maybeLoadScript(lib, version) {\n",
" var key = `${lib.replace(\"-\", \"\")}_version`;\n",
" return (VEGA_DEBUG[key] == version) ?\n",
" Promise.resolve(paths[lib]) :\n",
" new Promise(function(resolve, reject) {\n",
" var s = document.createElement('script');\n",
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
" s.async = true;\n",
" s.onload = () => {\n",
" VEGA_DEBUG[key] = version;\n",
" return resolve(paths[lib]);\n",
" };\n",
" s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
" s.src = paths[lib];\n",
" });\n",
" }\n",
"\n",
" function showError(err) {\n",
" outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
" throw err;\n",
" }\n",
"\n",
" function displayChart(vegaEmbed) {\n",
" vegaEmbed(outputDiv, spec, embedOpt)\n",
" .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
" }\n",
"\n",
" if(typeof define === \"function\" && define.amd) {\n",
" requirejs.config({paths});\n",
" require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
" } else {\n",
" maybeLoadScript(\"vega\", \"5\")\n",
" .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
" .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
" .catch(showError)\n",
" .then(() => displayChart(vegaEmbed));\n",
" }\n",
" })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-e05227b311c61468e366e00d8c0d9aa3\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"color\": {\"field\": \"grouping\", \"type\": \"nominal\"}, \"x\": {\"field\": \"country\", \"sort\": {\"field\": \"group\", \"order\": \"ascending\"}, \"type\": \"nominal\"}, \"y\": {\"field\": \"value\", \"type\": \"quantitative\"}}, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-e05227b311c61468e366e00d8c0d9aa3\": [{\"country\": \"AA\", \"value\": 13, \"grouping\": \"A\"}, {\"country\": \"AB\", \"value\": 45, \"grouping\": \"A\"}, {\"country\": \"AC\", \"value\": 30, \"grouping\": \"A\"}, {\"country\": \"AD\", \"value\": 14, \"grouping\": \"A\"}, {\"country\": \"AE\", \"value\": 21, \"grouping\": \"A\"}, {\"country\": \"BA\", \"value\": 17, \"grouping\": \"B\"}, {\"country\": \"BB\", \"value\": 25, \"grouping\": \"B\"}, {\"country\": \"BC\", \"value\": 29, \"grouping\": \"B\"}, {\"country\": \"BD\", \"value\": 16, \"grouping\": \"B\"}, {\"country\": \"BE\", \"value\": 21, \"grouping\": \"B\"}, {\"country\": \"CA\", \"value\": 20, \"grouping\": \"C\"}, {\"country\": \"CB\", \"value\": 22, \"grouping\": \"C\"}, {\"country\": \"CC\", \"value\": 28, \"grouping\": \"C\"}, {\"country\": \"CD\", \"value\": 18, \"grouping\": \"C\"}, {\"country\": \"CE\", \"value\": 33, \"grouping\": \"C\"}, {\"country\": \"DA\", \"value\": 40, \"grouping\": \"D\"}, {\"country\": \"DB\", \"value\": 33, \"grouping\": \"D\"}, {\"country\": \"DC\", \"value\": 30, \"grouping\": \"D\"}, {\"country\": \"DD\", \"value\": 13, \"grouping\": \"D\"}, {\"country\": \"EA\", \"value\": 16, \"grouping\": \"E\"}, {\"country\": \"EB\", \"value\": 45, \"grouping\": \"E\"}, {\"country\": \"EC\", \"value\": 27, \"grouping\": \"E\"}, {\"country\": \"ED\", \"value\": 80, \"grouping\": \"E\"}]}}, {\"mode\": \"vega-lite\"});\n",
"</script>"
],
"text/plain": [
"alt.Chart(...)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"alt.Chart(df_grouped).mark_bar().encode(\n",
" alt.X(\"country\", sort=alt.EncodingSortField(field=\"group\", order=\"ascending\")),\n",
" y=\"value\",\n",
" color=\"grouping:N\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a721eddb-a9b1-457a-9648-e9606b62f044",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}