{ "cells": [ { "cell_type": "markdown", "id": "aad6179d-be3f-4e61-a80b-3428f17e12b3", "metadata": {}, "source": [ "## Example: Highlighting Subset of Data\n", "\n", "Let's say you have a lot of something: countries or counties maybe, represented by lines or points, and you want to show a lot of them for context, but there are 1-5 that are very important.\n", "\n", "This is an example of how you might desaturate all but the key items, and ensure that the data that you care about stands out.\n", "\n", "This can turn a pile of spaghetti into a clear indication of a point." ] }, { "cell_type": "code", "execution_count": 3, "id": "07eb67c8-41d0-4c84-892d-822d90504cad", "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "import polars as pl\n", "import random" ] }, { "cell_type": "code", "execution_count": 44, "id": "eac46c04-ab06-44ea-82af-6471c71f486b", "metadata": {}, "outputs": [], "source": [ "countries_initial = {\n", " \"AA\": 20, \"AB\": 25, \"AC\": 30, \"AD\": 27, \"AE\": 21,\n", " \"BA\": 20, \"BB\": 25, \"BC\": 30, \"BD\": 27, \"BE\": 21,\n", " \"CA\": 20, \"CB\": 25, \"CC\": 30, \"CD\": 27, \"CE\": 21,\n", " \"DA\": 20, \"DB\": 25, \"DC\": 30, \"DD\": 27, \"CE\": 21,\n", " \"EA\": 20, \"EB\": 25, \"EC\": 30, \"ED\": 27, \"CE\": 21,\n", " }\n", "def generate_fake_data():\n", " countries = {\n", " \"AA\": 20, \"AB\": 25, \"AC\": 30, \"AD\": 27, \"AE\": 21,\n", " \"BA\": 20, \"BB\": 25, \"BC\": 30, \"BD\": 27, \"BE\": 21,\n", " \"CA\": 20, \"CB\": 25, \"CC\": 30, \"CD\": 27, \"CE\": 21,\n", " \"DA\": 20, \"DB\": 25, \"DC\": 30, \"DD\": 27, \"CE\": 21,\n", " \"EA\": 20, \"EB\": 25, \"EC\": 30, \"ED\": 27, \"CE\": 21,\n", " }\n", " # 100 records in our time series\n", " for t in range(100):\n", " # update all 25\n", " for country in countries:\n", " countries[country] += random.random() - 0.3\n", " yield {\"country\": country, \"val\": countries[country], \"t\": t} \n", "\n", "# just need some fake time-series like data\n", "# scatterplots can also benefit a lot from these ideas\n", "data = pl.DataFrame(generate_fake_data())" ] }, { "cell_type": "code", "execution_count": null, "id": "fd90289f-46bb-4102-bb02-bf5732dee01c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 48, "id": "17e6f86a-bd9f-44a7-af5f-917237c3393b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Spaghetti\n", "alt.Chart(data).mark_line().encode(\n", " color=\"country\", y=\"val\", x=\"t\"\n", ")" ] }, { "cell_type": "code", "execution_count": 46, "id": "c5fc9ebc-0286-4d87-aeaa-c589127871bb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Example: highlighting a subset of lines\n", "\n", "# If you're seeing overlaps, reorder data so your highlights are last\n", "highlights = [\"EC\", \"ED\"] \n", "\n", "alt.Chart(data).mark_line().encode( \n", " y=\"val\",\n", " x=\"t\",\n", " color=alt.condition(\n", " alt.FieldOneOfPredicate(field='country', oneOf=highlights),\n", " alt.value('orange'), # Color for the highlighted countries\n", " alt.value('lightgrey') # Color for the rest\n", " ),\n", " # this is needed to preserve grouping when using condition\n", " detail=\"country\",\n", ")" ] }, { "cell_type": "code", "execution_count": 49, "id": "31ba7622-d1da-4f52-bf44-26e3e1ced0d5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# alternately, using a mapping\n", "color_mapping = {\"EC\": \"blue\", \"ED\": \"red\", \"default\": \"#ddd\"}\n", "alt.Chart(data).mark_line().encode( \n", " y=\"val\",\n", " x=\"t\",\n", " color=alt.Color(\"country:N\", scale=alt.Scale(\n", " domain=list(countries_initial.keys()),\n", " range=[color_mapping.get(c, color_mapping['default']) for c in countries_initial] \n", " ), legend=None),\n", " #detail=\"country\",\n", ") " ] }, { "cell_type": "code", "execution_count": null, "id": "b547cf94-0d51-4f82-bb04-ccb7984b002c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 5 }