switch to Jupyter, marimo was having strange issues
This commit is contained in:
parent
d795795b89
commit
cd42f56eae
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
*.pyc
|
||||
.ipynb_checkpoints
|
||||
|
@ -233,7 +233,7 @@ Final project will have a place where D3 will be helpful, but other options will
|
||||
After introductory lecture, some examples will continue to be in D3, but you will not need to understand their inner workings.
|
||||
|
||||
<!--
|
||||
It has however, become a "library's library" in some ways. Most developers interact with d3 through a higher-level interface.
|
||||
It has however, become a "library's library" in some ways. Most developers interact with D3 through a higher-level interface.
|
||||
|
||||
We will be learning Altair, which generates Vega JSON, which in turn is drawn using D3.
|
||||
|
||||
@ -260,9 +260,10 @@ So, if you are here to learn visualization, I think that it is fair that you can
|
||||
## Course Staff
|
||||
|
||||
- James Turk
|
||||
- TODO
|
||||
- Krisha Mehta
|
||||
- Sam Huang
|
||||
|
||||
**All official information will be on the course site and/or Ed as appropriate.**
|
||||
**All official information will be on the course site and/or Ed.**
|
||||
|
||||
|
||||
---
|
||||
|
@ -1,226 +0,0 @@
|
||||
import marimo
|
||||
|
||||
__generated_with = "0.8.20"
|
||||
app = marimo.App(width="medium")
|
||||
|
||||
|
||||
@app.cell
|
||||
def __():
|
||||
import marimo as mo
|
||||
import altair as alt
|
||||
import polars as pl
|
||||
from pathlib import Path
|
||||
return Path, alt, mo, pl
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(mo):
|
||||
mo.md(
|
||||
"""
|
||||
## Tidy Data
|
||||
|
||||
Altair expects our data to be [tidy](http://vita.had.co.nz/papers/tidy-data.html).
|
||||
|
||||
- Each variable is a column.
|
||||
- Each observation is a row.
|
||||
- Each type of observational unit is a table.
|
||||
|
||||
You can use `pandas` or `polars` DataFrames.
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(Path, __file__, pl):
|
||||
# first let's load and look at a dataframe with three columns
|
||||
# there is an observation for each state legislature, showing how many bills they introduced in a given year
|
||||
df = pl.read_csv(Path(__file__).parent / "midwest_bills.csv")
|
||||
# (having a dataframe or chart as the last line in a notebook cell will automatically display it)
|
||||
df
|
||||
return (df,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, df):
|
||||
# Let's make our own charts of this dat, first we bind the data to a new chart object
|
||||
chart = alt.Chart(df)
|
||||
return (chart,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(chart):
|
||||
# we add a geometry, we'll start with a point (at this point *something* can be displayed, but it won't be useful)
|
||||
chart.mark_point()
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(chart):
|
||||
# We use encodings to map our data to particular dimensions.
|
||||
# Altair will make then make appropriate choices based upon the type of data.
|
||||
chart.mark_point().encode(
|
||||
y="state",
|
||||
x="num_bills"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def __():
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, chart):
|
||||
# what happens when we try to add color?
|
||||
chart.mark_point().encode(
|
||||
alt.Y("state"),
|
||||
alt.X("num_bills"),
|
||||
alt.Color("session_start_year"),
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, chart):
|
||||
# the prior example treated year as an Ordinal because it was numeric
|
||||
# instead we would treat it as Nominal for this data
|
||||
# we can use :Q, :O, :N, :T to mark the type that should be used
|
||||
by_year = chart.mark_point().encode(
|
||||
alt.Y("state:N"),
|
||||
alt.X("num_bills:Q"),
|
||||
alt.Color("session_start_year:N"),
|
||||
)
|
||||
# we're saving this one for later
|
||||
by_year
|
||||
return (by_year,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, chart):
|
||||
# Here we make a different chart from the same base data
|
||||
# by re-using our `chart` variable.
|
||||
#
|
||||
# We choose a different shape (parameters that don't need to vary can be passed into the mark_* functions)
|
||||
# We also use an aggregate function average(num_bills)
|
||||
avgs = chart.mark_point(shape="wedge", color="black").encode(
|
||||
alt.Y("state"),
|
||||
alt.X("average(num_bills)"),
|
||||
)
|
||||
avgs
|
||||
return (avgs,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(avgs, by_year):
|
||||
# two charts with compatible data can be layered with +
|
||||
by_year + avgs
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, by_year, chart):
|
||||
# perhaps we don't want to use mark_point anymore, maybe a bar?
|
||||
bar_avgs = chart.mark_bar(color="#ccc").encode(
|
||||
alt.Y("state"),
|
||||
alt.X("average(num_bills)"),
|
||||
)
|
||||
bar_avgs + by_year
|
||||
return (bar_avgs,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, chart):
|
||||
# We can customize titles and other details by using `.title` and `.properties`
|
||||
# the latter sets chart-wide properties.
|
||||
final = chart.mark_point(shape="diamond").encode(
|
||||
alt.Y("state:N"),
|
||||
alt.X("num_bills:Q"),
|
||||
alt.Color("session_start_year:N").title("Session Year"),
|
||||
) + chart.mark_bar(color="#70905050").encode(
|
||||
alt.Y("state"),
|
||||
alt.X("average(num_bills)").title("Number of Bills Introduced"),
|
||||
)
|
||||
final.properties(
|
||||
title='Midwest Bills by State',
|
||||
background='#f5f5dc'
|
||||
)
|
||||
return (final,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, chart):
|
||||
# Let's say we instead want to see if there are trends by year.
|
||||
# create a new chart object with year on the X-axis, and bills on the Y-axis
|
||||
# Also, make the chart print/colorblind friendly by encoding state in multiple ways.
|
||||
new_chart = chart.mark_point().encode(
|
||||
alt.Y("num_bills"),
|
||||
alt.X("session_start_year:N"),
|
||||
alt.Color("state"),
|
||||
alt.Shape("state"),
|
||||
)
|
||||
new_chart.properties(
|
||||
title='Midwest Bills by Year',
|
||||
background='#f5f5dc'
|
||||
)
|
||||
return (new_chart,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(mo):
|
||||
mo.md(
|
||||
"""
|
||||
### Recommended Reading
|
||||
|
||||
Altair Tutorial
|
||||
|
||||
- Specifying Data (you can stop when you hit 'Generated Data')
|
||||
- Encodings
|
||||
- Encodings -> Channels (skim Channel Options)
|
||||
- Marks (skim a few of the mark guides, including Bar & Point)
|
||||
- Data Transformations (skim a few, including Regression)
|
||||
- Layered and Multi-View Charts
|
||||
- Customizing Visualizations
|
||||
|
||||
Once you've read the above you have the core ideas of Altair.
|
||||
The remaining sections are useful as reference, and as you use Altair you will find your way to them as you ask yourself questions like "how do I work with geospatial data" or "how can I combine these axes"?
|
||||
|
||||
The other common thing you will use the documentation for is "what arguments can I pass to this?"
|
||||
|
||||
For that, use the [API Reference](https://altair-viz.github.io/user_guide/api.html) and find the class you're working with.
|
||||
|
||||
Example:
|
||||
|
||||
- Let's say we want to adjust the color scheme, start with <https://altair-viz.github.io/user_guide/generated/channels/altair.Color.html>
|
||||
- Note that it can take a scale, and click to <https://altair-viz.github.io/user_guide/generated/core/altair.Scale.html#altair.Scale>
|
||||
"""
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@app.cell
|
||||
def __(alt, chart):
|
||||
color_scheme = alt.Scale(scheme="set2")
|
||||
chart.mark_line().encode(
|
||||
alt.Y("num_bills"),
|
||||
alt.X("session_start_year:N"),
|
||||
alt.Color("state", scale=color_scheme),
|
||||
) + chart.mark_point().encode(
|
||||
alt.Y("num_bills").title("Bills Introduced"),
|
||||
alt.X("session_start_year:N").title("Session Year"),
|
||||
alt.Color("state", scale=color_scheme),
|
||||
alt.Shape("state"),
|
||||
).properties(
|
||||
title='Midwest Bills by Session',
|
||||
)
|
||||
return (color_scheme,)
|
||||
|
||||
|
||||
@app.cell
|
||||
def __():
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
1099
01.gog-altair/altair.ipynb
Normal file
1099
01.gog-altair/altair.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -135,10 +135,80 @@ Possible Exceptions?
|
||||
|
||||
---
|
||||
|
||||
## Altair's Grammar
|
||||
## Altair
|
||||
|
||||
Altair condenses several of the different pieces of the grammar to _"encoding channels"_.
|
||||
Altair is a Python visualization library that allows us to work from a grammar of graphics perspective.
|
||||
|
||||
We've seen X, Y, and color, let's take a look at some examples of other encoding channels.
|
||||
It also is very flexible in output formats, which will be useful if you want to modify your graphics or make them interactive.
|
||||
|
||||
Altair is built on top of **Vega-Lite**.
|
||||
|
||||
Vega-Lite is a system that represents graphics in a JSON schema, and a set of tools that convert these JSON representations to images or interactive graphics.
|
||||
|
||||
---
|
||||
|
||||
## Vega-Lite Example
|
||||
|
||||
```json
|
||||
{
|
||||
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
|
||||
"description": "A scatterplot showing horsepower and miles per gallons for various cars.",
|
||||
"data": {"url": "data/cars.json"},
|
||||
"mark": "point",
|
||||
"encoding": {
|
||||
"x": {"field": "Horsepower", "type": "quantitative"},
|
||||
"y": {"field": "Miles_per_Gallon", "type": "quantitative"}
|
||||
}
|
||||
}
|
||||
```
|
||||
Vega condenses several of the different pieces of the grammar to _"encoding channels"_.
|
||||
|
||||
---
|
||||
|
||||
![](vega.png)
|
||||
|
||||
---
|
||||
|
||||
## Altair
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import altair as alt
|
||||
|
||||
df = pd.read_csv("cars.csv")
|
||||
alt.Chart(df).encode(
|
||||
x="Horsepower:Q", # shorthand for simple features
|
||||
alt.Y("Miles_per_Gallon:Q").title("Miles Per Gallon"), # longer form w/ customization
|
||||
)
|
||||
```
|
||||
|
||||
Altair is a Pythonic wrapper to create Vega-Lite JSON. If you use it in a notebook, the resulting graphs will render inline.
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Altair Notebook
|
||||
|
||||
<!-- at this point, see the marimo notebook in this directory -->
|
||||
|
||||
---
|
||||
|
||||
## Learning Altair
|
||||
|
||||
To master a library like Altair, you'll go through the following phases:
|
||||
|
||||
1. Learn the key concepts.
|
||||
- Goal: Understand how the authors of Altair think about visualization.
|
||||
- Achieved by: Reading user guide & watching tutorials.
|
||||
2. Internalize concepts & API.
|
||||
- Goal: Be able to do common tasks without referring to documentation. (You'll always lean on documentation for specifics.)
|
||||
- Achieved by: Working on assignments & experimentation. Reading API reference as needed.
|
||||
3. Mastery (not this quarter!)
|
||||
- Goal: Be able to manipulate library to achieve most tasks. Understand limits.
|
||||
- Achieved by: Regular use over months/years. Reading API reference and/or source code.
|
||||
|
||||
---
|
||||
|
||||
## Altair Assignment
|
||||
|
||||
<!-- walk through of assignment setup & how it'll be graded -->
|
||||
|
BIN
01.gog-altair/vega.png
Normal file
BIN
01.gog-altair/vega.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 63 KiB |
17
README.md
17
README.md
@ -14,19 +14,20 @@ Inside each directory, you're likely to find:
|
||||
|
||||
- `slides.md` - My slides in raw markdown.
|
||||
- `slides.html` - My slides converted to a presentation. (using [`marp`](https://marpit.marp.app)) You can open this in your web browser (Type `open slides.html` from the command line.)
|
||||
- `*-notebook.py` - These are marimo notebooks (see below).
|
||||
- `*.ipynb` - These are Jupyter notebooks (see below).
|
||||
|
||||
Not every week will have slides & a notebook, but one or the other should generally exist.
|
||||
Not every week will have both slides & a notebook.
|
||||
|
||||
Other files, such as images & data will be kept in the appropriate folder.
|
||||
|
||||
### Marimo Notebooks
|
||||
### Jupyter Notebooks
|
||||
|
||||
Marimo notebooks are similar to Jupyter notebooks, but work much better with Git and have some other nice features I appreciate.
|
||||
You have a few options for working with `.ipynb` notebooks:
|
||||
|
||||
If you have ever looked at a Jupyter notebook file (.ipynb) in an editor, you know they are large JSON files, and once they are checked into Git changes become very difficult to track.
|
||||
- `uv run jupyter lab` - the newer UI, will start a server and
|
||||
- `uv run jupyter notebook` - the older UI, perfectly functional still
|
||||
- VS Code will open these in it's own editor
|
||||
|
||||
To interact with a notebook, run:
|
||||
|
||||
`uv run marimo edit <notebook-file>`
|
||||
If you run one of the `uv run` options, you'll need to navigate to the .ipynb file in the window that opens in your browser.
|
||||
|
||||
**Note:** To stop a server, press `Ctrl-C` and then 'y' to the prompt.
|
||||
|
Loading…
Reference in New Issue
Block a user