working prototype; quite fast in prod

This commit is contained in:
jpt 2025-06-29 16:16:57 -05:00
parent 24938ed408
commit 3c80b44910
5 changed files with 2383 additions and 27 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
shp/

2261
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

15
Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "polipoly-rs"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.98"
approx = "0.5.1"
geo = "0.30.0"
geo-types = { version = "0.7.16", features = ["approx", "rstar_0_12"] }
glob = "0.3.2"
reqwest = { version = "0.12.20", features = ["blocking"] }
rstar = "0.12.2"
shapefile = { version = "0.7.0", features = ["geo-types"] }
zip = "4.2.0"

42
src/bin/download.rs Normal file
View file

@ -0,0 +1,42 @@
use std::fs::{create_dir_all, File};
use std::io::{copy, Cursor};
use std::path::Path;
use zip::ZipArchive;
const DIRNAME: &str = "shp/cd/";
fn main() -> Result<(), Box<dyn std::error::Error>> {
create_dir_all(DIRNAME)?;
let client = reqwest::blocking::Client::new();
for i in 1..=56 {
// skipped FIPS
if i == 3 || i == 7 || i == 14 || i == 43 || i == 52 {
continue;
}
let url = format!(
"https://www2.census.gov/geo/tiger/TIGER2024/CD/tl_2024_{:02}_cd119.zip",
i
);
let path = format!("{}/tl_2024_{:02}_cd119.shp", DIRNAME, i);
if Path::new(&path).exists() {
println! {"{} exists, skipping", path}
continue;
}
println!("fetching {}", url);
let resp = client.get(&url).send()?;
println!("{:?}", resp.status());
if resp.status() != 200 {
println!("{:?}, skipping", resp.status());
continue;
}
let bytes = resp.bytes()?;
let mut archive = ZipArchive::new(Cursor::new(bytes))?;
for j in 0..archive.len() {
let mut file = archive.by_index(j)?;
let mut outfile = File::create(format!("{}/{}", DIRNAME, file.name()))?;
copy(&mut file, &mut outfile)?;
}
}
Ok(())
}

View file

@ -1,45 +1,63 @@
use geo::{BoundingRect, Contains, Distance, Euclidean};
use anyhow::Result;
use geo::{BoundingRect, Contains};
use geo::{LineString, Point, Polygon};
use glob::glob;
use rstar::{PointDistance, RTree, RTreeObject, AABB};
use shapefile::Reader;
use shapefile::{dbase::Record, Reader};
use std::path::PathBuf;
#[derive(Debug)]
struct PolygonWrapper(Polygon<f64>);
pub enum PoliticalGeometryType {
// SLDU,
// SLDL,
CD,
}
impl RTreeObject for PolygonWrapper {
#[derive(Debug)]
pub struct PoliticalGeometry {
pub poly: Polygon<f64>,
pub level: PoliticalGeometryType,
pub geoid: String,
}
impl PoliticalGeometry {
pub fn new(poly: Polygon<f64>, record: &Record) -> Self {
Self {
poly,
level: PoliticalGeometryType::CD,
geoid: record.get("GEOID").unwrap().to_string(),
}
}
}
impl RTreeObject for PoliticalGeometry {
type Envelope = AABB<[f64; 2]>;
fn envelope(&self) -> Self::Envelope {
let bbox = self.0.bounding_rect().unwrap();
let bbox = self.poly.bounding_rect().unwrap();
AABB::from_corners([bbox.min().x, bbox.min().y], [bbox.max().x, bbox.max().y])
}
}
impl PointDistance for PolygonWrapper {
impl PointDistance for PoliticalGeometry {
fn contains_point(&self, point: &[f64; 2]) -> bool {
self.0.contains(&Point::new(point[0], point[1]))
self.poly.contains(&Point::new(point[0], point[1]))
}
fn distance_2(&self, point: &[f64; 2]) -> f64 {
3.0
// Euclidean
// .distance(
// self.0,
// &Point::new(point[0], point[1])
// )
// .powi(2)
2.0
//self.poly.hausdorff_distance(coord! {point})
}
}
fn load_polygons(path: &str) -> Result<Vec<Polygon<f64>>, Box<dyn std::error::Error>> {
fn load_polygons(path: &PathBuf) -> Result<Vec<PoliticalGeometry>> {
let mut reader = Reader::from_path(path)?;
let mut geometries = Vec::new();
for shape_record in reader.iter_shapes_and_records() {
let (shape, _) = shape_record?;
let (shape, record) = shape_record?;
if let shapefile::Shape::Polygon(polygon) = shape {
let mut polygons = Vec::new();
let mut current_exterior: Option<LineString<f64>> = None;
let mut current_holes = Vec::new();
@ -49,7 +67,10 @@ fn load_polygons(path: &str) -> Result<Vec<Polygon<f64>>, Box<dyn std::error::Er
if is_clockwise(&line_string) {
if let Some(exterior) = current_exterior.take() {
polygons.push(Polygon::new(exterior, current_holes.drain(..).collect()));
geometries.push(PoliticalGeometry::new(
Polygon::new(exterior, current_holes.drain(..).collect()),
&record,
))
}
current_exterior = Some(line_string);
} else {
@ -58,10 +79,11 @@ fn load_polygons(path: &str) -> Result<Vec<Polygon<f64>>, Box<dyn std::error::Er
}
if let Some(exterior) = current_exterior {
polygons.push(Polygon::new(exterior, current_holes));
geometries.push(PoliticalGeometry::new(
Polygon::new(exterior, current_holes),
&record,
));
}
geometries.extend(polygons);
}
}
@ -79,18 +101,32 @@ fn is_clockwise(ring: &LineString<f64>) -> bool {
sum > 0.0
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let polygons = load_polygons("il_cd119/tl_2024_17_cd119.shp")?;
println!("Loaded {} polygons", polygons.len());
fn main() -> Result<()> {
let mut polygons = Vec::new();
let wrapped: Vec<_> = polygons.into_iter().map(PolygonWrapper).collect();
let rtree = RTree::bulk_load(wrapped);
for entry in glob("shp/cd/*.shp").expect("Failed to read glob pattern") {
match entry {
Ok(path) => {
polygons.extend(load_polygons(&path)?);
println!("Loaded {} polygons", polygons.len());
}
Err(e) => println!("{:?}", e),
}
}
let rtree = RTree::bulk_load(polygons);
let query_point = Point::new(-87.630, 41.878);
let results = rtree.locate_all_at_point(&[query_point.x(), query_point.y()]);
for poly in results {
println!("Polygon contains query point: {:?}", poly.geoid);
}
let query_point = Point::new(-97.630, 31.878);
let results = rtree.locate_all_at_point(&[query_point.x(), query_point.y()]);
for poly in results {
println!("Polygon contains query point: {:?}", poly.0.exterior());
println!("Polygon contains query point: {:?}", poly.geoid);
}
Ok(())