create master people db

This commit is contained in:
James Turk 2024-09-16 20:18:41 -05:00
parent ff5654c931
commit a628562f6e
2 changed files with 29 additions and 9 deletions

View File

@ -19,11 +19,16 @@ def load_people_yaml(dir_path: pathlib.Path):
# ensure all tables exist # ensure all tables exist
db.create_tables([Person, PersonLink, PersonSource, PersonRole, PersonOffice]) db.create_tables([Person, PersonLink, PersonSource, PersonRole, PersonOffice])
# output stats
unused = set()
created = 0
files = list(dir_path.glob("*.yml")) files = list(dir_path.glob("*.yml"))
print(f"preparing to load {len(files)} files") print(f"preparing to load {len(files)} files from {dir_path}")
for file in files: for file in files:
pdata = yaml.safe_load(file.read_text()) pdata = yaml.safe_load(file.read_text())
created += 1
person = Person.create( person = Person.create(
id=pdata.pop("id"), id=pdata.pop("id"),
name=pdata.pop("name"), name=pdata.pop("name"),
@ -32,23 +37,23 @@ def load_people_yaml(dir_path: pathlib.Path):
birth_date=pdata.pop("birth_date", None), birth_date=pdata.pop("birth_date", None),
gender=pdata.pop("gender"), gender=pdata.pop("gender"),
email=pdata.pop("email", ""), email=pdata.pop("email", ""),
image=pdata.pop("image"), image=pdata.pop("image", ""),
party=pdata.pop("party"), party=pdata.pop("party"),
extras=pdata.pop("extras", {}), extras=pdata.pop("extras", {}),
) )
to_links(person, pdata.pop("links"), PersonLink) to_links(person, pdata.pop("links", []), PersonLink)
to_links(person, pdata.pop("sources"), PersonSource) to_links(person, pdata.pop("sources", []), PersonSource)
for role in pdata.pop("roles"): for role in pdata.pop("roles"):
PersonRole.create( PersonRole.create(
person=person, person=person,
jurisdiction=role.pop("jurisdiction"), jurisdiction=role.pop("jurisdiction"),
district=role.pop("district"), district=role.pop("district", ""),
type=role.pop('type'), type=role.pop('type'),
start_date=role.pop('start_date', None), start_date=role.pop('start_date', None),
end_date=role.pop('end_date', None), end_date=role.pop('end_date', None),
) )
for office in pdata.pop("offices"): for office in pdata.pop("offices", []):
PersonOffice.create( PersonOffice.create(
person=person, person=person,
classification=office.pop("classification"), classification=office.pop("classification"),
@ -59,10 +64,25 @@ def load_people_yaml(dir_path: pathlib.Path):
# currently not using other_names, other_identifiers # currently not using other_names, other_identifiers
if pdata.keys(): if pdata.keys():
print(pdata.keys(), "left unused") unused.update(pdata.keys())
if unused:
print(unused, "left unused")
return created
if __name__ == "__main__": if __name__ == "__main__":
path = pathlib.Path(sys.argv[1]) path = pathlib.Path(sys.argv[1])
load_people_yaml(path) n_people = 0
if path.name == "data":
# load all states
states = list(path.glob("??"))
for state in sorted(states):
n_people += load_people_yaml(state / "legislature")
else:
# exact path, one state
n_people += load_people_yaml(path)
print(f"Created {n_people} people")

View File

@ -2,7 +2,7 @@ from peewee import SqliteDatabase, Model
from playhouse.sqlite_ext import SqliteExtDatabase from playhouse.sqlite_ext import SqliteExtDatabase
db = SqliteExtDatabase('openstates.db', pragmas=( db = SqliteExtDatabase('openstates.db', pragmas=(
('cache_size', -1024 * 64), # 64MB page-cache. ('cache_size', 1024 * 64), # 64MB page-cache.
('journal_mode', 'wal'), # Use WAL-mode (you should always use this!). ('journal_mode', 'wal'), # Use WAL-mode (you should always use this!).
('foreign_keys', 1)) ('foreign_keys', 1))
) )