create master people db

This commit is contained in:
James Turk 2024-09-16 20:18:41 -05:00
parent ff5654c931
commit a628562f6e
2 changed files with 29 additions and 9 deletions

View File

@ -19,11 +19,16 @@ def load_people_yaml(dir_path: pathlib.Path):
# ensure all tables exist
db.create_tables([Person, PersonLink, PersonSource, PersonRole, PersonOffice])
# output stats
unused = set()
created = 0
files = list(dir_path.glob("*.yml"))
print(f"preparing to load {len(files)} files")
print(f"preparing to load {len(files)} files from {dir_path}")
for file in files:
pdata = yaml.safe_load(file.read_text())
created += 1
person = Person.create(
id=pdata.pop("id"),
name=pdata.pop("name"),
@ -32,23 +37,23 @@ def load_people_yaml(dir_path: pathlib.Path):
birth_date=pdata.pop("birth_date", None),
gender=pdata.pop("gender"),
email=pdata.pop("email", ""),
image=pdata.pop("image"),
image=pdata.pop("image", ""),
party=pdata.pop("party"),
extras=pdata.pop("extras", {}),
)
to_links(person, pdata.pop("links"), PersonLink)
to_links(person, pdata.pop("sources"), PersonSource)
to_links(person, pdata.pop("links", []), PersonLink)
to_links(person, pdata.pop("sources", []), PersonSource)
for role in pdata.pop("roles"):
PersonRole.create(
person=person,
jurisdiction=role.pop("jurisdiction"),
district=role.pop("district"),
district=role.pop("district", ""),
type=role.pop('type'),
start_date=role.pop('start_date', None),
end_date=role.pop('end_date', None),
)
for office in pdata.pop("offices"):
for office in pdata.pop("offices", []):
PersonOffice.create(
person=person,
classification=office.pop("classification"),
@ -59,10 +64,25 @@ def load_people_yaml(dir_path: pathlib.Path):
# currently not using other_names, other_identifiers
if pdata.keys():
print(pdata.keys(), "left unused")
unused.update(pdata.keys())
if unused:
print(unused, "left unused")
return created
if __name__ == "__main__":
path = pathlib.Path(sys.argv[1])
load_people_yaml(path)
n_people = 0
if path.name == "data":
# load all states
states = list(path.glob("??"))
for state in sorted(states):
n_people += load_people_yaml(state / "legislature")
else:
# exact path, one state
n_people += load_people_yaml(path)
print(f"Created {n_people} people")

View File

@ -2,7 +2,7 @@ from peewee import SqliteDatabase, Model
from playhouse.sqlite_ext import SqliteExtDatabase
db = SqliteExtDatabase('openstates.db', pragmas=(
('cache_size', -1024 * 64), # 64MB page-cache.
('cache_size', 1024 * 64), # 64MB page-cache.
('journal_mode', 'wal'), # Use WAL-mode (you should always use this!).
('foreign_keys', 1))
)