create master people db
This commit is contained in:
parent
ff5654c931
commit
a628562f6e
@ -19,11 +19,16 @@ def load_people_yaml(dir_path: pathlib.Path):
|
||||
# ensure all tables exist
|
||||
db.create_tables([Person, PersonLink, PersonSource, PersonRole, PersonOffice])
|
||||
|
||||
# output stats
|
||||
unused = set()
|
||||
created = 0
|
||||
|
||||
files = list(dir_path.glob("*.yml"))
|
||||
print(f"preparing to load {len(files)} files")
|
||||
print(f"preparing to load {len(files)} files from {dir_path}")
|
||||
for file in files:
|
||||
pdata = yaml.safe_load(file.read_text())
|
||||
|
||||
created += 1
|
||||
person = Person.create(
|
||||
id=pdata.pop("id"),
|
||||
name=pdata.pop("name"),
|
||||
@ -32,23 +37,23 @@ def load_people_yaml(dir_path: pathlib.Path):
|
||||
birth_date=pdata.pop("birth_date", None),
|
||||
gender=pdata.pop("gender"),
|
||||
email=pdata.pop("email", ""),
|
||||
image=pdata.pop("image"),
|
||||
image=pdata.pop("image", ""),
|
||||
party=pdata.pop("party"),
|
||||
extras=pdata.pop("extras", {}),
|
||||
)
|
||||
to_links(person, pdata.pop("links"), PersonLink)
|
||||
to_links(person, pdata.pop("sources"), PersonSource)
|
||||
to_links(person, pdata.pop("links", []), PersonLink)
|
||||
to_links(person, pdata.pop("sources", []), PersonSource)
|
||||
|
||||
for role in pdata.pop("roles"):
|
||||
PersonRole.create(
|
||||
person=person,
|
||||
jurisdiction=role.pop("jurisdiction"),
|
||||
district=role.pop("district"),
|
||||
district=role.pop("district", ""),
|
||||
type=role.pop('type'),
|
||||
start_date=role.pop('start_date', None),
|
||||
end_date=role.pop('end_date', None),
|
||||
)
|
||||
for office in pdata.pop("offices"):
|
||||
for office in pdata.pop("offices", []):
|
||||
PersonOffice.create(
|
||||
person=person,
|
||||
classification=office.pop("classification"),
|
||||
@ -59,10 +64,25 @@ def load_people_yaml(dir_path: pathlib.Path):
|
||||
|
||||
# currently not using other_names, other_identifiers
|
||||
if pdata.keys():
|
||||
print(pdata.keys(), "left unused")
|
||||
unused.update(pdata.keys())
|
||||
|
||||
if unused:
|
||||
print(unused, "left unused")
|
||||
|
||||
return created
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
path = pathlib.Path(sys.argv[1])
|
||||
load_people_yaml(path)
|
||||
n_people = 0
|
||||
if path.name == "data":
|
||||
# load all states
|
||||
states = list(path.glob("??"))
|
||||
for state in sorted(states):
|
||||
n_people += load_people_yaml(state / "legislature")
|
||||
else:
|
||||
# exact path, one state
|
||||
n_people += load_people_yaml(path)
|
||||
|
||||
print(f"Created {n_people} people")
|
||||
|
@ -2,7 +2,7 @@ from peewee import SqliteDatabase, Model
|
||||
from playhouse.sqlite_ext import SqliteExtDatabase
|
||||
|
||||
db = SqliteExtDatabase('openstates.db', pragmas=(
|
||||
('cache_size', -1024 * 64), # 64MB page-cache.
|
||||
('cache_size', 1024 * 64), # 64MB page-cache.
|
||||
('journal_mode', 'wal'), # Use WAL-mode (you should always use this!).
|
||||
('foreign_keys', 1))
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user