create master people db
This commit is contained in:
parent
ff5654c931
commit
a628562f6e
@ -19,11 +19,16 @@ def load_people_yaml(dir_path: pathlib.Path):
|
|||||||
# ensure all tables exist
|
# ensure all tables exist
|
||||||
db.create_tables([Person, PersonLink, PersonSource, PersonRole, PersonOffice])
|
db.create_tables([Person, PersonLink, PersonSource, PersonRole, PersonOffice])
|
||||||
|
|
||||||
|
# output stats
|
||||||
|
unused = set()
|
||||||
|
created = 0
|
||||||
|
|
||||||
files = list(dir_path.glob("*.yml"))
|
files = list(dir_path.glob("*.yml"))
|
||||||
print(f"preparing to load {len(files)} files")
|
print(f"preparing to load {len(files)} files from {dir_path}")
|
||||||
for file in files:
|
for file in files:
|
||||||
pdata = yaml.safe_load(file.read_text())
|
pdata = yaml.safe_load(file.read_text())
|
||||||
|
|
||||||
|
created += 1
|
||||||
person = Person.create(
|
person = Person.create(
|
||||||
id=pdata.pop("id"),
|
id=pdata.pop("id"),
|
||||||
name=pdata.pop("name"),
|
name=pdata.pop("name"),
|
||||||
@ -32,23 +37,23 @@ def load_people_yaml(dir_path: pathlib.Path):
|
|||||||
birth_date=pdata.pop("birth_date", None),
|
birth_date=pdata.pop("birth_date", None),
|
||||||
gender=pdata.pop("gender"),
|
gender=pdata.pop("gender"),
|
||||||
email=pdata.pop("email", ""),
|
email=pdata.pop("email", ""),
|
||||||
image=pdata.pop("image"),
|
image=pdata.pop("image", ""),
|
||||||
party=pdata.pop("party"),
|
party=pdata.pop("party"),
|
||||||
extras=pdata.pop("extras", {}),
|
extras=pdata.pop("extras", {}),
|
||||||
)
|
)
|
||||||
to_links(person, pdata.pop("links"), PersonLink)
|
to_links(person, pdata.pop("links", []), PersonLink)
|
||||||
to_links(person, pdata.pop("sources"), PersonSource)
|
to_links(person, pdata.pop("sources", []), PersonSource)
|
||||||
|
|
||||||
for role in pdata.pop("roles"):
|
for role in pdata.pop("roles"):
|
||||||
PersonRole.create(
|
PersonRole.create(
|
||||||
person=person,
|
person=person,
|
||||||
jurisdiction=role.pop("jurisdiction"),
|
jurisdiction=role.pop("jurisdiction"),
|
||||||
district=role.pop("district"),
|
district=role.pop("district", ""),
|
||||||
type=role.pop('type'),
|
type=role.pop('type'),
|
||||||
start_date=role.pop('start_date', None),
|
start_date=role.pop('start_date', None),
|
||||||
end_date=role.pop('end_date', None),
|
end_date=role.pop('end_date', None),
|
||||||
)
|
)
|
||||||
for office in pdata.pop("offices"):
|
for office in pdata.pop("offices", []):
|
||||||
PersonOffice.create(
|
PersonOffice.create(
|
||||||
person=person,
|
person=person,
|
||||||
classification=office.pop("classification"),
|
classification=office.pop("classification"),
|
||||||
@ -59,10 +64,25 @@ def load_people_yaml(dir_path: pathlib.Path):
|
|||||||
|
|
||||||
# currently not using other_names, other_identifiers
|
# currently not using other_names, other_identifiers
|
||||||
if pdata.keys():
|
if pdata.keys():
|
||||||
print(pdata.keys(), "left unused")
|
unused.update(pdata.keys())
|
||||||
|
|
||||||
|
if unused:
|
||||||
|
print(unused, "left unused")
|
||||||
|
|
||||||
|
return created
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
path = pathlib.Path(sys.argv[1])
|
path = pathlib.Path(sys.argv[1])
|
||||||
load_people_yaml(path)
|
n_people = 0
|
||||||
|
if path.name == "data":
|
||||||
|
# load all states
|
||||||
|
states = list(path.glob("??"))
|
||||||
|
for state in sorted(states):
|
||||||
|
n_people += load_people_yaml(state / "legislature")
|
||||||
|
else:
|
||||||
|
# exact path, one state
|
||||||
|
n_people += load_people_yaml(path)
|
||||||
|
|
||||||
|
print(f"Created {n_people} people")
|
||||||
|
@ -2,7 +2,7 @@ from peewee import SqliteDatabase, Model
|
|||||||
from playhouse.sqlite_ext import SqliteExtDatabase
|
from playhouse.sqlite_ext import SqliteExtDatabase
|
||||||
|
|
||||||
db = SqliteExtDatabase('openstates.db', pragmas=(
|
db = SqliteExtDatabase('openstates.db', pragmas=(
|
||||||
('cache_size', -1024 * 64), # 64MB page-cache.
|
('cache_size', 1024 * 64), # 64MB page-cache.
|
||||||
('journal_mode', 'wal'), # Use WAL-mode (you should always use this!).
|
('journal_mode', 'wal'), # Use WAL-mode (you should always use this!).
|
||||||
('foreign_keys', 1))
|
('foreign_keys', 1))
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user