From ff5654c9313e544d431c747e0dbe5e78799e7cb5 Mon Sep 17 00:00:00 2001 From: James Turk Date: Mon, 16 Sep 2024 20:03:59 -0500 Subject: [PATCH] ossql initial commit --- .gitignore | 2 ++ .python-version | 1 + README.md | 1 + pyproject.toml | 11 ++++++ src/ossql/__init__.py | 0 src/ossql/people_to_sqlite.py | 68 +++++++++++++++++++++++++++++++++++ src/ossql/schemas/__init__.py | 0 src/ossql/schemas/common.py | 14 ++++++++ src/ossql/schemas/people.py | 40 +++++++++++++++++++++ uv.lock | 49 +++++++++++++++++++++++++ 10 files changed, 186 insertions(+) create mode 100644 .gitignore create mode 100644 .python-version create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 src/ossql/__init__.py create mode 100644 src/ossql/people_to_sqlite.py create mode 100644 src/ossql/schemas/__init__.py create mode 100644 src/ossql/schemas/common.py create mode 100644 src/ossql/schemas/people.py create mode 100644 uv.lock diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f8b5c08 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +openstates.db +*.pyc diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/README.md b/README.md new file mode 100644 index 0000000..934bac4 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Experiment using OS data. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..758553d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = "ossql" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.12" +dependencies = ["peewee>=3.17.6", "pyyaml>=6.0.2"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/src/ossql/__init__.py b/src/ossql/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ossql/people_to_sqlite.py b/src/ossql/people_to_sqlite.py new file mode 100644 index 0000000..0daf9f6 --- /dev/null +++ b/src/ossql/people_to_sqlite.py @@ -0,0 +1,68 @@ +import sys +import pathlib +import yaml +from .schemas.common import db +from .schemas.people import Person, PersonLink, PersonSource, PersonRole, PersonOffice + + + + +def to_links(person, dict_list, cls): + for dl in dict_list: + cls.create( + person=person, + url=dl.pop("url"), + note=dl.pop("note", ""), + ) + +def load_people_yaml(dir_path: pathlib.Path): + # ensure all tables exist + db.create_tables([Person, PersonLink, PersonSource, PersonRole, PersonOffice]) + + files = list(dir_path.glob("*.yml")) + print(f"preparing to load {len(files)} files") + for file in files: + pdata = yaml.safe_load(file.read_text()) + + person = Person.create( + id=pdata.pop("id"), + name=pdata.pop("name"), + given_name=pdata.pop("given_name"), + family_name=pdata.pop("family_name"), + birth_date=pdata.pop("birth_date", None), + gender=pdata.pop("gender"), + email=pdata.pop("email", ""), + image=pdata.pop("image"), + party=pdata.pop("party"), + extras=pdata.pop("extras", {}), + ) + to_links(person, pdata.pop("links"), PersonLink) + to_links(person, pdata.pop("sources"), PersonSource) + + for role in pdata.pop("roles"): + PersonRole.create( + person=person, + jurisdiction=role.pop("jurisdiction"), + district=role.pop("district"), + type=role.pop('type'), + start_date=role.pop('start_date', None), + end_date=role.pop('end_date', None), + ) + for office in pdata.pop("offices"): + PersonOffice.create( + person=person, + classification=office.pop("classification"), + address=office.pop("address", ""), + voice=office.pop("voice", ""), + fax=office.pop("fax",""), + ) + + # currently not using other_names, other_identifiers + if pdata.keys(): + print(pdata.keys(), "left unused") + + + +if __name__ == "__main__": + path = pathlib.Path(sys.argv[1]) + load_people_yaml(path) diff --git a/src/ossql/schemas/__init__.py b/src/ossql/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ossql/schemas/common.py b/src/ossql/schemas/common.py new file mode 100644 index 0000000..6581089 --- /dev/null +++ b/src/ossql/schemas/common.py @@ -0,0 +1,14 @@ +from peewee import SqliteDatabase, Model +from playhouse.sqlite_ext import SqliteExtDatabase + +db = SqliteExtDatabase('openstates.db', pragmas=( + ('cache_size', -1024 * 64), # 64MB page-cache. + ('journal_mode', 'wal'), # Use WAL-mode (you should always use this!). + ('foreign_keys', 1)) +) + + +class BaseModel(Model): + class Meta: + database = db + diff --git a/src/ossql/schemas/people.py b/src/ossql/schemas/people.py new file mode 100644 index 0000000..b55081e --- /dev/null +++ b/src/ossql/schemas/people.py @@ -0,0 +1,40 @@ +from .common import BaseModel +from peewee import ForeignKeyField, TextField, DateField +from playhouse.sqlite_ext import JSONField + +class Person(BaseModel): + id = TextField(primary_key=True) + name = TextField() + given_name = TextField() + family_name = TextField() + gender = TextField() + email = TextField() + image = TextField() + party = TextField() + extras = JSONField() + +class PersonLink(BaseModel): + person = ForeignKeyField(Person, backref="links") + url = TextField() + note = TextField() + + +class PersonSource(BaseModel): + person = ForeignKeyField(Person, backref="sources") + url = TextField() + note = TextField() + +class PersonRole(BaseModel): + person = ForeignKeyField(Person, backref="roles") + start_date = DateField(null=True) + end_date = DateField(null=True) + type = TextField() + jurisdiction = TextField() + district = TextField() + +class PersonOffice(BaseModel): + person = ForeignKeyField(Person, backref="offices") + classification = TextField() + address = TextField() + voice = TextField() + fax = TextField() diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..e2749b3 --- /dev/null +++ b/uv.lock @@ -0,0 +1,49 @@ +version = 1 +requires-python = ">=3.12" + +[[package]] +name = "ossql" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "peewee" }, + { name = "pyyaml" }, +] + +[package.metadata] +requires-dist = [ + { name = "peewee", specifier = ">=3.17.6" }, + { name = "pyyaml", specifier = ">=6.0.2" }, +] + +[[package]] +name = "peewee" +version = "3.17.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/be/e9c886b4601a19f4c34a1b75c5fe8b98a2115dd964251a76b24c977c369d/peewee-3.17.6.tar.gz", hash = "sha256:cea5592c6f4da1592b7cff8eaf655be6648a1f5857469e30037bf920c03fb8fb", size = 2954075 } + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, +]