scrapple/scripts/mirror_congress_api.py

53 lines
1.1 KiB
Python
Raw Normal View History

2025-01-10 22:59:05 +00:00
import httpx
import json
import pathlib
import time
from pprint import pprint
API_KEY = ""
BASE_URL = "https://api.congress.gov/v3/hearing/118/house/?limit=200"
BASE_DIR = pathlib.Path("congress-hearing-118-house")
def get_json(url):
time.sleep(0.5)
if "?" in url:
resp = httpx.get(url + f"&api_key={API_KEY}")
else:
resp = httpx.get(url + f"?api_key={API_KEY}")
resp.raise_for_status()
data = json.loads(resp.text)
return data
def save_hearing(hearing):
with open(BASE_DIR / f"{hearing['jacketNumber']}.json", "w") as f:
data = get_json(hearing["url"])
json.dump(data, f)
print("saved", hearing["jacketNumber"])
def get_jackets():
url = BASE_URL
while url:
page = get_json(url)
for hearing in page["hearings"]:
save_hearing(hearing)
url = page["pagination"]["next"]
def get_structure():
print("structure")
pprint(get_json(BASE_URL.replace("200", "2")))
def main():
# get_structure()
BASE_DIR.mkdir(exist_ok=True)
get_jackets()
if __name__ == "__main__":
main()