import httpx import json import pathlib import time from pprint import pprint API_KEY = "" BASE_URL = "https://api.congress.gov/v3/hearing/118/house/?limit=200" BASE_DIR = pathlib.Path("congress-hearing-118-house") def get_json(url): time.sleep(0.5) if "?" in url: resp = httpx.get(url + f"&api_key={API_KEY}") else: resp = httpx.get(url + f"?api_key={API_KEY}") resp.raise_for_status() data = json.loads(resp.text) return data def save_hearing(hearing): with open(BASE_DIR / f"{hearing['jacketNumber']}.json", "w") as f: data = get_json(hearing["url"]) json.dump(data, f) print("saved", hearing["jacketNumber"]) def get_jackets(): url = BASE_URL while url: page = get_json(url) for hearing in page["hearings"]: save_hearing(hearing) url = page["pagination"]["next"] def get_structure(): print("structure") pprint(get_json(BASE_URL.replace("200", "2"))) def main(): # get_structure() BASE_DIR.mkdir(exist_ok=True) get_jackets() if __name__ == "__main__": main()