53 lines
1.1 KiB
Python
53 lines
1.1 KiB
Python
|
import httpx
|
||
|
import json
|
||
|
import pathlib
|
||
|
import time
|
||
|
from pprint import pprint
|
||
|
|
||
|
API_KEY = ""
|
||
|
BASE_URL = "https://api.congress.gov/v3/hearing/118/house/?limit=200"
|
||
|
BASE_DIR = pathlib.Path("congress-hearing-118-house")
|
||
|
|
||
|
|
||
|
def get_json(url):
|
||
|
time.sleep(0.5)
|
||
|
if "?" in url:
|
||
|
resp = httpx.get(url + f"&api_key={API_KEY}")
|
||
|
else:
|
||
|
resp = httpx.get(url + f"?api_key={API_KEY}")
|
||
|
resp.raise_for_status()
|
||
|
data = json.loads(resp.text)
|
||
|
return data
|
||
|
|
||
|
|
||
|
def save_hearing(hearing):
|
||
|
with open(BASE_DIR / f"{hearing['jacketNumber']}.json", "w") as f:
|
||
|
data = get_json(hearing["url"])
|
||
|
json.dump(data, f)
|
||
|
print("saved", hearing["jacketNumber"])
|
||
|
|
||
|
|
||
|
def get_jackets():
|
||
|
url = BASE_URL
|
||
|
|
||
|
while url:
|
||
|
page = get_json(url)
|
||
|
for hearing in page["hearings"]:
|
||
|
save_hearing(hearing)
|
||
|
url = page["pagination"]["next"]
|
||
|
|
||
|
|
||
|
def get_structure():
|
||
|
print("structure")
|
||
|
pprint(get_json(BASE_URL.replace("200", "2")))
|
||
|
|
||
|
|
||
|
def main():
|
||
|
# get_structure()
|
||
|
BASE_DIR.mkdir(exist_ok=True)
|
||
|
get_jackets()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|