2023-05-07 23:39:46 +00:00
|
|
|
import httpx
|
|
|
|
from beakers.recipe import Recipe, Beaker
|
2023-04-27 06:25:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
async def add_response(obj_with_url):
|
2023-05-07 23:39:46 +00:00
|
|
|
print(obj_with_url["url"])
|
2023-04-27 06:25:07 +00:00
|
|
|
url = obj_with_url["url"]
|
|
|
|
response = await httpx.get(url)
|
|
|
|
return {
|
|
|
|
"url": url,
|
|
|
|
"status_code": response.status_code,
|
|
|
|
"response_body": response.text,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2023-05-07 23:39:46 +00:00
|
|
|
# current thinking, beakers exist within a recipe
|
2023-04-27 06:25:07 +00:00
|
|
|
recipe = Recipe("fetch urls")
|
2023-05-07 23:39:46 +00:00
|
|
|
recipe.declare_beaker("agencies")
|
|
|
|
recipe.declare_beaker("responses")
|
|
|
|
recipe.declare_beaker("good_urls", temp=True)
|
|
|
|
recipe.declare_beaker("missing_urls", temp=True)
|
|
|
|
recipe.csv_to_beaker("agencies.csv", "agencies")
|
|
|
|
recipe.add_split(
|
|
|
|
"agencies",
|
|
|
|
lambda x: x["url"].startswith("http"),
|
|
|
|
if_true="good_urls",
|
|
|
|
if_false="missing_urls",
|
|
|
|
)
|
|
|
|
recipe.add_pour("good_urls", "responses", add_response)
|
2023-04-27 06:25:07 +00:00
|
|
|
|
|
|
|
recipe.run_linearly()
|