seeds
This commit is contained in:
parent
11003ef872
commit
84c2f1a641
@ -2,7 +2,6 @@ import datetime
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import lxml
|
import lxml
|
||||||
from beakers import Recipe
|
from beakers import Recipe
|
||||||
from beakers.filters import ConditionalFilter
|
|
||||||
from beakers.http import HttpRequest
|
from beakers.http import HttpRequest
|
||||||
|
|
||||||
|
|
||||||
@ -65,7 +64,7 @@ other = [
|
|||||||
ArticleURL(url="https://nytimes.com", source="nytimes"),
|
ArticleURL(url="https://nytimes.com", source="nytimes"),
|
||||||
]
|
]
|
||||||
|
|
||||||
# recipe.add_seed(
|
recipe.add_seed(
|
||||||
# "article_url",
|
"url",
|
||||||
# npr_examples + other,
|
npr_examples + other,
|
||||||
# )
|
)
|
||||||
|
@ -60,6 +60,9 @@ def run(
|
|||||||
start: Optional[str] = typer.Option(None),
|
start: Optional[str] = typer.Option(None),
|
||||||
end: Optional[str] = typer.Option(None),
|
end: Optional[str] = typer.Option(None),
|
||||||
):
|
):
|
||||||
|
if ctx.obj.seeds:
|
||||||
|
typer.secho("Seeding beakers", fg=typer.colors.GREEN)
|
||||||
|
ctx.obj.process_seeds()
|
||||||
has_data = any(ctx.obj.beakers.values())
|
has_data = any(ctx.obj.beakers.values())
|
||||||
if not has_data and not input:
|
if not has_data and not input:
|
||||||
typer.secho("No data; pass --input to seed beaker(s)", fg=typer.colors.RED)
|
typer.secho("No data; pass --input to seed beaker(s)", fg=typer.colors.RED)
|
||||||
|
@ -1,9 +0,0 @@
|
|||||||
class ConditionalFilter:
|
|
||||||
def __init__(self, condition):
|
|
||||||
self.condition = condition
|
|
||||||
|
|
||||||
def __call__(self, item):
|
|
||||||
if self.condition(item):
|
|
||||||
return item
|
|
||||||
else:
|
|
||||||
return None
|
|
@ -4,6 +4,10 @@ import datetime
|
|||||||
|
|
||||||
|
|
||||||
class HttpResponse(pydantic.BaseModel):
|
class HttpResponse(pydantic.BaseModel):
|
||||||
|
"""
|
||||||
|
Beaker data type that represents an HTTP response.
|
||||||
|
"""
|
||||||
|
|
||||||
url: str
|
url: str
|
||||||
status_code: int
|
status_code: int
|
||||||
response_body: str
|
response_body: str
|
||||||
@ -13,7 +17,16 @@ class HttpResponse(pydantic.BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class HttpRequest:
|
class HttpRequest:
|
||||||
|
"""
|
||||||
|
Filter that converts from a beaker with a URL to a beaker with an HTTP response.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, beaker: str, field: str):
|
def __init__(self, beaker: str, field: str):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
beaker: The name of the beaker that contains the URL.
|
||||||
|
field: The name of the field in the beaker that contains the URL.
|
||||||
|
"""
|
||||||
self.beaker = beaker
|
self.beaker = beaker
|
||||||
self.field = field
|
self.field = field
|
||||||
|
|
||||||
|
@ -5,8 +5,10 @@ import inspect
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import hashlib
|
import hashlib
|
||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass
|
|
||||||
import networkx
|
import networkx
|
||||||
|
from collections import defaultdict, Counter
|
||||||
|
from dataclasses import dataclass # TODO: pydantic?
|
||||||
|
from typing import Iterable
|
||||||
from structlog import get_logger
|
from structlog import get_logger
|
||||||
|
|
||||||
from .beakers import Beaker, SqliteBeaker, TempBeaker
|
from .beakers import Beaker, SqliteBeaker, TempBeaker
|
||||||
@ -39,6 +41,7 @@ class Recipe:
|
|||||||
self.name = name
|
self.name = name
|
||||||
self.graph = networkx.DiGraph()
|
self.graph = networkx.DiGraph()
|
||||||
self.beakers = {}
|
self.beakers = {}
|
||||||
|
self.seeds = defaultdict(list)
|
||||||
self.db = sqlite3.connect(db_name)
|
self.db = sqlite3.connect(db_name)
|
||||||
cursor = self.db.cursor()
|
cursor = self.db.cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
@ -114,6 +117,15 @@ class Recipe:
|
|||||||
if_cond_false,
|
if_cond_false,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def add_seed(self, beaker_name: str, data: Iterable) -> None:
|
||||||
|
self.seeds[beaker_name].append(data)
|
||||||
|
|
||||||
|
def process_seeds(self) -> None:
|
||||||
|
log.info("process_seeds", recipe=self.name)
|
||||||
|
for beaker_name, seeds in self.seeds.items():
|
||||||
|
for seed in seeds:
|
||||||
|
self.beakers[beaker_name].add_item(seed)
|
||||||
|
|
||||||
def get_metadata(self, table_name) -> dict:
|
def get_metadata(self, table_name) -> dict:
|
||||||
cursor = self.db.cursor()
|
cursor = self.db.cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
@ -168,6 +180,10 @@ class Recipe:
|
|||||||
return beaker
|
return beaker
|
||||||
|
|
||||||
def show(self):
|
def show(self):
|
||||||
|
seed_count = Counter(self.seeds.keys())
|
||||||
|
typer.secho("Seeds", fg=typer.colors.GREEN)
|
||||||
|
for beaker, count in seed_count.items():
|
||||||
|
typer.secho(f" {beaker} ({count})", fg=typer.colors.GREEN)
|
||||||
graph_data = self.graph_data()
|
graph_data = self.graph_data()
|
||||||
for node in graph_data:
|
for node in graph_data:
|
||||||
if node["temp"]:
|
if node["temp"]:
|
||||||
|
Loading…
Reference in New Issue
Block a user