avoid duplicate processing

This commit is contained in:
James Turk 2023-07-13 16:54:13 -05:00
parent 0998fe1e6a
commit c21954ebba
3 changed files with 16 additions and 4 deletions

View File

@ -43,15 +43,19 @@ recipe.add_conditional(
lambda cw: cw.is_fruit,
"fruits",
)
recipe.add_transform("fruits", "sentence", lambda x: f"I love a fresh {x['word']}")
recipe.add_transform(
"fruits",
"sentence",
lambda x: Sentence(sentence=f"I love a fresh {x.normalized_word}".split()),
)
recipe.add_seed(
"word",
[
Word(word="apple"),
Word(word="banana"),
Word(word="bAnAnA"),
Word(word="hammer"),
Word(word="orange"),
Word(word="egg"),
Word(word="EGG"),
],
)

View File

@ -40,6 +40,9 @@ class Beaker(abc.ABC):
for item in items:
self.add_item(item)
def id_set(self) -> set[str]:
return set(id for id, _ in self.items())
class TempBeaker(Beaker):
def __init__(self, name: str, model: PydanticModel, recipe: "Recipe"):

View File

@ -272,11 +272,14 @@ class Recipe:
from_beaker = self.beakers[from_b]
to_beaker = self.beakers[to_b]
already_processed = from_beaker.id_set() & to_beaker.id_set()
log.info(
"transform",
from_b=from_b,
to_b=to_b,
items=len(from_beaker),
to_process=len(from_beaker) - len(already_processed),
already_processed=len(already_processed),
transform=edge["transform"].name,
)
@ -289,6 +292,8 @@ class Recipe:
t_func = transform.transform_func
for id, item in from_beaker.items():
if id in already_processed:
continue
try:
transformed = t_func(item)
if transformed: