avoid duplicate processing

This commit is contained in:
James Turk 2023-07-13 16:54:13 -05:00
parent 0998fe1e6a
commit c21954ebba
3 changed files with 16 additions and 4 deletions

View File

@ -43,15 +43,19 @@ recipe.add_conditional(
lambda cw: cw.is_fruit, lambda cw: cw.is_fruit,
"fruits", "fruits",
) )
recipe.add_transform("fruits", "sentence", lambda x: f"I love a fresh {x['word']}") recipe.add_transform(
"fruits",
"sentence",
lambda x: Sentence(sentence=f"I love a fresh {x.normalized_word}".split()),
)
recipe.add_seed( recipe.add_seed(
"word", "word",
[ [
Word(word="apple"), Word(word="apple"),
Word(word="banana"), Word(word="bAnAnA"),
Word(word="hammer"), Word(word="hammer"),
Word(word="orange"), Word(word="orange"),
Word(word="egg"), Word(word="EGG"),
], ],
) )

View File

@ -40,6 +40,9 @@ class Beaker(abc.ABC):
for item in items: for item in items:
self.add_item(item) self.add_item(item)
def id_set(self) -> set[str]:
return set(id for id, _ in self.items())
class TempBeaker(Beaker): class TempBeaker(Beaker):
def __init__(self, name: str, model: PydanticModel, recipe: "Recipe"): def __init__(self, name: str, model: PydanticModel, recipe: "Recipe"):

View File

@ -272,11 +272,14 @@ class Recipe:
from_beaker = self.beakers[from_b] from_beaker = self.beakers[from_b]
to_beaker = self.beakers[to_b] to_beaker = self.beakers[to_b]
already_processed = from_beaker.id_set() & to_beaker.id_set()
log.info( log.info(
"transform", "transform",
from_b=from_b, from_b=from_b,
to_b=to_b, to_b=to_b,
items=len(from_beaker), to_process=len(from_beaker) - len(already_processed),
already_processed=len(already_processed),
transform=edge["transform"].name, transform=edge["transform"].name,
) )
@ -289,6 +292,8 @@ class Recipe:
t_func = transform.transform_func t_func = transform.transform_func
for id, item in from_beaker.items(): for id, item in from_beaker.items():
if id in already_processed:
continue
try: try:
transformed = t_func(item) transformed = t_func(item)
if transformed: if transformed: