From c21954ebba4a0da953511c80066861f7a9a5e8f9 Mon Sep 17 00:00:00 2001 From: James Turk Date: Thu, 13 Jul 2023 16:54:13 -0500 Subject: [PATCH] avoid duplicate processing --- examples/fruits.py | 10 +++++++--- src/beakers/beakers.py | 3 +++ src/beakers/recipe.py | 7 ++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/examples/fruits.py b/examples/fruits.py index eff576a..c9615ed 100644 --- a/examples/fruits.py +++ b/examples/fruits.py @@ -43,15 +43,19 @@ recipe.add_conditional( lambda cw: cw.is_fruit, "fruits", ) -recipe.add_transform("fruits", "sentence", lambda x: f"I love a fresh {x['word']}") +recipe.add_transform( + "fruits", + "sentence", + lambda x: Sentence(sentence=f"I love a fresh {x.normalized_word}".split()), +) recipe.add_seed( "word", [ Word(word="apple"), - Word(word="banana"), + Word(word="bAnAnA"), Word(word="hammer"), Word(word="orange"), - Word(word="egg"), + Word(word="EGG"), ], ) diff --git a/src/beakers/beakers.py b/src/beakers/beakers.py index 07ae9e2..8908362 100644 --- a/src/beakers/beakers.py +++ b/src/beakers/beakers.py @@ -40,6 +40,9 @@ class Beaker(abc.ABC): for item in items: self.add_item(item) + def id_set(self) -> set[str]: + return set(id for id, _ in self.items()) + class TempBeaker(Beaker): def __init__(self, name: str, model: PydanticModel, recipe: "Recipe"): diff --git a/src/beakers/recipe.py b/src/beakers/recipe.py index cef4232..9ff1c1b 100644 --- a/src/beakers/recipe.py +++ b/src/beakers/recipe.py @@ -272,11 +272,14 @@ class Recipe: from_beaker = self.beakers[from_b] to_beaker = self.beakers[to_b] + already_processed = from_beaker.id_set() & to_beaker.id_set() + log.info( "transform", from_b=from_b, to_b=to_b, - items=len(from_beaker), + to_process=len(from_beaker) - len(already_processed), + already_processed=len(already_processed), transform=edge["transform"].name, ) @@ -289,6 +292,8 @@ class Recipe: t_func = transform.transform_func for id, item in from_beaker.items(): + if id in already_processed: + continue try: transformed = t_func(item) if transformed: