avoid duplicate processing
This commit is contained in:
parent
0998fe1e6a
commit
c21954ebba
@ -43,15 +43,19 @@ recipe.add_conditional(
|
||||
lambda cw: cw.is_fruit,
|
||||
"fruits",
|
||||
)
|
||||
recipe.add_transform("fruits", "sentence", lambda x: f"I love a fresh {x['word']}")
|
||||
recipe.add_transform(
|
||||
"fruits",
|
||||
"sentence",
|
||||
lambda x: Sentence(sentence=f"I love a fresh {x.normalized_word}".split()),
|
||||
)
|
||||
|
||||
recipe.add_seed(
|
||||
"word",
|
||||
[
|
||||
Word(word="apple"),
|
||||
Word(word="banana"),
|
||||
Word(word="bAnAnA"),
|
||||
Word(word="hammer"),
|
||||
Word(word="orange"),
|
||||
Word(word="egg"),
|
||||
Word(word="EGG"),
|
||||
],
|
||||
)
|
||||
|
@ -40,6 +40,9 @@ class Beaker(abc.ABC):
|
||||
for item in items:
|
||||
self.add_item(item)
|
||||
|
||||
def id_set(self) -> set[str]:
|
||||
return set(id for id, _ in self.items())
|
||||
|
||||
|
||||
class TempBeaker(Beaker):
|
||||
def __init__(self, name: str, model: PydanticModel, recipe: "Recipe"):
|
||||
|
@ -272,11 +272,14 @@ class Recipe:
|
||||
|
||||
from_beaker = self.beakers[from_b]
|
||||
to_beaker = self.beakers[to_b]
|
||||
already_processed = from_beaker.id_set() & to_beaker.id_set()
|
||||
|
||||
log.info(
|
||||
"transform",
|
||||
from_b=from_b,
|
||||
to_b=to_b,
|
||||
items=len(from_beaker),
|
||||
to_process=len(from_beaker) - len(already_processed),
|
||||
already_processed=len(already_processed),
|
||||
transform=edge["transform"].name,
|
||||
)
|
||||
|
||||
@ -289,6 +292,8 @@ class Recipe:
|
||||
t_func = transform.transform_func
|
||||
|
||||
for id, item in from_beaker.items():
|
||||
if id in already_processed:
|
||||
continue
|
||||
try:
|
||||
transformed = t_func(item)
|
||||
if transformed:
|
||||
|
Loading…
Reference in New Issue
Block a user