avoid duplicate processing
This commit is contained in:
		
							parent
							
								
									0998fe1e6a
								
							
						
					
					
						commit
						c21954ebba
					
				
					 3 changed files with 16 additions and 4 deletions
				
			
		|  | @ -43,15 +43,19 @@ recipe.add_conditional( | |||
|     lambda cw: cw.is_fruit, | ||||
|     "fruits", | ||||
| ) | ||||
| recipe.add_transform("fruits", "sentence", lambda x: f"I love a fresh {x['word']}") | ||||
| recipe.add_transform( | ||||
|     "fruits", | ||||
|     "sentence", | ||||
|     lambda x: Sentence(sentence=f"I love a fresh {x.normalized_word}".split()), | ||||
| ) | ||||
| 
 | ||||
| recipe.add_seed( | ||||
|     "word", | ||||
|     [ | ||||
|         Word(word="apple"), | ||||
|         Word(word="banana"), | ||||
|         Word(word="bAnAnA"), | ||||
|         Word(word="hammer"), | ||||
|         Word(word="orange"), | ||||
|         Word(word="egg"), | ||||
|         Word(word="EGG"), | ||||
|     ], | ||||
| ) | ||||
|  |  | |||
|  | @ -40,6 +40,9 @@ class Beaker(abc.ABC): | |||
|         for item in items: | ||||
|             self.add_item(item) | ||||
| 
 | ||||
|     def id_set(self) -> set[str]: | ||||
|         return set(id for id, _ in self.items()) | ||||
| 
 | ||||
| 
 | ||||
| class TempBeaker(Beaker): | ||||
|     def __init__(self, name: str, model: PydanticModel, recipe: "Recipe"): | ||||
|  |  | |||
|  | @ -272,11 +272,14 @@ class Recipe: | |||
| 
 | ||||
|                 from_beaker = self.beakers[from_b] | ||||
|                 to_beaker = self.beakers[to_b] | ||||
|                 already_processed = from_beaker.id_set() & to_beaker.id_set() | ||||
| 
 | ||||
|                 log.info( | ||||
|                     "transform", | ||||
|                     from_b=from_b, | ||||
|                     to_b=to_b, | ||||
|                     items=len(from_beaker), | ||||
|                     to_process=len(from_beaker) - len(already_processed), | ||||
|                     already_processed=len(already_processed), | ||||
|                     transform=edge["transform"].name, | ||||
|                 ) | ||||
| 
 | ||||
|  | @ -289,6 +292,8 @@ class Recipe: | |||
|                     t_func = transform.transform_func | ||||
| 
 | ||||
|                 for id, item in from_beaker.items(): | ||||
|                     if id in already_processed: | ||||
|                         continue | ||||
|                     try: | ||||
|                         transformed = t_func(item) | ||||
|                         if transformed: | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue
	
	 James Turk
						James Turk