From 04e43f0ab112faccf6c2bf1a86f6d4a4fc24f88a Mon Sep 17 00:00:00 2001 From: James Turk Date: Tue, 11 Jul 2023 18:49:23 -0500 Subject: [PATCH] serialize/deserialize --- examples/articles.py | 1 + examples/fruits.py | 73 +++++++++++++++++++++++++++++------------- src/beakers/beakers.py | 47 ++++++++++++++++++++------- src/beakers/recipe.py | 6 ++-- 4 files changed, 91 insertions(+), 36 deletions(-) diff --git a/examples/articles.py b/examples/articles.py index 70abdf2..322c735 100644 --- a/examples/articles.py +++ b/examples/articles.py @@ -53,6 +53,7 @@ recipe.add_transform( "article", extract_npr_article, ) +recipe.add_transform("archived_article") npr_examples = [ diff --git a/examples/fruits.py b/examples/fruits.py index f632ca5..eff576a 100644 --- a/examples/fruits.py +++ b/examples/fruits.py @@ -1,28 +1,57 @@ from beakers.recipe import Recipe +import pydantic -recipe = Recipe("example01") -words_beaker = recipe.add_beaker("words", temp=True) -recipe.add_beaker("fruits") -recipe.add_beaker("other") -recipe.add_beaker("sentences") + +class Word(pydantic.BaseModel): + word: str + + +class ClassifiedWord(pydantic.BaseModel): + normalized_word: str + is_fruit: bool + + +class Sentence(pydantic.BaseModel): + sentence: list[str] + + +def word_classifier(item) -> ClassifiedWord: + return ClassifiedWord( + normalized_word=item.word.lower(), + is_fruit=item.word.lower() + in ( + "apple", + "banana", + "fig", + "grape", + "lemon", + "mango", + "orange", + "pear", + "raspberry", + ), + ) + + +recipe = Recipe("fruits-example") +recipe.add_beaker("word", Word) +recipe.add_beaker("classified_word", ClassifiedWord) +recipe.add_beaker("sentence", Sentence) +recipe.add_transform("word", "classified_word", word_classifier) recipe.add_conditional( - "words", - lambda x: x["word"] - in ( - "apple", - "banana", - "fig", - "grape", - "lemon", - "mango", - "orange", - "pear", - "raspberry", - ), + "classified_word", + lambda cw: cw.is_fruit, "fruits", - "other", ) -recipe.add_transform("fruits", "sentences", lambda x: f"I like to eat {x['word']}") -recipe.add_transform( - "other", "sentences", lambda x: f"I'm not so sure about {x['word']}" +recipe.add_transform("fruits", "sentence", lambda x: f"I love a fresh {x['word']}") + +recipe.add_seed( + "word", + [ + Word(word="apple"), + Word(word="banana"), + Word(word="hammer"), + Word(word="orange"), + Word(word="egg"), + ], ) diff --git a/src/beakers/beakers.py b/src/beakers/beakers.py index e9c8582..fdc9174 100644 --- a/src/beakers/beakers.py +++ b/src/beakers/beakers.py @@ -4,13 +4,31 @@ import sqlite3 import uuid +class DataObject: + def __init__(self, id: str | None = None): + self._id = id if id else str(uuid.uuid4()) + self._data = {} + + def __getattr__(self, name): + return self._data[name] + + def __setattr__(self, name, value): + if name.startswith("_"): + super().__setattr__(name, value) + elif name not in self._data: + self._data[name] = value + else: + raise AttributeError(f"DataObject attribute {name} already exists") + + class Beaker(abc.ABC): - def __init__(self, name: str, recipe): + def __init__(self, name: str, model: type, recipe: "Recipe"): self.name = name + self.model = model self.recipe = recipe def __repr__(self): - return f"Beaker({self.name})" + return f"Beaker({self.name}, {self.model.__name__})" @abc.abstractmethod def items(self): @@ -21,23 +39,29 @@ class Beaker(abc.ABC): pass @abc.abstractmethod - def add_item(self, item: dict, id: str | None = None) -> None: + def add_item(self, item: "T", id: str | None = None) -> None: pass @abc.abstractmethod def reset(self): pass + def add_items(self, items: list["T"]) -> None: + for item in items: + self.add_item(item) + class TempBeaker(Beaker): - def __init__(self, name: str, recipe): - super().__init__(name, recipe) + def __init__(self, name: str, model: type, recipe: "Recipe"): + super().__init__(name, model, recipe) self._items = [] def __len__(self): return len(self._items) - def add_item(self, item: dict, id=None) -> None: + def add_item(self, item: "T", id=None) -> None: + if id is None: + id = str(uuid.uuid1()) self._items.append((id, item)) def items(self): @@ -48,8 +72,8 @@ class TempBeaker(Beaker): class SqliteBeaker(Beaker): - def __init__(self, name: str, recipe): - super().__init__(name, recipe) + def __init__(self, name: str, model: type, recipe: "Recipe"): + super().__init__(name, model, recipe) # create table if it doesn't exist self.cursor = self.recipe.db.cursor() self.cursor.row_factory = sqlite3.Row @@ -61,18 +85,19 @@ class SqliteBeaker(Beaker): self.cursor.execute(f"SELECT uuid, data FROM {self.name}") data = self.cursor.fetchall() for item in data: - yield item["uuid"], json.loads(item["data"]) + yield item["uuid"], self.model(**json.loads(item["data"])) def __len__(self): self.cursor.execute(f"SELECT COUNT(*) FROM {self.name}") return self.cursor.fetchone()[0] - def add_item(self, item: dict, id: str | None = None) -> None: + def add_item(self, item: "T", id: str | None = None) -> None: if id is None: id = str(uuid.uuid1()) + print("UUID", id, item) self.cursor.execute( f"INSERT INTO {self.name} (uuid, data) VALUES (?, ?)", - (id, json.dumps(item)), + (id, item.model_dump_json()), ) self.recipe.db.commit() diff --git a/src/beakers/recipe.py b/src/beakers/recipe.py index 9379d84..a8247f4 100644 --- a/src/beakers/recipe.py +++ b/src/beakers/recipe.py @@ -54,9 +54,9 @@ class Recipe: def add_beaker(self, name: str, datatype: type | None) -> Beaker: self.graph.add_node(name, datatype=datatype) if datatype is None: - self.beakers[name] = TempBeaker(name, self) + self.beakers[name] = TempBeaker(name, datatype, self) else: - self.beakers[name] = SqliteBeaker(name, self) + self.beakers[name] = SqliteBeaker(name, datatype, self) return self.beakers[name] def add_transform( @@ -124,7 +124,7 @@ class Recipe: log.info("process_seeds", recipe=self.name) for beaker_name, seeds in self.seeds.items(): for seed in seeds: - self.beakers[beaker_name].add_item(seed) + self.beakers[beaker_name].add_items(seed) def get_metadata(self, table_name) -> dict: cursor = self.db.cursor()