diff --git a/examples/test-splitter.py b/examples/test-splitter.py index f338bf3..f592b24 100644 --- a/examples/test-splitter.py +++ b/examples/test-splitter.py @@ -1,6 +1,7 @@ from saucebrush.filters import Splitter, PhoneNumberCleaner, FieldMerger, FieldAdder from saucebrush.emitters import DebugEmitter import operator +from itertools import count import saucebrush data = [{'person': {'firstname': 'James', 'lastname': 'Turk'}, @@ -10,6 +11,6 @@ data = [{'person': {'firstname': 'James', 'lastname': 'Turk'}, namemerger = FieldMerger({'name': ('firstname', 'lastname')}, lambda x,y: ' '.join((x,y))) phonecleaner = PhoneNumberCleaner(('phone',)) splitter = Splitter({'person':[namemerger], 'phones':[phonecleaner]}) -ider = FieldAdder('id', [1,2,3,4,5]) +ider = FieldAdder('id', count()) -saucebrush.run_recipe(data, [ider, splitter, DebugEmitter()]) +saucebrush.run_recipe(data, ider, splitter, DebugEmitter()) diff --git a/saucebrush/__init__.py b/saucebrush/__init__.py index e58e098..9951e46 100644 --- a/saucebrush/__init__.py +++ b/saucebrush/__init__.py @@ -5,37 +5,39 @@ import filters, emitters, sources, utils class Recipe(filters.Filter): - + def __init__(self, *filter_args): self._filter_args = filter_args self.rejected = [] - + def get_filters(self): - + filters = [] - + for filter_ in self._filter_args: - + # check to see if this is a filter or a recipe - + if hasattr(filter_, 'get_filters'): # load filters from child recipe filters.extend(filter_.get_filters()) - else: filters.append(filter_) - + return filters - + + def reject_record(self): + self.rejected.append((record, message)) + def run(self, source): - + # load filters filters = self.get_filters() - + # connect datapath data = source for filter_ in filters: - data = filter_(self, data) + data = filter_(data, recipe=self) # actually run the data through (causes iterators to actually be called) for record in data: @@ -51,7 +53,7 @@ class Recipe(filters.Filter): def run_recipe(source, *filter_args): """ Process data, taking it from a source and applying any number of filters """ - + r = Recipe(*filter_args) r.run(source) return r diff --git a/saucebrush/filters.py b/saucebrush/filters.py index 0050576..a63a59d 100644 --- a/saucebrush/filters.py +++ b/saucebrush/filters.py @@ -22,9 +22,6 @@ class Filter(object): takes a single record (python dictionary) and returns a result. """ - def __init__(self): - pass - def process_record(self, record): """ Abstract method to be overridden. @@ -34,14 +31,15 @@ class Filter(object): self.__class__.__name__) def reject_record(self, record, message): - if hasattr(self, '_recipe'): - self._recipe.rejected.append((record, message)) + recipe = getattr(self, '_recipe') + if recipe: + recipe.reject_record(record, message) - def __call__(self, recipe, source): + def __call__(self, source, recipe=None): self._recipe = recipe for record in source: result = self.process_record(record) - if not result is None: + if result is not None: yield result @@ -52,10 +50,8 @@ class YieldFilter(Filter): it is passed, it should yield back as many records as needed and the filter must derive from YieldFilter. """ - def __init__(self): - super(YieldFilter, self).__init__() - def __call__(self, recipe, source): + def __call__(self, source, recipe=None): self._recipe = recipe for record in source: for result in self.process_record(record): @@ -207,6 +203,8 @@ class FieldAdder(Filter): super(FieldAdder, self).__init__() self._field_name = field_name self._field_value = field_value + if hasattr(self._field_value, '__iter__'): + self._field_value = iter(self._field_value).next self._replace = replace def process_record(self, record): @@ -291,7 +289,7 @@ class Splitter(Filter): # if a list or tuple, use __call__ elif isinstance(subrecord, (list, tuple)): for filter_ in filters: - subrecord = filter_(subrecord) + subrecord = filter_(subrecord, recipe=self._recipe) subrecord = [r for r in subrecord] # unchain generators # place back from whence it came