some cleanups discovered via test-splitter.py failure

* recipe is now a kwarg to Filter.__call__
* recipe.reject_record
* Splitter works with iterables
This commit is contained in:
James Turk 2010-02-20 11:14:23 -05:00
parent 293c7a0a78
commit 5421c12acd
3 changed files with 27 additions and 26 deletions

View File

@ -1,6 +1,7 @@
from saucebrush.filters import Splitter, PhoneNumberCleaner, FieldMerger, FieldAdder from saucebrush.filters import Splitter, PhoneNumberCleaner, FieldMerger, FieldAdder
from saucebrush.emitters import DebugEmitter from saucebrush.emitters import DebugEmitter
import operator import operator
from itertools import count
import saucebrush import saucebrush
data = [{'person': {'firstname': 'James', 'lastname': 'Turk'}, data = [{'person': {'firstname': 'James', 'lastname': 'Turk'},
@ -10,6 +11,6 @@ data = [{'person': {'firstname': 'James', 'lastname': 'Turk'},
namemerger = FieldMerger({'name': ('firstname', 'lastname')}, lambda x,y: ' '.join((x,y))) namemerger = FieldMerger({'name': ('firstname', 'lastname')}, lambda x,y: ' '.join((x,y)))
phonecleaner = PhoneNumberCleaner(('phone',)) phonecleaner = PhoneNumberCleaner(('phone',))
splitter = Splitter({'person':[namemerger], 'phones':[phonecleaner]}) splitter = Splitter({'person':[namemerger], 'phones':[phonecleaner]})
ider = FieldAdder('id', [1,2,3,4,5]) ider = FieldAdder('id', count())
saucebrush.run_recipe(data, [ider, splitter, DebugEmitter()]) saucebrush.run_recipe(data, ider, splitter, DebugEmitter())

View File

@ -5,37 +5,39 @@
import filters, emitters, sources, utils import filters, emitters, sources, utils
class Recipe(filters.Filter): class Recipe(filters.Filter):
def __init__(self, *filter_args): def __init__(self, *filter_args):
self._filter_args = filter_args self._filter_args = filter_args
self.rejected = [] self.rejected = []
def get_filters(self): def get_filters(self):
filters = [] filters = []
for filter_ in self._filter_args: for filter_ in self._filter_args:
# check to see if this is a filter or a recipe # check to see if this is a filter or a recipe
if hasattr(filter_, 'get_filters'): if hasattr(filter_, 'get_filters'):
# load filters from child recipe # load filters from child recipe
filters.extend(filter_.get_filters()) filters.extend(filter_.get_filters())
else: else:
filters.append(filter_) filters.append(filter_)
return filters return filters
def reject_record(self):
self.rejected.append((record, message))
def run(self, source): def run(self, source):
# load filters # load filters
filters = self.get_filters() filters = self.get_filters()
# connect datapath # connect datapath
data = source data = source
for filter_ in filters: for filter_ in filters:
data = filter_(self, data) data = filter_(data, recipe=self)
# actually run the data through (causes iterators to actually be called) # actually run the data through (causes iterators to actually be called)
for record in data: for record in data:
@ -51,7 +53,7 @@ class Recipe(filters.Filter):
def run_recipe(source, *filter_args): def run_recipe(source, *filter_args):
""" Process data, taking it from a source and applying any number of filters """ Process data, taking it from a source and applying any number of filters
""" """
r = Recipe(*filter_args) r = Recipe(*filter_args)
r.run(source) r.run(source)
return r return r

View File

@ -22,9 +22,6 @@ class Filter(object):
takes a single record (python dictionary) and returns a result. takes a single record (python dictionary) and returns a result.
""" """
def __init__(self):
pass
def process_record(self, record): def process_record(self, record):
""" Abstract method to be overridden. """ Abstract method to be overridden.
@ -34,14 +31,15 @@ class Filter(object):
self.__class__.__name__) self.__class__.__name__)
def reject_record(self, record, message): def reject_record(self, record, message):
if hasattr(self, '_recipe'): recipe = getattr(self, '_recipe')
self._recipe.rejected.append((record, message)) if recipe:
recipe.reject_record(record, message)
def __call__(self, recipe, source): def __call__(self, source, recipe=None):
self._recipe = recipe self._recipe = recipe
for record in source: for record in source:
result = self.process_record(record) result = self.process_record(record)
if not result is None: if result is not None:
yield result yield result
@ -52,10 +50,8 @@ class YieldFilter(Filter):
it is passed, it should yield back as many records as needed and the it is passed, it should yield back as many records as needed and the
filter must derive from YieldFilter. filter must derive from YieldFilter.
""" """
def __init__(self):
super(YieldFilter, self).__init__()
def __call__(self, recipe, source): def __call__(self, source, recipe=None):
self._recipe = recipe self._recipe = recipe
for record in source: for record in source:
for result in self.process_record(record): for result in self.process_record(record):
@ -207,6 +203,8 @@ class FieldAdder(Filter):
super(FieldAdder, self).__init__() super(FieldAdder, self).__init__()
self._field_name = field_name self._field_name = field_name
self._field_value = field_value self._field_value = field_value
if hasattr(self._field_value, '__iter__'):
self._field_value = iter(self._field_value).next
self._replace = replace self._replace = replace
def process_record(self, record): def process_record(self, record):
@ -291,7 +289,7 @@ class Splitter(Filter):
# if a list or tuple, use __call__ # if a list or tuple, use __call__
elif isinstance(subrecord, (list, tuple)): elif isinstance(subrecord, (list, tuple)):
for filter_ in filters: for filter_ in filters:
subrecord = filter_(subrecord) subrecord = filter_(subrecord, recipe=self._recipe)
subrecord = [r for r in subrecord] # unchain generators subrecord = [r for r in subrecord] # unchain generators
# place back from whence it came # place back from whence it came