diff --git a/saucebrush/__init__.py b/saucebrush/__init__.py index c7bdb19..62f3b14 100644 --- a/saucebrush/__init__.py +++ b/saucebrush/__init__.py @@ -1,18 +1,25 @@ +""" + Saucebrush is a data loading & manipulation framework written in python. +""" + import filters, emitters, sources, utils -def run_recipe(source, *filters): +def run_recipe(source, *filter_args): + """ Process data, taking it from a source and applying any number of filters + """ + # connect datapath data = source - for filter in filters: - data = filter(data) + for filter_ in filter_args: + data = filter_(data) # actually run the data through (causes iterators to actually be called) for record in data: pass # try and call done() on all filters - for filter in filters: + for filter_ in filter_args: try: - filter.done() + filter_.done() except AttributeError: pass # don't care if there isn't a done method diff --git a/saucebrush/emitters.py b/saucebrush/emitters.py index 756c5e4..58129ec 100644 --- a/saucebrush/emitters.py +++ b/saucebrush/emitters.py @@ -3,10 +3,8 @@ it in some manner. """ -from exceptions import NotImplementedError from saucebrush.filters import Filter - class Emitter(Filter): """ ABC for emitters @@ -88,10 +86,10 @@ class SqliteEmitter(Emitter): """ def __init__(self, dbname, table_name, fieldnames=None): - self(SqliteEmitter, self).__init__() + super(SqliteEmitter, self).__init__() import sqlite3 self._conn = sqlite3.connect(dbname) - self._cursor = self.conn.cursor() + self._cursor = self._conn.cursor() self._table_name = table_name if fieldnames: create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name, @@ -130,10 +128,12 @@ class SqlDumpEmitter(Emitter): self._outfile = sys.stderr else: self._outfile = outfile - self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (table_name, '`,`'.join(fieldnames)) + self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % ( + table_name, '`,`'.join(fieldnames)) def quote(self, item): - return "'%s'" % item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0') + item = item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0') + return "'%s'" % item def emit_record(self, record): quoted_data = [self.quote(record[field]) for field in self._fieldnames] diff --git a/saucebrush/filters.py b/saucebrush/filters.py index 80fd61c..36971c5 100644 --- a/saucebrush/filters.py +++ b/saucebrush/filters.py @@ -7,7 +7,6 @@ (or process_field for FieldFilter). """ -from exceptions import NotImplementedError from saucebrush import utils ###################### @@ -99,8 +98,8 @@ class FieldModifier(FieldFilter): return self._filter_func(item) def __unicode__(self): - return '%s( %s, %s )' % (self.__class__.__name__, str(self._target_keys), - str(self._filter_func)) + return '%s( %s, %s )' % (self.__class__.__name__, + str(self._target_keys), str(self._filter_func)) class FieldRemover(Filter): @@ -138,7 +137,7 @@ class FieldMerger(Filter): self._merge_func = merge_func def process_record(self, record): - for to_col,from_cols in self._field_mapping.iteritems(): + for to_col, from_cols in self._field_mapping.iteritems(): values = [record.pop(col, None) for col in from_cols] record[to_col] = self._merge_func(*values) return record @@ -204,13 +203,13 @@ class Splitter(Filter): # if a dict, use process_record directly if isinstance(subrecord, dict): - for filter in filters: - subrecord = filter.process_record(subrecord) + for filter_ in filters: + subrecord = filter_.process_record(subrecord) # if a list or tuple, use __call__ elif isinstance(subrecord, (list, tuple)): - for filter in filters: - subrecord = filter(subrecord) + for filter_ in filters: + subrecord = filter_(subrecord) subrecord = [r for r in subrecord] # unchain generators # place back from whence it came @@ -232,18 +231,6 @@ class Flattener(Filter): def __init__(self): super(Flattener, self).__init__() - '''def process_field(self, item): - # create a list of dictionaries with concatenated keys - retlist = [] - for subitem in item: - newitem = {} - for key1,subdict in subitem.iteritems(): - for key2,value in subdict.iteritems(): - newitem[key1+'_'+key2] = value - retlist.append(newitem) - return retlist - ''' - def process_record(self, record): return utils.flatten(record) diff --git a/saucebrush/sources.py b/saucebrush/sources.py index b4bc1a2..90e4456 100644 --- a/saucebrush/sources.py +++ b/saucebrush/sources.py @@ -6,6 +6,7 @@ """ import string +from saucebrush import utils class CSVSource(object): """ Saucebrush source for reading from CSV files. @@ -25,7 +26,7 @@ class CSVSource(object): import csv self._dictreader = csv.DictReader(csvfile, fieldnames) for _ in xrange(skiprows): - self.dictreader.next() + self._dictreader.next() def __iter__(self): return self._dictreader @@ -61,8 +62,8 @@ class FixedWidthFileSource(object): def next(self): line = self._fwfile.next() record = {} - for name, range in self._fields_dict.iteritems(): - record[name] = line[range[0]:range[1]].rstrip(self._fillchars) + for name, range_ in self._fields_dict.iteritems(): + record[name] = line[range[0]:range_[1]].rstrip(self._fillchars) return record @@ -97,13 +98,14 @@ class HtmlTableSource(object): # determine the fieldnames if not fieldnames: - self._fieldnames = [td.string for td in self.rows[0].findAll(('td','th'))] + self._fieldnames = [td.string + for td in self._rows[0].findAll(('td','th'))] else: self._fieldnames = fieldnames - def process_tr(): + def process_tr(self): for row in self._rows: - strings = [string_dig(td) for td in row.findAll('td')] + strings = [utils.string_dig(td) for td in row.findAll('td')] yield dict(zip(self._fieldnames, strings)) def __iter__(self): @@ -122,10 +124,11 @@ class DjangoModelSource(object): settings.py. """ def __init__(self, dj_settings, app_label, model_name): - dbmodel = get_django_model(dj_settings, app_label, model_name) + dbmodel = utils.get_django_model(dj_settings, app_label, model_name) # only get values defined in model (no extra fields from custom manager) - self._data = dbmodel.objects.values(*[f.name for f in dbmodel._meta.fields]) + self._data = dbmodel.objects.values(*[f.name + for f in dbmodel._meta.fields]) def __iter__(self): return iter(self._data) diff --git a/saucebrush/utils.py b/saucebrush/utils.py index 42974f7..69ae323 100644 --- a/saucebrush/utils.py +++ b/saucebrush/utils.py @@ -16,7 +16,7 @@ def get_django_model(dj_settings, app_label, model_name): DATABASE_HOST=dj_settings.DATABASE_HOST, INSTALLED_APPS=dj_settings.INSTALLED_APPS) from django.db.models import get_model - dbmodel = get_model(app_label, model_name) + return get_model(app_label, model_name) def string_dig(element, joiner=''): @@ -29,7 +29,8 @@ def string_dig(element, joiner=''): if element.string: return element.string else: - return joiner.join([string_dig(child) for child in element.findAll(True)]) + return joiner.join([string_dig(child) + for child in element.findAll(True)]) def flatten(item, prefix=''):