minor pylint cleanups

This commit is contained in:
James Turk 2008-11-04 21:39:40 +00:00
parent 8ead922b4c
commit 0bbb1ee4e4
5 changed files with 39 additions and 41 deletions

View File

@ -1,18 +1,25 @@
"""
Saucebrush is a data loading & manipulation framework written in python.
"""
import filters, emitters, sources, utils
def run_recipe(source, *filters):
def run_recipe(source, *filter_args):
""" Process data, taking it from a source and applying any number of filters
"""
# connect datapath
data = source
for filter in filters:
data = filter(data)
for filter_ in filter_args:
data = filter_(data)
# actually run the data through (causes iterators to actually be called)
for record in data:
pass
# try and call done() on all filters
for filter in filters:
for filter_ in filter_args:
try:
filter.done()
filter_.done()
except AttributeError:
pass # don't care if there isn't a done method

View File

@ -3,10 +3,8 @@
it in some manner.
"""
from exceptions import NotImplementedError
from saucebrush.filters import Filter
class Emitter(Filter):
""" ABC for emitters
@ -88,10 +86,10 @@ class SqliteEmitter(Emitter):
"""
def __init__(self, dbname, table_name, fieldnames=None):
self(SqliteEmitter, self).__init__()
super(SqliteEmitter, self).__init__()
import sqlite3
self._conn = sqlite3.connect(dbname)
self._cursor = self.conn.cursor()
self._cursor = self._conn.cursor()
self._table_name = table_name
if fieldnames:
create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name,
@ -130,10 +128,12 @@ class SqlDumpEmitter(Emitter):
self._outfile = sys.stderr
else:
self._outfile = outfile
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (table_name, '`,`'.join(fieldnames))
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (
table_name, '`,`'.join(fieldnames))
def quote(self, item):
return "'%s'" % item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
item = item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
return "'%s'" % item
def emit_record(self, record):
quoted_data = [self.quote(record[field]) for field in self._fieldnames]

View File

@ -7,7 +7,6 @@
(or process_field for FieldFilter).
"""
from exceptions import NotImplementedError
from saucebrush import utils
######################
@ -99,8 +98,8 @@ class FieldModifier(FieldFilter):
return self._filter_func(item)
def __unicode__(self):
return '%s( %s, %s )' % (self.__class__.__name__, str(self._target_keys),
str(self._filter_func))
return '%s( %s, %s )' % (self.__class__.__name__,
str(self._target_keys), str(self._filter_func))
class FieldRemover(Filter):
@ -138,7 +137,7 @@ class FieldMerger(Filter):
self._merge_func = merge_func
def process_record(self, record):
for to_col,from_cols in self._field_mapping.iteritems():
for to_col, from_cols in self._field_mapping.iteritems():
values = [record.pop(col, None) for col in from_cols]
record[to_col] = self._merge_func(*values)
return record
@ -204,13 +203,13 @@ class Splitter(Filter):
# if a dict, use process_record directly
if isinstance(subrecord, dict):
for filter in filters:
subrecord = filter.process_record(subrecord)
for filter_ in filters:
subrecord = filter_.process_record(subrecord)
# if a list or tuple, use __call__
elif isinstance(subrecord, (list, tuple)):
for filter in filters:
subrecord = filter(subrecord)
for filter_ in filters:
subrecord = filter_(subrecord)
subrecord = [r for r in subrecord] # unchain generators
# place back from whence it came
@ -232,18 +231,6 @@ class Flattener(Filter):
def __init__(self):
super(Flattener, self).__init__()
'''def process_field(self, item):
# create a list of dictionaries with concatenated keys
retlist = []
for subitem in item:
newitem = {}
for key1,subdict in subitem.iteritems():
for key2,value in subdict.iteritems():
newitem[key1+'_'+key2] = value
retlist.append(newitem)
return retlist
'''
def process_record(self, record):
return utils.flatten(record)

View File

@ -6,6 +6,7 @@
"""
import string
from saucebrush import utils
class CSVSource(object):
""" Saucebrush source for reading from CSV files.
@ -25,7 +26,7 @@ class CSVSource(object):
import csv
self._dictreader = csv.DictReader(csvfile, fieldnames)
for _ in xrange(skiprows):
self.dictreader.next()
self._dictreader.next()
def __iter__(self):
return self._dictreader
@ -61,8 +62,8 @@ class FixedWidthFileSource(object):
def next(self):
line = self._fwfile.next()
record = {}
for name, range in self._fields_dict.iteritems():
record[name] = line[range[0]:range[1]].rstrip(self._fillchars)
for name, range_ in self._fields_dict.iteritems():
record[name] = line[range[0]:range_[1]].rstrip(self._fillchars)
return record
@ -97,13 +98,14 @@ class HtmlTableSource(object):
# determine the fieldnames
if not fieldnames:
self._fieldnames = [td.string for td in self.rows[0].findAll(('td','th'))]
self._fieldnames = [td.string
for td in self._rows[0].findAll(('td','th'))]
else:
self._fieldnames = fieldnames
def process_tr():
def process_tr(self):
for row in self._rows:
strings = [string_dig(td) for td in row.findAll('td')]
strings = [utils.string_dig(td) for td in row.findAll('td')]
yield dict(zip(self._fieldnames, strings))
def __iter__(self):
@ -122,10 +124,11 @@ class DjangoModelSource(object):
settings.py.
"""
def __init__(self, dj_settings, app_label, model_name):
dbmodel = get_django_model(dj_settings, app_label, model_name)
dbmodel = utils.get_django_model(dj_settings, app_label, model_name)
# only get values defined in model (no extra fields from custom manager)
self._data = dbmodel.objects.values(*[f.name for f in dbmodel._meta.fields])
self._data = dbmodel.objects.values(*[f.name
for f in dbmodel._meta.fields])
def __iter__(self):
return iter(self._data)

View File

@ -16,7 +16,7 @@ def get_django_model(dj_settings, app_label, model_name):
DATABASE_HOST=dj_settings.DATABASE_HOST,
INSTALLED_APPS=dj_settings.INSTALLED_APPS)
from django.db.models import get_model
dbmodel = get_model(app_label, model_name)
return get_model(app_label, model_name)
def string_dig(element, joiner=''):
@ -29,7 +29,8 @@ def string_dig(element, joiner=''):
if element.string:
return element.string
else:
return joiner.join([string_dig(child) for child in element.findAll(True)])
return joiner.join([string_dig(child)
for child in element.findAll(True)])
def flatten(item, prefix=''):