minor pylint cleanups

This commit is contained in:
James Turk 2008-11-04 21:39:40 +00:00
parent 8ead922b4c
commit 0bbb1ee4e4
5 changed files with 39 additions and 41 deletions

View File

@ -1,18 +1,25 @@
"""
Saucebrush is a data loading & manipulation framework written in python.
"""
import filters, emitters, sources, utils import filters, emitters, sources, utils
def run_recipe(source, *filters): def run_recipe(source, *filter_args):
""" Process data, taking it from a source and applying any number of filters
"""
# connect datapath # connect datapath
data = source data = source
for filter in filters: for filter_ in filter_args:
data = filter(data) data = filter_(data)
# actually run the data through (causes iterators to actually be called) # actually run the data through (causes iterators to actually be called)
for record in data: for record in data:
pass pass
# try and call done() on all filters # try and call done() on all filters
for filter in filters: for filter_ in filter_args:
try: try:
filter.done() filter_.done()
except AttributeError: except AttributeError:
pass # don't care if there isn't a done method pass # don't care if there isn't a done method

View File

@ -3,10 +3,8 @@
it in some manner. it in some manner.
""" """
from exceptions import NotImplementedError
from saucebrush.filters import Filter from saucebrush.filters import Filter
class Emitter(Filter): class Emitter(Filter):
""" ABC for emitters """ ABC for emitters
@ -88,10 +86,10 @@ class SqliteEmitter(Emitter):
""" """
def __init__(self, dbname, table_name, fieldnames=None): def __init__(self, dbname, table_name, fieldnames=None):
self(SqliteEmitter, self).__init__() super(SqliteEmitter, self).__init__()
import sqlite3 import sqlite3
self._conn = sqlite3.connect(dbname) self._conn = sqlite3.connect(dbname)
self._cursor = self.conn.cursor() self._cursor = self._conn.cursor()
self._table_name = table_name self._table_name = table_name
if fieldnames: if fieldnames:
create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name, create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name,
@ -130,10 +128,12 @@ class SqlDumpEmitter(Emitter):
self._outfile = sys.stderr self._outfile = sys.stderr
else: else:
self._outfile = outfile self._outfile = outfile
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (table_name, '`,`'.join(fieldnames)) self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (
table_name, '`,`'.join(fieldnames))
def quote(self, item): def quote(self, item):
return "'%s'" % item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0') item = item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
return "'%s'" % item
def emit_record(self, record): def emit_record(self, record):
quoted_data = [self.quote(record[field]) for field in self._fieldnames] quoted_data = [self.quote(record[field]) for field in self._fieldnames]

View File

@ -7,7 +7,6 @@
(or process_field for FieldFilter). (or process_field for FieldFilter).
""" """
from exceptions import NotImplementedError
from saucebrush import utils from saucebrush import utils
###################### ######################
@ -99,8 +98,8 @@ class FieldModifier(FieldFilter):
return self._filter_func(item) return self._filter_func(item)
def __unicode__(self): def __unicode__(self):
return '%s( %s, %s )' % (self.__class__.__name__, str(self._target_keys), return '%s( %s, %s )' % (self.__class__.__name__,
str(self._filter_func)) str(self._target_keys), str(self._filter_func))
class FieldRemover(Filter): class FieldRemover(Filter):
@ -204,13 +203,13 @@ class Splitter(Filter):
# if a dict, use process_record directly # if a dict, use process_record directly
if isinstance(subrecord, dict): if isinstance(subrecord, dict):
for filter in filters: for filter_ in filters:
subrecord = filter.process_record(subrecord) subrecord = filter_.process_record(subrecord)
# if a list or tuple, use __call__ # if a list or tuple, use __call__
elif isinstance(subrecord, (list, tuple)): elif isinstance(subrecord, (list, tuple)):
for filter in filters: for filter_ in filters:
subrecord = filter(subrecord) subrecord = filter_(subrecord)
subrecord = [r for r in subrecord] # unchain generators subrecord = [r for r in subrecord] # unchain generators
# place back from whence it came # place back from whence it came
@ -232,18 +231,6 @@ class Flattener(Filter):
def __init__(self): def __init__(self):
super(Flattener, self).__init__() super(Flattener, self).__init__()
'''def process_field(self, item):
# create a list of dictionaries with concatenated keys
retlist = []
for subitem in item:
newitem = {}
for key1,subdict in subitem.iteritems():
for key2,value in subdict.iteritems():
newitem[key1+'_'+key2] = value
retlist.append(newitem)
return retlist
'''
def process_record(self, record): def process_record(self, record):
return utils.flatten(record) return utils.flatten(record)

View File

@ -6,6 +6,7 @@
""" """
import string import string
from saucebrush import utils
class CSVSource(object): class CSVSource(object):
""" Saucebrush source for reading from CSV files. """ Saucebrush source for reading from CSV files.
@ -25,7 +26,7 @@ class CSVSource(object):
import csv import csv
self._dictreader = csv.DictReader(csvfile, fieldnames) self._dictreader = csv.DictReader(csvfile, fieldnames)
for _ in xrange(skiprows): for _ in xrange(skiprows):
self.dictreader.next() self._dictreader.next()
def __iter__(self): def __iter__(self):
return self._dictreader return self._dictreader
@ -61,8 +62,8 @@ class FixedWidthFileSource(object):
def next(self): def next(self):
line = self._fwfile.next() line = self._fwfile.next()
record = {} record = {}
for name, range in self._fields_dict.iteritems(): for name, range_ in self._fields_dict.iteritems():
record[name] = line[range[0]:range[1]].rstrip(self._fillchars) record[name] = line[range[0]:range_[1]].rstrip(self._fillchars)
return record return record
@ -97,13 +98,14 @@ class HtmlTableSource(object):
# determine the fieldnames # determine the fieldnames
if not fieldnames: if not fieldnames:
self._fieldnames = [td.string for td in self.rows[0].findAll(('td','th'))] self._fieldnames = [td.string
for td in self._rows[0].findAll(('td','th'))]
else: else:
self._fieldnames = fieldnames self._fieldnames = fieldnames
def process_tr(): def process_tr(self):
for row in self._rows: for row in self._rows:
strings = [string_dig(td) for td in row.findAll('td')] strings = [utils.string_dig(td) for td in row.findAll('td')]
yield dict(zip(self._fieldnames, strings)) yield dict(zip(self._fieldnames, strings))
def __iter__(self): def __iter__(self):
@ -122,10 +124,11 @@ class DjangoModelSource(object):
settings.py. settings.py.
""" """
def __init__(self, dj_settings, app_label, model_name): def __init__(self, dj_settings, app_label, model_name):
dbmodel = get_django_model(dj_settings, app_label, model_name) dbmodel = utils.get_django_model(dj_settings, app_label, model_name)
# only get values defined in model (no extra fields from custom manager) # only get values defined in model (no extra fields from custom manager)
self._data = dbmodel.objects.values(*[f.name for f in dbmodel._meta.fields]) self._data = dbmodel.objects.values(*[f.name
for f in dbmodel._meta.fields])
def __iter__(self): def __iter__(self):
return iter(self._data) return iter(self._data)

View File

@ -16,7 +16,7 @@ def get_django_model(dj_settings, app_label, model_name):
DATABASE_HOST=dj_settings.DATABASE_HOST, DATABASE_HOST=dj_settings.DATABASE_HOST,
INSTALLED_APPS=dj_settings.INSTALLED_APPS) INSTALLED_APPS=dj_settings.INSTALLED_APPS)
from django.db.models import get_model from django.db.models import get_model
dbmodel = get_model(app_label, model_name) return get_model(app_label, model_name)
def string_dig(element, joiner=''): def string_dig(element, joiner=''):
@ -29,7 +29,8 @@ def string_dig(element, joiner=''):
if element.string: if element.string:
return element.string return element.string
else: else:
return joiner.join([string_dig(child) for child in element.findAll(True)]) return joiner.join([string_dig(child)
for child in element.findAll(True)])
def flatten(item, prefix=''): def flatten(item, prefix=''):