minor pylint cleanups
This commit is contained in:
parent
8ead922b4c
commit
0bbb1ee4e4
@ -1,18 +1,25 @@
|
||||
"""
|
||||
Saucebrush is a data loading & manipulation framework written in python.
|
||||
"""
|
||||
|
||||
import filters, emitters, sources, utils
|
||||
|
||||
def run_recipe(source, *filters):
|
||||
def run_recipe(source, *filter_args):
|
||||
""" Process data, taking it from a source and applying any number of filters
|
||||
"""
|
||||
|
||||
# connect datapath
|
||||
data = source
|
||||
for filter in filters:
|
||||
data = filter(data)
|
||||
for filter_ in filter_args:
|
||||
data = filter_(data)
|
||||
|
||||
# actually run the data through (causes iterators to actually be called)
|
||||
for record in data:
|
||||
pass
|
||||
|
||||
# try and call done() on all filters
|
||||
for filter in filters:
|
||||
for filter_ in filter_args:
|
||||
try:
|
||||
filter.done()
|
||||
filter_.done()
|
||||
except AttributeError:
|
||||
pass # don't care if there isn't a done method
|
||||
|
@ -3,10 +3,8 @@
|
||||
it in some manner.
|
||||
"""
|
||||
|
||||
from exceptions import NotImplementedError
|
||||
from saucebrush.filters import Filter
|
||||
|
||||
|
||||
class Emitter(Filter):
|
||||
""" ABC for emitters
|
||||
|
||||
@ -88,10 +86,10 @@ class SqliteEmitter(Emitter):
|
||||
"""
|
||||
|
||||
def __init__(self, dbname, table_name, fieldnames=None):
|
||||
self(SqliteEmitter, self).__init__()
|
||||
super(SqliteEmitter, self).__init__()
|
||||
import sqlite3
|
||||
self._conn = sqlite3.connect(dbname)
|
||||
self._cursor = self.conn.cursor()
|
||||
self._cursor = self._conn.cursor()
|
||||
self._table_name = table_name
|
||||
if fieldnames:
|
||||
create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name,
|
||||
@ -130,10 +128,12 @@ class SqlDumpEmitter(Emitter):
|
||||
self._outfile = sys.stderr
|
||||
else:
|
||||
self._outfile = outfile
|
||||
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (table_name, '`,`'.join(fieldnames))
|
||||
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (
|
||||
table_name, '`,`'.join(fieldnames))
|
||||
|
||||
def quote(self, item):
|
||||
return "'%s'" % item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
|
||||
item = item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
|
||||
return "'%s'" % item
|
||||
|
||||
def emit_record(self, record):
|
||||
quoted_data = [self.quote(record[field]) for field in self._fieldnames]
|
||||
|
@ -7,7 +7,6 @@
|
||||
(or process_field for FieldFilter).
|
||||
"""
|
||||
|
||||
from exceptions import NotImplementedError
|
||||
from saucebrush import utils
|
||||
|
||||
######################
|
||||
@ -99,8 +98,8 @@ class FieldModifier(FieldFilter):
|
||||
return self._filter_func(item)
|
||||
|
||||
def __unicode__(self):
|
||||
return '%s( %s, %s )' % (self.__class__.__name__, str(self._target_keys),
|
||||
str(self._filter_func))
|
||||
return '%s( %s, %s )' % (self.__class__.__name__,
|
||||
str(self._target_keys), str(self._filter_func))
|
||||
|
||||
|
||||
class FieldRemover(Filter):
|
||||
@ -138,7 +137,7 @@ class FieldMerger(Filter):
|
||||
self._merge_func = merge_func
|
||||
|
||||
def process_record(self, record):
|
||||
for to_col,from_cols in self._field_mapping.iteritems():
|
||||
for to_col, from_cols in self._field_mapping.iteritems():
|
||||
values = [record.pop(col, None) for col in from_cols]
|
||||
record[to_col] = self._merge_func(*values)
|
||||
return record
|
||||
@ -204,13 +203,13 @@ class Splitter(Filter):
|
||||
|
||||
# if a dict, use process_record directly
|
||||
if isinstance(subrecord, dict):
|
||||
for filter in filters:
|
||||
subrecord = filter.process_record(subrecord)
|
||||
for filter_ in filters:
|
||||
subrecord = filter_.process_record(subrecord)
|
||||
|
||||
# if a list or tuple, use __call__
|
||||
elif isinstance(subrecord, (list, tuple)):
|
||||
for filter in filters:
|
||||
subrecord = filter(subrecord)
|
||||
for filter_ in filters:
|
||||
subrecord = filter_(subrecord)
|
||||
subrecord = [r for r in subrecord] # unchain generators
|
||||
|
||||
# place back from whence it came
|
||||
@ -232,18 +231,6 @@ class Flattener(Filter):
|
||||
def __init__(self):
|
||||
super(Flattener, self).__init__()
|
||||
|
||||
'''def process_field(self, item):
|
||||
# create a list of dictionaries with concatenated keys
|
||||
retlist = []
|
||||
for subitem in item:
|
||||
newitem = {}
|
||||
for key1,subdict in subitem.iteritems():
|
||||
for key2,value in subdict.iteritems():
|
||||
newitem[key1+'_'+key2] = value
|
||||
retlist.append(newitem)
|
||||
return retlist
|
||||
'''
|
||||
|
||||
def process_record(self, record):
|
||||
return utils.flatten(record)
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
"""
|
||||
|
||||
import string
|
||||
from saucebrush import utils
|
||||
|
||||
class CSVSource(object):
|
||||
""" Saucebrush source for reading from CSV files.
|
||||
@ -25,7 +26,7 @@ class CSVSource(object):
|
||||
import csv
|
||||
self._dictreader = csv.DictReader(csvfile, fieldnames)
|
||||
for _ in xrange(skiprows):
|
||||
self.dictreader.next()
|
||||
self._dictreader.next()
|
||||
|
||||
def __iter__(self):
|
||||
return self._dictreader
|
||||
@ -61,8 +62,8 @@ class FixedWidthFileSource(object):
|
||||
def next(self):
|
||||
line = self._fwfile.next()
|
||||
record = {}
|
||||
for name, range in self._fields_dict.iteritems():
|
||||
record[name] = line[range[0]:range[1]].rstrip(self._fillchars)
|
||||
for name, range_ in self._fields_dict.iteritems():
|
||||
record[name] = line[range[0]:range_[1]].rstrip(self._fillchars)
|
||||
return record
|
||||
|
||||
|
||||
@ -97,13 +98,14 @@ class HtmlTableSource(object):
|
||||
|
||||
# determine the fieldnames
|
||||
if not fieldnames:
|
||||
self._fieldnames = [td.string for td in self.rows[0].findAll(('td','th'))]
|
||||
self._fieldnames = [td.string
|
||||
for td in self._rows[0].findAll(('td','th'))]
|
||||
else:
|
||||
self._fieldnames = fieldnames
|
||||
|
||||
def process_tr():
|
||||
def process_tr(self):
|
||||
for row in self._rows:
|
||||
strings = [string_dig(td) for td in row.findAll('td')]
|
||||
strings = [utils.string_dig(td) for td in row.findAll('td')]
|
||||
yield dict(zip(self._fieldnames, strings))
|
||||
|
||||
def __iter__(self):
|
||||
@ -122,10 +124,11 @@ class DjangoModelSource(object):
|
||||
settings.py.
|
||||
"""
|
||||
def __init__(self, dj_settings, app_label, model_name):
|
||||
dbmodel = get_django_model(dj_settings, app_label, model_name)
|
||||
dbmodel = utils.get_django_model(dj_settings, app_label, model_name)
|
||||
|
||||
# only get values defined in model (no extra fields from custom manager)
|
||||
self._data = dbmodel.objects.values(*[f.name for f in dbmodel._meta.fields])
|
||||
self._data = dbmodel.objects.values(*[f.name
|
||||
for f in dbmodel._meta.fields])
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._data)
|
||||
|
@ -16,7 +16,7 @@ def get_django_model(dj_settings, app_label, model_name):
|
||||
DATABASE_HOST=dj_settings.DATABASE_HOST,
|
||||
INSTALLED_APPS=dj_settings.INSTALLED_APPS)
|
||||
from django.db.models import get_model
|
||||
dbmodel = get_model(app_label, model_name)
|
||||
return get_model(app_label, model_name)
|
||||
|
||||
|
||||
def string_dig(element, joiner=''):
|
||||
@ -29,7 +29,8 @@ def string_dig(element, joiner=''):
|
||||
if element.string:
|
||||
return element.string
|
||||
else:
|
||||
return joiner.join([string_dig(child) for child in element.findAll(True)])
|
||||
return joiner.join([string_dig(child)
|
||||
for child in element.findAll(True)])
|
||||
|
||||
|
||||
def flatten(item, prefix=''):
|
||||
|
Loading…
Reference in New Issue
Block a user