minor pylint cleanups
This commit is contained in:
parent
8ead922b4c
commit
0bbb1ee4e4
@ -1,18 +1,25 @@
|
|||||||
|
"""
|
||||||
|
Saucebrush is a data loading & manipulation framework written in python.
|
||||||
|
"""
|
||||||
|
|
||||||
import filters, emitters, sources, utils
|
import filters, emitters, sources, utils
|
||||||
|
|
||||||
def run_recipe(source, *filters):
|
def run_recipe(source, *filter_args):
|
||||||
|
""" Process data, taking it from a source and applying any number of filters
|
||||||
|
"""
|
||||||
|
|
||||||
# connect datapath
|
# connect datapath
|
||||||
data = source
|
data = source
|
||||||
for filter in filters:
|
for filter_ in filter_args:
|
||||||
data = filter(data)
|
data = filter_(data)
|
||||||
|
|
||||||
# actually run the data through (causes iterators to actually be called)
|
# actually run the data through (causes iterators to actually be called)
|
||||||
for record in data:
|
for record in data:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# try and call done() on all filters
|
# try and call done() on all filters
|
||||||
for filter in filters:
|
for filter_ in filter_args:
|
||||||
try:
|
try:
|
||||||
filter.done()
|
filter_.done()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass # don't care if there isn't a done method
|
pass # don't care if there isn't a done method
|
||||||
|
@ -3,10 +3,8 @@
|
|||||||
it in some manner.
|
it in some manner.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from exceptions import NotImplementedError
|
|
||||||
from saucebrush.filters import Filter
|
from saucebrush.filters import Filter
|
||||||
|
|
||||||
|
|
||||||
class Emitter(Filter):
|
class Emitter(Filter):
|
||||||
""" ABC for emitters
|
""" ABC for emitters
|
||||||
|
|
||||||
@ -88,10 +86,10 @@ class SqliteEmitter(Emitter):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, dbname, table_name, fieldnames=None):
|
def __init__(self, dbname, table_name, fieldnames=None):
|
||||||
self(SqliteEmitter, self).__init__()
|
super(SqliteEmitter, self).__init__()
|
||||||
import sqlite3
|
import sqlite3
|
||||||
self._conn = sqlite3.connect(dbname)
|
self._conn = sqlite3.connect(dbname)
|
||||||
self._cursor = self.conn.cursor()
|
self._cursor = self._conn.cursor()
|
||||||
self._table_name = table_name
|
self._table_name = table_name
|
||||||
if fieldnames:
|
if fieldnames:
|
||||||
create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name,
|
create = "CREATE TABLE IF NOT EXISTS %s (%s)" % (table_name,
|
||||||
@ -130,10 +128,12 @@ class SqlDumpEmitter(Emitter):
|
|||||||
self._outfile = sys.stderr
|
self._outfile = sys.stderr
|
||||||
else:
|
else:
|
||||||
self._outfile = outfile
|
self._outfile = outfile
|
||||||
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (table_name, '`,`'.join(fieldnames))
|
self._insert_str = "INSERT INTO `%s` (`%s`) VALUES (%%s);\n" % (
|
||||||
|
table_name, '`,`'.join(fieldnames))
|
||||||
|
|
||||||
def quote(self, item):
|
def quote(self, item):
|
||||||
return "'%s'" % item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
|
item = item.replace("\\","\\\\").replace("'","\\'").replace(chr(0),'0')
|
||||||
|
return "'%s'" % item
|
||||||
|
|
||||||
def emit_record(self, record):
|
def emit_record(self, record):
|
||||||
quoted_data = [self.quote(record[field]) for field in self._fieldnames]
|
quoted_data = [self.quote(record[field]) for field in self._fieldnames]
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
(or process_field for FieldFilter).
|
(or process_field for FieldFilter).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from exceptions import NotImplementedError
|
|
||||||
from saucebrush import utils
|
from saucebrush import utils
|
||||||
|
|
||||||
######################
|
######################
|
||||||
@ -99,8 +98,8 @@ class FieldModifier(FieldFilter):
|
|||||||
return self._filter_func(item)
|
return self._filter_func(item)
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return '%s( %s, %s )' % (self.__class__.__name__, str(self._target_keys),
|
return '%s( %s, %s )' % (self.__class__.__name__,
|
||||||
str(self._filter_func))
|
str(self._target_keys), str(self._filter_func))
|
||||||
|
|
||||||
|
|
||||||
class FieldRemover(Filter):
|
class FieldRemover(Filter):
|
||||||
@ -204,13 +203,13 @@ class Splitter(Filter):
|
|||||||
|
|
||||||
# if a dict, use process_record directly
|
# if a dict, use process_record directly
|
||||||
if isinstance(subrecord, dict):
|
if isinstance(subrecord, dict):
|
||||||
for filter in filters:
|
for filter_ in filters:
|
||||||
subrecord = filter.process_record(subrecord)
|
subrecord = filter_.process_record(subrecord)
|
||||||
|
|
||||||
# if a list or tuple, use __call__
|
# if a list or tuple, use __call__
|
||||||
elif isinstance(subrecord, (list, tuple)):
|
elif isinstance(subrecord, (list, tuple)):
|
||||||
for filter in filters:
|
for filter_ in filters:
|
||||||
subrecord = filter(subrecord)
|
subrecord = filter_(subrecord)
|
||||||
subrecord = [r for r in subrecord] # unchain generators
|
subrecord = [r for r in subrecord] # unchain generators
|
||||||
|
|
||||||
# place back from whence it came
|
# place back from whence it came
|
||||||
@ -232,18 +231,6 @@ class Flattener(Filter):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Flattener, self).__init__()
|
super(Flattener, self).__init__()
|
||||||
|
|
||||||
'''def process_field(self, item):
|
|
||||||
# create a list of dictionaries with concatenated keys
|
|
||||||
retlist = []
|
|
||||||
for subitem in item:
|
|
||||||
newitem = {}
|
|
||||||
for key1,subdict in subitem.iteritems():
|
|
||||||
for key2,value in subdict.iteritems():
|
|
||||||
newitem[key1+'_'+key2] = value
|
|
||||||
retlist.append(newitem)
|
|
||||||
return retlist
|
|
||||||
'''
|
|
||||||
|
|
||||||
def process_record(self, record):
|
def process_record(self, record):
|
||||||
return utils.flatten(record)
|
return utils.flatten(record)
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import string
|
import string
|
||||||
|
from saucebrush import utils
|
||||||
|
|
||||||
class CSVSource(object):
|
class CSVSource(object):
|
||||||
""" Saucebrush source for reading from CSV files.
|
""" Saucebrush source for reading from CSV files.
|
||||||
@ -25,7 +26,7 @@ class CSVSource(object):
|
|||||||
import csv
|
import csv
|
||||||
self._dictreader = csv.DictReader(csvfile, fieldnames)
|
self._dictreader = csv.DictReader(csvfile, fieldnames)
|
||||||
for _ in xrange(skiprows):
|
for _ in xrange(skiprows):
|
||||||
self.dictreader.next()
|
self._dictreader.next()
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self._dictreader
|
return self._dictreader
|
||||||
@ -61,8 +62,8 @@ class FixedWidthFileSource(object):
|
|||||||
def next(self):
|
def next(self):
|
||||||
line = self._fwfile.next()
|
line = self._fwfile.next()
|
||||||
record = {}
|
record = {}
|
||||||
for name, range in self._fields_dict.iteritems():
|
for name, range_ in self._fields_dict.iteritems():
|
||||||
record[name] = line[range[0]:range[1]].rstrip(self._fillchars)
|
record[name] = line[range[0]:range_[1]].rstrip(self._fillchars)
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
|
||||||
@ -97,13 +98,14 @@ class HtmlTableSource(object):
|
|||||||
|
|
||||||
# determine the fieldnames
|
# determine the fieldnames
|
||||||
if not fieldnames:
|
if not fieldnames:
|
||||||
self._fieldnames = [td.string for td in self.rows[0].findAll(('td','th'))]
|
self._fieldnames = [td.string
|
||||||
|
for td in self._rows[0].findAll(('td','th'))]
|
||||||
else:
|
else:
|
||||||
self._fieldnames = fieldnames
|
self._fieldnames = fieldnames
|
||||||
|
|
||||||
def process_tr():
|
def process_tr(self):
|
||||||
for row in self._rows:
|
for row in self._rows:
|
||||||
strings = [string_dig(td) for td in row.findAll('td')]
|
strings = [utils.string_dig(td) for td in row.findAll('td')]
|
||||||
yield dict(zip(self._fieldnames, strings))
|
yield dict(zip(self._fieldnames, strings))
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
@ -122,10 +124,11 @@ class DjangoModelSource(object):
|
|||||||
settings.py.
|
settings.py.
|
||||||
"""
|
"""
|
||||||
def __init__(self, dj_settings, app_label, model_name):
|
def __init__(self, dj_settings, app_label, model_name):
|
||||||
dbmodel = get_django_model(dj_settings, app_label, model_name)
|
dbmodel = utils.get_django_model(dj_settings, app_label, model_name)
|
||||||
|
|
||||||
# only get values defined in model (no extra fields from custom manager)
|
# only get values defined in model (no extra fields from custom manager)
|
||||||
self._data = dbmodel.objects.values(*[f.name for f in dbmodel._meta.fields])
|
self._data = dbmodel.objects.values(*[f.name
|
||||||
|
for f in dbmodel._meta.fields])
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return iter(self._data)
|
return iter(self._data)
|
||||||
|
@ -16,7 +16,7 @@ def get_django_model(dj_settings, app_label, model_name):
|
|||||||
DATABASE_HOST=dj_settings.DATABASE_HOST,
|
DATABASE_HOST=dj_settings.DATABASE_HOST,
|
||||||
INSTALLED_APPS=dj_settings.INSTALLED_APPS)
|
INSTALLED_APPS=dj_settings.INSTALLED_APPS)
|
||||||
from django.db.models import get_model
|
from django.db.models import get_model
|
||||||
dbmodel = get_model(app_label, model_name)
|
return get_model(app_label, model_name)
|
||||||
|
|
||||||
|
|
||||||
def string_dig(element, joiner=''):
|
def string_dig(element, joiner=''):
|
||||||
@ -29,7 +29,8 @@ def string_dig(element, joiner=''):
|
|||||||
if element.string:
|
if element.string:
|
||||||
return element.string
|
return element.string
|
||||||
else:
|
else:
|
||||||
return joiner.join([string_dig(child) for child in element.findAll(True)])
|
return joiner.join([string_dig(child)
|
||||||
|
for child in element.findAll(True)])
|
||||||
|
|
||||||
|
|
||||||
def flatten(item, prefix=''):
|
def flatten(item, prefix=''):
|
||||||
|
Loading…
Reference in New Issue
Block a user