added FileSource and JSONSource

This commit is contained in:
Michael Stephens 2010-06-17 17:06:28 -04:00
parent 172af19014
commit ce55ad0fcf

View File

@ -199,3 +199,61 @@ class SqliteSource(object):
def done(self):
self._conn.close()
class FileSource(object):
""" Base class for sources which read from one or more files.
Takes as input a file-like, a file path, a list of file-likes,
or a list of file paths.
"""
def __init__(self, input):
self._input = input
def __iter__(self):
# This method would be a lot cleaner with the proposed
# 'yield from' expression (PEP 380)
if hasattr(self._input, '__read__'):
for record in self._process_file(input):
yield record
elif isinstance(self._input, basestring):
with open(self._input) as f:
for record in self._process_file(input):
yield record
elif hasattr(self._input, '__iter__'):
for el in self._input:
if isinstance(el, basestring):
with open(el) as f:
for record in self._process_file(f):
yield record
elif hasattr(el, '__read__'):
for record in self._process_file(f):
yield record
def _process_file(self, file):
raise NotImplementedError('Descendants of FileSource should implement'
' a custom _process_file method.')
class JSONSource(FileSource):
""" Source for reading from JSON files.
When processing JSON files, if the top-level object is a list, will
yield each member separately. Otherwise, yields the top-level
object.
"""
def _process_file(self, file):
import json
obj = json.load(file)
# If the top-level JSON object in the file is a list
# then yield each element separately; otherwise, yield
# the top-level object.
if isinstance(obj, list):
for record in obj:
yield record
else:
yield obj