added FileSource and JSONSource
This commit is contained in:
parent
172af19014
commit
ce55ad0fcf
@ -199,3 +199,61 @@ class SqliteSource(object):
|
||||
|
||||
def done(self):
|
||||
self._conn.close()
|
||||
|
||||
|
||||
class FileSource(object):
|
||||
""" Base class for sources which read from one or more files.
|
||||
|
||||
Takes as input a file-like, a file path, a list of file-likes,
|
||||
or a list of file paths.
|
||||
"""
|
||||
|
||||
def __init__(self, input):
|
||||
self._input = input
|
||||
|
||||
def __iter__(self):
|
||||
# This method would be a lot cleaner with the proposed
|
||||
# 'yield from' expression (PEP 380)
|
||||
if hasattr(self._input, '__read__'):
|
||||
for record in self._process_file(input):
|
||||
yield record
|
||||
elif isinstance(self._input, basestring):
|
||||
with open(self._input) as f:
|
||||
for record in self._process_file(input):
|
||||
yield record
|
||||
elif hasattr(self._input, '__iter__'):
|
||||
for el in self._input:
|
||||
if isinstance(el, basestring):
|
||||
with open(el) as f:
|
||||
for record in self._process_file(f):
|
||||
yield record
|
||||
elif hasattr(el, '__read__'):
|
||||
for record in self._process_file(f):
|
||||
yield record
|
||||
|
||||
def _process_file(self, file):
|
||||
raise NotImplementedError('Descendants of FileSource should implement'
|
||||
' a custom _process_file method.')
|
||||
|
||||
|
||||
class JSONSource(FileSource):
|
||||
""" Source for reading from JSON files.
|
||||
|
||||
When processing JSON files, if the top-level object is a list, will
|
||||
yield each member separately. Otherwise, yields the top-level
|
||||
object.
|
||||
"""
|
||||
|
||||
def _process_file(self, file):
|
||||
import json
|
||||
|
||||
obj = json.load(file)
|
||||
|
||||
# If the top-level JSON object in the file is a list
|
||||
# then yield each element separately; otherwise, yield
|
||||
# the top-level object.
|
||||
if isinstance(obj, list):
|
||||
for record in obj:
|
||||
yield record
|
||||
else:
|
||||
yield obj
|
||||
|
Loading…
Reference in New Issue
Block a user