From ce55ad0fcf693567c4d356c732ecd4e62459867f Mon Sep 17 00:00:00 2001 From: Michael Stephens Date: Thu, 17 Jun 2010 17:06:28 -0400 Subject: [PATCH] added FileSource and JSONSource --- saucebrush/sources.py | 74 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/saucebrush/sources.py b/saucebrush/sources.py index 8c161b9..c4ac4f3 100644 --- a/saucebrush/sources.py +++ b/saucebrush/sources.py @@ -136,7 +136,7 @@ class DjangoModelSource(object): class MongoDBSource(object): """ Source for reading from a MongoDB database. - + The record dict is populated with records matching the spec from the specified database and collection. """ @@ -146,10 +146,10 @@ class MongoDBSource(object): conn = Connection(host, port) self.collection = conn[database][collection] self.spec = spec - + def __iter__(self): return self._find_spec() - + def _find_spec(self): for doc in self.collection.find(self.spec): yield dict(doc) @@ -166,18 +166,18 @@ class SqliteSource(object): The record dict is populated with the results from the query argument. If given, args will be passed to the query - when executed. + when executed. """ def __init__(self, dbpath, query, args=None, conn_params=None): import sqlite3 - + self._dbpath = dbpath self._query = query self._args = args or [] self._conn_params = conn_params or [] - + # setup connection self._conn = sqlite3.connect(self._dbpath) self._conn.row_factory = dict_factory @@ -186,7 +186,7 @@ class SqliteSource(object): setattr(self._conn, param, value) def _process_query(self): - + cursor = self._conn.cursor() for row in cursor.execute(self._query, self._args): @@ -196,6 +196,64 @@ class SqliteSource(object): def __iter__(self): return self._process_query() - + def done(self): self._conn.close() + + +class FileSource(object): + """ Base class for sources which read from one or more files. + + Takes as input a file-like, a file path, a list of file-likes, + or a list of file paths. + """ + + def __init__(self, input): + self._input = input + + def __iter__(self): + # This method would be a lot cleaner with the proposed + # 'yield from' expression (PEP 380) + if hasattr(self._input, '__read__'): + for record in self._process_file(input): + yield record + elif isinstance(self._input, basestring): + with open(self._input) as f: + for record in self._process_file(input): + yield record + elif hasattr(self._input, '__iter__'): + for el in self._input: + if isinstance(el, basestring): + with open(el) as f: + for record in self._process_file(f): + yield record + elif hasattr(el, '__read__'): + for record in self._process_file(f): + yield record + + def _process_file(self, file): + raise NotImplementedError('Descendants of FileSource should implement' + ' a custom _process_file method.') + + +class JSONSource(FileSource): + """ Source for reading from JSON files. + + When processing JSON files, if the top-level object is a list, will + yield each member separately. Otherwise, yields the top-level + object. + """ + + def _process_file(self, file): + import json + + obj = json.load(file) + + # If the top-level JSON object in the file is a list + # then yield each element separately; otherwise, yield + # the top-level object. + if isinstance(obj, list): + for record in obj: + yield record + else: + yield obj