diff --git a/design.txt b/design.txt index 807c027..46b6d07 100644 --- a/design.txt +++ b/design.txt @@ -4,12 +4,6 @@ and keeps a local copy up to date depending on user-specified criteria. Data Model ========== -Oyster keeps its data in a MongoDB instance and makes use of GridFS to store the -raw document data. - -In addition to the standard gridfs collections (fs.chunks, fs.files) oyster -uses the following collections: - tracked - metadata for tracked resources _id : internal id _random : a random integer used for sorting diff --git a/oyster/tasks.py b/oyster/tasks.py index e1c8d07..f659aad 100644 --- a/oyster/tasks.py +++ b/oyster/tasks.py @@ -36,36 +36,3 @@ class UpdateTaskScheduler(PeriodicTask): for doc in next_set: UpdateTask.delay(doc['_id']) kernel.db.status.update({}, {'$inc': {'update_queue': 1}}) - - -class ExternalStoreTask(Task): - """ base class for tasks that push documents to an external store - - when overiding be sure to define - external_store - short string describing external store (eg. 's3') - upload_document(self, doc_id, filedata, metadata) - function that uploads the document and returns a unique ID - """ - - # results go straight to database - ignore_result = True - # used as a base class - abstract = True - - def run(self, doc_id, extract_text=lambda x: x): - # get the document - doc = kernel.db.tracked.find_one({'_id': ObjectId(doc_id)}) - filedata = kernel.get_version(doc['url']).read() - text = extract_text(filedata, doc['metadata']) - - # put the document into the data store - result = self.upload_document(doc_id, text, doc['metadata']) - - doc[self.external_store + '_id'] = result - kernel.db.tracked.save(doc, safe=True) - - - def upload_document(self, doc_id, filedata, metadata): - """ abstract method, override on implementations """ - pass