delete obsolete ExternalStoreTask
This commit is contained in:
parent
86ea27c62b
commit
5b01b088d3
@ -4,12 +4,6 @@ and keeps a local copy up to date depending on user-specified criteria.
|
||||
Data Model
|
||||
==========
|
||||
|
||||
Oyster keeps its data in a MongoDB instance and makes use of GridFS to store the
|
||||
raw document data.
|
||||
|
||||
In addition to the standard gridfs collections (fs.chunks, fs.files) oyster
|
||||
uses the following collections:
|
||||
|
||||
tracked - metadata for tracked resources
|
||||
_id : internal id
|
||||
_random : a random integer used for sorting
|
||||
|
@ -36,36 +36,3 @@ class UpdateTaskScheduler(PeriodicTask):
|
||||
for doc in next_set:
|
||||
UpdateTask.delay(doc['_id'])
|
||||
kernel.db.status.update({}, {'$inc': {'update_queue': 1}})
|
||||
|
||||
|
||||
class ExternalStoreTask(Task):
|
||||
""" base class for tasks that push documents to an external store
|
||||
|
||||
when overiding be sure to define
|
||||
external_store
|
||||
short string describing external store (eg. 's3')
|
||||
upload_document(self, doc_id, filedata, metadata)
|
||||
function that uploads the document and returns a unique ID
|
||||
"""
|
||||
|
||||
# results go straight to database
|
||||
ignore_result = True
|
||||
# used as a base class
|
||||
abstract = True
|
||||
|
||||
def run(self, doc_id, extract_text=lambda x: x):
|
||||
# get the document
|
||||
doc = kernel.db.tracked.find_one({'_id': ObjectId(doc_id)})
|
||||
filedata = kernel.get_version(doc['url']).read()
|
||||
text = extract_text(filedata, doc['metadata'])
|
||||
|
||||
# put the document into the data store
|
||||
result = self.upload_document(doc_id, text, doc['metadata'])
|
||||
|
||||
doc[self.external_store + '_id'] = result
|
||||
kernel.db.tracked.save(doc, safe=True)
|
||||
|
||||
|
||||
def upload_document(self, doc_id, filedata, metadata):
|
||||
""" abstract method, override on implementations """
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user