delete obsolete ExternalStoreTask
This commit is contained in:
parent
86ea27c62b
commit
5b01b088d3
@ -4,12 +4,6 @@ and keeps a local copy up to date depending on user-specified criteria.
|
|||||||
Data Model
|
Data Model
|
||||||
==========
|
==========
|
||||||
|
|
||||||
Oyster keeps its data in a MongoDB instance and makes use of GridFS to store the
|
|
||||||
raw document data.
|
|
||||||
|
|
||||||
In addition to the standard gridfs collections (fs.chunks, fs.files) oyster
|
|
||||||
uses the following collections:
|
|
||||||
|
|
||||||
tracked - metadata for tracked resources
|
tracked - metadata for tracked resources
|
||||||
_id : internal id
|
_id : internal id
|
||||||
_random : a random integer used for sorting
|
_random : a random integer used for sorting
|
||||||
|
@ -36,36 +36,3 @@ class UpdateTaskScheduler(PeriodicTask):
|
|||||||
for doc in next_set:
|
for doc in next_set:
|
||||||
UpdateTask.delay(doc['_id'])
|
UpdateTask.delay(doc['_id'])
|
||||||
kernel.db.status.update({}, {'$inc': {'update_queue': 1}})
|
kernel.db.status.update({}, {'$inc': {'update_queue': 1}})
|
||||||
|
|
||||||
|
|
||||||
class ExternalStoreTask(Task):
|
|
||||||
""" base class for tasks that push documents to an external store
|
|
||||||
|
|
||||||
when overiding be sure to define
|
|
||||||
external_store
|
|
||||||
short string describing external store (eg. 's3')
|
|
||||||
upload_document(self, doc_id, filedata, metadata)
|
|
||||||
function that uploads the document and returns a unique ID
|
|
||||||
"""
|
|
||||||
|
|
||||||
# results go straight to database
|
|
||||||
ignore_result = True
|
|
||||||
# used as a base class
|
|
||||||
abstract = True
|
|
||||||
|
|
||||||
def run(self, doc_id, extract_text=lambda x: x):
|
|
||||||
# get the document
|
|
||||||
doc = kernel.db.tracked.find_one({'_id': ObjectId(doc_id)})
|
|
||||||
filedata = kernel.get_version(doc['url']).read()
|
|
||||||
text = extract_text(filedata, doc['metadata'])
|
|
||||||
|
|
||||||
# put the document into the data store
|
|
||||||
result = self.upload_document(doc_id, text, doc['metadata'])
|
|
||||||
|
|
||||||
doc[self.external_store + '_id'] = result
|
|
||||||
kernel.db.tracked.save(doc, safe=True)
|
|
||||||
|
|
||||||
|
|
||||||
def upload_document(self, doc_id, filedata, metadata):
|
|
||||||
""" abstract method, override on implementations """
|
|
||||||
pass
|
|
||||||
|
Loading…
Reference in New Issue
Block a user