From 5b71fa4aea1fae6c6ee825a43aa7fe4ad930f331 Mon Sep 17 00:00:00 2001 From: James Turk Date: Wed, 5 Oct 2011 15:52:16 -0400 Subject: [PATCH] ExternalStoreTask takes an extract_text argument --- oyster/tasks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/oyster/tasks.py b/oyster/tasks.py index 083653d..f69246a 100644 --- a/oyster/tasks.py +++ b/oyster/tasks.py @@ -64,13 +64,14 @@ class ExternalStoreTask(Task): # one client per process self.client = get_configured_client() - def run(self, doc_id): + def run(self, doc_id, extract_text=lambda x: x): # get the document doc = self.client.db.tracked.find_one({'_id': ObjectId(doc_id)}) filedata = self.client.get_version(doc['url']).read() + text = extract_text(filedata) # put the document into the data store - result = self.upload_document(doc_id, filedata, doc['metadata']) + result = self.upload_document(doc_id, extract_text, doc['metadata']) doc[self.external_store + '_id'] = result self.client.db.tracked.save(doc, safe=True)