From 1b3330fa69999c5c8f97902bcf4758407c1fe036 Mon Sep 17 00:00:00 2001 From: James Turk Date: Fri, 5 Aug 2011 16:46:13 -0400 Subject: [PATCH] fix how status is checked to avoid piling-on --- oyster/client.py | 6 ++++++ oyster/tasks.py | 4 +--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/oyster/client.py b/oyster/client.py index ebc83ca..117dbac 100644 --- a/oyster/client.py +++ b/oyster/client.py @@ -22,6 +22,11 @@ class Client(object): size=mongo_log_maxsize) except pymongo.errors.CollectionInvalid: pass + + # create status document if it doesn't exist + if self.db.status.count() == 0: + self.db.status.insert({'update_queue': 0}) + self._collection_name = 'fs' self.fs = gridfs.GridFS(self.db, self._collection_name) self.scraper = scrapelib.Scraper(user_agent=user_agent, @@ -40,6 +45,7 @@ class Client(object): self.db.drop_collection('%s.chunks' % self._collection_name) self.db.drop_collection('%s.files' % self._collection_name) self.db.drop_collection('logs') + self.db.drop_collection('status') def log(self, action, url, error=False, **kwargs): diff --git a/oyster/tasks.py b/oyster/tasks.py index 79b1197..0de14e8 100644 --- a/oyster/tasks.py +++ b/oyster/tasks.py @@ -6,8 +6,6 @@ from celery.execute import send_task from oyster.client import Client client = Client() -client.db.status.drop() -client.db.status.insert({'update_queue': 0}) @task(ignore_result=True) @@ -31,7 +29,7 @@ class UpdateTaskScheduler(PeriodicTask): # (currently the only way we avoid duplicates) # alternate option would be to set a _queued flag on documents if client.db.status.find_one()['update_queue']: - pass + return next_set = client.get_update_queue() for doc in next_set: