From 47d094e919780e106c2a50f302a20b4539a27793 Mon Sep 17 00:00:00 2001 From: James Turk Date: Fri, 5 Aug 2011 19:08:01 -0400 Subject: [PATCH] use configured client --- oyster/client.py | 17 +++++++++++++++++ oyster/conf/__init__.py | 21 +++++++++++++++++++++ oyster/conf/default_settings.py | 14 ++++++++++++++ oyster/tasks.py | 32 ++++++++++++++++++-------------- oyster/web.py | 6 ++---- 5 files changed, 72 insertions(+), 18 deletions(-) create mode 100644 oyster/conf/__init__.py create mode 100644 oyster/conf/default_settings.py diff --git a/oyster/client.py b/oyster/client.py index 117dbac..df39ad5 100644 --- a/oyster/client.py +++ b/oyster/client.py @@ -9,6 +9,23 @@ import gridfs import scrapelib +def get_configured_client(): + from oyster.conf import settings + return Client(mongo_host=settings.MONGO_HOST, + mongo_port=settings.MONGO_PORT, + mongo_db=settings.MONGO_DATABASE, + mongo_log_maxsize=settings.MONGO_LOG_MAXSIZE, + user_agent=settings.USER_AGENT, + rpm=settings.REQUESTS_PER_MINUTE, + timeout=settings.REQUEST_TIMEOUT, + retry_attempts=settings.RETRY_ATTEMPTS, + retry_wait_seconds=settings.RETRY_WAIT_SECONDS) + + + + + + class Client(object): diff --git a/oyster/conf/__init__.py b/oyster/conf/__init__.py new file mode 100644 index 0000000..716b595 --- /dev/null +++ b/oyster/conf/__init__.py @@ -0,0 +1,21 @@ +from oyster.conf import default_settings + +class Settings(object): + def __init__(self): + pass + + def update(self, module): + for setting in dir(module): + if setting.isupper(): + val = getattr(module, setting) + if val is not None: + setattr(self, setting, val) + +settings = Settings() +settings.update(default_settings) + +try: + import oyster_settings + settings.update(oyster_settings) +except ImportError: + pass diff --git a/oyster/conf/default_settings.py b/oyster/conf/default_settings.py new file mode 100644 index 0000000..ddd7b77 --- /dev/null +++ b/oyster/conf/default_settings.py @@ -0,0 +1,14 @@ +# mongodb +MONGO_HOST = 'localhost' +MONGO_PORT = 27017 +MONGO_DATABASE = 'oyster' +MONGO_LOG_MAXSIZE = 100000000 + +# scrapelib +USER_AGENT = 'oyster' +REQUESTS_PER_MINUTE = 300 +REQUEST_TIMEOUT = 0 +RETRY_ATTEMPTS = 0 +RETRY_WAIT_SECONDS = 5 + + diff --git a/oyster/tasks.py b/oyster/tasks.py index 0de14e8..e483639 100644 --- a/oyster/tasks.py +++ b/oyster/tasks.py @@ -1,21 +1,25 @@ -from celery.task import task -from celery.task.base import PeriodicTask -from celery.task.schedules import schedule +from celery.task.base import Task, PeriodicTask from celery.execute import send_task -from oyster.client import Client - -client = Client() +from oyster.client import get_configured_client -@task(ignore_result=True) -def update_task(doc_id): - # maybe fetch doc instead? - doc = client.db.tracked.find_one({'_id': doc_id}) - client.update(doc) - for hook in doc.get('post_update_hooks', []): - send_task(hook, (doc_id,)) - client.db.status.update({}, {'$inc': {'update_queue': -1}}) +class UpdateTask(Task): + # results go straight to database + ignore_result = True + + def __init__(self): + # one client per process + client = get_configured_client() + + + def run(self, doc_id): + # maybe fetch doc instead? + doc = client.db.tracked.find_one({'_id': doc_id}) + client.update(doc) + for hook in doc.get('post_update_hooks', []): + send_task(hook, (doc_id,)) + client.db.status.update({}, {'$inc': {'update_queue': -1}}) class UpdateTaskScheduler(PeriodicTask): diff --git a/oyster/web.py b/oyster/web.py index 1ae9669..aa5a7dd 100644 --- a/oyster/web.py +++ b/oyster/web.py @@ -5,7 +5,7 @@ import functools import flask import pymongo.objectid -from oyster.client import Client +from oyster.client import get_configured_client class JSONEncoder(json.JSONEncoder): @@ -33,14 +33,13 @@ def api_wrapper(template=None): app = flask.Flask('oyster') -client = Client() +client = get_configured_client() @app.route('/') @api_wrapper('index.html') def index(): status = { - 'queue_size': app.work_queue.qsize(), 'tracking': client.db.tracked.count(), 'need_update': client.get_update_queue_size(), 'logs': client.db.logs.find().sort('$natural', -1).limit(20) @@ -52,7 +51,6 @@ def index(): @api_wrapper() def doc_list(): status = { - 'queue_size': app.work_queue.qsize(), 'tracking': client.db.tracked.count(), 'need_update': client.get_update_queue_size(), }