From 981e2cc88fcd7a88c0f3ed8d98024ed73e7232a7 Mon Sep 17 00:00:00 2001 From: James Turk Date: Sat, 10 Mar 2012 11:21:43 -0800 Subject: [PATCH] flake8 fixes: mostly whitespace, a few real bugs --- oyster/conf/__init__.py | 1 + oyster/core.py | 15 +++------------ oyster/scripts/signal.py | 5 +++-- oyster/storage/dummy.py | 5 ----- oyster/storage/gridfs.py | 2 +- oyster/storage/s3.py | 2 +- oyster/tasks.py | 4 +--- oyster/tests/test_kernel.py | 21 +++++++-------------- oyster/tests/test_storage.py | 1 + oyster/web.py | 1 + 10 files changed, 19 insertions(+), 38 deletions(-) diff --git a/oyster/conf/__init__.py b/oyster/conf/__init__.py index 716b595..48ed4da 100644 --- a/oyster/conf/__init__.py +++ b/oyster/conf/__init__.py @@ -1,5 +1,6 @@ from oyster.conf import default_settings + class Settings(object): def __init__(self): pass diff --git a/oyster/core.py b/oyster/core.py index 250689e..86fcb55 100644 --- a/oyster/core.py +++ b/oyster/core.py @@ -2,13 +2,13 @@ import datetime import hashlib import random import sys -import urllib import pymongo import scrapelib from .storage import engines + class Kernel(object): """ oyster's workhorse, handles tracking """ @@ -62,14 +62,12 @@ class Kernel(object): raise ValueError('doc_class %s missing key %s' % (dc_name, key)) - def _wipe(self): """ exists primarily for debug use, wipes entire db """ self.db.drop_collection('tracked') self.db.drop_collection('logs') self.db.drop_collection('status') - def log(self, action, url, error=False, **kwargs): """ add an entry to the oyster log """ kwargs['action'] = action @@ -78,11 +76,9 @@ class Kernel(object): kwargs['timestamp'] = datetime.datetime.utcnow() self.db.logs.insert(kwargs) - def _add_doc_class(self, doc_class, **properties): self.doc_classes[doc_class] = properties - def track_url(self, url, doc_class, id=None, **kwargs): """ Add a URL to the set of tracked URLs, accessible via a given filename. @@ -123,14 +119,12 @@ class Kernel(object): newdoc['_id'] = id return self.db.tracked.insert(newdoc) - def md5_versioning(self, olddata, newdata): """ return True if md5 changed or if file is new """ old_md5 = hashlib.md5(olddata).hexdigest() new_md5 = hashlib.md5(newdata).hexdigest() return old_md5 != new_md5 - def update(self, doc): """ perform update upon a given document @@ -187,7 +181,7 @@ class Kernel(object): c_errors = doc.get('consecutive_errors', 0) doc['consecutive_errors'] = c_errors + 1 if c_errors <= self.retry_attempts: - update_mins = self.retry_wait_minutes * (2**c_errors) + update_mins = self.retry_wait_minutes * (2 ** c_errors) else: # reset error count if all was ok doc['consecutive_errors'] = 0 @@ -203,7 +197,6 @@ class Kernel(object): self.db.tracked.save(doc, safe=True) - def get_update_queue(self): """ Get a list of what needs to be updated. @@ -229,7 +222,6 @@ class Kernel(object): return queue - def get_update_queue_size(self): """ Get the size of the update queue, this should match @@ -240,7 +232,7 @@ class Kernel(object): {'next_update': {'$ne': None}}, {'next_update': {'$lt': datetime.datetime.utcnow()}}, ]}).count() - return new+next + return new + next def get_last_version(self, doc): try: @@ -251,7 +243,6 @@ class Kernel(object): return storage.get(doc['versions'][-1]['storage_key']) - def _get_configured_kernel(): """ factory, gets a connection configured with oyster.conf.settings """ from oyster.conf import settings diff --git a/oyster/scripts/signal.py b/oyster/scripts/signal.py index 7e0b1af..e48c8fb 100644 --- a/oyster/scripts/signal.py +++ b/oyster/scripts/signal.py @@ -3,6 +3,7 @@ import argparse from oyster.core import kernel + def main(): parser = argparse.ArgumentParser( description='do a task for all documents in a doc_class', @@ -14,10 +15,10 @@ def main(): args = parser.parse_args() - docs = kernel.db.tracked.find({'doc_class':args.doc_class}) + docs = kernel.db.tracked.find({'doc_class': args.doc_class}) print '%s docs in %s' % (docs.count(), args.doc_class) - path, func = args.function.rsplit('.',1) + path, func = args.function.rsplit('.', 1) mod = __import__(path, fromlist=[func]) func = getattr(mod, func) diff --git a/oyster/storage/dummy.py b/oyster/storage/dummy.py index 7438cab..ec29929 100644 --- a/oyster/storage/dummy.py +++ b/oyster/storage/dummy.py @@ -1,8 +1,3 @@ -import urllib -import boto -from oyster.conf import settings - - class DummyStorage(object): """ should NOT be used outside of testing """ diff --git a/oyster/storage/gridfs.py b/oyster/storage/gridfs.py index 976d0bc..f33f408 100644 --- a/oyster/storage/gridfs.py +++ b/oyster/storage/gridfs.py @@ -2,8 +2,8 @@ from __future__ import absolute_import import gridfs -class GridFSStorage(object): +class GridFSStorage(object): storage_type = 'gridfs' def __init__(self, kernel): diff --git a/oyster/storage/s3.py b/oyster/storage/s3.py index f5075e4..f77e9a6 100644 --- a/oyster/storage/s3.py +++ b/oyster/storage/s3.py @@ -13,7 +13,7 @@ class S3Storage(object): def put(self, tracked_doc, data, content_type): """ upload the document to S3 """ k = boto.s3.key.Key(self.bucket) - key_name = getattr(settings, AWS_PREFIX, '') + tracked_doc['_id'] + key_name = getattr(settings, 'AWS_PREFIX', '') + tracked_doc['_id'] k.key = key_name headers = {'x-amz-acl': 'public-read', 'Content-Type': content_type} diff --git a/oyster/tasks.py b/oyster/tasks.py index c2737f8..2aef105 100644 --- a/oyster/tasks.py +++ b/oyster/tasks.py @@ -1,8 +1,6 @@ from celery.task.base import Task, PeriodicTask from celery.execute import send_task -from pymongo.objectid import ObjectId - from oyster.core import kernel @@ -15,7 +13,7 @@ class UpdateTask(Task): doc = kernel.db.tracked.find_one({'_id': doc_id}) kernel.update(doc) for task in doc.get('post_update_tasks', []): - send_task(hook, (doc_id,)) + send_task(task, (doc_id,)) kernel.db.status.update({}, {'$inc': {'update_queue': -1}}) # don't sit on a connection kernel.db.connection.end_request() diff --git a/oyster/tests/test_kernel.py b/oyster/tests/test_kernel.py index 1bdd5d1..90a4719 100644 --- a/oyster/tests/test_kernel.py +++ b/oyster/tests/test_kernel.py @@ -3,14 +3,16 @@ import datetime from unittest import TestCase from nose.tools import assert_raises -import pymongo from oyster.core import Kernel + def hook_fired(doc, newdata): doc['hook_fired'] = doc.get('hook_fired', 0) + 1 -RANDOM_URL = 'http://www.random.org/integers/?num=1&min=-1000000000&max=1000000000&col=1&base=10&format=plain&rnd=new' +RANDOM_URL = ('http://www.random.org/integers/?num=1&min=-1000000000&' + 'max=1000000000&col=1&base=10&format=plain&rnd=new') + class KernelTests(TestCase): @@ -20,7 +22,7 @@ class KernelTests(TestCase): 'onchanged': [] }, 'fast-update': - {'update_mins': 1/60., 'storage_engine': 'dummy', + {'update_mins': 1 / 60., 'storage_engine': 'dummy', 'onchanged': [] }, 'one-time': @@ -32,7 +34,8 @@ class KernelTests(TestCase): 'onchanged': [hook_fired] } } - self.kernel = Kernel(mongo_db='oyster_test', retry_wait_minutes=1/60., + self.kernel = Kernel(mongo_db='oyster_test', + retry_wait_minutes=1 / 60., doc_classes=doc_classes) self.kernel._wipe() @@ -53,7 +56,6 @@ class KernelTests(TestCase): # ensure that a bad document class raises an error assert_raises(ValueError, Kernel, doc_classes={'bad-doc': {}}) - def test_log(self): self.kernel.log('action1', 'http://example.com') self.kernel.log('action2', 'http://test.com', error=True, pi=3) @@ -63,7 +65,6 @@ class KernelTests(TestCase): assert x['url'] == 'http://test.com' assert x['pi'] == 3 - def test_track_url(self): # basic insert id1 = self.kernel.track_url('http://example.com', 'default', pi=3) @@ -105,7 +106,6 @@ class KernelTests(TestCase): # logged error assert self.kernel.db.logs.find_one({'error': 'tracking conflict'}) - def test_no_update(self): # update self.kernel.track_url('http://example.com', 'one-time') @@ -122,7 +122,6 @@ class KernelTests(TestCase): assert not self.kernel.md5_versioning('hello!', 'hello!') assert self.kernel.md5_versioning('hello!', 'hey!') - def test_update(self): # get a single document tracked and call update on it self.kernel.track_url('http://example.com', 'default') @@ -154,7 +153,6 @@ class KernelTests(TestCase): # check that logs updated assert self.kernel.db.logs.find({'action': 'update'}).count() == 2 - def test_update_failure(self): # track a non-existent URL self.kernel.track_url('http://not_a_url', 'default') @@ -174,7 +172,6 @@ class KernelTests(TestCase): obj = self.kernel.db.tracked.find_one() assert obj['consecutive_errors'] == 2 - def test_update_onchanged_fire_only_on_change(self): self.kernel.track_url('http://example.com', 'change-hook') obj = self.kernel.db.tracked.find_one() @@ -201,7 +198,6 @@ class KernelTests(TestCase): doc = self.kernel.db.tracked.find_one() assert doc['hook_fired'] == 2 - def test_get_update_queue(self): self.kernel.track_url('never-updates', 'fast-update') self.kernel.track_url('bad-uri', 'fast-update') @@ -229,15 +225,12 @@ class KernelTests(TestCase): queue = self.kernel.get_update_queue() assert len(queue) == 3 - def test_get_update_queue_size(self): self.kernel.track_url('a', 'fast-update') self.kernel.track_url('b', 'fast-update') self.kernel.track_url('c', 'fast-update') a = self.kernel.db.tracked.find_one(dict(url='a')) - b = self.kernel.db.tracked.find_one(dict(url='b')) - c = self.kernel.db.tracked.find_one(dict(url='c')) # size should start at 3 assert self.kernel.get_update_queue_size() == 3 diff --git a/oyster/tests/test_storage.py b/oyster/tests/test_storage.py index 1d19c85..23c7d77 100644 --- a/oyster/tests/test_storage.py +++ b/oyster/tests/test_storage.py @@ -3,6 +3,7 @@ from oyster.storage.s3 import S3Storage from oyster.storage.gridfs import GridFSStorage from oyster.storage.dummy import DummyStorage + def _simple_storage_test(StorageCls): kernel = Kernel(mongo_db='oyster_test') storage = StorageCls(kernel) diff --git a/oyster/web.py b/oyster/web.py index 9f4cef8..56911b5 100644 --- a/oyster/web.py +++ b/oyster/web.py @@ -44,6 +44,7 @@ def api_wrapper(template=None): app = flask.Flask('oyster') + @app.route('/') @api_wrapper('index.html') def index():