diff --git a/oyster/tests/test_client.py b/oyster/tests/test_client.py index 6e153dc..2a8f2ca 100644 --- a/oyster/tests/test_client.py +++ b/oyster/tests/test_client.py @@ -1,5 +1,6 @@ +import datetime from unittest import TestCase - +from nose.tools import assert_raises import pymongo from oyster.client import Client @@ -8,6 +9,8 @@ class ClientTests(TestCase): def setUp(self): self.client = Client() + self.client._wipe() + def test_constructor(self): c = Client('127.0.0.1', 27017, 'testdb', mongo_log_maxsize=5000, @@ -24,12 +27,50 @@ class ClientTests(TestCase): assert c.scraper.retry_wait_seconds == 10 def test_log(self): - self.client._wipe() self.client.log('action1', 'http://example.com') - self.client.log('action2', 'http://test.com', error=True, pi=3.14) + self.client.log('action2', 'http://test.com', error=True, pi=3) assert self.client.db.logs.count() == 2 x = self.client.db.logs.find_one({'error': True}) assert x['action'] == 'action2' assert x['url'] == 'http://test.com' - assert x['pi'] == 3.14 + assert x['pi'] == 3 + + def test_track_url(self): + # basic insert + self.client.track_url('http://example.com', update_mins=30, pi=3) + obj = self.client.db.tracked.find_one() + assert '_random' in obj + assert obj['update_mins'] == 30 + assert obj['metadata'] == {'pi': 3} + + # logging + log = self.client.db.logs.find_one() + assert log['action'] == 'track' + assert log['url'] == 'http://example.com' + + # can't track same URL twice + assert_raises(ValueError, self.client.track_url, 'http://example.com') + + # logged error + assert self.client.db.logs.find_one({'error': 'already tracked'}) + + + def test_md5_versioning(self): + doc = {'url': 'hello.txt'} + self.client.fs.put('hello!', filename='hello.txt') + assert not self.client.md5_versioning(doc, 'hello!') + assert self.client.md5_versioning(doc, 'hey!') + + + def test_update(self): + self.client.track_url('http://google.com', update_mins=60) + obj = self.client.db.tracked.find_one() + self.client.update(obj) + + # check that metadata has been updated + newobj = self.client.db.tracked.find_one() + assert (newobj['last_update'] + + datetime.timedelta(minutes=newobj['update_mins']) == + newobj['next_update']) + assert newobj['consecutive_errors'] == 0 diff --git a/oyster/web.py b/oyster/web.py index 5071fa7..8fba4d5 100644 --- a/oyster/web.py +++ b/oyster/web.py @@ -55,7 +55,7 @@ def log_view(): prev_offset = max(offset - size, 0) next_offset = offset + size logs = client.db.logs.find().sort('$natural', -1).skip(offset).limit(size) - return dict(logs=logs, prev_offset=prev_offset, + return dict(logs=list(logs), prev_offset=prev_offset, next_offset=next_offset, offset=offset)