allow id to be set manually

This commit is contained in:
James Turk 2012-02-21 15:36:04 -05:00
parent bbcd7a3018
commit 9ef3a82d75
2 changed files with 25 additions and 6 deletions

View File

@ -83,7 +83,7 @@ class Kernel(object):
self.doc_classes[doc_class] = properties self.doc_classes[doc_class] = properties
def track_url(self, url, doc_class, **kwargs): def track_url(self, url, doc_class, id=None, **kwargs):
""" """
Add a URL to the set of tracked URLs, accessible via a given filename. Add a URL to the set of tracked URLs, accessible via a given filename.
@ -99,8 +99,11 @@ class Kernel(object):
# if data is already tracked and this is just a duplicate call # if data is already tracked and this is just a duplicate call
# return the original object # return the original object
if tracked: if tracked:
# only check id if id was passed in
id_matches = (tracked['_id'] == id) if id else True
if (tracked['metadata'] == kwargs and if (tracked['metadata'] == kwargs and
tracked['doc_class'] == doc_class): tracked['doc_class'] == doc_class and
id_matches):
return tracked['_id'] return tracked['_id']
else: else:
self.log('track', url=url, error='tracking conflict') self.log('track', url=url, error='tracking conflict')
@ -108,9 +111,13 @@ class Kernel(object):
'metadata' % url) 'metadata' % url)
self.log('track', url=url) self.log('track', url=url)
return self.db.tracked.insert(dict(url=url, doc_class=doc_class,
_random=random.randint(0, sys.maxint), newdoc = dict(url=url, doc_class=doc_class,
versions=[], metadata=kwargs)) _random=random.randint(0, sys.maxint),
versions=[], metadata=kwargs)
if id:
newdoc['_id'] = id
return self.db.tracked.insert(newdoc)
def md5_versioning(self, olddata, newdata): def md5_versioning(self, olddata, newdata):

View File

@ -78,12 +78,24 @@ class KernelTests(TestCase):
id2 = self.kernel.track_url('http://example.com', 'default', pi=3) id2 = self.kernel.track_url('http://example.com', 'default', pi=3)
assert id1 == id2 assert id1 == id2
# can't track same URL twice with different metadata # test setting id
out = self.kernel.track_url('http://example.com/2', 'default',
'fixed-id')
assert out == 'fixed-id'
# can't track same URL twice with different id
assert_raises(ValueError, self.kernel.track_url, 'http://example.com',
'default', 'hard-coded-id')
# logged error
assert self.kernel.db.logs.find_one({'error': 'tracking conflict'})
# ... with different metadata
assert_raises(ValueError, self.kernel.track_url, 'http://example.com', assert_raises(ValueError, self.kernel.track_url, 'http://example.com',
'default') 'default')
# logged error # logged error
assert self.kernel.db.logs.find_one({'error': 'tracking conflict'}) assert self.kernel.db.logs.find_one({'error': 'tracking conflict'})
# ... different doc class
assert_raises(ValueError, self.kernel.track_url, 'http://example.com', assert_raises(ValueError, self.kernel.track_url, 'http://example.com',
'special-doc-class', pi=3) 'special-doc-class', pi=3)
# logged error # logged error