new logging

This commit is contained in:
James Turk 2012-05-14 16:40:13 -04:00
parent 5e668b3b21
commit cec4bdc333
4 changed files with 22 additions and 16 deletions

View File

@ -72,10 +72,10 @@ class Kernel(object):
self.db.drop_collection('logs')
self.db.drop_collection('status')
def log(self, action, url, error=False, **kwargs):
def log(self, action, doc_id, error=False, **kwargs):
""" add an entry to the oyster log """
kwargs['action'] = action
kwargs['url'] = url
kwargs['doc_id'] = doc_id
kwargs['error'] = error
kwargs['timestamp'] = datetime.datetime.utcnow()
self.db.logs.insert(kwargs)
@ -96,7 +96,7 @@ class Kernel(object):
"""
if doc_class not in self.doc_classes:
error = 'unregistered doc_class %s' % doc_class
self.log('track', url=url, error=error)
self.log('track', id, url=url, error=error)
raise ValueError(error)
# try and find an existing version of this document
@ -123,10 +123,10 @@ class Kernel(object):
tracked['url'],
tracked['doc_class'],
url, doc_class))
self.log('track', url=url, error=error)
self.log('track', id, url=url, error=error)
raise ValueError(error)
self.log('track', url=url)
self.log('track', id, url=url)
newdoc = dict(url=url, doc_class=doc_class,
_random=random.randint(0, sys.maxint),
@ -209,7 +209,8 @@ class Kernel(object):
else:
doc['next_update'] = None
self.log('update', url=url, new_doc=new_version, error=error)
self.log('update', doc['_id'], url=url, new_doc=new_version,
error=error)
self.db.tracked.save(doc, safe=True)

View File

@ -10,14 +10,19 @@ es = ES(settings.ELASTICSEARCH_HOST)
class ElasticSearchPush(Task):
# results go straight to elasticsearch
ignore_result = True
action = 'elasticsearch'
def run(self, doc_id):
doc = kernel.db.tracked.find_one({'_id': doc_id})
text = kernel.extract_text(doc)
self.get_logger().debug('adding {0} to ElasticSearch'.format(doc_id))
try:
text = kernel.extract_text(doc)
es.index(dict(doc['metadata'], text=text),
settings.ELASTICSEARCH_INDEX,
settings.ELASTICSEARCH_DOC_TYPE,
id=doc_id)
kernel.log(self.action, doc_id, error=False)
es.index(dict(doc['metadata'], text=text),
settings.ELASTICSEARCH_INDEX,
settings.ELASTICSEARCH_DOC_TYPE,
id=doc_id)
except Exception as e:
kernel.log(self.action, doc_id, error=True, exception=str(e))

View File

@ -1,6 +1,6 @@
<tr{% if log.error %} class="error" {% endif %}>
<td>{{log.action}}</td>
<td>{{log.url}}</td>
<td>{{log.doc_id}} - {{log.url}}</td>
<td>{{log.timestamp.strftime("%Y-%m-%d %H:%M:%S")}}</td>
<td>{% if log.error %}{{log.error}}{% endif %}</td>
</tr>

View File

@ -56,12 +56,12 @@ class KernelTests(TestCase):
assert_raises(ValueError, Kernel, doc_classes={'bad-doc': {}})
def test_log(self):
self.kernel.log('action1', 'http://example.com')
self.kernel.log('action2', 'http://test.com', error=True, pi=3)
self.kernel.log('action1', 'example')
self.kernel.log('action2', 'test', error=True, pi=3)
assert self.kernel.db.logs.count() == 2
x = self.kernel.db.logs.find_one({'error': True})
assert x['action'] == 'action2'
assert x['url'] == 'http://test.com'
assert x['doc_id'] == 'test'
assert x['pi'] == 3
def test_track_url(self):