logging improvements, tracked view

This commit is contained in:
James Turk 2011-08-01 16:48:36 -04:00
parent b1d9151b08
commit 482a0ea1ae
2 changed files with 38 additions and 11 deletions

View File

@ -43,12 +43,15 @@ class Client(object):
self.db.drop_collection('%s.files' % self._collection_name) self.db.drop_collection('%s.files' % self._collection_name)
self.db.drop_collection('logs') self.db.drop_collection('logs')
def log(self, action, error=False, **kwargs):
def log(self, action, url, error=False, **kwargs):
kwargs['action'] = action kwargs['action'] = action
kwargs['url'] = url
kwargs['error'] = error kwargs['error'] = error
kwargs['timestamp'] = datetime.datetime.utcnow() kwargs['timestamp'] = datetime.datetime.utcnow()
self.db.logs.insert(kwargs) self.db.logs.insert(kwargs)
def track_url(self, url, versioning='md5', update_mins=60*24, def track_url(self, url, versioning='md5', update_mins=60*24,
**kwargs): **kwargs):
""" """
@ -107,14 +110,14 @@ class Client(object):
self.fs.put(data, filename=doc['url'], mimetype=content_type, self.fs.put(data, filename=doc['url'], mimetype=content_type,
**doc['metadata']) **doc['metadata'])
# _last_update/_next_update are separate from question of versioning # last_update/next_update are separate from question of versioning
doc['_last_update'] = datetime.datetime.utcnow() doc['last_update'] = datetime.datetime.utcnow()
doc['_next_update'] = (doc['_last_update'] + doc['next_update'] = (doc['last_update'] +
datetime.timedelta(minutes=doc['update_mins'])) datetime.timedelta(minutes=doc['update_mins']))
if error: if error:
doc['_consecutive_errors'] = doc.get('_consecutive_errors', 0) + 1 doc['consecutive_errors'] = doc.get('consecutive_errors', 0) + 1
else: else:
doc['_consecutive_errors'] = 0 doc['consecutive_errors'] = 0
self.log('update', url=url, new_doc=do_put, error=error) self.log('update', url=url, new_doc=do_put, error=error)
@ -141,15 +144,15 @@ class Client(object):
# results are always sorted by random to avoid piling on single server # results are always sorted by random to avoid piling on single server
# first we try to update anything that we've never retrieved # first we try to update anything that we've never retrieved
new = self.db.tracked.find({'_next_update': new = self.db.tracked.find({'next_update':
{'$exists': False}}).sort('_random') {'$exists': False}}).sort('_random')
if max: if max:
new = new.limit(max) new = new.limit(max)
queue = list(new) queue = list(new)
# pull the rest from those for which _next_update is in the past # pull the rest from those for which next_update is in the past
next = self.db.tracked.find({'_next_update': next = self.db.tracked.find({'next_update':
{'$lt': datetime.datetime.utcnow()}}).sort('_random') {'$lt': datetime.datetime.utcnow()}}).sort('_random')
if max: if max:
max -= len(queue) max -= len(queue)

View File

@ -1,6 +1,22 @@
import json
import datetime
import flask import flask
import pymongo.objectid
from oyster.client import Client from oyster.client import Client
class JSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.isoformat()
elif isinstance(obj, pymongo.objectid.ObjectId):
return str(obj)
else:
return super(JSONEncoder, self).default(obj)
app = flask.Flask('oyster') app = flask.Flask('oyster')
client = Client() client = Client()
@ -11,7 +27,8 @@ def doc_list():
'tracking': client.db.tracked.count(), 'tracking': client.db.tracked.count(),
'need_update': client.get_update_queue_size(), 'need_update': client.get_update_queue_size(),
} }
return flask.jsonify(**status) return json.dumps(status)
@app.route('/log/') @app.route('/log/')
def log_view(): def log_view():
@ -19,6 +36,12 @@ def log_view():
return flask.render_template('logs.html', logs=logs) return flask.render_template('logs.html', logs=logs)
@app.route('/tracked/<path:url>')
def tracked_view(url):
doc = client.db.tracked.find_one({'url': url})
return json.dumps(doc, cls=JSONEncoder)
@app.route('/doc/<path:url>/<version>') @app.route('/doc/<path:url>/<version>')
def show_doc(url, version): def show_doc(url, version):
if version == 'latest': if version == 'latest':
@ -28,5 +51,6 @@ def show_doc(url, version):
resp.headers['content-type'] = doc.mimetype resp.headers['content-type'] = doc.mimetype
return resp return resp
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True) app.run(debug=True)