diff --git a/oyster/client.py b/oyster/client.py index a09bc79..b7cfc27 100644 --- a/oyster/client.py +++ b/oyster/client.py @@ -43,12 +43,15 @@ class Client(object): self.db.drop_collection('%s.files' % self._collection_name) self.db.drop_collection('logs') - def log(self, action, error=False, **kwargs): + + def log(self, action, url, error=False, **kwargs): kwargs['action'] = action + kwargs['url'] = url kwargs['error'] = error kwargs['timestamp'] = datetime.datetime.utcnow() self.db.logs.insert(kwargs) + def track_url(self, url, versioning='md5', update_mins=60*24, **kwargs): """ @@ -107,14 +110,14 @@ class Client(object): self.fs.put(data, filename=doc['url'], mimetype=content_type, **doc['metadata']) - # _last_update/_next_update are separate from question of versioning - doc['_last_update'] = datetime.datetime.utcnow() - doc['_next_update'] = (doc['_last_update'] + - datetime.timedelta(minutes=doc['update_mins'])) + # last_update/next_update are separate from question of versioning + doc['last_update'] = datetime.datetime.utcnow() + doc['next_update'] = (doc['last_update'] + + datetime.timedelta(minutes=doc['update_mins'])) if error: - doc['_consecutive_errors'] = doc.get('_consecutive_errors', 0) + 1 + doc['consecutive_errors'] = doc.get('consecutive_errors', 0) + 1 else: - doc['_consecutive_errors'] = 0 + doc['consecutive_errors'] = 0 self.log('update', url=url, new_doc=do_put, error=error) @@ -141,15 +144,15 @@ class Client(object): # results are always sorted by random to avoid piling on single server # first we try to update anything that we've never retrieved - new = self.db.tracked.find({'_next_update': + new = self.db.tracked.find({'next_update': {'$exists': False}}).sort('_random') if max: new = new.limit(max) queue = list(new) - # pull the rest from those for which _next_update is in the past - next = self.db.tracked.find({'_next_update': + # pull the rest from those for which next_update is in the past + next = self.db.tracked.find({'next_update': {'$lt': datetime.datetime.utcnow()}}).sort('_random') if max: max -= len(queue) diff --git a/oyster/web.py b/oyster/web.py index 9db54e2..ee89838 100644 --- a/oyster/web.py +++ b/oyster/web.py @@ -1,6 +1,22 @@ +import json +import datetime + import flask +import pymongo.objectid + from oyster.client import Client + +class JSONEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, datetime.datetime): + return obj.isoformat() + elif isinstance(obj, pymongo.objectid.ObjectId): + return str(obj) + else: + return super(JSONEncoder, self).default(obj) + + app = flask.Flask('oyster') client = Client() @@ -11,7 +27,8 @@ def doc_list(): 'tracking': client.db.tracked.count(), 'need_update': client.get_update_queue_size(), } - return flask.jsonify(**status) + return json.dumps(status) + @app.route('/log/') def log_view(): @@ -19,6 +36,12 @@ def log_view(): return flask.render_template('logs.html', logs=logs) +@app.route('/tracked/') +def tracked_view(url): + doc = client.db.tracked.find_one({'url': url}) + return json.dumps(doc, cls=JSONEncoder) + + @app.route('/doc//') def show_doc(url, version): if version == 'latest': @@ -28,5 +51,6 @@ def show_doc(url, version): resp.headers['content-type'] = doc.mimetype return resp + if __name__ == '__main__': app.run(debug=True)