error handling
This commit is contained in:
parent
b7ab8d0ce4
commit
2342b4aeef
@ -2,6 +2,7 @@ import datetime
|
|||||||
import hashlib
|
import hashlib
|
||||||
import random
|
import random
|
||||||
import sys
|
import sys
|
||||||
|
import urllib
|
||||||
|
|
||||||
import pymongo
|
import pymongo
|
||||||
import gridfs
|
import gridfs
|
||||||
@ -64,16 +65,24 @@ class Client(object):
|
|||||||
|
|
||||||
|
|
||||||
def update(self, doc):
|
def update(self, doc):
|
||||||
# assume we're going to do the put
|
|
||||||
do_put = True
|
do_put = True
|
||||||
|
error = False
|
||||||
|
|
||||||
# update strategies could be implemented here as well
|
# update strategies could be implemented here as well
|
||||||
data = self.scraper.urlopen(doc['url'])
|
try:
|
||||||
|
data = self.scraper.urlopen(urllib.quote(doc['url']))
|
||||||
|
content_type = data.response.headers['content-type']
|
||||||
|
except scrapelib.HTTPError:
|
||||||
|
# TODO: log error
|
||||||
|
do_put = False
|
||||||
|
error = True
|
||||||
|
|
||||||
# versioning is a concept for future use, but here's how it can work:
|
# versioning is a concept for future use, but here's how it can work:
|
||||||
# versioning functions take doc & data, and return True if data is
|
# versioning functions take doc & data, and return True if data is
|
||||||
# different, since they have access to doc, they can also modify
|
# different, since they have access to doc, they can also modify
|
||||||
# certain attributes as needed
|
# certain attributes as needed
|
||||||
|
|
||||||
|
if do_put:
|
||||||
if doc['versioning'] == 'md5':
|
if doc['versioning'] == 'md5':
|
||||||
do_put = self.md5_versioning(doc, data)
|
do_put = self.md5_versioning(doc, data)
|
||||||
else:
|
else:
|
||||||
@ -81,12 +90,18 @@ class Client(object):
|
|||||||
doc['versioning'])
|
doc['versioning'])
|
||||||
|
|
||||||
if do_put:
|
if do_put:
|
||||||
self.fs.put(data, filename=doc['url'], **doc['metadata'])
|
self.fs.put(data, filename=doc['url'], mimetype=content_type,
|
||||||
|
**doc['metadata'])
|
||||||
|
|
||||||
# _last_update/_next_update are separate from question of versioning
|
# _last_update/_next_update are separate from question of versioning
|
||||||
doc['_last_update'] = datetime.datetime.utcnow()
|
doc['_last_update'] = datetime.datetime.utcnow()
|
||||||
doc['_next_update'] = (doc['_last_update'] +
|
doc['_next_update'] = (doc['_last_update'] +
|
||||||
datetime.timedelta(minutes=doc['update_mins']))
|
datetime.timedelta(minutes=doc['update_mins']))
|
||||||
|
if error:
|
||||||
|
doc['_consecutive_errors'] = doc.get('_consecutive_errors', 0) + 1
|
||||||
|
else:
|
||||||
|
doc['_consecutive_errors'] = 0
|
||||||
|
|
||||||
self.db.tracked.save(doc, safe=True)
|
self.db.tracked.save(doc, safe=True)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user