default timeout and return tracked _id
This commit is contained in:
parent
28450782ee
commit
1047ca8927
@ -28,8 +28,8 @@ class Client(object):
|
|||||||
|
|
||||||
def __init__(self, mongo_host='localhost', mongo_port=27017,
|
def __init__(self, mongo_host='localhost', mongo_port=27017,
|
||||||
mongo_db='oyster', mongo_log_maxsize=100000000,
|
mongo_db='oyster', mongo_log_maxsize=100000000,
|
||||||
user_agent='oyster', rpm=600, timeout=None,
|
user_agent='oyster', rpm=60, timeout=300,
|
||||||
retry_attempts=100, retry_wait_minutes=60):
|
retry_attempts=3, retry_wait_minutes=60):
|
||||||
|
|
||||||
# set up a capped log if it doesn't exist
|
# set up a capped log if it doesn't exist
|
||||||
self.db = pymongo.Connection(mongo_host, mongo_port)[mongo_db]
|
self.db = pymongo.Connection(mongo_host, mongo_port)[mongo_db]
|
||||||
@ -86,7 +86,7 @@ class Client(object):
|
|||||||
raise ValueError('%s is already tracked' % url)
|
raise ValueError('%s is already tracked' % url)
|
||||||
|
|
||||||
self.log('track', url=url)
|
self.log('track', url=url)
|
||||||
self.db.tracked.insert(dict(url=url, versioning=versioning,
|
return self.db.tracked.insert(dict(url=url, versioning=versioning,
|
||||||
update_mins=update_mins,
|
update_mins=update_mins,
|
||||||
_random=random.randint(0, sys.maxint),
|
_random=random.randint(0, sys.maxint),
|
||||||
metadata=kwargs))
|
metadata=kwargs))
|
||||||
|
@ -6,7 +6,7 @@ MONGO_LOG_MAXSIZE = 100000000
|
|||||||
|
|
||||||
# scrapelib
|
# scrapelib
|
||||||
USER_AGENT = 'oyster'
|
USER_AGENT = 'oyster'
|
||||||
REQUESTS_PER_MINUTE = 300
|
REQUESTS_PER_MINUTE = 60
|
||||||
REQUEST_TIMEOUT = 300
|
REQUEST_TIMEOUT = 300
|
||||||
|
|
||||||
# other
|
# other
|
||||||
|
Loading…
Reference in New Issue
Block a user