scrapelib config options
This commit is contained in:
parent
454130f5ea
commit
bc8438a106
@ -11,14 +11,22 @@ import scrapelib
|
|||||||
class Client(object):
|
class Client(object):
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, host='localhost', port=27017,
|
def __init__(self, mongo_host='localhost', mongo_port=27017,
|
||||||
database='oyster', collection='fs'):
|
mongo_db='oyster', gridfs_collection='fs',
|
||||||
self.db = pymongo.Connection(host, port)[database]
|
user_agent='oyster', rpm=600, follow_robots=False,
|
||||||
self.fs = gridfs.GridFS(self.db, collection)
|
raise_errors=True, timeout=None, retry_attempts=0,
|
||||||
self._collection_name = collection
|
retry_wait_seconds=5):
|
||||||
# TODO: add some scrapelib config options
|
self.db = pymongo.Connection(mongo_host, mongo_port)[mongo_db]
|
||||||
self.scraper = scrapelib.Scraper()
|
self.fs = gridfs.GridFS(self.db, gridfs_collection)
|
||||||
|
self._collection_name = gridfs_collection
|
||||||
|
self.scraper = scrapelib.Scraper(user_agent=user_agent,
|
||||||
|
requests_per_minute=rpm,
|
||||||
|
follow_robots=False,
|
||||||
|
raise_errors=True,
|
||||||
|
timeout=None,
|
||||||
|
retry_attempts=0,
|
||||||
|
retry_wait_seconds=5
|
||||||
|
)
|
||||||
|
|
||||||
def _wipe(self):
|
def _wipe(self):
|
||||||
""" exists primarily for debug use, wipes entire db """
|
""" exists primarily for debug use, wipes entire db """
|
||||||
|
Loading…
Reference in New Issue
Block a user