scrapelib config options

This commit is contained in:
James Turk 2011-07-28 16:20:30 -04:00
parent 454130f5ea
commit bc8438a106

View File

@ -11,14 +11,22 @@ import scrapelib
class Client(object): class Client(object):
def __init__(self, host='localhost', port=27017, def __init__(self, mongo_host='localhost', mongo_port=27017,
database='oyster', collection='fs'): mongo_db='oyster', gridfs_collection='fs',
self.db = pymongo.Connection(host, port)[database] user_agent='oyster', rpm=600, follow_robots=False,
self.fs = gridfs.GridFS(self.db, collection) raise_errors=True, timeout=None, retry_attempts=0,
self._collection_name = collection retry_wait_seconds=5):
# TODO: add some scrapelib config options self.db = pymongo.Connection(mongo_host, mongo_port)[mongo_db]
self.scraper = scrapelib.Scraper() self.fs = gridfs.GridFS(self.db, gridfs_collection)
self._collection_name = gridfs_collection
self.scraper = scrapelib.Scraper(user_agent=user_agent,
requests_per_minute=rpm,
follow_robots=False,
raise_errors=True,
timeout=None,
retry_attempts=0,
retry_wait_seconds=5
)
def _wipe(self): def _wipe(self):
""" exists primarily for debug use, wipes entire db """ """ exists primarily for debug use, wipes entire db """