scrapelib config options
This commit is contained in:
		
							parent
							
								
									454130f5ea
								
							
						
					
					
						commit
						bc8438a106
					
				
					 1 changed files with 16 additions and 8 deletions
				
			
		| 
						 | 
					@ -11,14 +11,22 @@ import scrapelib
 | 
				
			||||||
class Client(object):
 | 
					class Client(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, host='localhost', port=27017,
 | 
					    def __init__(self, mongo_host='localhost', mongo_port=27017,
 | 
				
			||||||
                 database='oyster', collection='fs'):
 | 
					                 mongo_db='oyster', gridfs_collection='fs',
 | 
				
			||||||
        self.db = pymongo.Connection(host, port)[database]
 | 
					                 user_agent='oyster', rpm=600, follow_robots=False,
 | 
				
			||||||
        self.fs = gridfs.GridFS(self.db, collection)
 | 
					                 raise_errors=True, timeout=None, retry_attempts=0,
 | 
				
			||||||
        self._collection_name = collection
 | 
					                 retry_wait_seconds=5):
 | 
				
			||||||
        # TODO: add some scrapelib config options
 | 
					        self.db = pymongo.Connection(mongo_host, mongo_port)[mongo_db]
 | 
				
			||||||
        self.scraper = scrapelib.Scraper()
 | 
					        self.fs = gridfs.GridFS(self.db, gridfs_collection)
 | 
				
			||||||
 | 
					        self._collection_name = gridfs_collection
 | 
				
			||||||
 | 
					        self.scraper = scrapelib.Scraper(user_agent=user_agent,
 | 
				
			||||||
 | 
					                                         requests_per_minute=rpm,
 | 
				
			||||||
 | 
					                                         follow_robots=False,
 | 
				
			||||||
 | 
					                                         raise_errors=True,
 | 
				
			||||||
 | 
					                                         timeout=None,
 | 
				
			||||||
 | 
					                                         retry_attempts=0,
 | 
				
			||||||
 | 
					                                         retry_wait_seconds=5
 | 
				
			||||||
 | 
					                                        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _wipe(self):
 | 
					    def _wipe(self):
 | 
				
			||||||
        """ exists primarily for debug use, wipes entire db """
 | 
					        """ exists primarily for debug use, wipes entire db """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue