S3 storage
This commit is contained in:
parent
d66ebdd74a
commit
ac5bf809da
@ -8,6 +8,8 @@ import pymongo
|
|||||||
import scrapelib
|
import scrapelib
|
||||||
|
|
||||||
from .storage.gridfs import GridFSStorage
|
from .storage.gridfs import GridFSStorage
|
||||||
|
from .storage.s3 import S3Storage
|
||||||
|
|
||||||
|
|
||||||
class Kernel(object):
|
class Kernel(object):
|
||||||
""" oyster's workhorse, handles tracking """
|
""" oyster's workhorse, handles tracking """
|
||||||
@ -29,7 +31,7 @@ class Kernel(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
# create storage class
|
# create storage class
|
||||||
self.storage = GridFSStorage(self)
|
self.storage = S3Storage(self)
|
||||||
|
|
||||||
# create status document if it doesn't exist
|
# create status document if it doesn't exist
|
||||||
if self.db.status.count() == 0:
|
if self.db.status.count() == 0:
|
||||||
|
28
oyster/storage/s3.py
Normal file
28
oyster/storage/s3.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
import urllib
|
||||||
|
import boto
|
||||||
|
from oyster.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
class S3Storage(object):
|
||||||
|
storage_type = 's3'
|
||||||
|
|
||||||
|
def __init__(self, kernel):
|
||||||
|
self.s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET)
|
||||||
|
|
||||||
|
def put(self, tracked_doc, data, content_type):
|
||||||
|
""" upload the document to S3 """
|
||||||
|
bucket = self.s3conn.create_bucket(settings.AWS_BUCKET)
|
||||||
|
k = boto.s3.key.Key(bucket)
|
||||||
|
k.key = tracked_doc['_id']
|
||||||
|
headers = {'x-amz-acl': 'public-read',
|
||||||
|
'Content-Type': content_type}
|
||||||
|
k.set_contents_from_string(data, headers=headers)
|
||||||
|
# can also set metadata if we want, useful?
|
||||||
|
|
||||||
|
url = 'http://%s.s3.amazonaws.com/%s' % (settings.AWS_BUCKET,
|
||||||
|
tracked_doc['_id'])
|
||||||
|
return url
|
||||||
|
|
||||||
|
def get(self, id):
|
||||||
|
# could use get_contents_as_string, any advantages?
|
||||||
|
return urllib.urlopen(id).read()
|
@ -3,7 +3,6 @@ from celery.execute import send_task
|
|||||||
|
|
||||||
from pymongo.objectid import ObjectId
|
from pymongo.objectid import ObjectId
|
||||||
|
|
||||||
from oyster.conf import settings
|
|
||||||
from oyster.core import kernel
|
from oyster.core import kernel
|
||||||
|
|
||||||
|
|
||||||
@ -70,21 +69,3 @@ class ExternalStoreTask(Task):
|
|||||||
def upload_document(self, doc_id, filedata, metadata):
|
def upload_document(self, doc_id, filedata, metadata):
|
||||||
""" abstract method, override on implementations """
|
""" abstract method, override on implementations """
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class S3StoreTask(ExternalStoreTask):
|
|
||||||
external_store = 's3'
|
|
||||||
|
|
||||||
import boto
|
|
||||||
s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET)
|
|
||||||
|
|
||||||
def upload_document(self, doc_id, filedata, metadata):
|
|
||||||
""" upload the document to S3 """
|
|
||||||
bucket = self.s3conn.create_bucket(settings.AWS_BUCKET)
|
|
||||||
k = self.boto.s3.Key(bucket)
|
|
||||||
k.key = doc_id
|
|
||||||
k.set_contents_from_string(filedata)
|
|
||||||
k.set_acl('public-read')
|
|
||||||
|
|
||||||
url = 'http://%s.s3.amazonaws.com/%s' % (settings.AWS_BUCKET, doc_id)
|
|
||||||
return url
|
|
||||||
|
Loading…
Reference in New Issue
Block a user