diff --git a/oyster/core.py b/oyster/core.py index 43f2698..6193eb3 100644 --- a/oyster/core.py +++ b/oyster/core.py @@ -8,6 +8,8 @@ import pymongo import scrapelib from .storage.gridfs import GridFSStorage +from .storage.s3 import S3Storage + class Kernel(object): """ oyster's workhorse, handles tracking """ @@ -29,7 +31,7 @@ class Kernel(object): pass # create storage class - self.storage = GridFSStorage(self) + self.storage = S3Storage(self) # create status document if it doesn't exist if self.db.status.count() == 0: diff --git a/oyster/storage/s3.py b/oyster/storage/s3.py new file mode 100644 index 0000000..71e62aa --- /dev/null +++ b/oyster/storage/s3.py @@ -0,0 +1,28 @@ +import urllib +import boto +from oyster.conf import settings + + +class S3Storage(object): + storage_type = 's3' + + def __init__(self, kernel): + self.s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET) + + def put(self, tracked_doc, data, content_type): + """ upload the document to S3 """ + bucket = self.s3conn.create_bucket(settings.AWS_BUCKET) + k = boto.s3.key.Key(bucket) + k.key = tracked_doc['_id'] + headers = {'x-amz-acl': 'public-read', + 'Content-Type': content_type} + k.set_contents_from_string(data, headers=headers) + # can also set metadata if we want, useful? + + url = 'http://%s.s3.amazonaws.com/%s' % (settings.AWS_BUCKET, + tracked_doc['_id']) + return url + + def get(self, id): + # could use get_contents_as_string, any advantages? + return urllib.urlopen(id).read() diff --git a/oyster/tasks.py b/oyster/tasks.py index 5afa509..e1c8d07 100644 --- a/oyster/tasks.py +++ b/oyster/tasks.py @@ -3,7 +3,6 @@ from celery.execute import send_task from pymongo.objectid import ObjectId -from oyster.conf import settings from oyster.core import kernel @@ -70,21 +69,3 @@ class ExternalStoreTask(Task): def upload_document(self, doc_id, filedata, metadata): """ abstract method, override on implementations """ pass - - -class S3StoreTask(ExternalStoreTask): - external_store = 's3' - - import boto - s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET) - - def upload_document(self, doc_id, filedata, metadata): - """ upload the document to S3 """ - bucket = self.s3conn.create_bucket(settings.AWS_BUCKET) - k = self.boto.s3.Key(bucket) - k.key = doc_id - k.set_contents_from_string(filedata) - k.set_acl('public-read') - - url = 'http://%s.s3.amazonaws.com/%s' % (settings.AWS_BUCKET, doc_id) - return url