S3 storage

This commit is contained in:
James Turk 2012-02-14 17:24:55 -05:00
parent d66ebdd74a
commit ac5bf809da
3 changed files with 31 additions and 20 deletions

View File

@ -8,6 +8,8 @@ import pymongo
import scrapelib
from .storage.gridfs import GridFSStorage
from .storage.s3 import S3Storage
class Kernel(object):
""" oyster's workhorse, handles tracking """
@ -29,7 +31,7 @@ class Kernel(object):
pass
# create storage class
self.storage = GridFSStorage(self)
self.storage = S3Storage(self)
# create status document if it doesn't exist
if self.db.status.count() == 0:

28
oyster/storage/s3.py Normal file
View File

@ -0,0 +1,28 @@
import urllib
import boto
from oyster.conf import settings
class S3Storage(object):
storage_type = 's3'
def __init__(self, kernel):
self.s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET)
def put(self, tracked_doc, data, content_type):
""" upload the document to S3 """
bucket = self.s3conn.create_bucket(settings.AWS_BUCKET)
k = boto.s3.key.Key(bucket)
k.key = tracked_doc['_id']
headers = {'x-amz-acl': 'public-read',
'Content-Type': content_type}
k.set_contents_from_string(data, headers=headers)
# can also set metadata if we want, useful?
url = 'http://%s.s3.amazonaws.com/%s' % (settings.AWS_BUCKET,
tracked_doc['_id'])
return url
def get(self, id):
# could use get_contents_as_string, any advantages?
return urllib.urlopen(id).read()

View File

@ -3,7 +3,6 @@ from celery.execute import send_task
from pymongo.objectid import ObjectId
from oyster.conf import settings
from oyster.core import kernel
@ -70,21 +69,3 @@ class ExternalStoreTask(Task):
def upload_document(self, doc_id, filedata, metadata):
""" abstract method, override on implementations """
pass
class S3StoreTask(ExternalStoreTask):
external_store = 's3'
import boto
s3conn = boto.connect_s3(settings.AWS_KEY, settings.AWS_SECRET)
def upload_document(self, doc_id, filedata, metadata):
""" upload the document to S3 """
bucket = self.s3conn.create_bucket(settings.AWS_BUCKET)
k = self.boto.s3.Key(bucket)
k.key = doc_id
k.set_contents_from_string(filedata)
k.set_acl('public-read')
url = 'http://%s.s3.amazonaws.com/%s' % (settings.AWS_BUCKET, doc_id)
return url