Commit 3c093bdb authored by Antony Chazapis's avatar Antony Chazapis

Implement basic functionality plus some extras

The API is based on the Apr. 15, 2011 release of the OpenStack Object Storage API v1.
The implementation is broken up into two layers - frontend (API) and backend (data and metadata handling).
The API is documented in the wiki. The following list is copied here for reference.

List of differences from the OOS API and clarifications:
* Authentication is done by another system. The token is used in the same way, but it is obtained differently. The top level GET request is kept compatible with the OOS API and allows for guest/testing operations.
* Support for X-Account-Meta-* style headers at the account level. Use POST to update.
* Support for X-Container-Meta-* style headers at the account level. Can be set when creating via PUT. Use POST to update.
* Some processing is done in the variable part of all X-*-Meta-* headers. If it includes underscores, they will be converted to dashes and the first letter of all intra-dash strings will be capitalized.
* All metadata replies, at all levels, include latest modification information.
* At all levels, a GET request may use If-Modified-Since and If-Unmodified-Since headers.
* A GET reply for a level will include all headers of the corresponding HEAD request.
* To avoid conflicts between objects and virtual directory markers in container listings, it is recommended that object names do not end with the delimiter used.
* The Accept header may be used in requests instead of the format parameter to specify the desired reply format. The parameter overrides the header.
* Container/object lists use a 200 return code if the reply is of type json/xml. The reply will include an empty json/xml.
* Container/object lists include all associated metadata if the reply is of type json/xml. Some names are kept to their OOS API equivalents for compatibility.
* In headers, dates are formatted according to RFC 1123. In extended information listings, dates are formatted according to ISO 8601.
* Object headers allowed, in addition to X-Object-Meta-*: Content-Encoding
* Object MOVE support.

Fixes #445
Fixes #447
parent c0a83a65
#
# Copyright (c) 2011 Greek Research and Technology Network
#
def camelCase(s):
return s[0].lower() + s[1:]
class Fault(Exception):
def __init__(self, message='', details='', name=''):
Exception.__init__(self, message, details, name)
self.message = message
self.details = details
self.name = name or camelCase(self.__class__.__name__)
class NotModified(Fault):
code = 304
class BadRequest(Fault):
code = 400
class Unauthorized(Fault):
code = 401
class ResizeNotAllowed(Fault):
code = 403
class ItemNotFound(Fault):
code = 404
class Conflict(Fault):
code = 409
class LengthRequired(Fault):
code = 411
class PreconditionFailed(Fault):
code = 412
class RangeNotSatisfiable(Fault):
code = 416
class UnprocessableEntity(Fault):
code = 422
class ServiceUnavailable(Fault):
code = 503
def camelCase(s):
return s[0].lower() + s[1:]
class Fault(Exception):
def __init__(self, message='', details='', name=''):
Exception.__init__(self, message, details, name)
self.message = message
self.details = details
self.name = name or camelCase(self.__class__.__name__)
class NotModified(Fault):
code = 304
class BadRequest(Fault):
code = 400
class Unauthorized(Fault):
code = 401
class ResizeNotAllowed(Fault):
code = 403
class ItemNotFound(Fault):
code = 404
class Conflict(Fault):
code = 409
class LengthRequired(Fault):
code = 411
class PreconditionFailed(Fault):
code = 412
class RangeNotSatisfiable(Fault):
code = 416
class UnprocessableEntity(Fault):
code = 422
class ServiceUnavailable(Fault):
code = 503
This diff is collapsed.
......@@ -4,9 +4,9 @@
<account name="{{ account }}">
{% for container in containers %}
<container>
<name>{{ container.name }}</name>
<count>{{ container.count }}</count>
<bytes>{{ container.bytes }}</bytes>
{% for key, value in container.items %}
<{{ key }}>{{ value }}</{{ key }}>
{% endfor %}
</container>
{% endfor %}
</account>
......
......@@ -3,13 +3,15 @@
<container name="{{ container }}">
{% for object in objects %}
{% if object.subdir %}
<subdir name="{{ object.subdir }}" />
{% else %}
<object>
<name>{{ object.name }}</name>
<hash>{{ object.hash }}</hash>
<bytes>{{ object.bytes }}</bytes>
<content_type>{{ object.content_type }}</content_type>
<last_modified>{{ object.last_modified }}</last_modified>
{% for key, value in object.items %}
<{{ key }}>{{ value }}</{{ key }}>
{% endfor %}
</object>
{% endif %}
{% endfor %}
</container>
{% endspaceless %}
# coding=utf-8
import cloudfiles
conn = cloudfiles.get_connection('jsmith', '1234567890', authurl = 'http://127.0.0.1:8000/v1')
print 'Authenticated. Token: %s' % conn.token
print 'Container count: %d Total bytes: %d' % conn.get_info()
container = 'asdf κοντέινερ'
conn.create_container(container)
containers = conn.get_all_containers()
print 'Found: %d containers' % len(containers)
for container in containers:
print container.name.encode('utf-8')
containers = conn.list_containers_info()
for container in containers:
print container
container = 'asdf κοντέινερ'
conn.create_container(container)
cont = conn.get_container(container)
print 'Got container %s.' % container
print 'Object count: %s Total bytes: %s' % (cont.object_count, cont.size_used)
objects = cont.list_objects()
print 'Found: %d objects' % len(objects)
for object in objects:
print object
cont.delete_object(object)
object = 'δοκιμαστικό object'
obj = cont.create_object(object)
obj.content_type = 'text/plain'
obj.metadata['blah'] = 'αθδσηκφ'
obj.metadata['δοκ'] = 'αθδσηκφ'
obj.write('asdfasdfasdf')
obj.metadata
print ''
print 'OBJECT'
print 'Name: %s' % obj.name
print 'Content Type: %s' % obj.content_type
print 'Size: %s' % obj.size
print 'Last Modified: %s' % obj.last_modified
print 'Container: %s' % obj.container
print 'Metadata: %s' % obj.metadata
print 'Metadata: %s' % obj.metadata.get('blah')
from urllib import unquote
obj = cont.get_object(object)
data = obj.read()
print ''
print 'OBJECT'
print 'Name: %s' % obj.name
print 'Content Type: %s' % obj.content_type
print 'Size: %s' % obj.size
print 'Last Modified: %s' % obj.last_modified
print 'Container: %s' % obj.container
print 'Metadata: %s' % obj.metadata
print 'Metadata: %s' % unquote(obj.metadata.get('blah'))
print 'Metadata: %s' % unquote(obj.metadata.keys()[1])
print 'Data: %s' % data
cont.delete_object(object)
conn.delete_container(container)
File mode changed from 100755 to 100644
#
# Copyright (c) 2011 Greek Research and Technology Network
#
from django.conf.urls.defaults import *
# TODO: This only works when in this order.
urlpatterns = patterns('pithos.api.functions',
(r'^$', 'authenticate'),
(r'^(?P<v_account>.+?)/(?P<v_container>.+?)/(?P<v_object>.+?)$', 'object_demux'),
(r'^(?P<v_account>.+?)/(?P<v_container>.+?)$', 'container_demux'),
(r'^(?P<v_account>.+?)$', 'account_demux')
)
from django.conf.urls.defaults import *
# TODO: This only works when in this order.
urlpatterns = patterns('pithos.api.functions',
(r'^$', 'top_demux'),
(r'^(?P<v_account>.+?)/(?P<v_container>.+?)/(?P<v_object>.+?)$', 'object_demux'),
(r'^(?P<v_account>.+?)/(?P<v_container>.+?)$', 'container_demux'),
(r'^(?P<v_account>.+?)$', 'account_demux')
)
#
# Copyright (c) 2011 Greek Research and Technology Network
#
from functools import wraps
from time import time
from wsgiref.handlers import format_date_time
from django.conf import settings
from django.http import HttpResponse
from pithos.api.faults import Fault, BadRequest, ServiceUnavailable
import datetime
import logging
logger = logging.getLogger(__name__)
def format_meta_key(k):
"""
Convert underscores to dashes and capitalize intra-dash strings.
"""
return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
def get_meta(request, prefix):
"""
Get all prefix-* request headers in a dict. Reformat keys with format_meta_key().
"""
prefix = 'HTTP_' + prefix.upper().replace('-', '_')
return dict([(format_meta_key(k[5:]), v) for k, v in request.META.iteritems() if k.startswith(prefix)])
def get_range(request):
"""
Parse a Range header from the request.
Either returns None, or an (offset, length) tuple.
If no offset is defined offset equals 0.
If no length is defined length is None.
"""
range = request.GET.get('range')
if not range:
return None
range = range.replace(' ', '')
if not range.startswith('bytes='):
return None
parts = range.split('-')
if len(parts) != 2:
return None
offset, length = parts
if offset == '' and length == '':
return None
if offset != '':
try:
offset = int(offset)
except ValueError:
return None
else:
offset = 0
if length != '':
try:
length = int(length)
except ValueError:
return None
else:
length = None
return (offset, length)
def update_response_headers(request, response):
if request.serialization == 'xml':
response['Content-Type'] = 'application/xml; charset=UTF-8'
elif request.serialization == 'json':
response['Content-Type'] = 'application/json; charset=UTF-8'
else:
response['Content-Type'] = 'text/plain; charset=UTF-8'
if settings.TEST:
response['Date'] = format_date_time(time())
def render_fault(request, fault):
response = HttpResponse(status = fault.code)
update_response_headers(request, response)
return response
def request_serialization(request, format_allowed=False):
"""
Return the serialization format requested.
Valid formats are 'text' and 'json', 'xml' if `format_allowed` is True.
"""
if not format_allowed:
return 'text'
format = request.GET.get('format')
if format == 'json':
return 'json'
elif format == 'xml':
return 'xml'
for item in request.META.get('HTTP_ACCEPT', '').split(','):
accept, sep, rest = item.strip().partition(';')
if accept == 'text/plain':
return 'text'
elif accept == 'application/json':
return 'json'
elif accept == 'application/xml' or accept == 'text/xml':
return 'xml'
return 'text'
def api_method(http_method = None, format_allowed = False):
"""
Decorator function for views that implement an API method.
"""
def decorator(func):
@wraps(func)
def wrapper(request, *args, **kwargs):
try:
if http_method and request.method != http_method:
raise BadRequest('Method not allowed.')
# The args variable may contain up to (account, container, object).
if len(args) > 1 and len(args[1]) > 256:
raise BadRequest('Container name too large.')
if len(args) > 2 and len(args[2]) > 1024:
raise BadRequest('Object name too large.')
# Fill in custom request variables.
request.serialization = request_serialization(request, format_allowed)
# TODO: Authenticate.
request.user = "test"
response = func(request, *args, **kwargs)
update_response_headers(request, response)
return response
except Fault, fault:
return render_fault(request, fault)
except BaseException, e:
logger.exception('Unexpected error: %s' % e)
fault = ServiceUnavailable('Unexpected error')
return render_fault(request, fault)
return wrapper
return decorator
from functools import wraps
from time import time
from traceback import format_exc
from wsgiref.handlers import format_date_time
from django.conf import settings
from django.http import HttpResponse
from django.utils.http import http_date
from pithos.api.compat import parse_http_date_safe
from pithos.api.faults import (Fault, NotModified, BadRequest, ItemNotFound, PreconditionFailed,
ServiceUnavailable)
from pithos.backends import backend
import datetime
import logging
logger = logging.getLogger(__name__)
def printable_meta_dict(d):
"""Format a meta dictionary for printing out json/xml.
Convert all keys to lower case and replace dashes to underscores.
Change 'modified' key from backend to 'last_modified' and format date.
"""
if 'modified' in d:
d['last_modified'] = datetime.datetime.fromtimestamp(int(d['modified'])).isoformat()
del(d['modified'])
return dict([(k.lower().replace('-', '_'), v) for k, v in d.iteritems()])
def format_meta_key(k):
"""Convert underscores to dashes and capitalize intra-dash strings"""
return '-'.join([x.capitalize() for x in k.replace('_', '-').split('-')])
def get_meta_prefix(request, prefix):
"""Get all prefix-* request headers in a dict. Reformat keys with format_meta_key()"""
prefix = 'HTTP_' + prefix.upper().replace('-', '_')
return dict([(format_meta_key(k[5:]), v) for k, v in request.META.iteritems() if k.startswith(prefix)])
def get_account_meta(request):
"""Get metadata from an account request"""
meta = get_meta_prefix(request, 'X-Account-Meta-')
return meta
def put_account_meta(response, meta):
"""Put metadata in an account response"""
response['X-Account-Container-Count'] = meta['count']
response['X-Account-Bytes-Used'] = meta['bytes']
if 'modified' in meta:
response['Last-Modified'] = http_date(int(meta['modified']))
for k in [x for x in meta.keys() if x.startswith('X-Account-Meta-')]:
response[k.encode('utf-8')] = meta[k].encode('utf-8')
def get_container_meta(request):
"""Get metadata from a container request"""
meta = get_meta_prefix(request, 'X-Container-Meta-')
return meta
def put_container_meta(response, meta):
"""Put metadata in a container response"""
response['X-Container-Object-Count'] = meta['count']
response['X-Container-Bytes-Used'] = meta['bytes']
if 'modified' in meta:
response['Last-Modified'] = http_date(int(meta['modified']))
for k in [x for x in meta.keys() if x.startswith('X-Container-Meta-')]:
response[k.encode('utf-8')] = meta[k].encode('utf-8')
def get_object_meta(request):
"""Get metadata from an object request"""
meta = get_meta_prefix(request, 'X-Object-Meta-')
if request.META.get('CONTENT_TYPE'):
meta['Content-Type'] = request.META['CONTENT_TYPE']
if request.META.get('HTTP_CONTENT_ENCODING'):
meta['Content-Encoding'] = request.META['HTTP_CONTENT_ENCODING']
if request.META.get('HTTP_X_OBJECT_MANIFEST'):
meta['X-Object-Manifest'] = request.META['HTTP_X_OBJECT_MANIFEST']
return meta
def put_object_meta(response, meta):
"""Put metadata in an object response"""
response['ETag'] = meta['hash']
response['Content-Length'] = meta['bytes']
response['Content-Type'] = meta.get('Content-Type', 'application/octet-stream')
response['Last-Modified'] = http_date(int(meta['modified']))
for k in [x for x in meta.keys() if x.startswith('X-Object-Meta-')]:
response[k.encode('utf-8')] = meta[k].encode('utf-8')
for k in ('Content-Encoding', 'X-Object-Manifest'):
if k in meta:
response[k] = meta[k]
def validate_modification_preconditions(request, meta):
"""Check that the modified timestamp conforms with the preconditions set"""
if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
if if_modified_since is not None:
if_modified_since = parse_http_date_safe(if_modified_since)
if if_modified_since is not None and 'modified' in meta and int(meta['modified']) <= if_modified_since:
raise NotModified('Object has not been modified')
if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
if if_unmodified_since is not None:
if_unmodified_since = parse_http_date_safe(if_unmodified_since)
if if_unmodified_since is not None and 'modified' in meta and int(meta['modified']) > if_unmodified_since:
raise PreconditionFailed('Object has been modified')
def copy_or_move_object(request, src_path, dest_path, move=False):
"""Copy or move an object"""
if type(src_path) == str:
parts = src_path.split('/')
if len(parts) < 3 or parts[0] != '':
raise BadRequest('Invalid X-Copy-From or X-Move-From header')
src_container = parts[1]
src_name = '/'.join(parts[2:])
elif type(src_path) == tuple and len(src_path) == 2:
src_container, src_name = src_path
if type(dest_path) == str:
parts = dest_path.split('/')
if len(parts) < 3 or parts[0] != '':
raise BadRequest('Invalid Destination header')
dest_container = parts[1]
dest_name = '/'.join(parts[2:])
elif type(dest_path) == tuple and len(dest_path) == 2:
dest_container, dest_name = dest_path
meta = get_object_meta(request)
try:
if move:
backend.move_object(request.user, src_container, src_name, dest_container, dest_name, meta)
else:
backend.copy_object(request.user, src_container, src_name, dest_container, dest_name, meta)
except NameError:
raise ItemNotFound('Container or object does not exist')
def get_range(request):
"""Parse a Range header from the request
Either returns None, or an (offset, length) tuple.
If no length is defined length is None.
May return a negative offset (offset from the end).
"""
range = request.META.get('HTTP_RANGE', '').replace(' ', '')
if not range.startswith('bytes='):
return None
parts = range[6:].split('-')
if len(parts) != 2:
return None
offset, upto = parts
if offset == '' and upto == '':
return None
if offset != '':
try:
offset = int(offset)
except ValueError:
return None
if upto != '':
try:
upto = int(upto)
except ValueError:
return None
else:
return (offset, None)
if offset > upto:
return None
return (offset, upto - offset + 1)
else:
try:
offset = -int(upto)
except ValueError:
return None
return (offset, None)
def raw_input_socket(request):
"""Return the socket for reading the rest of the request"""
server_software = request.META.get('SERVER_SOFTWARE')
if not server_software:
if 'wsgi.input' in request.environ:
return request.environ['wsgi.input']
raise ServiceUnavailable('Unknown server software')
if server_software.startswith('WSGIServer'):
return request.environ['wsgi.input']
elif server_software.startswith('mod_python'):
return request._req
raise ServiceUnavailable('Unknown server software')
MAX_UPLOAD_SIZE = 10 * (1024 * 1024) # 10MB
def socket_read_iterator(sock, length=-1, blocksize=4096):
"""Return a maximum of blocksize data read from the socket in each iteration
Read up to 'length'. If no 'length' is defined, will attempt a chunked read.
The maximum ammount of data read is controlled by MAX_UPLOAD_SIZE.
"""
if length < 0: # Chunked transfers
while length < MAX_UPLOAD_SIZE:
chunk_length = sock.readline()
pos = chunk_length.find(';')
if pos >= 0:
chunk_length = chunk_length[:pos]
try:
chunk_length = int(chunk_length, 16)
except Exception, e:
raise BadRequest('Bad chunk size') # TODO: Change to something more appropriate.
if chunk_length == 0:
return
while chunk_length > 0:
data = sock.read(min(chunk_length, blocksize))
chunk_length -= len(data)
length += len(data)
yield data
data = sock.read(2) # CRLF
# TODO: Raise something to note that maximum size is reached.
else:
if length > MAX_UPLOAD_SIZE:
# TODO: Raise something to note that maximum size is reached.
pass
while length > 0:
data = sock.read(min(length, blocksize))
length -= len(data)
yield data
def update_response_headers(request, response):
if request.serialization == 'xml':
response['Content-Type'] = 'application/xml; charset=UTF-8'
elif request.serialization == 'json':
response['Content-Type'] = 'application/json; charset=UTF-8'
else:
response['Content-Type'] = 'text/plain; charset=UTF-8'
if settings.TEST:
response['Date'] = format_date_time(time())
def render_fault(request, fault):
if settings.DEBUG or settings.TEST:
fault.details = format_exc(fault)
request.serialization = 'text'
data = '\n'.join((fault.message, fault.details)) + '\n'
response = HttpResponse(data, status=fault.code)
update_response_headers(request, response)
return response
def request_serialization(request, format_allowed=False):
"""Return the serialization format requested
Valid formats are 'text' and 'json', 'xml' if 'format_allowed' is True.
"""
if not format_allowed:
return 'text'
format = request.GET.get('format')
if format == 'json':
return 'json'