Commit 8319ae38 authored by Sofia Papagiannaki's avatar Sofia Papagiannaki

reorganize psend components: move HashMap & smart_upload in lib

parent 2aaff3f7
# Copyright 2011 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and
# documentation are those of the authors and should not be
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.
import hashlib
def file_read_iterator(fp, size=1024):
while True:
data = fp.read(size)
if not data:
break
yield data
class HashMap(list):
def __init__(self, f, blocksize, blockhash):
super(HashMap, self).__init__()
self.blocksize = blocksize
self.blockhash = blockhash
self.load(f)
def _hash_raw(self, v):
h = hashlib.new(self.blockhash)
h.update(v)
return h.digest()
def _hash_block(self, v):
return self._hash_raw(v.rstrip('\x00'))
def hash(self):
if len(self) == 0:
return self._hash_raw('')
if len(self) == 1:
return self.__getitem__(0)
h = list(self)
s = 2
while s < len(h):
s = s * 2
h += [('\x00' * len(h[0]))] * (s - len(h))
while len(h) > 1:
h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
return h[0]
def load(self, f):
with open(f) as fp:
for block in file_read_iterator(fp, self.blocksize):
self.append(self._hash_block(block))
\ No newline at end of file
# Copyright 2011 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and
# documentation are those of the authors and should not be
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.
from hashmap import HashMap
from binascii import hexlify, unhexlify
from cStringIO import StringIO
from lib.client import Fault
import os
import sys
def smart_upload(client, file, blocksize, blockhash):
dest_container = 'pithos'
dest_object = os.path.split(file)[-1]
size = os.path.getsize(file)
hashes = HashMap(sys.argv[1], blocksize, blockhash)
map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]}
try:
client.create_object_by_hashmap(dest_container, dest_object, map)
except Fault, fault:
if fault.status != 409:
raise
else:
return
missing = fault.data.split('\n')
if '' in missing:
del missing[missing.index(''):]
with open(file) as fp:
for hash in missing:
offset = hashes.index(unhexlify(hash)) * BLOCK_SIZE
fp.seek(offset)
block = fp.read(BLOCK_SIZE)
client.create_object('pithos', '.upload', StringIO(block))
client.create_object_by_hashmap(dest_container, dest_object, map)
\ No newline at end of file
#!/usr/bin/env python
import os
import hashlib
import sys
import os
from binascii import hexlify, unhexlify
from cStringIO import StringIO
from lib.client import Pithos_Client, Fault
from lib.client import Pithos_Client
from lib.util import get_user, get_auth, get_server, get_api
from lib.transfer import smart_upload
# XXX Get these from container...
BLOCK_SIZE = 4 * 1024 * 1024
BLOCK_HASH = 'sha256'
def file_read_iterator(fp, size=1024):
while True:
data = fp.read(size)
if not data:
break
yield data
class HashMap(list):
def __init__(self, f):
super(HashMap, self).__init__()
self.load(f)
def _hash_raw(self, v):
h = hashlib.new(BLOCK_HASH)
h.update(v)
return h.digest()
def _hash_block(self, v):
return self._hash_raw(v.rstrip('\x00'))
def hash(self):
if len(self) == 0:
return self._hash_raw('')
if len(self) == 1:
return self.__getitem__(0)
h = list(self)
s = 2
while s < len(h):
s = s * 2
h += [('\x00' * len(h[0]))] * (s - len(h))
while len(h) > 1:
h = [self._hash_raw(h[x] + h[x + 1]) for x in range(0, len(h), 2)]
return h[0]
def load(self, f):
with open(f) as fp:
for block in file_read_iterator(fp, BLOCK_SIZE):
self.append(self._hash_block(block))
def smart_upload(client, file):
dest_container = 'pithos'
dest_object = os.path.split(file)[-1]
size = os.path.getsize(file)
hashes = HashMap(sys.argv[1])
map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]}
try:
client.create_object_by_hashmap(dest_container, dest_object, map)
except Fault, fault:
if fault.status != 409:
raise
else:
return
missing = fault.data.split('\n')
if '' in missing:
del missing[missing.index(''):]
with open(file) as fp:
for hash in missing:
offset = hashes.index(unhexlify(hash)) * BLOCK_SIZE
fp.seek(offset)
block = fp.read(BLOCK_SIZE)
client.create_object('pithos', '.upload', StringIO(block))
client.create_object_by_hashmap(dest_container, dest_object, map)
if __name__ == '__main__':
if len(sys.argv) != 2 or not os.path.isfile(sys.argv[1]):
print 'syntax: %s <file>' % sys.argv[0]
sys.exit(1)
client = Pithos_Client(get_server(), get_auth(), get_user())
smart_upload(client, sys.argv[1])
smart_upload(client, sys.argv[1], BLOCK_SIZE, BLOCK_HASH)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment