Commit d64ea483 authored by Sofia Papagiannaki's avatar Sofia Papagiannaki

Migration Tools: Progess II

Refs #1171
parent 08831612
#!/usr/bin/env python
# Copyright 2011 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and
# documentation are those of the authors and should not be
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.
from sqlalchemy import create_engine
from sqlalchemy import Table, MetaData
from pithos.backends.modular import ModularBackend
class Migration(object):
def __init__(self, db):
self.engine = create_engine(db)
self.metadata = MetaData(self.engine)
#self.engine.echo = True
self.conn = self.engine.connect()
def execute(self):
pass
\ No newline at end of file
......@@ -32,6 +32,7 @@
# or implied, of GRNET S.A.
import os
import types
from hashmap import HashMap
from binascii import hexlify, unhexlify
......@@ -39,7 +40,7 @@ from cStringIO import StringIO
from client import Fault
def upload(client, file, container, prefix):
def upload(client, file, container, prefix, name=None):
meta = client.retrieve_container_metadata(container)
blocksize = int(meta['x-container-block-size'])
......@@ -50,7 +51,8 @@ def upload(client, file, container, prefix):
hashes.load(file)
map = {'bytes': size, 'hashes': [hexlify(x) for x in hashes]}
object = prefix + os.path.split(file)[-1]
objectname = name if name else os.path.split(file)[-1]
object = prefix + objectname
try:
client.create_object_by_hashmap(container, object, map)
except Fault, fault:
......@@ -59,7 +61,11 @@ def upload(client, file, container, prefix):
else:
return
missing = fault.data.split('\n')
if type(fault.data) == types.StringType:
missing = fault.data.split('\n')
elif type(fault.data) == types.ListType:
missing = fault.data
if '' in missing:
del missing[missing.index(''):]
......
......@@ -43,20 +43,7 @@ from pithos import settings
from pithos.backends.modular import ModularBackend
from lib.hashmap import HashMap
class Migration(object):
def __init__(self, db):
self.engine = create_engine(db)
self.metadata = MetaData(self.engine)
#self.engine.echo = True
self.conn = self.engine.connect()
options = getattr(settings, 'BACKEND', None)[1]
self.backend = ModularBackend(*options)
def execute(self):
pass
from lib.migrate import Migration
class DataMigration(Migration):
def __init__(self, db):
......@@ -91,7 +78,13 @@ class DataMigration(Migration):
blockhash = self.backend.hash_algorithm
# Loop for all available files.
for path in ['README', 'store', 'test']:
filebody = Table('filebody', self.metadata, autoload=True)
s = select([filebody.c.storedfilepath])
rp = self.conn.execute(s)
paths = rp.fetchall()
rp.close()
for path in paths:
map = HashMap(blocksize, blockhash)
map.load(path)
hash = hexlify(map.hash())
......
......@@ -33,154 +33,140 @@
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.
from sqlalchemy import create_engine
from sqlalchemy import Table, MetaData
from sqlalchemy import Table
from sqlalchemy.sql import select
from pithos.api.util import hashmap_hash, get_container_headers
from pithos.backends.lib.hashfiler import Blocker, Mapper
from binascii import hexlify
from pithos.backends.lib.hashfiler import Blocker
from pithos.aai.models import PithosUser
from django.conf import settings
from pithos.backends.modular import ModularBackend
from lib.transfer import upload
from lib.hashmap import HashMap
from lib.client import Fault
from lib.migrate import Migration
import json
import base64
import os
class Migration(object):
def __init__(self, db):
self.engine = create_engine(db)
self.metadata = MetaData(self.engine)
#self.engine.echo = True
self.conn = self.engine.connect()
def execute(self):
pass
class UserMigration(Migration):
def __init__(self, db):
Migration.__init__(self, db)
self.gss_users = Table('gss_user', self.metadata, autoload=True)
def execute(self):
s = self.gss_users.select()
users = self.conn.execute(s).fetchall()
l = []
for u in users:
user = PithosUser()
user.pk = u['id']
user.uniq = u['username']
user.realname = u['name']
user.is_admin = False
user.affiliation = u['homeorganization'] if u['homeorganization'] else ''
user.auth_token = base64.b64encode(u['authtoken'])
user.auth_token_created = u['creationdate']
user.auth_token_expires = u['authtokenexpirydate']
user.created = u['creationdate']
user.updated = u['modificationdate']
print '#', user
user.save(update_timestamps=False)
class DataMigration(Migration):
def __init__(self, db, path, block_size, hash_algorithm):
Migration.__init__(self, db)
params = {'blocksize': block_size,
'blockpath': os.path.join(path + '/blocks'),
'hashtype': hash_algorithm}
self.blocker = Blocker(**params)
params = {'mappath': os.path.join(path + '/maps'),
'namelen': self.blocker.hashlen}
self.mapper = Mapper(**params)
def execute(self):
filebody = Table('filebody', self.metadata, autoload=True)
s = select([filebody.c.id, filebody.c.storedfilepath])
rp = self.conn.execute(s)
while True:
t = rp.fetchone()
if not t:
break
id, path = t
print '#', id, path
hashlist = self.blocker.block_stor_file(open(path))[1]
self.mapper.map_stor(id, hashlist)
rp.close()
import sys
import hashlib
class ObjectMigration(DataMigration):
def __init__(self, db, path, block_size, hash_algorithm):
DataMigration.__init__(self, db, path, block_size, hash_algorithm)
options = getattr(settings, 'BACKEND', None)[1]
self.backend = ModularBackend(*options)
self.wrapper = ClientWrapper()
def create_default_containers(self):
users = PithosUser.objects.all()
for u in users:
print '#', u.uniq
try:
self.backend.put_container(u.uniq, u.uniq, 'pithos', {})
self.backend.put_container(u.uniq, u.uniq, 'trash', {})
self.wrapper.create_container('pithos', u.uniq)
self.wrapper.create_container('trash', u.uniq)
except NameError, e:
pass
def create_directory_markers(self, parent_id=None, path=None):
def get_path(self, child_id):
folderTable = Table('folder', self.metadata, autoload=True)
userTable = Table('gss_user', self.metadata, autoload=True)
s = select([folderTable.c.id, folderTable.c.name, userTable.c.username])
s = s.where(folderTable.c.parent_id == parent_id)
s = s.where(folderTable.c.owner_id == userTable.c.id)
s = select([folderTable.c.parent_id, folderTable.c.name])
s = s.where(folderTable.c.id == child_id)
rp = self.conn.execute(s)
while True:
t = rp.fetchone()
if not t:
path = None
break
id, name, uuniq = t[0], t[1], t[2]
#print id, name, uuniq
if parent_id:
obj = '%s/%s' %(path, name) if path else name
print '#', obj
self.backend.update_object_hashmap(uuniq, uuniq, 'pithos', obj, 0, [])
else:
obj = ''
self.create_directory_markers(id, path=obj)
rp.close()
path = None
parent_id, foldername = rp.fetchone()
if not parent_id:
return ''
else:
return '%s/%s' %(self.get_path(parent_id), foldername)
def execute(self):
def create_objects(self):
fileheader = Table('fileheader', self.metadata, autoload=True)
filebody = Table('filebody', self.metadata, autoload=True)
s = select([filebody.c.id])
folder = Table('folder', self.metadata, autoload=True)
gss_user = Table('gss_user', self.metadata, autoload=True)
j = filebody.join(fileheader, filebody.c.header_id == fileheader.c.id)
j = j.join(folder, fileheader.c.folder_id == folder.c.id)
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
s = select([gss_user.c.username, fileheader.c.folder_id, fileheader.c.name,
filebody.c.storedfilepath], from_obj=j)
rp = self.conn.execute(s)
while True:
id = rp.fetchone()
if not id:
break
meta = {}
hashlist = self.mapper.map_retr(id)
#hashmap = d['hashes']
#size = int(d['bytes'])
#meta.update({'hash': hashmap_hash(request, hashmap)})
#version_id = backend.update_object_hashmap(request.user, v_account,
# v_container, v_object,
# size, hashmap)
rp.close()
objects = rp.fetchall()
for username, folderid, filename, filepath in objects:
path = self.get_path(folderid)[1:]
obj = ''
#create directory markers
for f in path.split('/'):
obj = '%s/%s' %(obj, f) if obj else f
try:
self.wrapper.create_directory_marker('pithos', obj, username)
except NameError, e:
pass
self.wrapper.set_account(username)
print '#', username, path, filename
prefix = '%s/' %path if path else ''
upload(self.wrapper, filepath, 'pithos', prefix, filename)
class ClientWrapper(object):
"""Wraps client methods used by transfer.upload()
to ModularBackend methods"""
def __init__(self):
options = getattr(settings, 'BACKEND', None)[1]
self.backend = ModularBackend(*options)
self.block_size = self.backend.block_size
self.block_hash = self.backend.hash_algorithm
def set_account(self, account):
self.account = account
def create_container(self, container, account=None, **meta):
self.backend.put_container(account, account, container, meta)
def create_directory_marker(self, container, object, account=None):
md5 = hashlib.md5()
meta = {'Content-Type':'application/directory',
'hash': md5.hexdigest().lower()}
self.backend.update_object_hashmap(account, account, container, object, 0, [], meta)
def create_object_by_hashmap(self, container, object, map):
hashmap = HashMap(self.block_size, self.block_hash)
for hash in map['hashes']:
hashmap.append(hash)
meta = {'hash':hexlify(hashmap.hash())}
size = map['bytes']
try:
args = [self.account, self.account, container, object, size, map['hashes'], meta]
self.backend.update_object_hashmap(*args)
except IndexError, ie:
fault = Fault(ie.data, 409)
raise fault
def create_object(self, container, object, f):
hashmap = HashMap(self.block_size, self.block_hash)
hashmap.load(f)
map = [hexlify(x) for x in hashmap]
meta = {'hash':hashmap.hash()}
size = hashmap.size
self.backend.update_object_hashmap(self.account, self.account, container, object, size, hashmap, meta)
def retrieve_container_metadata(self, container):
return {'x-container-block-size':self.block_size,
'x-container-block-hash':self.block_hash}
if __name__ == "__main__":
db = ''
t = UserMigration(db)
t.execute()
basepath = options = getattr(settings, 'PROJECT_PATH', None)
params = {'db':db,
'path':os.path.join(basepath, 'data/pithos/'),
'block_size':(4 * 1024 * 1024),
'hash_algorithm':'sha256'}
dt = DataMigration(**params)
dt.execute()
ot = ObjectMigration(**params)
ot.create_default_containers()
ot.create_directory_markers()
ot.create_objects()
\ No newline at end of file
#!/usr/bin/env python
# Copyright 2011 GRNET S.A. All rights reserved.
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
#
# 1. Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# 2. Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and
# documentation are those of the authors and should not be
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.
from lib.migrate import Migration
from sqlalchemy import Table
from pithos.aai.models import PithosUser
import base64
class UserMigration(Migration):
def __init__(self, db):
Migration.__init__(self, db)
self.gss_users = Table('gss_user', self.metadata, autoload=True)
def execute(self):
s = self.gss_users.select()
users = self.conn.execute(s).fetchall()
l = []
for u in users:
user = PithosUser()
user.pk = u['id']
user.uniq = u['username']
user.realname = u['name']
user.is_admin = False
user.affiliation = u['homeorganization'] if u['homeorganization'] else ''
user.auth_token = base64.b64encode(u['authtoken'])
user.auth_token_created = u['creationdate']
user.auth_token_expires = u['authtokenexpirydate']
user.created = u['creationdate']
user.updated = u['modificationdate']
print '#', user
user.save(update_timestamps=False)
if __name__ == "__main__":
db = 'postgresql://gss:m0ust@rda@62.217.112.56/pithos'
m = UserMigration(db)
m.execute()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment