-
Vangelis Koukis authored
According to the decision of the GRNET Board of Directors, switch license to GPLv3. This commit will be propagated to the release and master branches based on git flow, and the next release will be licensed as GPLv3.
02071b96
migrate-db 12.53 KiB
#!/usr/bin/env python
# Copyright (C) 2010-2014 GRNET S.A.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from sqlalchemy import Table
from sqlalchemy.sql import select, and_
from binascii import hexlify
from pithos.backends.lib.hashfiler import Blocker
from pithos.backends.lib.sqlalchemy import Node
from django.conf import settings
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE
from pithos.tools.lib.transfer import upload
from pithos.tools.lib.hashmap import HashMap
from pithos.tools.lib.client import Fault
from migrate import Migration, Cache
from calendar import timegm
from decimal import Decimal
from collections import defaultdict
import json
import os
import sys
import hashlib
import mimetypes
import time
import datetime
(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14)
class ObjectMigration(Migration):
def __init__(self, old_db, db, f):
Migration.__init__(self, old_db)
self.cache = Cache(db)
def create_node(self, username, container, object):
node = self.backend.node.node_lookup(object)
if not node:
parent_path = '%s/%s' %(username, container)
parent_node = self.backend.node.node_lookup(parent_path)
if not parent_node:
raise Exception('Missing node')
node = self.backend.node.node_create(parent_node, object)
return node
def create_history(self, header_id, node_id, deleted=False):
i = 0
map = HashMap(self.backend.block_size, self.backend.hash_algorithm)
v = []
stored_versions = self.backend.node.node_get_versions(node_id, ['mtime'])
stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions]
for t, rowcount in self.retrieve_node_versions(header_id):
size, modyfied_by, filepath, mimetype, mdate = t
if mdate in stored_versions_mtime:
continue
cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL
cluster = cluster if not deleted else CLUSTER_DELETED
hash = self.cache.get(filepath)
if hash == None:
raise Exception("Missing hash")
args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate
v.append(self.create_version(*args))
i += 1
return v
def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate):
args = (node_id, hash, size, None, modyfied_by, cluster)
serial = self.backend.node.version_create(*args)[0]
meta = {'hash':hash,
'content-type':mimetype}
self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems()))
timestamp = timegm(mdate.timetuple())
microseconds = mdate.time().microsecond
values = timestamp, microseconds, serial
f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values)
return serial
def create_tags(self, header_id, node_id, vserials):
tags = self.retrieve_tags(header_id)
if not tags:
return
for v in vserials:
self.backend.node.attribute_set(v, (('X-Object-Meta-Tag', tags),))
def create_permissions(self, fid, path, owner, is_folder=True):
fpath, fpermissions = self.backend.permissions.access_inherit(path)
permissions = self.retrieve_permissions(fid, is_folder)
if not fpermissions:
keys = ('read', 'write')
for k in keys:
if owner in permissions[k]:
permissions[k].remove(owner)
self.backend.permissions.access_set(path, permissions)
else:
keys = ('read', 'write')
common_p = {}
for k in keys:
if owner in permissions[k]:
permissions[k].remove(owner)
common = set(fpermissions[k]).intersection(set(permissions[k]))
common_p[k] = list(common)
#keep only the common permissions
#trade off for securing access only to explicitly authorized users
self.backend.permissions.access_set(fpath, common_p)
def create_objects(self):
for t in self.retrieve_current_nodes():
username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t
containers = ['pithos', 'trash']
for c in containers:
#create container if it does not exist
try:
self.backend._lookup_container(username, c)
except NameError, e:
self.backend.put_container(username, username, c)
container = 'pithos' if not deleted else 'trash'
path = self.build_path(folderid)
#create node
object = '%s/%s' %(username, container)
object = '%s/%s/%s' %(object, path, filename) if path else '%s/%s' %(object, filename)
args = username, container, object
nodeid = self.create_node(*args)
#create node history
vserials = self.create_history(headerid, nodeid, deleted)
#set object tags
self.create_tags(headerid, nodeid, vserials)
#set object's publicity
if public:
self.backend.permissions.public_set(
object,
self.backend.public_url_security,
self.backend.public_url_alphabet
)
#set object's permissions
self.create_permissions(headerid, object, username, is_folder=False)
def build_path(self, child_id):
folder = Table('folder', self.metadata, autoload=True)
user = Table('gss_user', self.metadata, autoload=True)
j = folder.join(user, folder.c.owner_id == user.c.id)
s = select([folder, user.c.username], from_obj=j)
s = s.where(folder.c.id == child_id)
s.order_by(folder.c.modificationdate)
rp = self.conn.execute(s)
t = rp.fetchone()
md5 = hashlib.md5()
hash = md5.hexdigest().lower()
size = 0
if not t[PARENT_ID]:
return ''
else:
container_path = t[USER]
container_path += '/trash' if t[DELETED] else '/pithos'
parent_node = self.backend.node.node_lookup(container_path)
if not parent_node:
raise Exception('Missing node:', container_path)
parent_path = self.build_path(t[PARENT_ID])
path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME])
node = self.backend.node.node_lookup(path)
if not node:
node = self.backend.node.node_create(parent_node, path)
if not node:
raise Exception('Unable to create node:', path)
#create versions
v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE])
if t[CREATIONDATE] != t[MODIFICATIONDATE]:
self.backend.node.version_recluster(v, CLUSTER_HISTORY)
self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE])
#set permissions
self.create_permissions(t[ID], path, t[USER], is_folder=True)
return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME]
def retrieve_current_nodes(self):
fileheader = Table('fileheader', self.metadata, autoload=True)
filebody = Table('filebody', self.metadata, autoload=True)
folder = Table('folder', self.metadata, autoload=True)
gss_user = Table('gss_user', self.metadata, autoload=True)
j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
j = j.join(folder, fileheader.c.folder_id == folder.c.id)
j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
s = select([gss_user.c.username, fileheader.c.id, fileheader.c.folder_id,
fileheader.c.name, fileheader.c.deleted,
filebody.c.storedfilepath, filebody.c.mimetype,
fileheader.c.readforall, fileheader.c.owner_id], from_obj=j)
rp = self.conn.execute(s)
object = rp.fetchone()
while object:
yield object
object = rp.fetchone()
rp.close()
def retrieve_node_versions(self, header_id):
filebody = Table('filebody', self.metadata, autoload=True)
gss_user = Table('gss_user', self.metadata, autoload=True)
j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
s = select([filebody.c.filesize, gss_user.c.username,
filebody.c.storedfilepath, filebody.c.mimetype,
filebody.c.modificationdate], from_obj=j)
s = s.where(filebody.c.header_id == header_id)
s = s.order_by(filebody.c.version)
rp = self.conn.execute(s)
version = rp.fetchone()
while version:
yield version, rp.rowcount
version = rp.fetchone()
rp.close()
def retrieve_tags(self, header_id):
filetag = Table('filetag', self.metadata, autoload=True)
s = select([filetag.c.tag], filetag.c.fileid == header_id)
rp = self.conn.execute(s)
tags = rp.fetchall() if rp.returns_rows else []
tags = [elem[0] for elem in tags]
rp.close()
return ','.join(tags) if tags else ''
def retrieve_permissions(self, id, is_folder=True):
permissions = {}
if is_folder:
ftable = Table('folder_permission', self.metadata, autoload=True)
else:
ftable = Table('fileheader_permission', self.metadata, autoload=True)
permission = Table('permission', self.metadata, autoload=True)
group = Table('gss_group', self.metadata, autoload=True)
user = Table('gss_user', self.metadata, autoload=True)
j = ftable.join(permission, ftable.c.permissions_id == permission.c.id)
j1 = j.join(group, group.c.id == permission.c.group_id)
j2 = j.join(user, user.c.id == permission.c.user_id)
permissions = defaultdict(list)
def _get_permissions(self, action='read', get_groups=True):
if get_groups:
col, j = group.c.name, j1
cond2 = permission.c.group_id != None
else:
col, j = user.c.username, j2
cond2 = permission.c.user_id != None
s = select([col], from_obj=j)
if is_folder:
s = s.where(ftable.c.folder_id == id)
else:
s = s.where(ftable.c.fileheader_id == id)
if action == 'read':
cond1 = permission.c.read == True
else:
cond1 = permission.c.write == True
s = s.where(and_(cond1, cond2))
print '>', s, s.compile().params
rp = self.conn.execute(s)
p = permissions[action].extend([e[0] for e in rp.fetchall()])
rp.close()
return p
#get object read groups
_get_permissions(self, action='read', get_groups=True)
#get object read users
_get_permissions(self, action='read', get_groups=False)
#get object write groups
_get_permissions(self, action='write', get_groups=True)
#get object write groups
_get_permissions(self, action='write', get_groups=False)
return permissions
if __name__ == "__main__":
old_db = ''
db = ''
f = open('fixdates.sql', 'w')
ot = ObjectMigration(old_db, db, f)
ot.create_objects()
f.close()