Skip to content
Snippets Groups Projects
  • Vangelis Koukis's avatar
    Switch license to GPLv3 · 02071b96
    Vangelis Koukis authored
    According to the decision of the GRNET Board of Directors,
    switch license to GPLv3.
    
    This commit will be propagated to the release
    and master branches based on git flow, and the next
    release will be licensed as GPLv3.
    02071b96
migrate-db 12.53 KiB
#!/usr/bin/env python

# Copyright (C) 2010-2014 GRNET S.A.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from sqlalchemy import Table
from sqlalchemy.sql import select, and_

from binascii import hexlify

from pithos.backends.lib.hashfiler import Blocker
from pithos.backends.lib.sqlalchemy import Node

from django.conf import settings

from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE

from pithos.tools.lib.transfer import upload
from pithos.tools.lib.hashmap import HashMap
from pithos.tools.lib.client import Fault

from migrate import Migration, Cache

from calendar import timegm
from decimal import Decimal
from collections import defaultdict

import json
import os
import sys
import hashlib
import mimetypes
import time
import datetime

(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14)

class ObjectMigration(Migration):
    def __init__(self, old_db, db, f):
        Migration.__init__(self, old_db)
        self.cache = Cache(db)
    
    def create_node(self, username, container, object):
        node = self.backend.node.node_lookup(object)
        if not node:
            parent_path = '%s/%s' %(username, container)
            parent_node = self.backend.node.node_lookup(parent_path)
            if not parent_node:
                raise Exception('Missing node')
            node = self.backend.node.node_create(parent_node, object)
        return node
    
    def create_history(self, header_id, node_id, deleted=False):
        i = 0
        map = HashMap(self.backend.block_size, self.backend.hash_algorithm)
        v = []
        stored_versions = self.backend.node.node_get_versions(node_id, ['mtime'])
        stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions]
        for t, rowcount  in self.retrieve_node_versions(header_id):
            size, modyfied_by, filepath, mimetype, mdate = t
            if mdate in stored_versions_mtime:
                continue
            cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL
            cluster = cluster if not deleted else CLUSTER_DELETED
            hash = self.cache.get(filepath)
            if hash == None:
                raise Exception("Missing hash")
            args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate
            v.append(self.create_version(*args))
            i += 1
        return v
    
    def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate):
        args = (node_id, hash, size, None, modyfied_by, cluster)
        serial = self.backend.node.version_create(*args)[0]
        meta = {'hash':hash,
                'content-type':mimetype}
        self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems()))
        timestamp = timegm(mdate.timetuple())
        microseconds = mdate.time().microsecond
        values = timestamp, microseconds, serial
        f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values)
        return serial
    
    def create_tags(self, header_id, node_id, vserials):
        tags = self.retrieve_tags(header_id)
        if not tags:
            return
        for v in vserials:
            self.backend.node.attribute_set(v, (('X-Object-Meta-Tag', tags),))
    
    def create_permissions(self, fid, path, owner, is_folder=True):
        fpath, fpermissions = self.backend.permissions.access_inherit(path)
        permissions = self.retrieve_permissions(fid, is_folder)
        if not fpermissions:
            keys = ('read', 'write')
            for k in keys:
                if owner in permissions[k]:
                    permissions[k].remove(owner)
            self.backend.permissions.access_set(path, permissions)
        else:
            keys = ('read', 'write')
            common_p = {}
            for k in keys:
                if owner in permissions[k]:
                    permissions[k].remove(owner)
                common = set(fpermissions[k]).intersection(set(permissions[k]))
                common_p[k] = list(common)
            #keep only the common permissions
            #trade off for securing access only to explicitly authorized users
            self.backend.permissions.access_set(fpath, common_p)
    
    def create_objects(self):
        for t in self.retrieve_current_nodes():
            username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t
            containers = ['pithos', 'trash']
            
            for c in containers:
                #create container if it does not exist
                try:
                    self.backend._lookup_container(username, c)
                except NameError, e:
                    self.backend.put_container(username, username, c) 
            
            container = 'pithos' if not deleted else 'trash'
            path = self.build_path(folderid)
            #create node
            object = '%s/%s' %(username, container)
            object = '%s/%s/%s' %(object, path, filename) if path else '%s/%s' %(object, filename)
            args = username, container, object
            nodeid = self.create_node(*args)
            #create node history 
            vserials = self.create_history(headerid, nodeid, deleted)
            #set object tags
            self.create_tags(headerid, nodeid, vserials)
            #set object's publicity
            if public:
                self.backend.permissions.public_set(
                    object,
                    self.backend.public_url_security,
                    self.backend.public_url_alphabet
                )
            #set object's permissions
            self.create_permissions(headerid, object, username, is_folder=False)
    
    def build_path(self, child_id):
        folder = Table('folder', self.metadata, autoload=True)
        user = Table('gss_user', self.metadata, autoload=True)
        j = folder.join(user, folder.c.owner_id == user.c.id)
        s = select([folder, user.c.username], from_obj=j)
        s = s.where(folder.c.id == child_id)
        s.order_by(folder.c.modificationdate)
        rp = self.conn.execute(s)
        t = rp.fetchone()
        md5 = hashlib.md5()
        hash = md5.hexdigest().lower()
        size = 0
        if not t[PARENT_ID]:
            return ''
        else:
            container_path = t[USER]
            container_path += '/trash' if t[DELETED] else '/pithos'
            parent_node = self.backend.node.node_lookup(container_path)
            if not parent_node:
                raise Exception('Missing node:', container_path)
            parent_path = self.build_path(t[PARENT_ID])
            path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME])
            node = self.backend.node.node_lookup(path)
            if not node:
                node = self.backend.node.node_create(parent_node, path)
                if not node:
                    raise Exception('Unable to create node:', path)
                
                #create versions
                v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE])
                if t[CREATIONDATE] != t[MODIFICATIONDATE]:
                    self.backend.node.version_recluster(v, CLUSTER_HISTORY)
                    self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE])
                
                #set permissions
                self.create_permissions(t[ID], path, t[USER], is_folder=True)
            return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME]
    
    def retrieve_current_nodes(self):
        fileheader = Table('fileheader', self.metadata, autoload=True)
        filebody = Table('filebody', self.metadata, autoload=True)
        folder = Table('folder', self.metadata, autoload=True)
        gss_user = Table('gss_user', self.metadata, autoload=True)
        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
                    fileheader.c.name,  fileheader.c.deleted,
                    filebody.c.storedfilepath, filebody.c.mimetype,
                    fileheader.c.readforall, fileheader.c.owner_id], from_obj=j)
        rp = self.conn.execute(s)
        object = rp.fetchone()
        while object:
            yield object
            object = rp.fetchone()
        rp.close()
    
    def retrieve_node_versions(self, header_id):
        filebody = Table('filebody', self.metadata, autoload=True)
        gss_user = Table('gss_user', self.metadata, autoload=True)
        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
        s = select([filebody.c.filesize, gss_user.c.username,
                    filebody.c.storedfilepath, filebody.c.mimetype,
                    filebody.c.modificationdate], from_obj=j)
        s = s.where(filebody.c.header_id == header_id)
        s = s.order_by(filebody.c.version)
        rp = self.conn.execute(s)
        version = rp.fetchone()
        while version:
            yield version, rp.rowcount
            version = rp.fetchone()
        rp.close()
    
    def retrieve_tags(self, header_id):
        filetag = Table('filetag', self.metadata, autoload=True)
        s = select([filetag.c.tag], filetag.c.fileid == header_id)
        rp = self.conn.execute(s)
        tags = rp.fetchall() if rp.returns_rows else []
        tags = [elem[0] for elem in tags]
        rp.close()
        return ','.join(tags) if tags else ''
    
    def retrieve_permissions(self, id, is_folder=True):
        permissions = {}
        if is_folder:
            ftable = Table('folder_permission', self.metadata, autoload=True)
        else:
            ftable = Table('fileheader_permission', self.metadata, autoload=True)
        permission = Table('permission', self.metadata, autoload=True)
        group = Table('gss_group', self.metadata, autoload=True)
        user = Table('gss_user', self.metadata, autoload=True)
        j = ftable.join(permission, ftable.c.permissions_id == permission.c.id)
        j1 = j.join(group, group.c.id == permission.c.group_id)
        j2 = j.join(user, user.c.id == permission.c.user_id)
        
        permissions = defaultdict(list)
        
        def _get_permissions(self, action='read', get_groups=True):
            if get_groups:
                col, j = group.c.name, j1
                cond2 = permission.c.group_id != None
            else:
                col, j = user.c.username, j2
                cond2 = permission.c.user_id != None
            s = select([col], from_obj=j)
            if is_folder:
                s = s.where(ftable.c.folder_id == id)
            else:
                s = s.where(ftable.c.fileheader_id == id)
            if action == 'read':
                cond1 = permission.c.read == True
            else:
                cond1 = permission.c.write == True
            s = s.where(and_(cond1, cond2))
            print '>', s, s.compile().params
            rp = self.conn.execute(s)
            p = permissions[action].extend([e[0] for e in rp.fetchall()])
            rp.close()
            return p
        
        #get object read groups
        _get_permissions(self, action='read', get_groups=True)
        
        #get object read users
        _get_permissions(self, action='read', get_groups=False)
        
        #get object write groups
        _get_permissions(self, action='write', get_groups=True)
        
        #get object write groups
        _get_permissions(self, action='write', get_groups=False)
        
        return permissions
    
if __name__ == "__main__":
    old_db = ''
    db = ''
    
    f = open('fixdates.sql', 'w')
    ot = ObjectMigration(old_db, db, f)
    ot.create_objects()
    f.close()