migrate-db 13.2 KB
Newer Older
1 2
#!/usr/bin/env python

Antony Chazapis's avatar
Antony Chazapis committed
3
# Copyright 2011-2012 GRNET S.A. All rights reserved.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
# 
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the following
# conditions are met:
# 
#   1. Redistributions of source code must retain the above
#      copyright notice, this list of conditions and the following
#      disclaimer.
# 
#   2. Redistributions in binary form must reproduce the above
#      copyright notice, this list of conditions and the following
#      disclaimer in the documentation and/or other materials
#      provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# 
# The views and conclusions contained in the software and
# documentation are those of the authors and should not be
# interpreted as representing official policies, either expressed
# or implied, of GRNET S.A.

from sqlalchemy import Table
37
from sqlalchemy.sql import select, and_
38 39 40 41

from binascii import hexlify

from pithos.backends.lib.hashfiler import Blocker
42
from pithos.backends.lib.sqlalchemy import Node
43 44 45

from django.conf import settings

46
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
47
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE
48

49 50 51 52 53 54
from pithos.lib.transfer import upload
from pithos.lib.hashmap import HashMap
from pithos.lib.client import Fault

from migrate import Migration, Cache

55
from calendar import timegm
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
56 57
from decimal import Decimal
from collections import defaultdict
58 59 60 61 62

import json
import os
import sys
import hashlib
63
import mimetypes
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
64 65 66 67
import time
import datetime

(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14)
68

69
class ObjectMigration(Migration):
70
    def __init__(self, old_db, db, f):
71
        Migration.__init__(self, old_db)
72
        self.cache = Cache(db)
73
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
74 75 76 77 78 79 80 81 82
    def create_node(self, username, container, object):
        node = self.backend.node.node_lookup(object)
        if not node:
            parent_path = '%s/%s' %(username, container)
            parent_node = self.backend.node.node_lookup(parent_path)
            if not parent_node:
                raise Exception('Missing node')
            node = self.backend.node.node_create(parent_node, object)
        return node
83
    
84
    def create_history(self, header_id, node_id, deleted=False):
85
        i = 0
86
        map = HashMap(self.backend.block_size, self.backend.hash_algorithm)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
87 88 89
        v = []
        stored_versions = self.backend.node.node_get_versions(node_id, ['mtime'])
        stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions]
90
        for t, rowcount  in self.retrieve_node_versions(header_id):
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
91 92 93
            size, modyfied_by, filepath, mimetype, mdate = t
            if mdate in stored_versions_mtime:
                continue
94
            cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL
95
            cluster = cluster if not deleted else CLUSTER_DELETED
96 97
            hash = self.cache.get(filepath)
            if hash == None:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
98 99 100
                raise Exception("Missing hash")
            args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate
            v.append(self.create_version(*args))
101
            i += 1
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
        return v
    
    def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate):
        args = (node_id, hash, size, None, modyfied_by, cluster)
        serial = self.backend.node.version_create(*args)[0]
        meta = {'hash':hash,
                'content-type':mimetype}
        self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems()))
        timestamp = timegm(mdate.timetuple())
        microseconds = mdate.time().microsecond
        values = timestamp, microseconds, serial
        f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values)
        return serial
    
    def create_tags(self, header_id, node_id, vserials):
        tags = self.retrieve_tags(header_id)
        if not tags:
            return
        for v in vserials:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
121
            self.backend.node.attribute_set(v, (('X-Object-Meta-Tag', tags),))
122
    
123 124 125 126
    def create_permissions(self, fid, path, owner, is_folder=True):
        fpath, fpermissions = self.backend.permissions.access_inherit(path)
        permissions = self.retrieve_permissions(fid, is_folder)
        if not fpermissions:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
127 128 129 130 131
            keys = ('read', 'write')
            for k in keys:
                if owner in permissions[k]:
                    permissions[k].remove(owner)
            self.backend.permissions.access_set(path, permissions)
132 133 134 135 136 137 138 139 140 141 142
        else:
            keys = ('read', 'write')
            common_p = {}
            for k in keys:
                if owner in permissions[k]:
                    permissions[k].remove(owner)
                common = set(fpermissions[k]).intersection(set(permissions[k]))
                common_p[k] = list(common)
            #keep only the common permissions
            #trade off for securing access only to explicitly authorized users
            self.backend.permissions.access_set(fpath, common_p)
143
    
144
    def create_objects(self):
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
145 146 147 148 149 150 151 152 153 154
        for t in self.retrieve_current_nodes():
            username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t
            containers = ['pithos', 'trash']
            
            for c in containers:
                #create container if it does not exist
                try:
                    self.backend._lookup_container(username, c)
                except NameError, e:
                    self.backend.put_container(username, username, c) 
155
            
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
156 157
            container = 'pithos' if not deleted else 'trash'
            path = self.build_path(folderid)
158
            #create node
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
159 160 161
            object = '%s/%s' %(username, container)
            object = '%s/%s/%s' %(object, path, filename) if path else '%s/%s' %(object, filename)
            args = username, container, object
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
162 163 164 165 166 167 168 169 170
            nodeid = self.create_node(*args)
            #create node history 
            vserials = self.create_history(headerid, nodeid, deleted)
            #set object tags
            self.create_tags(headerid, nodeid, vserials)
            #set object's publicity
            if public:
                self.backend.permissions.public_set(object)
            #set object's permissions
171
            self.create_permissions(headerid, object, username, is_folder=False)
172
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
173 174 175 176 177 178 179
    def build_path(self, child_id):
        folder = Table('folder', self.metadata, autoload=True)
        user = Table('gss_user', self.metadata, autoload=True)
        j = folder.join(user, folder.c.owner_id == user.c.id)
        s = select([folder, user.c.username], from_obj=j)
        s = s.where(folder.c.id == child_id)
        s.order_by(folder.c.modificationdate)
180
        rp = self.conn.execute(s)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
181 182 183 184 185
        t = rp.fetchone()
        md5 = hashlib.md5()
        hash = md5.hexdigest().lower()
        size = 0
        if not t[PARENT_ID]:
186 187
            return ''
        else:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
            container_path = t[USER]
            container_path += '/trash' if t[DELETED] else '/pithos'
            parent_node = self.backend.node.node_lookup(container_path)
            if not parent_node:
                raise Exception('Missing node:', container_path)
            parent_path = self.build_path(t[PARENT_ID])
            path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME])
            node = self.backend.node.node_lookup(path)
            if not node:
                node = self.backend.node.node_create(parent_node, path)
                if not node:
                    raise Exception('Unable to create node:', path)
                
                #create versions
                v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE])
                if t[CREATIONDATE] != t[MODIFICATIONDATE]:
                    self.backend.node.version_recluster(v, CLUSTER_HISTORY)
                    self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE])
                
                #set permissions
208
                self.create_permissions(t[ID], path, t[USER], is_folder=True)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
209
            return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME]
210 211
    
    def retrieve_current_nodes(self):
212 213 214 215
        fileheader = Table('fileheader', self.metadata, autoload=True)
        filebody = Table('filebody', self.metadata, autoload=True)
        folder = Table('folder', self.metadata, autoload=True)
        gss_user = Table('gss_user', self.metadata, autoload=True)
216
        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
217 218
        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
219
        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
220 221 222
                    fileheader.c.name,  fileheader.c.deleted,
                    filebody.c.storedfilepath, filebody.c.mimetype,
                    fileheader.c.readforall, fileheader.c.owner_id], from_obj=j)
223
        rp = self.conn.execute(s)
224 225 226 227 228
        object = rp.fetchone()
        while object:
            yield object
            object = rp.fetchone()
        rp.close()
229
    
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
    def retrieve_node_versions(self, header_id):
        filebody = Table('filebody', self.metadata, autoload=True)
        gss_user = Table('gss_user', self.metadata, autoload=True)
        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
        s = select([filebody.c.filesize, gss_user.c.username,
                    filebody.c.storedfilepath, filebody.c.mimetype,
                    filebody.c.modificationdate], from_obj=j)
        s = s.where(filebody.c.header_id == header_id)
        s = s.order_by(filebody.c.version)
        rp = self.conn.execute(s)
        version = rp.fetchone()
        while version:
            yield version, rp.rowcount
            version = rp.fetchone()
        rp.close()
245
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
246
    def retrieve_tags(self, header_id):
247
        filetag = Table('filetag', self.metadata, autoload=True)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
248
        s = select([filetag.c.tag], filetag.c.fileid == header_id)
249
        rp = self.conn.execute(s)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
250 251
        tags = rp.fetchall() if rp.returns_rows else []
        tags = [elem[0] for elem in tags]
252
        rp.close()
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
253
        return ','.join(tags) if tags else ''
254
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
    def retrieve_permissions(self, id, is_folder=True):
        permissions = {}
        if is_folder:
            ftable = Table('folder_permission', self.metadata, autoload=True)
        else:
            ftable = Table('fileheader_permission', self.metadata, autoload=True)
        permission = Table('permission', self.metadata, autoload=True)
        group = Table('gss_group', self.metadata, autoload=True)
        user = Table('gss_user', self.metadata, autoload=True)
        j = ftable.join(permission, ftable.c.permissions_id == permission.c.id)
        j1 = j.join(group, group.c.id == permission.c.group_id)
        j2 = j.join(user, user.c.id == permission.c.user_id)
        
        permissions = defaultdict(list)
        
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
        def _get_permissions(self, action='read', get_groups=True):
            if get_groups:
                col, j = group.c.name, j1
                cond2 = permission.c.group_id != None
            else:
                col, j = user.c.username, j2
                cond2 = permission.c.user_id != None
            s = select([col], from_obj=j)
            if is_folder:
                s = s.where(ftable.c.folder_id == id)
            else:
                s = s.where(ftable.c.fileheader_id == id)
            if action == 'read':
                cond1 = permission.c.read == True
            else:
                cond1 = permission.c.write == True
            s = s.where(and_(cond1, cond2))
            print '>', s, s.compile().params
            rp = self.conn.execute(s)
            p = permissions[action].extend([e[0] for e in rp.fetchall()])
            rp.close()
            return p
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
292
        
293 294
        #get object read groups
        _get_permissions(self, action='read', get_groups=True)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
295
        
296 297 298 299 300 301 302 303
        #get object read users
        _get_permissions(self, action='read', get_groups=False)
        
        #get object write groups
        _get_permissions(self, action='write', get_groups=True)
        
        #get object write groups
        _get_permissions(self, action='write', get_groups=False)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
304 305 306
        
        return permissions
    
307
if __name__ == "__main__":
308
    old_db = ''
309
    db = ''
310
    
311 312 313 314
    f = open('fixdates.sql', 'w')
    ot = ObjectMigration(old_db, db, f)
    ot.create_objects()
    f.close()
315
    
316