migrate-db 12.5 KB
Newer Older
1 2
#!/usr/bin/env python

Vangelis Koukis's avatar
Vangelis Koukis committed
3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (C) 2010-2014 GRNET S.A.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 18

from sqlalchemy import Table
19
from sqlalchemy.sql import select, and_
20 21 22 23

from binascii import hexlify

from pithos.backends.lib.hashfiler import Blocker
24
from pithos.backends.lib.sqlalchemy import Node
25 26 27

from django.conf import settings

28
from pithos.backends.modular import CLUSTER_NORMAL, CLUSTER_HISTORY, CLUSTER_DELETED
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
29
from pithos.backends.lib.sqlalchemy.node import Node, ROOTNODE
30

Antony Chazapis's avatar
Antony Chazapis committed
31 32 33
from pithos.tools.lib.transfer import upload
from pithos.tools.lib.hashmap import HashMap
from pithos.tools.lib.client import Fault
34 35 36

from migrate import Migration, Cache

37
from calendar import timegm
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
38 39
from decimal import Decimal
from collections import defaultdict
40 41 42 43 44

import json
import os
import sys
import hashlib
45
import mimetypes
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
46 47 48 49
import time
import datetime

(ID, CREATIONDATE, MODIFICATIONDATE, DELETED, ICON, NAME, VERSION, CREATEDBY_ID, MODIFIEDBY_ID, OWNER_ID, PARENT_ID, READFORALL, SHARED, USER) = range(14)
50

51
class ObjectMigration(Migration):
52
    def __init__(self, old_db, db, f):
53
        Migration.__init__(self, old_db)
54
        self.cache = Cache(db)
55
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
56 57 58 59 60 61 62 63 64
    def create_node(self, username, container, object):
        node = self.backend.node.node_lookup(object)
        if not node:
            parent_path = '%s/%s' %(username, container)
            parent_node = self.backend.node.node_lookup(parent_path)
            if not parent_node:
                raise Exception('Missing node')
            node = self.backend.node.node_create(parent_node, object)
        return node
65
    
66
    def create_history(self, header_id, node_id, deleted=False):
67
        i = 0
68
        map = HashMap(self.backend.block_size, self.backend.hash_algorithm)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
69 70 71
        v = []
        stored_versions = self.backend.node.node_get_versions(node_id, ['mtime'])
        stored_versions_mtime = [datetime.datetime.utcfromtimestamp(elem[0]) for elem in stored_versions]
72
        for t, rowcount  in self.retrieve_node_versions(header_id):
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
73 74 75
            size, modyfied_by, filepath, mimetype, mdate = t
            if mdate in stored_versions_mtime:
                continue
76
            cluster = CLUSTER_HISTORY if i < rowcount - 1 else CLUSTER_NORMAL
77
            cluster = cluster if not deleted else CLUSTER_DELETED
78 79
            hash = self.cache.get(filepath)
            if hash == None:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
80 81 82
                raise Exception("Missing hash")
            args = node_id, hash, size, modyfied_by, cluster, mimetype, mdate
            v.append(self.create_version(*args))
83
            i += 1
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
        return v
    
    def create_version(self, node_id, hash, size, modyfied_by, cluster, mimetype, mdate):
        args = (node_id, hash, size, None, modyfied_by, cluster)
        serial = self.backend.node.version_create(*args)[0]
        meta = {'hash':hash,
                'content-type':mimetype}
        self.backend.node.attribute_set(serial, ((k, v) for k, v in meta.iteritems()))
        timestamp = timegm(mdate.timetuple())
        microseconds = mdate.time().microsecond
        values = timestamp, microseconds, serial
        f.write('update versions set mtime=\'%10d.%6d\' where serial=%s;' %values)
        return serial
    
    def create_tags(self, header_id, node_id, vserials):
        tags = self.retrieve_tags(header_id)
        if not tags:
            return
        for v in vserials:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
103
            self.backend.node.attribute_set(v, (('X-Object-Meta-Tag', tags),))
104
    
105 106 107 108
    def create_permissions(self, fid, path, owner, is_folder=True):
        fpath, fpermissions = self.backend.permissions.access_inherit(path)
        permissions = self.retrieve_permissions(fid, is_folder)
        if not fpermissions:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
109 110 111 112 113
            keys = ('read', 'write')
            for k in keys:
                if owner in permissions[k]:
                    permissions[k].remove(owner)
            self.backend.permissions.access_set(path, permissions)
114 115 116 117 118 119 120 121 122 123 124
        else:
            keys = ('read', 'write')
            common_p = {}
            for k in keys:
                if owner in permissions[k]:
                    permissions[k].remove(owner)
                common = set(fpermissions[k]).intersection(set(permissions[k]))
                common_p[k] = list(common)
            #keep only the common permissions
            #trade off for securing access only to explicitly authorized users
            self.backend.permissions.access_set(fpath, common_p)
125
    
126
    def create_objects(self):
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
127 128 129 130 131 132 133 134 135 136
        for t in self.retrieve_current_nodes():
            username, headerid, folderid, filename, deleted, filepath, mimetype, public, owner_id = t
            containers = ['pithos', 'trash']
            
            for c in containers:
                #create container if it does not exist
                try:
                    self.backend._lookup_container(username, c)
                except NameError, e:
                    self.backend.put_container(username, username, c) 
137
            
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
138 139
            container = 'pithos' if not deleted else 'trash'
            path = self.build_path(folderid)
140
            #create node
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
141 142 143
            object = '%s/%s' %(username, container)
            object = '%s/%s/%s' %(object, path, filename) if path else '%s/%s' %(object, filename)
            args = username, container, object
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
144 145 146 147 148 149 150
            nodeid = self.create_node(*args)
            #create node history 
            vserials = self.create_history(headerid, nodeid, deleted)
            #set object tags
            self.create_tags(headerid, nodeid, vserials)
            #set object's publicity
            if public:
151 152
                self.backend.permissions.public_set(
                    object,
153
                    self.backend.public_url_security,
154 155
                    self.backend.public_url_alphabet
                )
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
156
            #set object's permissions
157
            self.create_permissions(headerid, object, username, is_folder=False)
158
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
159 160 161 162 163 164 165
    def build_path(self, child_id):
        folder = Table('folder', self.metadata, autoload=True)
        user = Table('gss_user', self.metadata, autoload=True)
        j = folder.join(user, folder.c.owner_id == user.c.id)
        s = select([folder, user.c.username], from_obj=j)
        s = s.where(folder.c.id == child_id)
        s.order_by(folder.c.modificationdate)
166
        rp = self.conn.execute(s)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
167 168 169 170 171
        t = rp.fetchone()
        md5 = hashlib.md5()
        hash = md5.hexdigest().lower()
        size = 0
        if not t[PARENT_ID]:
172 173
            return ''
        else:
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
            container_path = t[USER]
            container_path += '/trash' if t[DELETED] else '/pithos'
            parent_node = self.backend.node.node_lookup(container_path)
            if not parent_node:
                raise Exception('Missing node:', container_path)
            parent_path = self.build_path(t[PARENT_ID])
            path = '%s/%s/%s' %(container_path, parent_path, t[NAME]) if parent_path else '%s/%s' %(container_path, t[NAME])
            node = self.backend.node.node_lookup(path)
            if not node:
                node = self.backend.node.node_create(parent_node, path)
                if not node:
                    raise Exception('Unable to create node:', path)
                
                #create versions
                v = self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[CREATIONDATE])
                if t[CREATIONDATE] != t[MODIFICATIONDATE]:
                    self.backend.node.version_recluster(v, CLUSTER_HISTORY)
                    self.create_version(node, hash, size, t[USER], CLUSTER_NORMAL, 'application/directory', t[MODIFICATIONDATE])
                
                #set permissions
194
                self.create_permissions(t[ID], path, t[USER], is_folder=True)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
195
            return '%s/%s' %(parent_path, t[NAME]) if parent_path else t[NAME]
196 197
    
    def retrieve_current_nodes(self):
198 199 200 201
        fileheader = Table('fileheader', self.metadata, autoload=True)
        filebody = Table('filebody', self.metadata, autoload=True)
        folder = Table('folder', self.metadata, autoload=True)
        gss_user = Table('gss_user', self.metadata, autoload=True)
202
        j = filebody.join(fileheader, filebody.c.id == fileheader.c.currentbody_id)
203 204
        j = j.join(folder, fileheader.c.folder_id == folder.c.id)
        j = j.join(gss_user, fileheader.c.owner_id == gss_user.c.id)
205
        s = select([gss_user.c.username,  fileheader.c.id, fileheader.c.folder_id,
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
206 207 208
                    fileheader.c.name,  fileheader.c.deleted,
                    filebody.c.storedfilepath, filebody.c.mimetype,
                    fileheader.c.readforall, fileheader.c.owner_id], from_obj=j)
209
        rp = self.conn.execute(s)
210 211 212 213 214
        object = rp.fetchone()
        while object:
            yield object
            object = rp.fetchone()
        rp.close()
215
    
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
    def retrieve_node_versions(self, header_id):
        filebody = Table('filebody', self.metadata, autoload=True)
        gss_user = Table('gss_user', self.metadata, autoload=True)
        j = filebody.join(gss_user, filebody.c.modifiedby_id == gss_user.c.id)
        s = select([filebody.c.filesize, gss_user.c.username,
                    filebody.c.storedfilepath, filebody.c.mimetype,
                    filebody.c.modificationdate], from_obj=j)
        s = s.where(filebody.c.header_id == header_id)
        s = s.order_by(filebody.c.version)
        rp = self.conn.execute(s)
        version = rp.fetchone()
        while version:
            yield version, rp.rowcount
            version = rp.fetchone()
        rp.close()
231
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
232
    def retrieve_tags(self, header_id):
233
        filetag = Table('filetag', self.metadata, autoload=True)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
234
        s = select([filetag.c.tag], filetag.c.fileid == header_id)
235
        rp = self.conn.execute(s)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
236 237
        tags = rp.fetchall() if rp.returns_rows else []
        tags = [elem[0] for elem in tags]
238
        rp.close()
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
239
        return ','.join(tags) if tags else ''
240
    
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
    def retrieve_permissions(self, id, is_folder=True):
        permissions = {}
        if is_folder:
            ftable = Table('folder_permission', self.metadata, autoload=True)
        else:
            ftable = Table('fileheader_permission', self.metadata, autoload=True)
        permission = Table('permission', self.metadata, autoload=True)
        group = Table('gss_group', self.metadata, autoload=True)
        user = Table('gss_user', self.metadata, autoload=True)
        j = ftable.join(permission, ftable.c.permissions_id == permission.c.id)
        j1 = j.join(group, group.c.id == permission.c.group_id)
        j2 = j.join(user, user.c.id == permission.c.user_id)
        
        permissions = defaultdict(list)
        
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
        def _get_permissions(self, action='read', get_groups=True):
            if get_groups:
                col, j = group.c.name, j1
                cond2 = permission.c.group_id != None
            else:
                col, j = user.c.username, j2
                cond2 = permission.c.user_id != None
            s = select([col], from_obj=j)
            if is_folder:
                s = s.where(ftable.c.folder_id == id)
            else:
                s = s.where(ftable.c.fileheader_id == id)
            if action == 'read':
                cond1 = permission.c.read == True
            else:
                cond1 = permission.c.write == True
            s = s.where(and_(cond1, cond2))
            print '>', s, s.compile().params
            rp = self.conn.execute(s)
            p = permissions[action].extend([e[0] for e in rp.fetchall()])
            rp.close()
            return p
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
278
        
279 280
        #get object read groups
        _get_permissions(self, action='read', get_groups=True)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
281
        
282 283 284 285 286 287 288 289
        #get object read users
        _get_permissions(self, action='read', get_groups=False)
        
        #get object write groups
        _get_permissions(self, action='write', get_groups=True)
        
        #get object write groups
        _get_permissions(self, action='write', get_groups=False)
Sofia Papagiannaki's avatar
Sofia Papagiannaki committed
290 291 292
        
        return permissions
    
293
if __name__ == "__main__":
294
    old_db = ''
295
    db = ''
296
    
297 298 299 300
    f = open('fixdates.sql', 'w')
    ot = ObjectMigration(old_db, db, f)
    ot.create_objects()
    f.close()
301
    
302