pithos_client.py 13.6 KB
Newer Older
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (C) 2015 GRNET S.A.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

16 17 18 19 20 21
from functools import wraps
import time
import os
import datetime
import threading
import logging
22
import re
23

24
from agkyra.syncer import utils, common, messaging
25 26 27 28 29 30 31
from agkyra.syncer.file_client import FileClient
from agkyra.syncer.setup import ClientError
from agkyra.syncer.database import transaction

logger = logging.getLogger(__name__)


Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
32
def heartbeat_event(settings, heartbeat, objname):
33 34 35 36 37
    event = threading.Event()
    max_interval = settings.action_max_wait / 2.0

    def set_log():
        with heartbeat.lock() as hb:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
38 39 40 41
            beat = hb.get(objname)
            assert beat is not None
            new_beat = {"ident": beat["ident"],
                        "tstamp": utils.time_stamp()}
42
            hb[objname] = new_beat
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
43
            logger.debug("HEARTBEAT '%s' %s" % (objname, new_beat))
44 45 46 47 48 49 50 51

    def go():
        interval = 0.2
        while True:
            if event.is_set():
                break
            set_log()
            time.sleep(interval)
52
            interval = min(1.2 * interval, max_interval)
53 54 55 56 57 58 59 60 61
    thread = threading.Thread(target=go)
    thread.start()
    return event


def give_heartbeat(f):
    @wraps(f)
    def inner(*args, **kwargs):
        obj = args[0]
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
62
        objname = obj.objname
63 64
        heartbeat = obj.heartbeat
        settings = obj.settings
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
65
        event = heartbeat_event(settings, heartbeat, objname)
66 67 68 69 70 71 72
        try:
            return f(*args, **kwargs)
        finally:
            event.set()
    return inner


Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
73 74 75 76 77 78 79 80 81 82 83 84 85
def handle_client_errors(f):
    @wraps(f)
    def inner(*args, **kwargs):
        try:
            return f(*args, **kwargs)
        except ClientError as e:
            if e.status == 412:  # Precondition failed
                raise common.CollisionError(e)
            # TODO handle other cases, too
            raise common.SyncError(e)
    return inner


86 87
class PithosSourceHandle(object):
    def __init__(self, settings, source_state):
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
88
        self.SIGNATURE = "PithosSourceHandle"
89 90 91 92 93 94 95
        self.settings = settings
        self.endpoint = settings.endpoint
        self.cache_fetch_name = settings.cache_fetch_name
        self.cache_fetch_path = settings.cache_fetch_path
        self.cache_path = settings.cache_path
        self.get_db = settings.get_db
        self.source_state = source_state
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
96
        self.objname = source_state.objname
97 98 99 100 101 102 103 104 105
        self.heartbeat = settings.heartbeat

    @transaction()
    def register_fetch_name(self, filename):
        db = self.get_db()
        f = utils.hash_string(filename) + "_" + \
            datetime.datetime.now().strftime("%s")
        fetch_name = utils.join_path(self.cache_fetch_name, f)
        self.fetch_name = fetch_name
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
106
        db.insert_cachename(fetch_name, self.SIGNATURE, filename)
107 108
        return utils.join_path(self.cache_path, fetch_name)

Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
109
    @handle_client_errors
110 111
    @give_heartbeat
    def send_file(self, sync_state):
112
        fetched_fspath = self.register_fetch_name(self.objname)
113
        headers = dict()
114
        with open(fetched_fspath, mode='wb+') as fil:
115
            try:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
116
                logger.info("Downloading object: '%s', to: '%s'" %
117
                            (self.objname, fetched_fspath))
118
                self.endpoint.download_object(
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
119
                    self.objname,
120 121 122 123 124 125 126 127 128 129 130 131 132
                    fil,
                    headers=headers)
            except ClientError as e:
                if e.status == 404:
                    actual_info = {}
                else:
                    raise e
            else:
                actual_etag = headers["x-object-hash"]
                actual_type = (common.T_DIR if object_isdir(headers)
                               else common.T_FILE)
                actual_info = {"pithos_etag": actual_etag,
                               "pithos_type": actual_type}
133
            self.check_update_source_state(actual_info)
134
        if actual_info == {}:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
135 136
            logger.info("Downloading object: '%s', object is gone."
                        % self.objname)
137
            os.unlink(fetched_fspath)
138
        elif actual_info["pithos_type"] == common.T_DIR:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
139 140
            logger.info("Downloading object: '%s', object is dir."
                        % self.objname)
141 142 143
            os.unlink(fetched_fspath)
            os.mkdir(fetched_fspath)
        return fetched_fspath
144

145 146 147 148 149 150 151 152 153 154 155 156 157
    @transaction()
    def update_state(self, state):
        db = self.get_db()
        db.put_state(state)

    def check_update_source_state(self, actual_info):
        if actual_info != self.source_state.info:
            logger.warning("Actual info differs in %s for object: '%s'; "
                           "updating..." % (self.SIGNATURE, self.objname))
            new_state = self.source_state.set(info=actual_info)
            self.update_state(new_state)
            self.source_state = new_state

158 159 160 161
    def get_synced_state(self):
        return self.source_state

    def unstage_file(self):
162
        pass
163

164 165 166 167 168
STAGED_FOR_DELETION_SUFFIX = ".pithos_staged_for_deletion"
exclude_staged_regex = ".*" + STAGED_FOR_DELETION_SUFFIX + "$"
exclude_pattern = re.compile(exclude_staged_regex)


169 170 171 172 173
class PithosTargetHandle(object):
    def __init__(self, settings, target_state):
        self.settings = settings
        self.endpoint = settings.endpoint
        self.target_state = target_state
174
        self.target_objname = target_state.objname
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
175
        self.objname = target_state.objname
176 177
        self.heartbeat = settings.heartbeat

178 179
    def mk_del_name(self, name, etag):
        return "%s.%s%s" % (name, etag, STAGED_FOR_DELETION_SUFFIX)
180

Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
181
    def safe_object_del(self, objname, etag):
182
        container = self.endpoint.container
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
183
        del_name = self.mk_del_name(objname, etag)
184
        logger.info("Moving temporarily to '%s'" % del_name)
185 186
        try:
            self.endpoint.object_move(
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
187
                objname,
188 189 190 191
                destination='/%s/%s' % (container, del_name),
                if_etag_match=etag)
        except ClientError as e:
            if e.status == 404:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
192
                logger.warning("'%s' not found; already moved?" % objname)
193 194 195 196 197
            else:
                raise
        finally:
            self.endpoint.del_object(del_name)
            logger.info("Deleted tmp '%s'" % del_name)
198

Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
199
    def directory_put(self, objname, etag):
200
        r = self.endpoint.object_put(
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
201
            objname,
202 203 204 205 206
            content_type='application/directory',
            content_length=0,
            if_etag_match=etag)
        return r

Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
207
    @handle_client_errors
208 209
    @give_heartbeat
    def pull(self, source_handle, sync_state):
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
210
        # assert isinstance(source_handle, LocalfsSourceHandle)
211 212
        info = sync_state.info
        etag = info.get("pithos_etag")
213 214 215 216 217 218 219 220 221 222
        try:
            if source_handle.info_is_deleted_or_unhandled():
                if etag is not None:
                    logger.info("Deleting object '%s'" % self.target_objname)
                    self.safe_object_del(self.target_objname, etag)
                live_info = {}
            elif source_handle.info_is_dir():
                logger.info("Creating dir '%s'" % self.target_objname)
                r = self.directory_put(self.target_objname, etag)
                synced_etag = r.headers["etag"]
223
                live_info = {"pithos_etag": synced_etag,
224 225 226 227 228 229 230 231 232
                             "pithos_type": common.T_DIR}
            else:
                with open(source_handle.staged_path, mode="rb") as fil:
                    r = self.endpoint.upload_object(
                        self.target_objname,
                        fil,
                        if_etag_match=info.get("pithos_etag"))
                    synced_etag = r["etag"]
                live_info = {"pithos_etag": synced_etag,
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
233
                             "pithos_type": common.T_FILE}
234 235 236 237 238 239 240 241 242
            return self.target_state.set(info=live_info)
        except ClientError as e:
            if e.status == 412:  # Precondition failed
                msg = messaging.CollisionMessage(
                    objname=self.target_objname, etag=etag, logger=logger)
                self.settings.messager.put(msg)
                raise common.CollisionError(e)
            else:
                raise
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260


def object_isdir(obj):
    try:
        content_type = obj["content_type"]
    except KeyError:
        content_type = obj["content-type"]
    return any(txt in content_type for txt in ['application/directory',
                                               'application/folder'])


PITHOS_TYPE = "pithos_type"
PITHOS_ETAG = "pithos_etag"


class PithosFileClient(FileClient):
    def __init__(self, settings):
        self.settings = settings
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
261
        self.SIGNATURE = "PithosFileClient"
262 263 264 265 266
        self.auth_url = settings.auth_url
        self.auth_token = settings.auth_token
        self.container = settings.container
        self.get_db = settings.get_db
        self.endpoint = settings.endpoint
267
        self.last_modification = "0000-00-00"
268
        self.probe_candidates = utils.ThreadSafeDict()
269

270 271 272 273 274 275 276 277 278 279
    def remove_candidates(self, objnames, ident):
        with self.probe_candidates.lock() as d:
            for objname in objnames:
                try:
                    cached = d.pop(objname)
                    if cached["ident"] != ident:
                        d[objname] = cached
                except KeyError:
                    pass

280
    def list_candidate_files(self, forced=False):
281 282 283 284 285
        with self.probe_candidates.lock() as d:
            if forced:
                candidates = self.get_pithos_candidates()
                d.update(candidates)
            return d.keys()
286 287

    def get_pithos_candidates(self, last_modified=None):
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
288 289 290 291 292
        try:
            objects = self.endpoint.list_objects()
        except ClientError as e:
            logger.error(e)
            return {}
293
        self.objects = objects
294 295 296
        upstream_all = {}
        for obj in objects:
            name = obj["name"]
297 298 299 300
            upstream_all[name] = {
                "ident": None,
                "info": self.get_object_live_info(obj)
            }
301 302 303
            obj_last_modified = obj["last_modified"]
            if obj_last_modified > self.last_modification:
                self.last_modification = obj_last_modified
304
        upstream_all_names = set(upstream_all.keys())
305
        if last_modified is not None:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
306 307 308 309 310 311
            upstream_modified = {}
            for obj in objects:
                name = obj["name"]
                if obj["last_modified"] > last_modified:
                    upstream_modified[name] = upstream_all[name]
            candidates = upstream_modified
312
        else:
313 314
            candidates = upstream_all

315
        non_deleted_in_db = set(self.list_non_deleted_files())
316 317
        newly_deleted_names = non_deleted_in_db.difference(upstream_all_names)
        logger.debug("newly_deleted %s" % newly_deleted_names)
318 319
        newly_deleted = dict((name, {"ident": None, "info": {}})
                             for name in newly_deleted_names)
320 321

        candidates.update(newly_deleted)
322 323
        logger.info("Candidates since %s: %s" %
                    (last_modified, candidates))
324 325
        return candidates

326 327 328 329 330
    @transaction()
    def list_non_deleted_files(self):
        db = self.get_db()
        return db.list_non_deleted_files(self.SIGNATURE)

Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
331
    def notifier(self):
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
332
        interval = self.settings.pithos_list_interval
333 334
        class PollPithosThread(utils.StoppableThread):
            def run_body(this):
335
                candidates = self.get_pithos_candidates(
336
                    last_modified=self.last_modification)
337 338
                with self.probe_candidates.lock() as d:
                    d.update(candidates)
339 340
                time.sleep(interval)
        return utils.start_daemon(PollPithosThread)
341

Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
342
    def get_object(self, objname):
343
        try:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
344
            return self.endpoint.get_object_info(objname)
345 346 347 348 349 350 351 352 353
        except ClientError as e:
            if e.status == 404:
                return None
            raise e

    def get_object_live_info(self, obj):
        if obj is None:
            return {}
        p_type = common.T_DIR if object_isdir(obj) else common.T_FILE
354 355 356
        obj_hash = obj.get("x-object-hash")
        if obj_hash is None:
            obj_hash = obj.get("x_object_hash")
357 358 359 360
        return {PITHOS_ETAG: obj_hash,
                PITHOS_TYPE: p_type,
                }

361
    def probe_file(self, objname, old_state, ref_state, ident):
362
        info = old_state.info
363
        with self.probe_candidates.lock() as d:
364 365 366 367 368 369
            try:
                cached = d[objname]
                cached_info = cached["info"]
                cached["ident"] = ident
            except KeyError:
                cached_info = None
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
370 371 372
        if exclude_pattern.match(objname):
            logger.warning("Ignoring probe archive: %s, object: '%s'" %
                           (old_state.archive, objname))
373
            return
374
        if cached_info is None:
Giorgos Korfiatis's avatar
Giorgos Korfiatis committed
375
            obj = self.get_object(objname)
376 377
            live_info = self.get_object_live_info(obj)
        else:
378
            live_info = cached_info
379
        if info != live_info:
380 381
            live_state = old_state.set(info=live_info)
            return live_state
382 383 384 385 386 387

    def stage_file(self, source_state):
        return PithosSourceHandle(self.settings, source_state)

    def prepare_target(self, target_state):
        return PithosTargetHandle(self.settings, target_state)