backend.py 75.9 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
#
Iustin Pop's avatar
Iustin Pop committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#

# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Functions used by the node daemon"""


import os
import os.path
import shutil
import time
import stat
import errno
import re
import subprocess
33
import random
34
import logging
35
import tempfile
36
37
import zlib
import base64
Iustin Pop's avatar
Iustin Pop committed
38
39
40
41
42
43
44
45

from ganeti import errors
from ganeti import utils
from ganeti import ssh
from ganeti import hypervisor
from ganeti import constants
from ganeti import bdev
from ganeti import objects
46
from ganeti import ssconf
Iustin Pop's avatar
Iustin Pop committed
47
48


Michael Hanselmann's avatar
Michael Hanselmann committed
49
def _GetConfig():
Iustin Pop's avatar
Iustin Pop committed
50
  """Simple wrapper to return a SimpleStore.
Iustin Pop's avatar
Iustin Pop committed
51

Iustin Pop's avatar
Iustin Pop committed
52
53
  @rtype: L{ssconf.SimpleStore}
  @return: a SimpleStore instance
Iustin Pop's avatar
Iustin Pop committed
54
55

  """
Iustin Pop's avatar
Iustin Pop committed
56
  return ssconf.SimpleStore()
Michael Hanselmann's avatar
Michael Hanselmann committed
57
58


59
def _GetSshRunner(cluster_name):
Iustin Pop's avatar
Iustin Pop committed
60
61
62
63
64
65
66
67
68
  """Simple wrapper to return an SshRunner.

  @type cluster_name: str
  @param cluster_name: the cluster name, which is needed
      by the SshRunner constructor
  @rtype: L{ssh.SshRunner}
  @return: an SshRunner instance

  """
69
  return ssh.SshRunner(cluster_name)
70
71


72
73
74
75
76
77
78
79
80
def _Decompress(data):
  """Unpacks data compressed by the RPC client.

  @type data: list or tuple
  @param data: Data sent by RPC client
  @rtype: str
  @return: Decompressed data

  """
81
  assert isinstance(data, (list, tuple))
82
83
84
85
86
87
88
89
90
91
  assert len(data) == 2
  (encoding, content) = data
  if encoding == constants.RPC_ENCODING_NONE:
    return content
  elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
    return zlib.decompress(base64.b64decode(content))
  else:
    raise AssertionError("Unknown data encoding")


92
def _CleanDirectory(path, exclude=None):
93
94
  """Removes all regular files in a directory.

Iustin Pop's avatar
Iustin Pop committed
95
96
  @type path: str
  @param path: the directory to clean
97
  @type exclude: list
Iustin Pop's avatar
Iustin Pop committed
98
99
  @param exclude: list of files to be excluded, defaults
      to the empty list
100
101

  """
102
103
  if not os.path.isdir(path):
    return
104
105
106
107
108
  if exclude is None:
    exclude = []
  else:
    # Normalize excluded paths
    exclude = [os.path.normpath(i) for i in exclude]
109

110
  for rel_name in utils.ListVisibleFiles(path):
111
112
113
    full_name = os.path.normpath(os.path.join(path, rel_name))
    if full_name in exclude:
      continue
114
115
116
117
    if os.path.isfile(full_name) and not os.path.islink(full_name):
      utils.RemoveFile(full_name)


118
def JobQueuePurge():
Iustin Pop's avatar
Iustin Pop committed
119
120
121
  """Removes job queue files and archived jobs.

  @rtype: None
122
123

  """
124
  _CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE])
125
126
127
  _CleanDirectory(constants.JOB_QUEUE_ARCHIVE_DIR)


128
129
130
131
132
133
134
def GetMasterInfo():
  """Returns master information.

  This is an utility function to compute master information, either
  for consumption here or from the node daemon.

  @rtype: tuple
Iustin Pop's avatar
Iustin Pop committed
135
136
  @return: (master_netdev, master_ip, master_name) if we have a good
      configuration, otherwise (None, None, None)
137
138
139

  """
  try:
Michael Hanselmann's avatar
Michael Hanselmann committed
140
141
142
143
    cfg = _GetConfig()
    master_netdev = cfg.GetMasterNetdev()
    master_ip = cfg.GetMasterIP()
    master_node = cfg.GetMasterNode()
144
145
  except errors.ConfigurationError, err:
    logging.exception("Cluster configuration incomplete")
Iustin Pop's avatar
Iustin Pop committed
146
    return (None, None, None)
147
  return (master_netdev, master_ip, master_node)
148
149


150
def StartMaster(start_daemons):
Iustin Pop's avatar
Iustin Pop committed
151
152
  """Activate local node as master node.

153
  The function will always try activate the IP address of the master
Iustin Pop's avatar
Iustin Pop committed
154
155
156
157
158
159
160
  (unless someone else has it). It will also start the master daemons,
  based on the start_daemons parameter.

  @type start_daemons: boolean
  @param start_daemons: whther to also start the master
      daemons (ganeti-masterd and ganeti-rapi)
  @rtype: None
Iustin Pop's avatar
Iustin Pop committed
161
162

  """
163
  ok = True
164
  master_netdev, master_ip, _ = GetMasterInfo()
165
  if not master_netdev:
Iustin Pop's avatar
Iustin Pop committed
166
167
    return False

168
  if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
169
    if utils.OwnIpAddress(master_ip):
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
      # we already have the ip:
      logging.debug("Already started")
    else:
      logging.error("Someone else has the master ip, not activating")
      ok = False
  else:
    result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
                           "dev", master_netdev, "label",
                           "%s:0" % master_netdev])
    if result.failed:
      logging.error("Can't activate master IP: %s", result.output)
      ok = False

    result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
                           "-s", master_ip, master_ip])
    # we'll ignore the exit code of arping

  # and now start the master and rapi daemons
  if start_daemons:
    for daemon in 'ganeti-masterd', 'ganeti-rapi':
      result = utils.RunCmd([daemon])
      if result.failed:
        logging.error("Can't start daemon %s: %s", daemon, result.output)
        ok = False
  return ok
Iustin Pop's avatar
Iustin Pop committed
195
196


197
def StopMaster(stop_daemons):
Iustin Pop's avatar
Iustin Pop committed
198
199
  """Deactivate this node as master.

200
  The function will always try to deactivate the IP address of the
Iustin Pop's avatar
Iustin Pop committed
201
202
203
204
205
206
207
  master. It will also stop the master daemons depending on the
  stop_daemons parameter.

  @type stop_daemons: boolean
  @param stop_daemons: whether to also stop the master daemons
      (ganeti-masterd and ganeti-rapi)
  @rtype: None
Iustin Pop's avatar
Iustin Pop committed
208
209

  """
210
  master_netdev, master_ip, _ = GetMasterInfo()
211
212
  if not master_netdev:
    return False
Iustin Pop's avatar
Iustin Pop committed
213

214
215
  result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
                         "dev", master_netdev])
Iustin Pop's avatar
Iustin Pop committed
216
  if result.failed:
217
    logging.error("Can't remove the master IP, error: %s", result.output)
218
219
220
221
222
223
    # but otherwise ignore the failure

  if stop_daemons:
    # stop/kill the rapi and the master daemon
    for daemon in constants.RAPI_PID, constants.MASTERD_PID:
      utils.KillProcess(utils.ReadPidFile(utils.DaemonPidFileName(daemon)))
Iustin Pop's avatar
Iustin Pop committed
224
225
226
227

  return True


Iustin Pop's avatar
Iustin Pop committed
228
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
229
  """Joins this node to the cluster.
Iustin Pop's avatar
Iustin Pop committed
230

231
232
233
234
  This does the following:
      - updates the hostkeys of the machine (rsa and dsa)
      - adds the ssh private key to the user
      - adds the ssh public key to the users' authorized_keys file
Iustin Pop's avatar
Iustin Pop committed
235

Iustin Pop's avatar
Iustin Pop committed
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  @type dsa: str
  @param dsa: the DSA private key to write
  @type dsapub: str
  @param dsapub: the DSA public key to write
  @type rsa: str
  @param rsa: the RSA private key to write
  @type rsapub: str
  @param rsapub: the RSA public key to write
  @type sshkey: str
  @param sshkey: the SSH private key to write
  @type sshpub: str
  @param sshpub: the SSH public key to write
  @rtype: boolean
  @return: the success of the operation

251
  """
252
253
254
255
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
256
  for name, content, mode in sshd_keys:
257
    utils.WriteFile(name, data=content, mode=mode)
Iustin Pop's avatar
Iustin Pop committed
258

259
260
261
262
  try:
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
                                                    mkdir=True)
  except errors.OpExecError, err:
263
    logging.exception("Error while processing user ssh files")
264
    return False
Iustin Pop's avatar
Iustin Pop committed
265

266
267
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
    utils.WriteFile(name, data=content, mode=0600)
Iustin Pop's avatar
Iustin Pop committed
268

269
  utils.AddAuthorizedKey(auth_keys, sshpub)
Iustin Pop's avatar
Iustin Pop committed
270

271
  utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
Iustin Pop's avatar
Iustin Pop committed
272
273
274
275
276

  return True


def LeaveCluster():
Iustin Pop's avatar
Iustin Pop committed
277
278
279
280
281
282
  """Cleans up and remove the current node.

  This function cleans up and prepares the current node to be removed
  from the cluster.

  If processing is successful, then it raises an
Iustin Pop's avatar
Iustin Pop committed
283
  L{errors.QuitGanetiException} which is used as a special case to
Iustin Pop's avatar
Iustin Pop committed
284
  shutdown the node daemon.
Iustin Pop's avatar
Iustin Pop committed
285
286

  """
287
  _CleanDirectory(constants.DATA_DIR)
288
  JobQueuePurge()
289

290
291
  try:
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
292
293
  except errors.OpExecError:
    logging.exception("Error while processing ssh files")
294
295
    return

296
  f = open(pub_key, 'r')
Iustin Pop's avatar
Iustin Pop committed
297
  try:
298
    utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
Iustin Pop's avatar
Iustin Pop committed
299
300
301
  finally:
    f.close()

302
303
  utils.RemoveFile(priv_key)
  utils.RemoveFile(pub_key)
Iustin Pop's avatar
Iustin Pop committed
304

305
306
307
  # Return a reassuring string to the caller, and quit
  raise errors.QuitGanetiException(False, 'Shutdown scheduled')

Iustin Pop's avatar
Iustin Pop committed
308

309
def GetNodeInfo(vgname, hypervisor_type):
Alexander Schreiber's avatar
Alexander Schreiber committed
310
  """Gives back a hash with different informations about the node.
Iustin Pop's avatar
Iustin Pop committed
311

312
313
314
315
316
317
318
319
320
321
322
323
  @type vgname: C{string}
  @param vgname: the name of the volume group to ask for disk space information
  @type hypervisor_type: C{str}
  @param hypervisor_type: the name of the hypervisor to ask for
      memory information
  @rtype: C{dict}
  @return: dictionary with the following keys:
      - vg_size is the size of the configured volume group in MiB
      - vg_free is the free size of the volume group in MiB
      - memory_dom0 is the memory allocated for domain0 in MiB
      - memory_free is the currently available (free) ram in MiB
      - memory_total is the total number of ram in MiB
Iustin Pop's avatar
Iustin Pop committed
324

325
  """
Iustin Pop's avatar
Iustin Pop committed
326
327
328
329
330
  outputarray = {}
  vginfo = _GetVGInfo(vgname)
  outputarray['vg_size'] = vginfo['vg_size']
  outputarray['vg_free'] = vginfo['vg_free']

331
  hyper = hypervisor.GetHypervisor(hypervisor_type)
Iustin Pop's avatar
Iustin Pop committed
332
333
334
335
  hyp_info = hyper.GetNodeInfo()
  if hyp_info is not None:
    outputarray.update(hyp_info)

336
337
338
339
340
341
  f = open("/proc/sys/kernel/random/boot_id", 'r')
  try:
    outputarray["bootid"] = f.read(128).rstrip("\n")
  finally:
    f.close()

Iustin Pop's avatar
Iustin Pop committed
342
343
344
  return outputarray


345
def VerifyNode(what, cluster_name):
Iustin Pop's avatar
Iustin Pop committed
346
347
  """Verify the status of the local node.

348
349
350
351
352
353
354
355
356
  Based on the input L{what} parameter, various checks are done on the
  local node.

  If the I{filelist} key is present, this list of
  files is checksummed and the file/checksum pairs are returned.

  If the I{nodelist} key is present, we check that we have
  connectivity via ssh with the target nodes (and check the hostname
  report).
Iustin Pop's avatar
Iustin Pop committed
357

358
359
360
361
362
363
364
365
366
367
368
  If the I{node-net-test} key is present, we check that we have
  connectivity to the given nodes via both primary IP and, if
  applicable, secondary IPs.

  @type what: C{dict}
  @param what: a dictionary of things to check:
      - filelist: list of files for which to compute checksums
      - nodelist: list of nodes we should check ssh communication with
      - node-net-test: list of nodes we should check node daemon port
        connectivity with
      - hypervisor: list with hypervisors to run the verify for
Iustin Pop's avatar
Iustin Pop committed
369
370
371
  @rtype: dict
  @return: a dictionary with the same keys as the input dict, and
      values representing the result of the checks
Iustin Pop's avatar
Iustin Pop committed
372
373
374
375

  """
  result = {}

376
377
378
379
380
381
382
383
384
385
386
387
388
  if constants.NV_HYPERVISOR in what:
    result[constants.NV_HYPERVISOR] = tmp = {}
    for hv_name in what[constants.NV_HYPERVISOR]:
      tmp[hv_name] = hypervisor.GetHypervisor(hv_name).Verify()

  if constants.NV_FILELIST in what:
    result[constants.NV_FILELIST] = utils.FingerprintFiles(
      what[constants.NV_FILELIST])

  if constants.NV_NODELIST in what:
    result[constants.NV_NODELIST] = tmp = {}
    random.shuffle(what[constants.NV_NODELIST])
    for node in what[constants.NV_NODELIST]:
389
      success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node)
Iustin Pop's avatar
Iustin Pop committed
390
      if not success:
391
392
393
394
        tmp[node] = message

  if constants.NV_NODENETTEST in what:
    result[constants.NV_NODENETTEST] = tmp = {}
395
396
    my_name = utils.HostInfo().name
    my_pip = my_sip = None
397
    for name, pip, sip in what[constants.NV_NODENETTEST]:
398
399
400
401
402
      if name == my_name:
        my_pip = pip
        my_sip = sip
        break
    if not my_pip:
403
404
      tmp[my_name] = ("Can't find my own primary/secondary IP"
                      " in the node list")
405
    else:
Michael Hanselmann's avatar
Michael Hanselmann committed
406
      port = utils.GetNodeDaemonPort()
407
      for name, pip, sip in what[constants.NV_NODENETTEST]:
408
409
410
411
412
413
414
        fail = []
        if not utils.TcpPing(pip, port, source=my_pip):
          fail.append("primary")
        if sip != pip:
          if not utils.TcpPing(sip, port, source=my_sip):
            fail.append("secondary")
        if fail:
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
          tmp[name] = ("failure using the %s interface(s)" %
                       " and ".join(fail))

  if constants.NV_LVLIST in what:
    result[constants.NV_LVLIST] = GetVolumeList(what[constants.NV_LVLIST])

  if constants.NV_INSTANCELIST in what:
    result[constants.NV_INSTANCELIST] = GetInstanceList(
      what[constants.NV_INSTANCELIST])

  if constants.NV_VGLIST in what:
    result[constants.NV_VGLIST] = ListVolumeGroups()

  if constants.NV_VERSION in what:
    result[constants.NV_VERSION] = constants.PROTOCOL_VERSION

  if constants.NV_HVINFO in what:
    hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO])
    result[constants.NV_HVINFO] = hyper.GetNodeInfo()
434

435
436
437
438
439
440
441
442
  if constants.NV_DRBDLIST in what:
    try:
      used_minors = bdev.DRBD8.GetUsedDevs().keys()
    except errors.BlockDeviceErrors:
      logging.warning("Can't get used minors list", exc_info=True)
      used_minors = []
    result[constants.NV_DRBDLIST] = used_minors

Iustin Pop's avatar
Iustin Pop committed
443
444
445
446
447
448
  return result


def GetVolumeList(vg_name):
  """Compute list of logical volumes and their size.

Iustin Pop's avatar
Iustin Pop committed
449
450
451
452
453
454
455
456
457
458
459
  @type vg_name: str
  @param vg_name: the volume group whose LVs we should list
  @rtype: dict
  @return:
      dictionary of all partions (key) with value being a tuple of
      their size (in MiB), inactive and online status::

        {'test1': ('20.06', True, True)}

      in case of errors, a string is returned with the error
      details.
Iustin Pop's avatar
Iustin Pop committed
460
461

  """
462
463
464
465
466
  lvs = {}
  sep = '|'
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
                         "--separator=%s" % sep,
                         "-olv_name,lv_size,lv_attr", vg_name])
Iustin Pop's avatar
Iustin Pop committed
467
  if result.failed:
468
469
    logging.error("Failed to list logical volumes, lvs output: %s",
                  result.output)
470
    return result.output
471

472
  valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
473
  for line in result.stdout.splitlines():
474
475
476
    line = line.strip()
    match = valid_line_re.match(line)
    if not match:
477
      logging.error("Invalid line returned from lvs output: '%s'", line)
478
479
      continue
    name, size, attr = match.groups()
480
481
482
483
484
    inactive = attr[4] == '-'
    online = attr[5] == 'o'
    lvs[name] = (size, inactive, online)

  return lvs
Iustin Pop's avatar
Iustin Pop committed
485
486
487


def ListVolumeGroups():
Alexander Schreiber's avatar
Alexander Schreiber committed
488
  """List the volume groups and their size.
Iustin Pop's avatar
Iustin Pop committed
489

Iustin Pop's avatar
Iustin Pop committed
490
491
492
  @rtype: dict
  @return: dictionary with keys volume name and values the
      size of the volume
Iustin Pop's avatar
Iustin Pop committed
493
494
495
496
497

  """
  return utils.ListVolumeGroups()


498
499
500
def NodeVolumes():
  """List all volumes on this node.

Iustin Pop's avatar
Iustin Pop committed
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
  @rtype: list
  @return:
    A list of dictionaries, each having four keys:
      - name: the logical volume name,
      - size: the size of the logical volume
      - dev: the physical device on which the LV lives
      - vg: the volume group to which it belongs

    In case of errors, we return an empty list and log the
    error.

    Note that since a logical volume can live on multiple physical
    volumes, the resulting list might include a logical volume
    multiple times.

516
517
518
519
520
  """
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
                         "--separator=|",
                         "--options=lv_name,lv_size,devices,vg_name"])
  if result.failed:
521
522
    logging.error("Failed to list logical volumes, lvs output: %s",
                  result.output)
Iustin Pop's avatar
Iustin Pop committed
523
    return []
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538

  def parse_dev(dev):
    if '(' in dev:
      return dev.split('(')[0]
    else:
      return dev

  def map_line(line):
    return {
      'name': line[0].strip(),
      'size': line[1].strip(),
      'dev': parse_dev(line[2].strip()),
      'vg': line[3].strip(),
    }

539
540
  return [map_line(line.split('|')) for line in result.stdout.splitlines()
          if line.count('|') >= 3]
541
542


Iustin Pop's avatar
Iustin Pop committed
543
def BridgesExist(bridges_list):
Alexander Schreiber's avatar
Alexander Schreiber committed
544
  """Check if a list of bridges exist on the current node.
Iustin Pop's avatar
Iustin Pop committed
545

Iustin Pop's avatar
Iustin Pop committed
546
547
  @rtype: boolean
  @return: C{True} if all of them exist, C{False} otherwise
Iustin Pop's avatar
Iustin Pop committed
548
549
550
551
552
553
554
555
556

  """
  for bridge in bridges_list:
    if not utils.BridgeExists(bridge):
      return False

  return True


557
def GetInstanceList(hypervisor_list):
Alexander Schreiber's avatar
Alexander Schreiber committed
558
  """Provides a list of instances.
Iustin Pop's avatar
Iustin Pop committed
559

560
561
562
563
564
  @type hypervisor_list: list
  @param hypervisor_list: the list of hypervisors to query information

  @rtype: list
  @return: a list of all running instances on the current node
Iustin Pop's avatar
Iustin Pop committed
565
566
    - instance1.example.com
    - instance2.example.com
Iustin Pop's avatar
Iustin Pop committed
567

568
  """
569
570
571
572
573
574
575
576
577
  results = []
  for hname in hypervisor_list:
    try:
      names = hypervisor.GetHypervisor(hname).ListInstances()
      results.extend(names)
    except errors.HypervisorError, err:
      logging.exception("Error enumerating instances for hypevisor %s", hname)
      # FIXME: should we somehow not propagate this to the master?
      raise
Iustin Pop's avatar
Iustin Pop committed
578

579
  return results
Iustin Pop's avatar
Iustin Pop committed
580
581


582
def GetInstanceInfo(instance, hname):
Alexander Schreiber's avatar
Alexander Schreiber committed
583
  """Gives back the informations about an instance as a dictionary.
Iustin Pop's avatar
Iustin Pop committed
584

585
586
587
588
  @type instance: string
  @param instance: the instance name
  @type hname: string
  @param hname: the hypervisor type of the instance
Iustin Pop's avatar
Iustin Pop committed
589

590
591
592
593
594
  @rtype: dict
  @return: dictionary with the following keys:
      - memory: memory size of instance (int)
      - state: xen state of instance (string)
      - time: cpu time of instance (float)
Iustin Pop's avatar
Iustin Pop committed
595

596
  """
Iustin Pop's avatar
Iustin Pop committed
597
598
  output = {}

599
  iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance)
Iustin Pop's avatar
Iustin Pop committed
600
601
602
603
604
605
606
607
  if iinfo is not None:
    output['memory'] = iinfo[2]
    output['state'] = iinfo[4]
    output['time'] = iinfo[5]

  return output


608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
def GetInstanceMigratable(instance):
  """Gives whether an instance can be migrated.

  @type instance: L{objects.Instance}
  @param instance: object representing the instance to be checked.

  @rtype: tuple
  @return: tuple of (result, description) where:
      - result: whether the instance can be migrated or not
      - description: a description of the issue, if relevant

  """
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
  if instance.name not in hyper.ListInstances():
    return (False, 'not running')

  for idx in range(len(instance.disks)):
    link_name = _GetBlockDevSymlinkPath(instance.name, idx)
    if not os.path.islink(link_name):
      return (False, 'not restarted since ganeti 1.2.5')

  return (True, '')


632
def GetAllInstancesInfo(hypervisor_list):
Iustin Pop's avatar
Iustin Pop committed
633
634
  """Gather data about all instances.

Iustin Pop's avatar
Iustin Pop committed
635
  This is the equivalent of L{GetInstanceInfo}, except that it
Iustin Pop's avatar
Iustin Pop committed
636
637
638
  computes data for all instances at once, thus being faster if one
  needs data about more than one instance.

639
640
641
  @type hypervisor_list: list
  @param hypervisor_list: list of hypervisors to query for instance data

642
  @rtype: dict
643
644
645
646
  @return: dictionary of instance: data, with data having the following keys:
      - memory: memory size of instance (int)
      - state: xen state of instance (string)
      - time: cpu time of instance (float)
Iustin Pop's avatar
Iustin Pop committed
647
      - vcpus: the number of vcpus
Iustin Pop's avatar
Iustin Pop committed
648

649
  """
Iustin Pop's avatar
Iustin Pop committed
650
651
  output = {}

652
653
654
655
  for hname in hypervisor_list:
    iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
    if iinfo:
      for name, inst_id, memory, vcpus, state, times in iinfo:
656
        value = {
657
658
659
660
661
          'memory': memory,
          'vcpus': vcpus,
          'state': state,
          'time': times,
          }
662
663
664
665
        if name in output and output[name] != value:
          raise errors.HypervisorError("Instance %s running duplicate"
                                       " with different parameters" % name)
        output[name] = value
Iustin Pop's avatar
Iustin Pop committed
666
667
668
669

  return output


670
def AddOSToInstance(instance):
Alexander Schreiber's avatar
Alexander Schreiber committed
671
  """Add an OS to an instance.
Iustin Pop's avatar
Iustin Pop committed
672

673
674
  @type instance: L{objects.Instance}
  @param instance: Instance whose OS is to be installed
Iustin Pop's avatar
Iustin Pop committed
675
676
  @rtype: boolean
  @return: the success of the operation
Iustin Pop's avatar
Iustin Pop committed
677
678
679
680

  """
  inst_os = OSFromDisk(instance.os)

681
  create_env = OSEnvironment(instance)
Iustin Pop's avatar
Iustin Pop committed
682
683
684

  logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
                                     instance.name, int(time.time()))
685

686
687
  result = utils.RunCmd([inst_os.create_script], env=create_env,
                        cwd=inst_os.path, output=logfile,)
688
  if result.failed:
689
    logging.error("os create command '%s' returned error: %s, logfile: %s,"
690
                  " output: %s", result.cmd, result.fail_reason, logfile,
691
                  result.output)
692
693
694
695
    lines = [val.encode("string_escape")
             for val in utils.TailFile(logfile, lines=20)]
    return (False, "OS create script failed (%s), last lines in the"
            " log file:\n%s" % (result.fail_reason, "\n".join(lines)))
696

697
  return (True, "Successfully installed")
698
699


700
def RunRenameInstance(instance, old_name):
701
702
  """Run the OS rename script for an instance.

Iustin Pop's avatar
Iustin Pop committed
703
  @type instance: L{objects.Instance}
704
705
706
  @param instance: Instance whose OS is to be installed
  @type old_name: string
  @param old_name: previous instance name
Iustin Pop's avatar
Iustin Pop committed
707
708
  @rtype: boolean
  @return: the success of the operation
709
710
711
712

  """
  inst_os = OSFromDisk(instance.os)

713
714
  rename_env = OSEnvironment(instance)
  rename_env['OLD_INSTANCE_NAME'] = old_name
715
716
717
718

  logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
                                           old_name,
                                           instance.name, int(time.time()))
Iustin Pop's avatar
Iustin Pop committed
719

720
721
  result = utils.RunCmd([inst_os.rename_script], env=rename_env,
                        cwd=inst_os.path, output=logfile)
Iustin Pop's avatar
Iustin Pop committed
722
723

  if result.failed:
724
    logging.error("os create command '%s' returned error: %s output: %s",
725
                  result.cmd, result.fail_reason, result.output)
Iustin Pop's avatar
Iustin Pop committed
726
727
728
729
730
731
732
733
    return False

  return True


def _GetVGInfo(vg_name):
  """Get informations about the volume group.

Iustin Pop's avatar
Iustin Pop committed
734
735
736
737
738
739
740
741
  @type vg_name: str
  @param vg_name: the volume group which we query
  @rtype: dict
  @return:
    A dictionary with the following keys:
      - C{vg_size} is the total size of the volume group in MiB
      - C{vg_free} is the free size of the volume group in MiB
      - C{pv_count} are the number of physical disks in that VG
Iustin Pop's avatar
Iustin Pop committed
742

Iustin Pop's avatar
Iustin Pop committed
743
744
    If an error occurs during gathering of data, we return the same dict
    with keys all set to None.
745

Iustin Pop's avatar
Iustin Pop committed
746
  """
747
748
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])

Iustin Pop's avatar
Iustin Pop committed
749
750
751
752
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
                         "--nosuffix", "--units=m", "--separator=:", vg_name])

  if retval.failed:
753
    logging.error("volume group %s not present", vg_name)
754
    return retdic
Iustin Pop's avatar
Iustin Pop committed
755
  valarr = retval.stdout.strip().rstrip(':').split(':')
756
757
758
759
760
761
762
763
  if len(valarr) == 3:
    try:
      retdic = {
        "vg_size": int(round(float(valarr[0]), 0)),
        "vg_free": int(round(float(valarr[1]), 0)),
        "pv_count": int(valarr[2]),
        }
    except ValueError, err:
764
      logging.exception("Fail to parse vgs output")
765
  else:
766
767
    logging.error("vgs output has the wrong number of fields (expected"
                  " three): %s", str(valarr))
Iustin Pop's avatar
Iustin Pop committed
768
769
770
  return retdic


771
772
773
774
775
776
def _GetBlockDevSymlinkPath(instance_name, idx):
  return os.path.join(constants.DISK_LINKS_DIR,
                      "%s:%d" % (instance_name, idx))


def _SymlinkBlockDev(instance_name, device_path, idx):
777
778
779
780
781
782
  """Set up symlinks to a instance's block device.

  This is an auxiliary function run when an instance is start (on the primary
  node) or when an instance is migrated (on the target node).


783
784
785
786
  @param instance_name: the name of the target instance
  @param device_path: path of the physical block device, on the node
  @param idx: the disk index
  @return: absolute path to the disk's symlink
787
788

  """
789
  link_name = _GetBlockDevSymlinkPath(instance_name, idx)
790
791
  try:
    os.symlink(device_path, link_name)
792
793
  except OSError, err:
    if err.errno == errno.EEXIST:
794
795
796
797
798
799
800
801
802
803
      if (not os.path.islink(link_name) or
          os.readlink(link_name) != device_path):
        os.remove(link_name)
        os.symlink(device_path, link_name)
    else:
      raise

  return link_name


804
def _RemoveBlockDevLinks(instance_name, disks):
Iustin Pop's avatar
Iustin Pop committed
805
806
807
  """Remove the block device symlinks belonging to the given instance.

  """
808
809
810
  for idx, disk in enumerate(disks):
    link_name = _GetBlockDevSymlinkPath(instance_name, idx)
    if os.path.islink(link_name):
Iustin Pop's avatar
Iustin Pop committed
811
      try:
812
813
814
        os.remove(link_name)
      except OSError:
        logging.exception("Can't remove symlink '%s'", link_name)
Iustin Pop's avatar
Iustin Pop committed
815
816


817
def _GatherAndLinkBlockDevs(instance):
Iustin Pop's avatar
Iustin Pop committed
818
819
820
821
822
  """Set up an instance's block device(s).

  This is run on the primary node at instance startup. The block
  devices must be already assembled.

Iustin Pop's avatar
Iustin Pop committed
823
824
  @type instance: L{objects.Instance}
  @param instance: the instance whose disks we shoul assemble
825
826
  @rtype: list
  @return: list of (disk_object, device_path)
Iustin Pop's avatar
Iustin Pop committed
827

Iustin Pop's avatar
Iustin Pop committed
828
829
  """
  block_devices = []
830
  for idx, disk in enumerate(instance.disks):
Iustin Pop's avatar
Iustin Pop committed
831
832
833
834
835
    device = _RecursiveFindBD(disk)
    if device is None:
      raise errors.BlockDeviceError("Block device '%s' is not set up." %
                                    str(disk))
    device.Open()
836
    try:
837
      link_name = _SymlinkBlockDev(instance.name, device.dev_path, idx)
838
839
840
841
842
843
    except OSError, e:
      raise errors.BlockDeviceError("Cannot create block device symlink: %s" %
                                    e.strerror)

    block_devices.append((disk, link_name))

Iustin Pop's avatar
Iustin Pop committed
844
845
846
847
848
849
  return block_devices


def StartInstance(instance, extra_args):
  """Start an instance.

Iustin Pop's avatar
Iustin Pop committed
850
  @type instance: L{objects.Instance}
851
852
853
  @param instance: the instance object
  @rtype: boolean
  @return: whether the startup was successful or not
Iustin Pop's avatar
Iustin Pop committed
854

855
  """
856
  running_instances = GetInstanceList([instance.hypervisor])
Iustin Pop's avatar
Iustin Pop committed
857
858

  if instance.name in running_instances:
859
    return (True, "Already running")
Iustin Pop's avatar
Iustin Pop committed
860
861

  try:
862
863
    block_devices = _GatherAndLinkBlockDevs(instance)
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
Iustin Pop's avatar
Iustin Pop committed
864
    hyper.StartInstance(instance, block_devices, extra_args)
865
866
  except errors.BlockDeviceError, err:
    logging.exception("Failed to start instance")
867
    return (False, "Block device error: %s" % str(err))
Iustin Pop's avatar
Iustin Pop committed
868
  except errors.HypervisorError, err:
869
    logging.exception("Failed to start instance")
870
    _RemoveBlockDevLinks(instance.name, instance.disks)
871
    return (False, "Hypervisor error: %s" % str(err))
Iustin Pop's avatar
Iustin Pop committed
872

873
  return (True, "Instance started successfully")
Iustin Pop's avatar
Iustin Pop committed
874
875
876
877
878


def ShutdownInstance(instance):
  """Shut an instance down.

Iustin Pop's avatar
Iustin Pop committed
879
880
881
  @note: this functions uses polling with a hardcoded timeout.

  @type instance: L{objects.Instance}
882
883
884
  @param instance: the instance object
  @rtype: boolean
  @return: whether the startup was successful or not
Iustin Pop's avatar
Iustin Pop committed
885

886
  """
887
888
  hv_name = instance.hypervisor
  running_instances = GetInstanceList([hv_name])
Iustin Pop's avatar
Iustin Pop committed
889
890
891
892

  if instance.name not in running_instances:
    return True

893
  hyper = hypervisor.GetHypervisor(hv_name)
Iustin Pop's avatar
Iustin Pop committed
894
895
896
  try:
    hyper.StopInstance(instance)
  except errors.HypervisorError, err:
897
    logging.error("Failed to stop instance")
Iustin Pop's avatar
Iustin Pop committed
898
899
900
901
902
903
    return False

  # test every 10secs for 2min

  time.sleep(1)
  for dummy in range(11):
904
    if instance.name not in GetInstanceList([hv_name]):
Iustin Pop's avatar
Iustin Pop committed
905
906
907
908
      break
    time.sleep(10)
  else:
    # the shutdown did not succeed
909
    logging.error("shutdown of '%s' unsuccessful, using destroy", instance)
Iustin Pop's avatar
Iustin Pop committed
910
911
912
913

    try:
      hyper.StopInstance(instance, force=True)
    except errors.HypervisorError, err:
914
      logging.exception("Failed to stop instance")
Iustin Pop's avatar
Iustin Pop committed
915
916
917
      return False

    time.sleep(1)
918
    if instance.name in GetInstanceList([hv_name]):
919
920
      logging.error("could not shutdown instance '%s' even by destroy",
                    instance.name)
Iustin Pop's avatar
Iustin Pop committed
921
922
      return False

923
  _RemoveBlockDevLinks(instance.name, instance.disks)
Iustin Pop's avatar
Iustin Pop committed
924

Iustin Pop's avatar
Iustin Pop committed
925
926
927
  return True


928
929
930
def RebootInstance(instance, reboot_type, extra_args):
  """Reboot an instance.

Iustin Pop's avatar
Iustin Pop committed
931
932
933
934
935
936
937
938
939
940
941
942
943
944
  @type instance: L{objects.Instance}
  @param instance: the instance object to reboot
  @type reboot_type: str
  @param reboot_type: the type of reboot, one the following
    constants:
      - L{constants.INSTANCE_REBOOT_SOFT}: only reboot the
        instance OS, do not recreate the VM
      - L{constants.INSTANCE_REBOOT_HARD}: tear down and
        restart the VM (at the hypervisor level)
      - the other reboot type (L{constants.INSTANCE_REBOOT_HARD})
        is not accepted here, since that mode is handled
        differently
  @rtype: boolean
  @return: the success of the operation
945
946

  """
947
  running_instances = GetInstanceList([instance.hypervisor])
948
949

  if instance.name not in running_instances:
950
    logging.error("Cannot reboot instance that is not running")
951
952
    return False

953
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
954
955
956
957
  if reboot_type == constants.INSTANCE_REBOOT_SOFT:
    try:
      hyper.RebootInstance(instance)
    except errors.HypervisorError, err:
958
      logging.exception("Failed to soft reboot instance")
959
960
961
962
963
964
      return False
  elif reboot_type == constants.INSTANCE_REBOOT_HARD:
    try:
      ShutdownInstance(instance)
      StartInstance(instance, extra_args)
    except errors.HypervisorError, err:
965
      logging.exception("Failed to hard reboot instance")
966
967
968
969
970
971
972
      return False
  else:
    raise errors.ParameterError("reboot_type invalid")

  return True


973
974
975
def MigrateInstance(instance, target, live):
  """Migrates an instance to another node.

Iustin Pop's avatar
Iustin Pop committed
976
  @type instance: L{objects.Instance}
977
978
979
980
981
982
983
984
985
986
987
  @param instance: the instance definition
  @type target: string
  @param target: the target node name
  @type live: boolean
  @param live: whether the migration should be done live or not (the
      interpretation of this parameter is left to the hypervisor)
  @rtype: tuple
  @return: a tuple of (success, msg) where:
      - succes is a boolean denoting the success/failure of the operation
      - msg is a string with details in case of failure

988
  """
989
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
990
991

  try:
992
    hyper.MigrateInstance(instance.name, target, live)
993
  except errors.HypervisorError, err:
994
995
996
    msg = "Failed to migrate instance"
    logging.exception(msg)
    return (False, "%s: %s" % (msg, err))
997
998
999
  return (True, "Migration successfull")


1000
def CreateBlockDevice(disk, size, owner, on_primary, info):
Iustin Pop's avatar
Iustin Pop committed
1001
1002
  """Creates a block device for an instance.

Iustin Pop's avatar
Iustin Pop committed
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
  @type disk: L{objects.Disk}
  @param disk: the object describing the disk we should create
  @type size: int
  @param size: the size of the physical underlying device, in MiB
  @type owner: str
  @param owner: the name of the instance for which disk is created,
      used for device cache data
  @type on_primary: boolean
  @param on_primary:  indicates if it is the primary node or not
  @type info: string
  @param info: string that will be sent to the physical device
      creation, used for example to set (LVM) tags on LVs

  @return: the new unique_id of the device (this can sometime be
      computed only after creation), or None. On secondary nodes,
      it's not required to return anything.
Iustin Pop's avatar
Iustin Pop committed
1019
1020
1021
1022
1023

  """
  clist = []
  if disk.children:
    for child in disk.children:
1024
      crdev = _RecursiveAssembleBD(child, owner, on_primary)
Iustin Pop's avatar
Iustin Pop committed
1025
1026
1027
1028
1029
1030
      if on_primary or disk.AssembleOnSecondary():
        # we need the children open in case the device itself has to
        # be assembled
        crdev.Open()
      clist.append(crdev)

1031
1032
1033
1034
  try:
    device = bdev.Create(disk.dev_type, disk.physical_id, clist, size)
  except errors.GenericError, err:
    return False, "Can't create block device: %s" % str(err)
Iustin Pop's avatar
Iustin Pop committed
1035

Iustin Pop's avatar
Iustin Pop committed
1036
  if on_primary or disk.AssembleOnSecondary():
1037
    if not device.Assemble():
1038
      errorstring = "Can't assemble device after creation, very unusual event"
1039
      logging.error(errorstring)
1040
      return False, errorstring
1041
    device.SetSyncSpeed(constants.SYNC_SPEED)
Iustin Pop's avatar
Iustin Pop committed
1042
1043
    if on_primary or disk.OpenOnSecondary():
      device.Open(force=True)
1044
1045
    DevCacheManager.UpdateCache(device.dev_path, owner,
                                on_primary, disk.iv_name)
1046
1047
1048

  device.SetInfo(info)

Iustin Pop's avatar
Iustin Pop committed
1049
  physical_id = device.unique_id
1050
  return True, physical_id
Iustin Pop's avatar
Iustin Pop committed
1051
1052
1053
1054
1055


def RemoveBlockDevice(disk):
  """Remove a block device.

Iustin Pop's avatar
Iustin Pop committed
1056
1057
  @note: This is intended to be called recursively.

Iustin Pop's avatar
Iustin Pop committed
1058
  @type disk: L{objects.Disk}
Iustin Pop's avatar
Iustin Pop committed
1059
1060
1061
  @param disk: the disk object we should remove
  @rtype: boolean
  @return: the success of the operation
Iustin Pop's avatar
Iustin Pop committed
1062
1063
1064

  """
  try:
1065
    rdev = _RecursiveFindBD(disk)
Iustin Pop's avatar
Iustin Pop committed
1066
1067
  except errors.BlockDeviceError, err:
    # probably can't attach
1068
    logging.info("Can't attach to device %s in remove", disk)
Iustin Pop's avatar
Iustin Pop committed
1069
1070
    rdev = None
  if rdev is not None:
1071
    r_path = rdev.dev_path
Iustin Pop's avatar
Iustin Pop committed
1072
    result = rdev.Remove()
1073
1074
    if result:
      DevCacheManager.RemoveCache(r_path)
Iustin Pop's avatar
Iustin Pop committed
1075
1076
1077
1078
1079
1080
1081
1082
  else:
    result = True
  if disk.children:
    for child in disk.children:
      result = result and RemoveBlockDevice(child)
  return result


1083
def _RecursiveAssembleBD(disk, owner, as_primary):
Iustin Pop's avatar
Iustin Pop committed
1084
1085
1086
1087
  """Activate a block device for an instance.

  This is run on the primary and secondary nodes for an instance.

Iustin Pop's avatar
Iustin Pop committed
1088
  @note: this function is called recursively.
Iustin Pop's avatar
Iustin Pop committed
1089

Iustin Pop's avatar
Iustin Pop committed
1090
1091
1092
1093
1094
1095
1096
  @type disk: L{objects.Disk}
  @param disk: the disk we try to assemble
  @type owner: str
  @param owner: the name of the instance which owns the disk
  @type as_primary: boolean
  @param as_primary: if we should make the block device
      read/write
Iustin Pop's avatar
Iustin Pop committed
1097

Iustin Pop's avatar
Iustin Pop committed
1098
1099
1100
1101
1102
  @return: the assembled device or None (in case no device
      was assembled)
  @raise errors.BlockDeviceError: in case there is an error
      during the activation of the children or the device
      itself
Iustin Pop's avatar
Iustin Pop committed
1103
1104
1105
1106

  """
  children = []
  if disk.children:
1107
1108
1109
1110
1111
    mcn = disk.ChildrenNeeded()
    if mcn == -1:
      mcn = 0 # max number of Nones allowed
    else:
      mcn = len(disk.children) - mcn # max number of Nones
Iustin Pop's avatar
Iustin Pop committed
1112
    for chld_disk in disk.children:
1113
1114
1115
      try:
        cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
      except errors.BlockDeviceError, err:
1116
        if children.count(None) >= mcn:
1117
1118
          raise
        cdev = None
1119
        logging.debug("Error in child activation: %s", str(err))
1120
      children.append(cdev)
Iustin Pop's avatar
Iustin Pop committed
1121
1122

  if as_primary or disk.AssembleOnSecondary():
1123
    r_dev = bdev.Assemble(disk.dev_type, disk.physical_id, children)
1124
    r_dev.SetSyncSpeed(constants.SYNC_SPEED)
Iustin Pop's avatar
Iustin Pop committed
1125
1126
1127
    result = r_dev
    if as_primary or disk.OpenOnSecondary():
      r_dev.Open()
1128
1129
1130
    DevCacheManager.UpdateCache(r_dev.dev_path, owner,
                                as_primary, disk.iv_name)

Iustin Pop's avatar
Iustin Pop committed
1131
1132
1133
1134
1135
  else:
    result = True
  return result


1136
def AssembleBlockDevice(disk, owner, as_primary):
Iustin Pop's avatar
Iustin Pop committed
1137
1138
1139
1140
  """Activate a block device for an instance.

  This is a wrapper over _RecursiveAssembleBD.

Iustin Pop's avatar
Iustin Pop committed
1141
1142
1143
  @rtype: str or boolean
  @return: a C{/dev/...} path for primary nodes, and
      C{True} for secondary nodes
Iustin Pop's avatar
Iustin Pop committed
1144
1145

  """
1146
  result = _RecursiveAssembleBD(disk, owner, as_primary)
Iustin Pop's avatar
Iustin Pop committed
1147
1148
1149
1150
1151
1152
1153
1154
  if isinstance(result, bdev.BlockDev):
    result = result.dev_path
  return result


def ShutdownBlockDevice(disk):
  """Shut down a block device.

Iustin Pop's avatar
Iustin Pop committed
1155
1156
1157
  First, if the device is assembled (Attach() is successfull), then
  the device is shutdown. Then the children of the device are
  shutdown.
Iustin Pop's avatar
Iustin Pop committed
1158
1159
1160
1161
1162

  This function is called recursively. Note that we don't cache the
  children or such, as oppossed to assemble, shutdown of different
  devices doesn't require that the upper device was active.

Iustin Pop's avatar
Iustin Pop committed
1163
1164
1165
1166
1167
1168
  @type disk: L{objects.Disk}
  @param disk: the description of the disk we should
      shutdown
  @rtype: boolean
  @return: the success of the operation

Iustin Pop's avatar
Iustin Pop committed
1169
1170
1171
  """
  r_dev = _RecursiveFindBD(disk)
  if r_dev is not None:
Iustin Pop's avatar