backend.py 87.6 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
#
Iustin Pop's avatar
Iustin Pop committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#

# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


22 23 24 25
"""Functions used by the node daemon

@var _ALLOWED_UPLOAD_FILES: denotes which files are accepted in
     the L{UploadFile} function
26 27
@var _ALLOWED_CLEAN_DIRS: denotes which directories are accepted
     in the L{_CleanDirectory} function
28 29

"""
Iustin Pop's avatar
Iustin Pop committed
30

Iustin Pop's avatar
Iustin Pop committed
31 32 33 34 35 36
# pylint: disable-msg=E1103

# E1103: %s %r has no %r member (but some types could not be
# inferred), because the _TryOSFromDisk returns either (True, os_obj)
# or (False, "string") which confuses pylint

Iustin Pop's avatar
Iustin Pop committed
37 38 39 40 41 42 43 44

import os
import os.path
import shutil
import time
import stat
import errno
import re
45
import random
46
import logging
47
import tempfile
48 49
import zlib
import base64
Iustin Pop's avatar
Iustin Pop committed
50 51 52 53 54 55 56 57

from ganeti import errors
from ganeti import utils
from ganeti import ssh
from ganeti import hypervisor
from ganeti import constants
from ganeti import bdev
from ganeti import objects
58
from ganeti import ssconf
Iustin Pop's avatar
Iustin Pop committed
59 60


61
_BOOT_ID_PATH = "/proc/sys/kernel/random/boot_id"
62 63 64 65 66
_ALLOWED_CLEAN_DIRS = frozenset([
  constants.DATA_DIR,
  constants.JOB_QUEUE_ARCHIVE_DIR,
  constants.QUEUE_DIR,
  ])
67 68


69 70 71 72 73 74 75
class RPCFail(Exception):
  """Class denoting RPC failure.

  Its argument is the error message.

  """

76

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
def _Fail(msg, *args, **kwargs):
  """Log an error and the raise an RPCFail exception.

  This exception is then handled specially in the ganeti daemon and
  turned into a 'failed' return type. As such, this function is a
  useful shortcut for logging the error and returning it to the master
  daemon.

  @type msg: string
  @param msg: the text of the exception
  @raise RPCFail

  """
  if args:
    msg = msg % args
92 93 94 95 96
  if "log" not in kwargs or kwargs["log"]: # if we should log this error
    if "exc" in kwargs and kwargs["exc"]:
      logging.exception(msg)
    else:
      logging.error(msg)
97 98 99
  raise RPCFail(msg)


Michael Hanselmann's avatar
Michael Hanselmann committed
100
def _GetConfig():
Iustin Pop's avatar
Iustin Pop committed
101
  """Simple wrapper to return a SimpleStore.
Iustin Pop's avatar
Iustin Pop committed
102

Iustin Pop's avatar
Iustin Pop committed
103 104
  @rtype: L{ssconf.SimpleStore}
  @return: a SimpleStore instance
Iustin Pop's avatar
Iustin Pop committed
105 106

  """
Iustin Pop's avatar
Iustin Pop committed
107
  return ssconf.SimpleStore()
Michael Hanselmann's avatar
Michael Hanselmann committed
108 109


110
def _GetSshRunner(cluster_name):
Iustin Pop's avatar
Iustin Pop committed
111 112 113 114 115 116 117 118 119
  """Simple wrapper to return an SshRunner.

  @type cluster_name: str
  @param cluster_name: the cluster name, which is needed
      by the SshRunner constructor
  @rtype: L{ssh.SshRunner}
  @return: an SshRunner instance

  """
120
  return ssh.SshRunner(cluster_name)
121 122


123 124 125 126 127 128 129 130 131
def _Decompress(data):
  """Unpacks data compressed by the RPC client.

  @type data: list or tuple
  @param data: Data sent by RPC client
  @rtype: str
  @return: Decompressed data

  """
132
  assert isinstance(data, (list, tuple))
133 134 135 136 137 138 139 140 141 142
  assert len(data) == 2
  (encoding, content) = data
  if encoding == constants.RPC_ENCODING_NONE:
    return content
  elif encoding == constants.RPC_ENCODING_ZLIB_BASE64:
    return zlib.decompress(base64.b64decode(content))
  else:
    raise AssertionError("Unknown data encoding")


143
def _CleanDirectory(path, exclude=None):
144 145
  """Removes all regular files in a directory.

Iustin Pop's avatar
Iustin Pop committed
146 147
  @type path: str
  @param path: the directory to clean
148
  @type exclude: list
Iustin Pop's avatar
Iustin Pop committed
149 150
  @param exclude: list of files to be excluded, defaults
      to the empty list
151 152

  """
153 154 155 156
  if path not in _ALLOWED_CLEAN_DIRS:
    _Fail("Path passed to _CleanDirectory not in allowed clean targets: '%s'",
          path)

157 158
  if not os.path.isdir(path):
    return
159 160 161 162 163
  if exclude is None:
    exclude = []
  else:
    # Normalize excluded paths
    exclude = [os.path.normpath(i) for i in exclude]
164

165
  for rel_name in utils.ListVisibleFiles(path):
166
    full_name = utils.PathJoin(path, rel_name)
167 168
    if full_name in exclude:
      continue
169 170 171 172
    if os.path.isfile(full_name) and not os.path.islink(full_name):
      utils.RemoveFile(full_name)


173 174 175 176 177 178
def _BuildUploadFileList():
  """Build the list of allowed upload files.

  This is abstracted so that it's built only once at module import time.

  """
179 180 181 182 183 184 185
  allowed_files = set([
    constants.CLUSTER_CONF_FILE,
    constants.ETC_HOSTS,
    constants.SSH_KNOWN_HOSTS_FILE,
    constants.VNC_PASSWORD_FILE,
    constants.RAPI_CERT_FILE,
    constants.RAPI_USERS_FILE,
186
    constants.CONFD_HMAC_KEY,
187 188 189
    ])

  for hv_name in constants.HYPER_TYPES:
190
    hv_class = hypervisor.GetHypervisorClass(hv_name)
191 192 193
    allowed_files.update(hv_class.GetAncillaryFiles())

  return frozenset(allowed_files)
194 195 196 197 198


_ALLOWED_UPLOAD_FILES = _BuildUploadFileList()


199
def JobQueuePurge():
Iustin Pop's avatar
Iustin Pop committed
200 201
  """Removes job queue files and archived jobs.

202 203
  @rtype: tuple
  @return: True, None
204 205

  """
206
  _CleanDirectory(constants.QUEUE_DIR, exclude=[constants.JOB_QUEUE_LOCK_FILE])
207 208 209
  _CleanDirectory(constants.JOB_QUEUE_ARCHIVE_DIR)


210 211 212 213 214 215 216
def GetMasterInfo():
  """Returns master information.

  This is an utility function to compute master information, either
  for consumption here or from the node daemon.

  @rtype: tuple
217
  @return: master_netdev, master_ip, master_name
218
  @raise RPCFail: in case of errors
219 220 221

  """
  try:
Michael Hanselmann's avatar
Michael Hanselmann committed
222 223 224 225
    cfg = _GetConfig()
    master_netdev = cfg.GetMasterNetdev()
    master_ip = cfg.GetMasterIP()
    master_node = cfg.GetMasterNode()
226
  except errors.ConfigurationError, err:
Iustin Pop's avatar
Iustin Pop committed
227
    _Fail("Cluster configuration incomplete: %s", err, exc=True)
228
  return (master_netdev, master_ip, master_node)
229 230


231
def StartMaster(start_daemons, no_voting):
Iustin Pop's avatar
Iustin Pop committed
232 233
  """Activate local node as master node.

234
  The function will always try activate the IP address of the master
Iustin Pop's avatar
Iustin Pop committed
235 236 237 238
  (unless someone else has it). It will also start the master daemons,
  based on the start_daemons parameter.

  @type start_daemons: boolean
239
  @param start_daemons: whether to also start the master
Iustin Pop's avatar
Iustin Pop committed
240
      daemons (ganeti-masterd and ganeti-rapi)
241 242 243
  @type no_voting: boolean
  @param no_voting: whether to start ganeti-masterd without a node vote
      (if start_daemons is True), but still non-interactively
Iustin Pop's avatar
Iustin Pop committed
244
  @rtype: None
Iustin Pop's avatar
Iustin Pop committed
245 246

  """
247
  # GetMasterInfo will raise an exception if not able to return data
Guido Trotter's avatar
Guido Trotter committed
248
  master_netdev, master_ip, _ = GetMasterInfo()
Iustin Pop's avatar
Iustin Pop committed
249

250
  err_msgs = []
251
  if utils.TcpPing(master_ip, constants.DEFAULT_NODED_PORT):
252
    if utils.OwnIpAddress(master_ip):
253
      # we already have the ip:
254
      logging.debug("Master IP already configured, doing nothing")
255
    else:
256 257
      msg = "Someone else has the master ip, not activating"
      logging.error(msg)
258
      err_msgs.append(msg)
259 260 261 262 263
  else:
    result = utils.RunCmd(["ip", "address", "add", "%s/32" % master_ip,
                           "dev", master_netdev, "label",
                           "%s:0" % master_netdev])
    if result.failed:
264 265
      msg = "Can't activate master IP: %s" % result.output
      logging.error(msg)
266
      err_msgs.append(msg)
267 268 269 270 271 272 273

    result = utils.RunCmd(["arping", "-q", "-U", "-c 3", "-I", master_netdev,
                           "-s", master_ip, master_ip])
    # we'll ignore the exit code of arping

  # and now start the master and rapi daemons
  if start_daemons:
274
    if no_voting:
275 276 277 278 279 280 281 282 283 284 285 286 287
      masterd_args = "--no-voting --yes-do-it"
    else:
      masterd_args = ""

    env = {
      "EXTRA_MASTERD_ARGS": masterd_args,
      }

    result = utils.RunCmd([constants.DAEMON_UTIL, "start-master"], env=env)
    if result.failed:
      msg = "Can't start Ganeti master: %s" % result.output
      logging.error(msg)
      err_msgs.append(msg)
288

289 290
  if err_msgs:
    _Fail("; ".join(err_msgs))
291

Iustin Pop's avatar
Iustin Pop committed
292

293
def StopMaster(stop_daemons):
Iustin Pop's avatar
Iustin Pop committed
294 295
  """Deactivate this node as master.

296
  The function will always try to deactivate the IP address of the
Iustin Pop's avatar
Iustin Pop committed
297 298 299 300 301 302 303
  master. It will also stop the master daemons depending on the
  stop_daemons parameter.

  @type stop_daemons: boolean
  @param stop_daemons: whether to also stop the master daemons
      (ganeti-masterd and ganeti-rapi)
  @rtype: None
Iustin Pop's avatar
Iustin Pop committed
304 305

  """
306 307
  # TODO: log and report back to the caller the error failures; we
  # need to decide in which case we fail the RPC for this
308 309

  # GetMasterInfo will raise an exception if not able to return data
Guido Trotter's avatar
Guido Trotter committed
310
  master_netdev, master_ip, _ = GetMasterInfo()
Iustin Pop's avatar
Iustin Pop committed
311

312 313
  result = utils.RunCmd(["ip", "address", "del", "%s/32" % master_ip,
                         "dev", master_netdev])
Iustin Pop's avatar
Iustin Pop committed
314
  if result.failed:
315
    logging.error("Can't remove the master IP, error: %s", result.output)
316 317 318
    # but otherwise ignore the failure

  if stop_daemons:
319 320 321 322 323
    result = utils.RunCmd([constants.DAEMON_UTIL, "stop-master"])
    if result.failed:
      logging.error("Could not stop Ganeti master, command %s had exitcode %s"
                    " and error %s",
                    result.cmd, result.exit_code, result.output)
Iustin Pop's avatar
Iustin Pop committed
324 325


Iustin Pop's avatar
Iustin Pop committed
326
def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
327
  """Joins this node to the cluster.
Iustin Pop's avatar
Iustin Pop committed
328

329 330 331 332
  This does the following:
      - updates the hostkeys of the machine (rsa and dsa)
      - adds the ssh private key to the user
      - adds the ssh public key to the users' authorized_keys file
Iustin Pop's avatar
Iustin Pop committed
333

Iustin Pop's avatar
Iustin Pop committed
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
  @type dsa: str
  @param dsa: the DSA private key to write
  @type dsapub: str
  @param dsapub: the DSA public key to write
  @type rsa: str
  @param rsa: the RSA private key to write
  @type rsapub: str
  @param rsapub: the RSA public key to write
  @type sshkey: str
  @param sshkey: the SSH private key to write
  @type sshpub: str
  @param sshpub: the SSH public key to write
  @rtype: boolean
  @return: the success of the operation

349
  """
350 351 352 353
  sshd_keys =  [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
                (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
                (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
                (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
354
  for name, content, mode in sshd_keys:
355
    utils.WriteFile(name, data=content, mode=mode)
Iustin Pop's avatar
Iustin Pop committed
356

357 358 359 360
  try:
    priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
                                                    mkdir=True)
  except errors.OpExecError, err:
361
    _Fail("Error while processing user ssh files: %s", err, exc=True)
Iustin Pop's avatar
Iustin Pop committed
362

363 364
  for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
    utils.WriteFile(name, data=content, mode=0600)
Iustin Pop's avatar
Iustin Pop committed
365

366
  utils.AddAuthorizedKey(auth_keys, sshpub)
Iustin Pop's avatar
Iustin Pop committed
367

368 369 370 371
  result = utils.RunCmd([constants.DAEMON_UTIL, "reload-ssh-keys"])
  if result.failed:
    _Fail("Unable to reload SSH keys (command %r, exit code %s, output %r)",
          result.cmd, result.exit_code, result.output)
Iustin Pop's avatar
Iustin Pop committed
372 373


374
def LeaveCluster(modify_ssh_setup):
Iustin Pop's avatar
Iustin Pop committed
375 376 377 378 379 380
  """Cleans up and remove the current node.

  This function cleans up and prepares the current node to be removed
  from the cluster.

  If processing is successful, then it raises an
Iustin Pop's avatar
Iustin Pop committed
381
  L{errors.QuitGanetiException} which is used as a special case to
Iustin Pop's avatar
Iustin Pop committed
382
  shutdown the node daemon.
Iustin Pop's avatar
Iustin Pop committed
383

384 385
  @param modify_ssh_setup: boolean

Iustin Pop's avatar
Iustin Pop committed
386
  """
387
  _CleanDirectory(constants.DATA_DIR)
388
  JobQueuePurge()
389

390 391 392
  if modify_ssh_setup:
    try:
      priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
393

394
      utils.RemoveAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
Iustin Pop's avatar
Iustin Pop committed
395

396 397 398 399
      utils.RemoveFile(priv_key)
      utils.RemoveFile(pub_key)
    except errors.OpExecError:
      logging.exception("Error while processing ssh files")
Iustin Pop's avatar
Iustin Pop committed
400

401
  try:
402
    utils.RemoveFile(constants.CONFD_HMAC_KEY)
403
    utils.RemoveFile(constants.RAPI_CERT_FILE)
404
    utils.RemoveFile(constants.NODED_CERT_FILE)
Iustin Pop's avatar
Iustin Pop committed
405
  except: # pylint: disable-msg=W0702
406 407
    logging.exception("Error while removing cluster secrets")

408 409 410 411
  result = utils.RunCmd([constants.DAEMON_UTIL, "stop", constants.CONFD])
  if result.failed:
    logging.error("Command %s failed with exitcode %s and error %s",
                  result.cmd, result.exit_code, result.output)
412

413 414
  # Raise a custom exception (handled in ganeti-noded)
  raise errors.QuitGanetiException(True, 'Shutdown scheduled')
415

Iustin Pop's avatar
Iustin Pop committed
416

417
def GetNodeInfo(vgname, hypervisor_type):
Michael Hanselmann's avatar
Michael Hanselmann committed
418
  """Gives back a hash with different information about the node.
Iustin Pop's avatar
Iustin Pop committed
419

420 421 422 423 424 425 426 427 428 429 430 431
  @type vgname: C{string}
  @param vgname: the name of the volume group to ask for disk space information
  @type hypervisor_type: C{str}
  @param hypervisor_type: the name of the hypervisor to ask for
      memory information
  @rtype: C{dict}
  @return: dictionary with the following keys:
      - vg_size is the size of the configured volume group in MiB
      - vg_free is the free size of the volume group in MiB
      - memory_dom0 is the memory allocated for domain0 in MiB
      - memory_free is the currently available (free) ram in MiB
      - memory_total is the total number of ram in MiB
Iustin Pop's avatar
Iustin Pop committed
432

433
  """
Iustin Pop's avatar
Iustin Pop committed
434 435 436 437 438
  outputarray = {}
  vginfo = _GetVGInfo(vgname)
  outputarray['vg_size'] = vginfo['vg_size']
  outputarray['vg_free'] = vginfo['vg_free']

439
  hyper = hypervisor.GetHypervisor(hypervisor_type)
Iustin Pop's avatar
Iustin Pop committed
440 441 442 443
  hyp_info = hyper.GetNodeInfo()
  if hyp_info is not None:
    outputarray.update(hyp_info)

444
  outputarray["bootid"] = utils.ReadFile(_BOOT_ID_PATH, size=128).rstrip("\n")
445

446
  return outputarray
Iustin Pop's avatar
Iustin Pop committed
447 448


449
def VerifyNode(what, cluster_name):
Iustin Pop's avatar
Iustin Pop committed
450 451
  """Verify the status of the local node.

452 453 454 455 456 457 458 459 460
  Based on the input L{what} parameter, various checks are done on the
  local node.

  If the I{filelist} key is present, this list of
  files is checksummed and the file/checksum pairs are returned.

  If the I{nodelist} key is present, we check that we have
  connectivity via ssh with the target nodes (and check the hostname
  report).
Iustin Pop's avatar
Iustin Pop committed
461

462 463 464 465 466 467 468 469 470 471 472
  If the I{node-net-test} key is present, we check that we have
  connectivity to the given nodes via both primary IP and, if
  applicable, secondary IPs.

  @type what: C{dict}
  @param what: a dictionary of things to check:
      - filelist: list of files for which to compute checksums
      - nodelist: list of nodes we should check ssh communication with
      - node-net-test: list of nodes we should check node daemon port
        connectivity with
      - hypervisor: list with hypervisors to run the verify for
Iustin Pop's avatar
Iustin Pop committed
473 474 475
  @rtype: dict
  @return: a dictionary with the same keys as the input dict, and
      values representing the result of the checks
Iustin Pop's avatar
Iustin Pop committed
476 477 478 479

  """
  result = {}

480 481 482
  if constants.NV_HYPERVISOR in what:
    result[constants.NV_HYPERVISOR] = tmp = {}
    for hv_name in what[constants.NV_HYPERVISOR]:
483 484 485 486 487
      try:
        val = hypervisor.GetHypervisor(hv_name).Verify()
      except errors.HypervisorError, err:
        val = "Error while checking hypervisor: %s" % str(err)
      tmp[hv_name] = val
488 489 490 491 492 493 494 495 496

  if constants.NV_FILELIST in what:
    result[constants.NV_FILELIST] = utils.FingerprintFiles(
      what[constants.NV_FILELIST])

  if constants.NV_NODELIST in what:
    result[constants.NV_NODELIST] = tmp = {}
    random.shuffle(what[constants.NV_NODELIST])
    for node in what[constants.NV_NODELIST]:
497
      success, message = _GetSshRunner(cluster_name).VerifyNodeHostname(node)
Iustin Pop's avatar
Iustin Pop committed
498
      if not success:
499 500 501 502
        tmp[node] = message

  if constants.NV_NODENETTEST in what:
    result[constants.NV_NODENETTEST] = tmp = {}
503 504
    my_name = utils.HostInfo().name
    my_pip = my_sip = None
505
    for name, pip, sip in what[constants.NV_NODENETTEST]:
506 507 508 509 510
      if name == my_name:
        my_pip = pip
        my_sip = sip
        break
    if not my_pip:
511 512
      tmp[my_name] = ("Can't find my own primary/secondary IP"
                      " in the node list")
513
    else:
514
      port = utils.GetDaemonPort(constants.NODED)
515
      for name, pip, sip in what[constants.NV_NODENETTEST]:
516 517 518 519 520 521 522
        fail = []
        if not utils.TcpPing(pip, port, source=my_pip):
          fail.append("primary")
        if sip != pip:
          if not utils.TcpPing(sip, port, source=my_sip):
            fail.append("secondary")
        if fail:
523 524 525 526
          tmp[name] = ("failure using the %s interface(s)" %
                       " and ".join(fail))

  if constants.NV_LVLIST in what:
527 528 529 530 531
    try:
      val = GetVolumeList(what[constants.NV_LVLIST])
    except RPCFail, err:
      val = str(err)
    result[constants.NV_LVLIST] = val
532 533

  if constants.NV_INSTANCELIST in what:
534 535 536 537 538 539
    # GetInstanceList can fail
    try:
      val = GetInstanceList(what[constants.NV_INSTANCELIST])
    except RPCFail, err:
      val = str(err)
    result[constants.NV_INSTANCELIST] = val
540 541

  if constants.NV_VGLIST in what:
542
    result[constants.NV_VGLIST] = utils.ListVolumeGroups()
543

544 545 546 547 548
  if constants.NV_PVLIST in what:
    result[constants.NV_PVLIST] = \
      bdev.LogicalVolume.GetPVInfo(what[constants.NV_PVLIST],
                                   filter_allocatable=False)

549
  if constants.NV_VERSION in what:
550 551
    result[constants.NV_VERSION] = (constants.PROTOCOL_VERSION,
                                    constants.RELEASE_VERSION)
552 553 554 555

  if constants.NV_HVINFO in what:
    hyper = hypervisor.GetHypervisor(what[constants.NV_HVINFO])
    result[constants.NV_HVINFO] = hyper.GetNodeInfo()
556

557 558 559
  if constants.NV_DRBDLIST in what:
    try:
      used_minors = bdev.DRBD8.GetUsedDevs().keys()
560
    except errors.BlockDeviceError, err:
561
      logging.warning("Can't get used minors list", exc_info=True)
562
      used_minors = str(err)
563 564
    result[constants.NV_DRBDLIST] = used_minors

565 566 567 568 569 570 571 572 573 574 575
  if constants.NV_NODESETUP in what:
    result[constants.NV_NODESETUP] = tmpr = []
    if not os.path.isdir("/sys/block") or not os.path.isdir("/sys/class/net"):
      tmpr.append("The sysfs filesytem doesn't seem to be mounted"
                  " under /sys, missing required directories /sys/block"
                  " and /sys/class/net")
    if (not os.path.isdir("/proc/sys") or
        not os.path.isfile("/proc/sysrq-trigger")):
      tmpr.append("The procfs filesystem doesn't seem to be mounted"
                  " under /proc, missing required directory /proc/sys and"
                  " the file /proc/sysrq-trigger")
576 577 578 579

  if constants.NV_TIME in what:
    result[constants.NV_TIME] = utils.SplitTime(time.time())

580
  return result
Iustin Pop's avatar
Iustin Pop committed
581 582 583 584 585


def GetVolumeList(vg_name):
  """Compute list of logical volumes and their size.

Iustin Pop's avatar
Iustin Pop committed
586 587 588 589 590 591 592 593 594 595 596
  @type vg_name: str
  @param vg_name: the volume group whose LVs we should list
  @rtype: dict
  @return:
      dictionary of all partions (key) with value being a tuple of
      their size (in MiB), inactive and online status::

        {'test1': ('20.06', True, True)}

      in case of errors, a string is returned with the error
      details.
Iustin Pop's avatar
Iustin Pop committed
597 598

  """
599 600 601 602 603
  lvs = {}
  sep = '|'
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
                         "--separator=%s" % sep,
                         "-olv_name,lv_size,lv_attr", vg_name])
Iustin Pop's avatar
Iustin Pop committed
604
  if result.failed:
605
    _Fail("Failed to list logical volumes, lvs output: %s", result.output)
606

607
  valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
608
  for line in result.stdout.splitlines():
609 610 611
    line = line.strip()
    match = valid_line_re.match(line)
    if not match:
612
      logging.error("Invalid line returned from lvs output: '%s'", line)
613 614
      continue
    name, size, attr = match.groups()
615 616
    inactive = attr[4] == '-'
    online = attr[5] == 'o'
Iustin Pop's avatar
Iustin Pop committed
617 618 619 620 621
    virtual = attr[0] == 'v'
    if virtual:
      # we don't want to report such volumes as existing, since they
      # don't really hold data
      continue
622 623 624
    lvs[name] = (size, inactive, online)

  return lvs
Iustin Pop's avatar
Iustin Pop committed
625 626 627


def ListVolumeGroups():
Alexander Schreiber's avatar
Alexander Schreiber committed
628
  """List the volume groups and their size.
Iustin Pop's avatar
Iustin Pop committed
629

Iustin Pop's avatar
Iustin Pop committed
630 631 632
  @rtype: dict
  @return: dictionary with keys volume name and values the
      size of the volume
Iustin Pop's avatar
Iustin Pop committed
633 634

  """
635
  return utils.ListVolumeGroups()
Iustin Pop's avatar
Iustin Pop committed
636 637


638 639 640
def NodeVolumes():
  """List all volumes on this node.

Iustin Pop's avatar
Iustin Pop committed
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655
  @rtype: list
  @return:
    A list of dictionaries, each having four keys:
      - name: the logical volume name,
      - size: the size of the logical volume
      - dev: the physical device on which the LV lives
      - vg: the volume group to which it belongs

    In case of errors, we return an empty list and log the
    error.

    Note that since a logical volume can live on multiple physical
    volumes, the resulting list might include a logical volume
    multiple times.

656 657 658 659 660
  """
  result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
                         "--separator=|",
                         "--options=lv_name,lv_size,devices,vg_name"])
  if result.failed:
661 662
    _Fail("Failed to list logical volumes, lvs output: %s",
          result.output)
663 664

  def parse_dev(dev):
665 666 667 668
    return dev.split('(')[0]

  def handle_dev(dev):
    return [parse_dev(x) for x in dev.split(",")]
669 670

  def map_line(line):
671 672 673 674 675 676 677 678 679 680 681
    line = [v.strip() for v in line]
    return [{'name': line[0], 'size': line[1],
             'dev': dev, 'vg': line[3]} for dev in handle_dev(line[2])]

  all_devs = []
  for line in result.stdout.splitlines():
    if line.count('|') >= 3:
      all_devs.extend(map_line(line.split('|')))
    else:
      logging.warning("Strange line in the output from lvs: '%s'", line)
  return all_devs
682 683


Iustin Pop's avatar
Iustin Pop committed
684
def BridgesExist(bridges_list):
Alexander Schreiber's avatar
Alexander Schreiber committed
685
  """Check if a list of bridges exist on the current node.
Iustin Pop's avatar
Iustin Pop committed
686

Iustin Pop's avatar
Iustin Pop committed
687 688
  @rtype: boolean
  @return: C{True} if all of them exist, C{False} otherwise
Iustin Pop's avatar
Iustin Pop committed
689 690

  """
691
  missing = []
Iustin Pop's avatar
Iustin Pop committed
692 693
  for bridge in bridges_list:
    if not utils.BridgeExists(bridge):
694
      missing.append(bridge)
Iustin Pop's avatar
Iustin Pop committed
695

696
  if missing:
697
    _Fail("Missing bridges %s", utils.CommaJoin(missing))
698

Iustin Pop's avatar
Iustin Pop committed
699

700
def GetInstanceList(hypervisor_list):
Alexander Schreiber's avatar
Alexander Schreiber committed
701
  """Provides a list of instances.
Iustin Pop's avatar
Iustin Pop committed
702

703 704 705 706 707
  @type hypervisor_list: list
  @param hypervisor_list: the list of hypervisors to query information

  @rtype: list
  @return: a list of all running instances on the current node
Iustin Pop's avatar
Iustin Pop committed
708 709
    - instance1.example.com
    - instance2.example.com
Iustin Pop's avatar
Iustin Pop committed
710

711
  """
712 713 714 715 716 717
  results = []
  for hname in hypervisor_list:
    try:
      names = hypervisor.GetHypervisor(hname).ListInstances()
      results.extend(names)
    except errors.HypervisorError, err:
718 719
      _Fail("Error enumerating instances (hypervisor %s): %s",
            hname, err, exc=True)
Iustin Pop's avatar
Iustin Pop committed
720

721
  return results
Iustin Pop's avatar
Iustin Pop committed
722 723


724
def GetInstanceInfo(instance, hname):
Michael Hanselmann's avatar
Michael Hanselmann committed
725
  """Gives back the information about an instance as a dictionary.
Iustin Pop's avatar
Iustin Pop committed
726

727 728 729 730
  @type instance: string
  @param instance: the instance name
  @type hname: string
  @param hname: the hypervisor type of the instance
Iustin Pop's avatar
Iustin Pop committed
731

732 733 734 735 736
  @rtype: dict
  @return: dictionary with the following keys:
      - memory: memory size of instance (int)
      - state: xen state of instance (string)
      - time: cpu time of instance (float)
Iustin Pop's avatar
Iustin Pop committed
737

738
  """
Iustin Pop's avatar
Iustin Pop committed
739 740
  output = {}

741
  iinfo = hypervisor.GetHypervisor(hname).GetInstanceInfo(instance)
Iustin Pop's avatar
Iustin Pop committed
742 743 744 745 746
  if iinfo is not None:
    output['memory'] = iinfo[2]
    output['state'] = iinfo[4]
    output['time'] = iinfo[5]

747
  return output
Iustin Pop's avatar
Iustin Pop committed
748 749


750 751 752 753 754 755 756 757 758 759 760 761 762
def GetInstanceMigratable(instance):
  """Gives whether an instance can be migrated.

  @type instance: L{objects.Instance}
  @param instance: object representing the instance to be checked.

  @rtype: tuple
  @return: tuple of (result, description) where:
      - result: whether the instance can be migrated or not
      - description: a description of the issue, if relevant

  """
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
763 764 765
  iname = instance.name
  if iname not in hyper.ListInstances():
    _Fail("Instance %s is not running", iname)
766 767

  for idx in range(len(instance.disks)):
768
    link_name = _GetBlockDevSymlinkPath(iname, idx)
769
    if not os.path.islink(link_name):
770
      _Fail("Instance %s was not restarted since ganeti 1.2.5", iname)
771 772


773
def GetAllInstancesInfo(hypervisor_list):
Iustin Pop's avatar
Iustin Pop committed
774 775
  """Gather data about all instances.

Iustin Pop's avatar
Iustin Pop committed
776
  This is the equivalent of L{GetInstanceInfo}, except that it
Iustin Pop's avatar
Iustin Pop committed
777 778 779
  computes data for all instances at once, thus being faster if one
  needs data about more than one instance.

780 781 782
  @type hypervisor_list: list
  @param hypervisor_list: list of hypervisors to query for instance data

783
  @rtype: dict
784 785 786 787
  @return: dictionary of instance: data, with data having the following keys:
      - memory: memory size of instance (int)
      - state: xen state of instance (string)
      - time: cpu time of instance (float)
Iustin Pop's avatar
Iustin Pop committed
788
      - vcpus: the number of vcpus
Iustin Pop's avatar
Iustin Pop committed
789

790
  """
Iustin Pop's avatar
Iustin Pop committed
791 792
  output = {}

793 794 795
  for hname in hypervisor_list:
    iinfo = hypervisor.GetHypervisor(hname).GetAllInstancesInfo()
    if iinfo:
Iustin Pop's avatar
Iustin Pop committed
796
      for name, _, memory, vcpus, state, times in iinfo:
797
        value = {
798 799 800 801 802
          'memory': memory,
          'vcpus': vcpus,
          'state': state,
          'time': times,
          }
803 804 805 806 807 808
        if name in output:
          # we only check static parameters, like memory and vcpus,
          # and not state and time which can change between the
          # invocations of the different hypervisors
          for key in 'memory', 'vcpus':
            if value[key] != output[name][key]:
809 810
              _Fail("Instance %s is running twice"
                    " with different parameters", name)
811
        output[name] = value
Iustin Pop's avatar
Iustin Pop committed
812

813
  return output
Iustin Pop's avatar
Iustin Pop committed
814 815


Iustin Pop's avatar
Iustin Pop committed
816 817 818 819 820 821 822 823 824 825 826 827 828 829
def _InstanceLogName(kind, os_name, instance):
  """Compute the OS log filename for a given instance and operation.

  The instance name and os name are passed in as strings since not all
  operations have these as part of an instance object.

  @type kind: string
  @param kind: the operation type (e.g. add, import, etc.)
  @type os_name: string
  @param os_name: the os name
  @type instance: string
  @param instance: the name of the instance being imported/added/etc.

  """
830 831
  base = ("%s-%s-%s-%s.log" %
          (kind, os_name, instance, utils.TimestampForFilename()))
Iustin Pop's avatar
Iustin Pop committed
832 833 834
  return utils.PathJoin(constants.LOG_OS_DIR, base)


835
def InstanceOsAdd(instance, reinstall, debug):
Alexander Schreiber's avatar
Alexander Schreiber committed
836
  """Add an OS to an instance.
Iustin Pop's avatar
Iustin Pop committed
837

838 839
  @type instance: L{objects.Instance}
  @param instance: Instance whose OS is to be installed
840 841
  @type reinstall: boolean
  @param reinstall: whether this is an instance reinstall
842 843
  @type debug: integer
  @param debug: debug level, passed to the OS scripts
844
  @rtype: None
Iustin Pop's avatar
Iustin Pop committed
845 846

  """
847 848
  inst_os = OSFromDisk(instance.os)

849
  create_env = OSEnvironment(instance, inst_os, debug)
850 851
  if reinstall:
    create_env['INSTANCE_REINSTALL'] = "1"
Iustin Pop's avatar
Iustin Pop committed
852

Iustin Pop's avatar
Iustin Pop committed
853
  logfile = _InstanceLogName("add", instance.os, instance.name)
854

855 856
  result = utils.RunCmd([inst_os.create_script], env=create_env,
                        cwd=inst_os.path, output=logfile,)
857
  if result.failed:
858
    logging.error("os create command '%s' returned error: %s, logfile: %s,"
859
                  " output: %s", result.cmd, result.fail_reason, logfile,
860
                  result.output)
861
    lines = [utils.SafeEncode(val)
862
             for val in utils.TailFile(logfile, lines=20)]
863 864
    _Fail("OS create script failed (%s), last lines in the"
          " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
865 866


867
def RunRenameInstance(instance, old_name, debug):
868 869
  """Run the OS rename script for an instance.

Iustin Pop's avatar
Iustin Pop committed
870
  @type instance: L{objects.Instance}
871 872 873
  @param instance: Instance whose OS is to be installed
  @type old_name: string
  @param old_name: previous instance name
874 875
  @type debug: integer
  @param debug: debug level, passed to the OS scripts
Iustin Pop's avatar
Iustin Pop committed
876 877
  @rtype: boolean
  @return: the success of the operation
878 879 880 881

  """
  inst_os = OSFromDisk(instance.os)

882
  rename_env = OSEnvironment(instance, inst_os, debug)
883
  rename_env['OLD_INSTANCE_NAME'] = old_name
884

Iustin Pop's avatar
Iustin Pop committed
885 886
  logfile = _InstanceLogName("rename", instance.os,
                             "%s-%s" % (old_name, instance.name))
Iustin Pop's avatar
Iustin Pop committed
887

888 889
  result = utils.RunCmd([inst_os.rename_script], env=rename_env,
                        cwd=inst_os.path, output=logfile)
Iustin Pop's avatar
Iustin Pop committed
890 891

  if result.failed:
892
    logging.error("os create command '%s' returned error: %s output: %s",
893
                  result.cmd, result.fail_reason, result.output)
894
    lines = [utils.SafeEncode(val)
895
             for val in utils.TailFile(logfile, lines=20)]
896 897
    _Fail("OS rename script failed (%s), last lines in the"
          " log file:\n%s", result.fail_reason, "\n".join(lines), log=False)
Iustin Pop's avatar
Iustin Pop committed
898 899 900


def _GetVGInfo(vg_name):
Michael Hanselmann's avatar
Michael Hanselmann committed
901
  """Get information about the volume group.
Iustin Pop's avatar
Iustin Pop committed
902

Iustin Pop's avatar
Iustin Pop committed
903 904 905 906 907 908 909 910
  @type vg_name: str
  @param vg_name: the volume group which we query
  @rtype: dict
  @return:
    A dictionary with the following keys:
      - C{vg_size} is the total size of the volume group in MiB
      - C{vg_free} is the free size of the volume group in MiB
      - C{pv_count} are the number of physical disks in that VG
Iustin Pop's avatar
Iustin Pop committed
911

Iustin Pop's avatar
Iustin Pop committed
912 913
    If an error occurs during gathering of data, we return the same dict
    with keys all set to None.
914

Iustin Pop's avatar
Iustin Pop committed
915
  """
916 917
  retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])

Iustin Pop's avatar
Iustin Pop committed
918 919 920 921
  retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
                         "--nosuffix", "--units=m", "--separator=:", vg_name])

  if retval.failed:
922
    logging.error("volume group %s not present", vg_name)
923
    return retdic
Iustin Pop's avatar
Iustin Pop committed
924
  valarr = retval.stdout.strip().rstrip(':').split(':')
925 926 927 928 929 930 931
  if len(valarr) == 3:
    try:
      retdic = {
        "vg_size": int(round(float(valarr[0]), 0)),
        "vg_free": int(round(float(valarr[1]), 0)),
        "pv_count": int(valarr[2]),
        }
932
    except (TypeError, ValueError), err:
Iustin Pop's avatar
Iustin Pop committed
933
      logging.exception("Fail to parse vgs output: %s", err)
934
  else:
935 936
    logging.error("vgs output has the wrong number of fields (expected"
                  " three): %s", str(valarr))
Iustin Pop's avatar
Iustin Pop committed
937 938 939
  return retdic


940
def _GetBlockDevSymlinkPath(instance_name, idx):
941 942
  return utils.PathJoin(constants.DISK_LINKS_DIR,
                        "%s:%d" % (instance_name, idx))