config.py 103 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
#
Iustin Pop's avatar
Iustin Pop committed
2 3
#

4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Google Inc.
Iustin Pop's avatar
Iustin Pop committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Configuration management for Ganeti

24
This module provides the interface to the Ganeti cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
25

26 27
The configuration data is stored on every node but is updated on the master
only. After each update, the master distributes the data to the other nodes.
Iustin Pop's avatar
Iustin Pop committed
28

29 30
Currently, the data storage format is JSON. YAML was slow and consuming too
much memory.
Iustin Pop's avatar
Iustin Pop committed
31 32 33

"""

34
# pylint: disable=R0904
35 36
# R0904: Too many public methods

37
import copy
Iustin Pop's avatar
Iustin Pop committed
38 39
import os
import random
40
import logging
41
import time
42
import threading
43
import itertools
Iustin Pop's avatar
Iustin Pop committed
44 45 46 47

from ganeti import errors
from ganeti import utils
from ganeti import constants
48
import ganeti.wconfd as wc
Iustin Pop's avatar
Iustin Pop committed
49
from ganeti import objects
50
from ganeti import serializer
Balazs Lecz's avatar
Balazs Lecz committed
51
from ganeti import uidpool
52
from ganeti import netutils
53
from ganeti import runtime
54
from ganeti import pathutils
55
from ganeti import network
56 57


58 59 60 61 62 63 64 65 66 67 68 69 70 71
def GetWConfdContext(ec_id, livelock):
  """Prepare a context for communication with WConfd.

  WConfd needs to know the identity of each caller to properly manage locks and
  detect job death. This helper function prepares the identity object given a
  job ID (optional) and a livelock file.

  @type ec_id: int, or None
  @param ec_id: the job ID or None, if the caller isn't a job
  @type livelock: L{ganeti.utils.livelock.LiveLock}
  @param livelock: a livelock object holding the lockfile needed for WConfd
  @return: the WConfd context

  """
72 73
  if ec_id is None:
    return (threading.current_thread().getName(),
74
            livelock.GetPath(), os.getpid())
75 76
  else:
    return (ec_id,
77
            livelock.GetPath(), os.getpid())
78 79


80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
def GetConfig(ec_id, livelock, **kwargs):
  """A utility function for constructing instances of ConfigWriter.

  It prepares a WConfd context and uses it to create a ConfigWriter instance.

  @type ec_id: int, or None
  @param ec_id: the job ID or None, if the caller isn't a job
  @type livelock: L{ganeti.utils.livelock.LiveLock}
  @param livelock: a livelock object holding the lockfile needed for WConfd
  @type kwargs: dict
  @param kwargs: Any additional arguments for the ConfigWriter constructor
  @rtype: L{ConfigWriter}
  @return: the ConfigWriter context

  """
  kwargs['wconfdcontext'] = GetWConfdContext(ec_id, livelock)
  kwargs['wconfd'] = wc.Client()
  return ConfigWriter(**kwargs)


def _ConfigSync(shared=0):
  """Configuration synchronization decorator.

  """
  def wrap(fn):
    def sync_function(*args, **kwargs):
      with args[0].GetConfigManager(shared):
        logging.debug("ConfigWriter.%s(%s, %s)",
                      fn.__name__, str(args), str(kwargs))
        result = fn(*args, **kwargs)
        logging.debug("ConfigWriter.%s(...) returned '%s'",
                      fn.__name__, str(result))
        return result
    return sync_function
  return wrap

# job id used for resource management at config upgrade time
_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"


Michael Hanselmann's avatar
Michael Hanselmann committed
120
def _ValidateConfig(data):
121
  """Verifies that a configuration dict looks valid.
Iustin Pop's avatar
Iustin Pop committed
122 123 124 125 126 127 128

  This only verifies the version of the configuration.

  @raise errors.ConfigurationError: if the version differs from what
      we expect

  """
129 130 131
  if data['version'] != constants.CONFIG_VERSION:
    raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION,
                                       data['version'])
Iustin Pop's avatar
Iustin Pop committed
132

133

Guido Trotter's avatar
Guido Trotter committed
134 135 136 137 138 139 140 141 142 143 144
class TemporaryReservationManager:
  """A temporary resource reservation manager.

  This is used to reserve resources in a job, before using them, making sure
  other jobs cannot get them in the meantime.

  """
  def __init__(self):
    self._ec_reserved = {}

  def Reserved(self, resource):
145
    for holder_reserved in self._ec_reserved.values():
Guido Trotter's avatar
Guido Trotter committed
146 147 148 149 150 151
      if resource in holder_reserved:
        return True
    return False

  def Reserve(self, ec_id, resource):
    if self.Reserved(resource):
152 153
      raise errors.ReservationError("Duplicate reservation for resource '%s'"
                                    % str(resource))
Guido Trotter's avatar
Guido Trotter committed
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
    if ec_id not in self._ec_reserved:
      self._ec_reserved[ec_id] = set([resource])
    else:
      self._ec_reserved[ec_id].add(resource)

  def DropECReservations(self, ec_id):
    if ec_id in self._ec_reserved:
      del self._ec_reserved[ec_id]

  def GetReserved(self):
    all_reserved = set()
    for holder_reserved in self._ec_reserved.values():
      all_reserved.update(holder_reserved)
    return all_reserved

169
  def GetECReserved(self, ec_id):
170 171 172 173 174
    """ Used when you want to retrieve all reservations for a specific
        execution context. E.g when commiting reserved IPs for a specific
        network.

    """
175 176 177 178 179
    ec_reserved = set()
    if ec_id in self._ec_reserved:
      ec_reserved.update(self._ec_reserved[ec_id])
    return ec_reserved

Guido Trotter's avatar
Guido Trotter committed
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
  def Generate(self, existing, generate_one_fn, ec_id):
    """Generate a new resource of this type

    """
    assert callable(generate_one_fn)

    all_elems = self.GetReserved()
    all_elems.update(existing)
    retries = 64
    while retries > 0:
      new_resource = generate_one_fn()
      if new_resource is not None and new_resource not in all_elems:
        break
    else:
      raise errors.ConfigurationError("Not able generate new resource"
                                      " (last tried: %s)" % new_resource)
    self.Reserve(ec_id, new_resource)
    return new_resource


200
def _MatchNameComponentIgnoreCase(short_name, names):
201
  """Wrapper around L{utils.text.MatchNameComponent}.
202 203 204 205 206

  """
  return utils.MatchNameComponent(short_name, names, case_sensitive=False)


207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
def _CheckInstanceDiskIvNames(disks):
  """Checks if instance's disks' C{iv_name} attributes are in order.

  @type disks: list of L{objects.Disk}
  @param disks: List of disks
  @rtype: list of tuples; (int, string, string)
  @return: List of wrongly named disks, each tuple contains disk index,
    expected and actual name

  """
  result = []

  for (idx, disk) in enumerate(disks):
    exp_iv_name = "disk/%s" % idx
    if disk.iv_name != exp_iv_name:
      result.append((idx, exp_iv_name, disk.iv_name))

  return result

226

227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
class ConfigManager(object):
  """Locks the configuration and exposes it to be read or modified.

  """
  def __init__(self, config_writer, shared=False):
    assert isinstance(config_writer, ConfigWriter), \
           "invalid argument: Not a ConfigWriter"
    self._config_writer = config_writer
    self._shared = shared

  def __enter__(self):
    try:
      self._config_writer._OpenConfig(self._shared) # pylint: disable=W0212
    except Exception:
      logging.debug("Opening configuration failed")
      try:
        self._config_writer._CloseConfig(False) # pylint: disable=W0212
      except Exception: # pylint: disable=W0703
        logging.debug("Closing configuration failed as well")
      raise

  def __exit__(self, exc_type, exc_value, traceback):
    # save the configuration, if this was a write opreration that succeeded
    if exc_type is not None:
      logging.debug("Configuration operation failed,"
                    " the changes will not be saved")
    # pylint: disable=W0212
    self._config_writer._CloseConfig(not self._shared and exc_type is None)
    return False


258
class ConfigWriter(object):
259
  """The interface to the cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
260

261 262 263
  WARNING: The class is no longer thread-safe!
  Each thread must construct a separate instance.

264 265 266
  @ivar _temporary_lvs: reservation manager for temporary LVs
  @ivar _all_rms: a list of all temporary reservation managers

267
  """
268
  def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
269
               accept_foreign=False, wconfdcontext=None, wconfd=None):
270
    self.write_count = 0
Iustin Pop's avatar
Iustin Pop committed
271
    self._config_data = None
272
    self._SetConfigData(None)
Iustin Pop's avatar
Iustin Pop committed
273 274
    self._offline = offline
    if cfg_file is None:
275
      self._cfg_file = pathutils.CLUSTER_CONF_FILE
Iustin Pop's avatar
Iustin Pop committed
276 277
    else:
      self._cfg_file = cfg_file
278
    self._getents = _getents
279
    self._temporary_ids = TemporaryReservationManager()
280
    self._temporary_drbds = {}
281
    self._temporary_macs = TemporaryReservationManager()
282
    self._temporary_secrets = TemporaryReservationManager()
283
    self._temporary_lvs = TemporaryReservationManager()
284
    self._temporary_ips = TemporaryReservationManager()
285
    self._all_rms = [self._temporary_ids, self._temporary_macs,
286 287
                     self._temporary_secrets, self._temporary_lvs,
                     self._temporary_ips]
288 289
    # Note: in order to prevent errors when resolving our name later,
    # we compute it here once and reuse it; it's
290 291
    # better to raise an error before starting to modify the config
    # file than after it was modified
292
    self._my_hostname = netutils.Hostname.GetSysName()
293
    self._cfg_id = None
294 295 296 297
    self._wconfdcontext = wconfdcontext
    self._wconfd = wconfd
    self._accept_foreign = accept_foreign
    self._lock_count = 0
298
    self._lock_current_shared = None
Iustin Pop's avatar
Iustin Pop committed
299

300 301 302
  def _ConfigData(self):
    return self._config_data

303 304 305 306 307 308
  def _SetConfigData(self, cfg):
    self._config_data = cfg

  def _GetWConfdContext(self):
    return self._wconfdcontext

Iustin Pop's avatar
Iustin Pop committed
309 310 311 312 313 314
  # this method needs to be static, so that we can call it on the class
  @staticmethod
  def IsCluster():
    """Check if the cluster is configured.

    """
315
    return os.path.exists(pathutils.CLUSTER_CONF_FILE)
Iustin Pop's avatar
Iustin Pop committed
316

317
  @_ConfigSync(shared=1)
318 319 320
  def GetNdParams(self, node):
    """Get the node params populated with cluster defaults.

321
    @type node: L{objects.Node}
322 323 324 325 326
    @param node: The node we want to know the params for
    @return: A dict with the filled in node params

    """
    nodegroup = self._UnlockedGetNodeGroup(node.group)
327
    return self._ConfigData().cluster.FillND(node, nodegroup)
328

329
  @_ConfigSync(shared=1)
330 331 332 333 334 335 336 337
  def GetNdGroupParams(self, nodegroup):
    """Get the node groups params populated with cluster defaults.

    @type nodegroup: L{objects.NodeGroup}
    @param nodegroup: The node group we want to know the params for
    @return: A dict with the filled in node group params

    """
338
    return self._ConfigData().cluster.FillNDGroup(nodegroup)
339

340
  @_ConfigSync(shared=1)
341 342 343 344 345 346 347 348 349 350
  def GetInstanceDiskParams(self, instance):
    """Get the disk params populated with inherit chain.

    @type instance: L{objects.Instance}
    @param instance: The instance we want to know the params for
    @return: A dict with the filled in disk params

    """
    node = self._UnlockedGetNodeInfo(instance.primary_node)
    nodegroup = self._UnlockedGetNodeGroup(node.group)
351 352
    return self._UnlockedGetGroupDiskParams(nodegroup)

353
  @_ConfigSync(shared=1)
354 355 356
  def GetGroupDiskParams(self, group):
    """Get the disk params populated with inherit chain.

357
    @type group: L{objects.NodeGroup}
358 359 360 361 362 363 364 365 366
    @param group: The group we want to know the params for
    @return: A dict with the filled in disk params

    """
    return self._UnlockedGetGroupDiskParams(group)

  def _UnlockedGetGroupDiskParams(self, group):
    """Get the disk params populated with inherit chain down to node-group.

367
    @type group: L{objects.NodeGroup}
368 369 370 371
    @param group: The group we want to know the params for
    @return: A dict with the filled in disk params

    """
372 373 374
    data = self._ConfigData().cluster.SimpleFillDP(group.diskparams)
    assert isinstance(data, dict), "Not a dictionary: " + str(data)
    return data
375

376
  def _UnlockedGetNetworkMACPrefix(self, net_uuid):
377 378 379 380
    """Return the network mac prefix if it exists or the cluster level default.

    """
    prefix = None
381
    if net_uuid:
382 383 384
      nobj = self._UnlockedGetNetwork(net_uuid)
      if nobj.mac_prefix:
        prefix = nobj.mac_prefix
385 386 387 388 389 390 391 392 393 394

    return prefix

  def _GenerateOneMAC(self, prefix=None):
    """Return a function that randomly generates a MAC suffic
       and appends it to the given prefix. If prefix is not given get
       the cluster level default.

    """
    if not prefix:
395
      prefix = self._ConfigData().cluster.mac_prefix
396 397 398 399 400 401 402 403 404 405

    def GenMac():
      byte1 = random.randrange(0, 256)
      byte2 = random.randrange(0, 256)
      byte3 = random.randrange(0, 256)
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
      return mac

    return GenMac

406
  @_ConfigSync(shared=1)
407
  def GenerateMAC(self, net_uuid, ec_id):
Iustin Pop's avatar
Iustin Pop committed
408 409 410 411 412
    """Generate a MAC for an instance.

    This should check the current instances for duplicates.

    """
413
    existing = self._AllMACs()
414
    prefix = self._UnlockedGetNetworkMACPrefix(net_uuid)
415
    gen_mac = self._GenerateOneMAC(prefix)
Dimitris Aragiorgis's avatar
Dimitris Aragiorgis committed
416
    return self._temporary_ids.Generate(existing, gen_mac, ec_id)
Iustin Pop's avatar
Iustin Pop committed
417

418
  @_ConfigSync(shared=1)
419 420
  def ReserveMAC(self, mac, ec_id):
    """Reserve a MAC for an instance.
421 422 423 424 425 426

    This only checks instances managed by this cluster, it does not
    check for potential collisions elsewhere.

    """
    all_macs = self._AllMACs()
427 428 429
    if mac in all_macs:
      raise errors.ReservationError("mac already in use")
    else:
430
      self._temporary_macs.Reserve(ec_id, mac)
431

432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
  def _UnlockedCommitTemporaryIps(self, ec_id):
    """Commit all reserved IP address to their respective pools

    """
    for action, address, net_uuid in self._temporary_ips.GetECReserved(ec_id):
      self._UnlockedCommitIp(action, net_uuid, address)

  def _UnlockedCommitIp(self, action, net_uuid, address):
    """Commit a reserved IP address to an IP pool.

    The IP address is taken from the network's IP pool and marked as reserved.

    """
    nobj = self._UnlockedGetNetwork(net_uuid)
    pool = network.AddressPool(nobj)
447
    if action == constants.RESERVE_ACTION:
448
      pool.Reserve(address)
449
    elif action == constants.RELEASE_ACTION:
450 451 452 453 454 455 456 457 458
      pool.Release(address)

  def _UnlockedReleaseIp(self, net_uuid, address, ec_id):
    """Give a specific IP address back to an IP pool.

    The IP address is returned to the IP pool designated by pool_id and marked
    as reserved.

    """
459 460
    self._temporary_ips.Reserve(ec_id,
                                (constants.RELEASE_ACTION, address, net_uuid))
461

462
  @_ConfigSync(shared=1)
463
  def ReleaseIp(self, net_uuid, address, ec_id):
464 465 466 467 468
    """Give a specified IP address back to an IP pool.

    This is just a wrapper around _UnlockedReleaseIp.

    """
469 470
    if net_uuid:
      self._UnlockedReleaseIp(net_uuid, address, ec_id)
471

472
  @_ConfigSync(shared=1)
473
  def GenerateIp(self, net_uuid, ec_id):
474 475 476 477 478 479 480 481
    """Find a free IPv4 address for an instance.

    """
    nobj = self._UnlockedGetNetwork(net_uuid)
    pool = network.AddressPool(nobj)

    def gen_one():
      try:
482 483
        ip = pool.GenerateFree()
      except errors.AddressPoolError:
484
        raise errors.ReservationError("Cannot generate IP. Network is full")
485
      return (constants.RESERVE_ACTION, ip, net_uuid)
486

487
    _, address, _ = self._temporary_ips.Generate([], gen_one, ec_id)
488 489
    return address

490
  def _UnlockedReserveIp(self, net_uuid, address, ec_id, check=True):
491 492 493 494 495 496 497
    """Reserve a given IPv4 address for use by an instance.

    """
    nobj = self._UnlockedGetNetwork(net_uuid)
    pool = network.AddressPool(nobj)
    try:
      isreserved = pool.IsReserved(address)
498
      isextreserved = pool.IsReserved(address, external=True)
499 500 501 502
    except errors.AddressPoolError:
      raise errors.ReservationError("IP address not in network")
    if isreserved:
      raise errors.ReservationError("IP address already in use")
503 504
    if check and isextreserved:
      raise errors.ReservationError("IP is externally reserved")
505

506 507 508
    return self._temporary_ips.Reserve(ec_id,
                                       (constants.RESERVE_ACTION,
                                        address, net_uuid))
509

510
  @_ConfigSync(shared=1)
511
  def ReserveIp(self, net_uuid, address, ec_id, check=True):
512 513 514
    """Reserve a given IPv4 address for use by an instance.

    """
515
    if net_uuid:
516
      return self._UnlockedReserveIp(net_uuid, address, ec_id, check)
517

518
  @_ConfigSync(shared=1)
519 520 521 522 523 524 525 526 527 528 529
  def ReserveLV(self, lv_name, ec_id):
    """Reserve an VG/LV pair for an instance.

    @type lv_name: string
    @param lv_name: the logical volume name to reserve

    """
    all_lvs = self._AllLVs()
    if lv_name in all_lvs:
      raise errors.ReservationError("LV already in use")
    else:
530
      self._temporary_lvs.Reserve(ec_id, lv_name)
531

532
  @_ConfigSync(shared=1)
533
  def GenerateDRBDSecret(self, ec_id):
534 535 536 537 538
    """Generate a DRBD secret.

    This checks the current disks for duplicates.

    """
539 540 541
    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
                                            utils.GenerateSecret,
                                            ec_id)
Michael Hanselmann's avatar
Michael Hanselmann committed
542

543
  def _AllLVs(self):
544 545 546 547
    """Compute the list of all LVs.

    """
    lvnames = set()
548
    for instance in self._ConfigData().instances.values():
549 550 551 552 553
      node_data = instance.MapLVsByNode()
      for lv_list in node_data.values():
        lvnames.update(lv_list)
    return lvnames

554
  def _AllDisks(self):
555
    """Compute the list of all Disks (recursively, including children).
556 557

    """
558 559 560 561 562 563 564 565 566 567
    def DiskAndAllChildren(disk):
      """Returns a list containing the given disk and all of his children.

      """
      disks = [disk]
      if disk.children:
        for child_disk in disk.children:
          disks.extend(DiskAndAllChildren(child_disk))
      return disks

568
    disks = []
569
    for instance in self._ConfigData().instances.values():
570 571
      for disk in instance.disks:
        disks.extend(DiskAndAllChildren(disk))
572 573 574 575 576 577 578
    return disks

  def _AllNICs(self):
    """Compute the list of all NICs.

    """
    nics = []
579
    for instance in self._ConfigData().instances.values():
580 581 582
      nics.extend(instance.nics)
    return nics

583 584 585 586 587 588 589 590 591 592 593
  def _AllIDs(self, include_temporary):
    """Compute the list of all UUIDs and names we have.

    @type include_temporary: boolean
    @param include_temporary: whether to include the _temporary_ids set
    @rtype: set
    @return: a set of IDs

    """
    existing = set()
    if include_temporary:
594
      existing.update(self._temporary_ids.GetReserved())
595
    existing.update(self._AllLVs())
596 597
    existing.update(self._ConfigData().instances.keys())
    existing.update(self._ConfigData().nodes.keys())
598
    existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
599 600
    return existing

601
  def _GenerateUniqueID(self, ec_id):
602
    """Generate an unique UUID.
603 604 605 606

    This checks the current node, instances and disk names for
    duplicates.

Iustin Pop's avatar
Iustin Pop committed
607 608
    @rtype: string
    @return: the unique id
609 610

    """
611 612
    existing = self._AllIDs(include_temporary=False)
    return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
613

614
  @_ConfigSync(shared=1)
615
  def GenerateUniqueID(self, ec_id):
616 617 618 619
    """Generate an unique ID.

    This is just a wrapper over the unlocked version.

620 621
    @type ec_id: string
    @param ec_id: unique id for the job to reserve the id to
622 623

    """
624
    return self._GenerateUniqueID(ec_id)
625

Iustin Pop's avatar
Iustin Pop committed
626 627 628
  def _AllMACs(self):
    """Return all MACs present in the config.

Iustin Pop's avatar
Iustin Pop committed
629 630 631
    @rtype: list
    @return: the list of all MACs

Iustin Pop's avatar
Iustin Pop committed
632 633
    """
    result = []
634
    for instance in self._ConfigData().instances.values():
Iustin Pop's avatar
Iustin Pop committed
635 636 637 638 639
      for nic in instance.nics:
        result.append(nic.mac)

    return result

640 641 642
  def _AllDRBDSecrets(self):
    """Return all DRBD secrets present in the config.

Iustin Pop's avatar
Iustin Pop committed
643 644 645
    @rtype: list
    @return: the list of all DRBD secrets

646 647 648 649 650 651 652 653 654 655
    """
    def helper(disk, result):
      """Recursively gather secrets from this disk."""
      if disk.dev_type == constants.DT_DRBD8:
        result.append(disk.logical_id[5])
      if disk.children:
        for child in disk.children:
          helper(child, result)

    result = []
656
    for instance in self._ConfigData().instances.values():
657 658 659 660 661
      for disk in instance.disks:
        helper(disk, result)

    return result

662
  def _CheckDiskIDs(self, disk, l_ids):
663 664 665 666 667 668 669 670 671 672 673
    """Compute duplicate disk IDs

    @type disk: L{objects.Disk}
    @param disk: the disk at which to start searching
    @type l_ids: list
    @param l_ids: list of current logical ids
    @rtype: list
    @return: a list of error messages

    """
    result = []
674 675 676 677 678
    if disk.logical_id is not None:
      if disk.logical_id in l_ids:
        result.append("duplicate logical id %s" % str(disk.logical_id))
      else:
        l_ids.append(disk.logical_id)
679 680 681

    if disk.children:
      for child in disk.children:
682
        result.extend(self._CheckDiskIDs(child, l_ids))
683 684
    return result

685
  def _UnlockedVerifyConfig(self):
686 687
    """Verify function.

688 689 690 691
    @rtype: list
    @return: a list of error messages; a non-empty list signifies
        configuration errors

Iustin Pop's avatar
Iustin Pop committed
692
    """
693
    # pylint: disable=R0914
Iustin Pop's avatar
Iustin Pop committed
694 695
    result = []
    seen_macs = []
696
    ports = {}
697
    data = self._ConfigData()
698
    cluster = data.cluster
699
    seen_lids = []
700 701

    # global cluster checks
702
    if not cluster.enabled_hypervisors:
703
      result.append("enabled hypervisors list doesn't have any entries")
704
    invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
705 706
    if invalid_hvs:
      result.append("enabled hypervisors contains invalid entries: %s" %
707
                    utils.CommaJoin(invalid_hvs))
708 709
    missing_hvp = (set(cluster.enabled_hypervisors) -
                   set(cluster.hvparams.keys()))
710 711 712
    if missing_hvp:
      result.append("hypervisor parameters missing for the enabled"
                    " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
713

714 715 716 717 718 719 720 721
    if not cluster.enabled_disk_templates:
      result.append("enabled disk templates list doesn't have any entries")
    invalid_disk_templates = set(cluster.enabled_disk_templates) \
                               - constants.DISK_TEMPLATES
    if invalid_disk_templates:
      result.append("enabled disk templates list contains invalid entries:"
                    " %s" % utils.CommaJoin(invalid_disk_templates))

722
    if cluster.master_node not in data.nodes:
723
      result.append("cluster has invalid primary node '%s'" %
724
                    cluster.master_node)
725

726 727 728 729 730 731 732 733 734 735 736 737
    def _helper(owner, attr, value, template):
      try:
        utils.ForceDictType(value, template)
      except errors.GenericError, err:
        result.append("%s has invalid %s: %s" % (owner, attr, err))

    def _helper_nic(owner, params):
      try:
        objects.NIC.CheckParameterSyntax(params)
      except errors.ConfigurationError, err:
        result.append("%s has invalid nicparams: %s" % (owner, err))

738
    def _helper_ipolicy(owner, ipolicy, iscluster):
739
      try:
740
        objects.InstancePolicy.CheckParameterSyntax(ipolicy, iscluster)
741 742
      except errors.ConfigurationError, err:
        result.append("%s has invalid instance policy: %s" % (owner, err))
743 744
      for key, value in ipolicy.items():
        if key == constants.ISPECS_MINMAX:
745 746
          for k in range(len(value)):
            _helper_ispecs(owner, "ipolicy/%s[%s]" % (key, k), value[k])
747 748 749
        elif key == constants.ISPECS_STD:
          _helper(owner, "ipolicy/" + key, value,
                  constants.ISPECS_PARAMETER_TYPES)
750 751
        else:
          # FIXME: assuming list type
752 753
          if key in constants.IPOLICY_PARAMETERS:
            exp_type = float
754 755
            # if the value is int, it can be converted into float
            convertible_types = [int]
756 757
          else:
            exp_type = list
758 759 760 761 762 763 764 765
            convertible_types = []
          # Try to convert from allowed types, if necessary.
          if any(isinstance(value, ct) for ct in convertible_types):
            try:
              value = exp_type(value)
              ipolicy[key] = value
            except ValueError:
              pass
766
          if not isinstance(value, exp_type):
767
            result.append("%s has invalid instance policy: for %s,"
768 769
                          " expecting %s, got %s" %
                          (owner, key, exp_type.__name__, type(value)))
770

771 772 773 774 775
    def _helper_ispecs(owner, parentkey, params):
      for (key, value) in params.items():
        fullkey = "/".join([parentkey, key])
        _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)

776 777 778 779 780 781 782 783
    # check cluster parameters
    _helper("cluster", "beparams", cluster.SimpleFillBE({}),
            constants.BES_PARAMETER_TYPES)
    _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
            constants.NICS_PARAMETER_TYPES)
    _helper_nic("cluster", cluster.SimpleFillNIC({}))
    _helper("cluster", "ndparams", cluster.SimpleFillND({}),
            constants.NDS_PARAMETER_TYPES)
784
    _helper_ipolicy("cluster", cluster.ipolicy, True)
785

786 787 788 789 790 791
    for disk_template in cluster.diskparams:
      if disk_template not in constants.DTS_HAVE_ACCESS:
        continue

      access = cluster.diskparams[disk_template].get(constants.LDP_ACCESS,
                                                     constants.DISK_KERNELSPACE)
792 793 794
      if access not in constants.DISK_VALID_ACCESS_MODES:
        result.append(
          "Invalid value of '%s:%s': '%s' (expected one of %s)" % (
795
            disk_template, constants.LDP_ACCESS, access,
796 797 798 799
            utils.CommaJoin(constants.DISK_VALID_ACCESS_MODES)
          )
        )

800
    # per-instance checks
801 802 803 804 805
    for instance_uuid in data.instances:
      instance = data.instances[instance_uuid]
      if instance.uuid != instance_uuid:
        result.append("instance '%s' is indexed by wrong UUID '%s'" %
                      (instance.name, instance_uuid))
Iustin Pop's avatar
Iustin Pop committed
806
      if instance.primary_node not in data.nodes:
807
        result.append("instance '%s' has invalid primary node '%s'" %
808
                      (instance.name, instance.primary_node))
Iustin Pop's avatar
Iustin Pop committed
809 810
      for snode in instance.secondary_nodes:
        if snode not in data.nodes:
811
          result.append("instance '%s' has invalid secondary node '%s'" %
812
                        (instance.name, snode))
Iustin Pop's avatar
Iustin Pop committed
813 814
      for idx, nic in enumerate(instance.nics):
        if nic.mac in seen_macs:
815
          result.append("instance '%s' has NIC %d mac %s duplicate" %
816
                        (instance.name, idx, nic.mac))
Iustin Pop's avatar
Iustin Pop committed
817 818
        else:
          seen_macs.append(nic.mac)
819 820 821 822 823 824 825
        if nic.nicparams:
          filled = cluster.SimpleFillNIC(nic.nicparams)
          owner = "instance %s nic %d" % (instance.name, idx)
          _helper(owner, "nicparams",
                  filled, constants.NICS_PARAMETER_TYPES)
          _helper_nic(owner, filled)

826 827 828
      # disk template checks
      if not instance.disk_template in data.cluster.enabled_disk_templates:
        result.append("instance '%s' uses the disabled disk template '%s'." %
829
                      (instance.name, instance.disk_template))
830

831 832 833 834
      # parameter checks
      if instance.beparams:
        _helper("instance %s" % instance.name, "beparams",
                cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
835 836

      # gather the drbd ports for duplicate checks
837
      for (idx, dsk) in enumerate(instance.disks):
Helga Velroyen's avatar
Helga Velroyen committed
838
        if dsk.dev_type in constants.DTS_DRBD:
839 840 841
          tcp_port = dsk.logical_id[2]
          if tcp_port not in ports:
            ports[tcp_port] = []
842
          ports[tcp_port].append((instance.name, "drbd disk %s" % idx))
843 844 845 846 847 848 849
      # gather network port reservation
      net_port = getattr(instance, "network_port", None)
      if net_port is not None:
        if net_port not in ports:
          ports[net_port] = []
        ports[net_port].append((instance.name, "network port"))

850 851 852 853
      # instance disk verify
      for idx, disk in enumerate(instance.disks):
        result.extend(["instance '%s' disk %d error: %s" %
                       (instance.name, idx, msg) for msg in disk.Verify()])
854
        result.extend(self._CheckDiskIDs(disk, seen_lids))
855

856 857 858 859 860 861 862 863 864
      wrong_names = _CheckInstanceDiskIvNames(instance.disks)
      if wrong_names:
        tmp = "; ".join(("name of disk %s should be '%s', but is '%s'" %
                         (idx, exp_name, actual_name))
                        for (idx, exp_name, actual_name) in wrong_names)

        result.append("Instance '%s' has wrongly named disks: %s" %
                      (instance.name, tmp))

865
    # cluster-wide pool of free ports
866
    for free_port in cluster.tcpudp_port_pool:
867 868 869 870 871 872 873 874 875 876
      if free_port not in ports:
        ports[free_port] = []
      ports[free_port].append(("cluster", "port marked as free"))

    # compute tcp/udp duplicate ports
    keys = ports.keys()
    keys.sort()
    for pnum in keys:
      pdata = ports[pnum]
      if len(pdata) > 1:
877
        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
878 879 880 881
        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))

    # highest used tcp port check
    if keys:
882
      if keys[-1] > cluster.highest_used_port:
883
        result.append("Highest used port mismatch, saved %s, computed %s" %
884
                      (cluster.highest_used_port, keys[-1]))
885

886
    if not data.nodes[cluster.master_node].master_candidate:
887 888
      result.append("Master node is not a master candidate")

889
    # master candidate checks
890
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
891 892 893
    if mc_now < mc_max:
      result.append("Not enough master candidates: actual %d, target %d" %
                    (mc_now, mc_max))
894

895
    # node checks
Thomas Thrainer's avatar
Thomas Thrainer committed
896 897 898 899
    for node_uuid, node in data.nodes.items():
      if node.uuid != node_uuid:
        result.append("Node '%s' is indexed by wrong UUID '%s'" %
                      (node.name, node_uuid))
900 901 902
      if [node.master_candidate, node.drained, node.offline].count(True) > 1:
        result.append("Node %s state is invalid: master_candidate=%s,"
                      " drain=%s, offline=%s" %
903
                      (node.name, node.master_candidate, node.drained,
904
                       node.offline))
905 906 907 908 909 910 911
      if node.group not in data.nodegroups:
        result.append("Node '%s' has invalid group '%s'" %
                      (node.name, node.group))
      else:
        _helper("node %s" % node.name, "ndparams",
                cluster.FillND(node, data.nodegroups[node.group]),
                constants.NDS_PARAMETER_TYPES)
912 913 914 915
      used_globals = constants.NDC_GLOBALS.intersection(node.ndparams)
      if used_globals:
        result.append("Node '%s' has some global parameters set: %s" %
                      (node.name, utils.CommaJoin(used_globals)))
916

917
    # nodegroups checks
918
    nodegroups_names = set()
919 920 921
    for nodegroup_uuid in data.nodegroups:
      nodegroup = data.nodegroups[nodegroup_uuid]
      if nodegroup.uuid != nodegroup_uuid:
922
        result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
923
                      % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
924
      if utils.UUID_RE.match(nodegroup.name.lower()):
925
        result.append("node group '%s' (uuid: '%s') has uuid-like name" %
926
                      (nodegroup.name, nodegroup.uuid))
927
      if nodegroup.name in nodegroups_names:
928
        result.append("duplicate node group name '%s'" % nodegroup.name)
929 930
      else:
        nodegroups_names.add(nodegroup.name)
931
      group_name = "group %s" % nodegroup.name
932 933
      _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy),
                      False)
934
      if nodegroup.ndparams:
935
        _helper(group_name, "ndparams",
936 937 938
                cluster.SimpleFillND(nodegroup.ndparams),
                constants.NDS_PARAMETER_TYPES)

939
    # drbd minors check
940
    _, duplicates = self._UnlockedComputeDRBDMap()
941 942 943 944
    for node, minor, instance_a, instance_b in duplicates:
      result.append("DRBD minor %d on node %s is assigned twice to instances"
                    " %s and %s" % (minor, node, instance_a, instance_b))

945
    # IP checks
946
    default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
947 948 949 950 951
    ips = {}

    def _AddIpAddress(ip, name):
      ips.setdefault(ip, []).append(name)

952
    _AddIpAddress(cluster.master_ip, "cluster_ip")
953 954

    for node in data.nodes.values():
955
      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
956
      if node.secondary_ip != node.primary_ip:
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)

    for instance in data.instances.values():
      for idx, nic in enumerate(instance.nics):
        if nic.ip is None:
          continue

        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
        nic_mode = nicparams[constants.NIC_MODE]
        nic_link = nicparams[constants.NIC_LINK]

        if nic_mode == constants.NIC_MODE_BRIDGED:
          link = "bridge:%s" % nic_link
        elif nic_mode == constants.NIC_MODE_ROUTED:
          link = "route:%s" % nic_link
        else:
          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)

975
        _AddIpAddress("%s/%s/%s" % (link, nic.ip, nic.network),