config.py 71.8 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
#
Iustin Pop's avatar
Iustin Pop committed
2 3
#

4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
Iustin Pop's avatar
Iustin Pop committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Configuration management for Ganeti

24
This module provides the interface to the Ganeti cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
25

26 27
The configuration data is stored on every node but is updated on the master
only. After each update, the master distributes the data to the other nodes.
Iustin Pop's avatar
Iustin Pop committed
28

29 30
Currently, the data storage format is JSON. YAML was slow and consuming too
much memory.
Iustin Pop's avatar
Iustin Pop committed
31 32 33

"""

34
# pylint: disable=R0904
35 36
# R0904: Too many public methods

Iustin Pop's avatar
Iustin Pop committed
37 38
import os
import random
39
import logging
40
import time
41
import itertools
Iustin Pop's avatar
Iustin Pop committed
42 43

from ganeti import errors
44
from ganeti import locking
Iustin Pop's avatar
Iustin Pop committed
45 46 47 48
from ganeti import utils
from ganeti import constants
from ganeti import rpc
from ganeti import objects
49
from ganeti import serializer
Balazs Lecz's avatar
Balazs Lecz committed
50
from ganeti import uidpool
51
from ganeti import netutils
52
from ganeti import runtime
53 54


55
_config_lock = locking.SharedLock("ConfigWriter")
56

57
# job id used for resource management at config upgrade time
Michael Hanselmann's avatar
Michael Hanselmann committed
58
_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
59

60

Michael Hanselmann's avatar
Michael Hanselmann committed
61
def _ValidateConfig(data):
Iustin Pop's avatar
Iustin Pop committed
62 63 64 65 66 67 68 69
  """Verifies that a configuration objects looks valid.

  This only verifies the version of the configuration.

  @raise errors.ConfigurationError: if the version differs from what
      we expect

  """
Michael Hanselmann's avatar
Michael Hanselmann committed
70
  if data.version != constants.CONFIG_VERSION:
71
    raise errors.ConfigVersionMismatch(constants.CONFIG_VERSION, data.version)
Iustin Pop's avatar
Iustin Pop committed
72

73

Guido Trotter's avatar
Guido Trotter committed
74 75 76 77 78 79 80 81 82 83 84
class TemporaryReservationManager:
  """A temporary resource reservation manager.

  This is used to reserve resources in a job, before using them, making sure
  other jobs cannot get them in the meantime.

  """
  def __init__(self):
    self._ec_reserved = {}

  def Reserved(self, resource):
85
    for holder_reserved in self._ec_reserved.values():
Guido Trotter's avatar
Guido Trotter committed
86 87 88 89 90 91
      if resource in holder_reserved:
        return True
    return False

  def Reserve(self, ec_id, resource):
    if self.Reserved(resource):
92 93
      raise errors.ReservationError("Duplicate reservation for resource '%s'"
                                    % str(resource))
Guido Trotter's avatar
Guido Trotter committed
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    if ec_id not in self._ec_reserved:
      self._ec_reserved[ec_id] = set([resource])
    else:
      self._ec_reserved[ec_id].add(resource)

  def DropECReservations(self, ec_id):
    if ec_id in self._ec_reserved:
      del self._ec_reserved[ec_id]

  def GetReserved(self):
    all_reserved = set()
    for holder_reserved in self._ec_reserved.values():
      all_reserved.update(holder_reserved)
    return all_reserved

  def Generate(self, existing, generate_one_fn, ec_id):
    """Generate a new resource of this type

    """
    assert callable(generate_one_fn)

    all_elems = self.GetReserved()
    all_elems.update(existing)
    retries = 64
    while retries > 0:
      new_resource = generate_one_fn()
      if new_resource is not None and new_resource not in all_elems:
        break
    else:
      raise errors.ConfigurationError("Not able generate new resource"
                                      " (last tried: %s)" % new_resource)
    self.Reserve(ec_id, new_resource)
    return new_resource


129
def _MatchNameComponentIgnoreCase(short_name, names):
130
  """Wrapper around L{utils.text.MatchNameComponent}.
131 132 133 134 135

  """
  return utils.MatchNameComponent(short_name, names, case_sensitive=False)


Iustin Pop's avatar
Iustin Pop committed
136
class ConfigWriter:
137
  """The interface to the cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
138

139 140 141
  @ivar _temporary_lvs: reservation manager for temporary LVs
  @ivar _all_rms: a list of all temporary reservation managers

142
  """
143 144
  def __init__(self, cfg_file=None, offline=False, _getents=runtime.GetEnts,
               accept_foreign=False):
145
    self.write_count = 0
146
    self._lock = _config_lock
Iustin Pop's avatar
Iustin Pop committed
147 148 149 150 151 152
    self._config_data = None
    self._offline = offline
    if cfg_file is None:
      self._cfg_file = constants.CLUSTER_CONF_FILE
    else:
      self._cfg_file = cfg_file
153
    self._getents = _getents
154
    self._temporary_ids = TemporaryReservationManager()
155
    self._temporary_drbds = {}
156
    self._temporary_macs = TemporaryReservationManager()
157
    self._temporary_secrets = TemporaryReservationManager()
158 159 160
    self._temporary_lvs = TemporaryReservationManager()
    self._all_rms = [self._temporary_ids, self._temporary_macs,
                     self._temporary_secrets, self._temporary_lvs]
161 162 163 164
    # Note: in order to prevent errors when resolving our name in
    # _DistributeConfig, we compute it here once and reuse it; it's
    # better to raise an error before starting to modify the config
    # file than after it was modified
165
    self._my_hostname = netutils.Hostname.GetSysName()
166
    self._last_cluster_serial = -1
167
    self._cfg_id = None
168
    self._context = None
169
    self._OpenConfig(accept_foreign)
Iustin Pop's avatar
Iustin Pop committed
170

171 172 173 174 175 176 177 178 179 180 181 182
  def _GetRpc(self, address_list):
    """Returns RPC runner for configuration.

    """
    return rpc.ConfigRunner(self._context, address_list)

  def SetContext(self, context):
    """Sets Ganeti context.

    """
    self._context = context

Iustin Pop's avatar
Iustin Pop committed
183 184 185 186 187 188 189 190
  # this method needs to be static, so that we can call it on the class
  @staticmethod
  def IsCluster():
    """Check if the cluster is configured.

    """
    return os.path.exists(constants.CLUSTER_CONF_FILE)

191 192 193 194 195 196 197 198 199 200 201
  def _GenerateOneMAC(self):
    """Generate one mac address

    """
    prefix = self._config_data.cluster.mac_prefix
    byte1 = random.randrange(0, 256)
    byte2 = random.randrange(0, 256)
    byte3 = random.randrange(0, 256)
    mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
    return mac

202 203 204 205
  @locking.ssynchronized(_config_lock, shared=1)
  def GetNdParams(self, node):
    """Get the node params populated with cluster defaults.

206
    @type node: L{objects.Node}
207 208 209 210 211 212 213
    @param node: The node we want to know the params for
    @return: A dict with the filled in node params

    """
    nodegroup = self._UnlockedGetNodeGroup(node.group)
    return self._config_data.cluster.FillND(node, nodegroup)

214
  @locking.ssynchronized(_config_lock, shared=1)
215
  def GenerateMAC(self, ec_id):
Iustin Pop's avatar
Iustin Pop committed
216 217 218 219 220
    """Generate a MAC for an instance.

    This should check the current instances for duplicates.

    """
221 222
    existing = self._AllMACs()
    return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
Iustin Pop's avatar
Iustin Pop committed
223

224
  @locking.ssynchronized(_config_lock, shared=1)
225 226
  def ReserveMAC(self, mac, ec_id):
    """Reserve a MAC for an instance.
227 228 229 230 231 232

    This only checks instances managed by this cluster, it does not
    check for potential collisions elsewhere.

    """
    all_macs = self._AllMACs()
233 234 235
    if mac in all_macs:
      raise errors.ReservationError("mac already in use")
    else:
236
      self._temporary_macs.Reserve(ec_id, mac)
237

238 239 240 241 242 243 244 245 246 247 248 249
  @locking.ssynchronized(_config_lock, shared=1)
  def ReserveLV(self, lv_name, ec_id):
    """Reserve an VG/LV pair for an instance.

    @type lv_name: string
    @param lv_name: the logical volume name to reserve

    """
    all_lvs = self._AllLVs()
    if lv_name in all_lvs:
      raise errors.ReservationError("LV already in use")
    else:
250
      self._temporary_lvs.Reserve(ec_id, lv_name)
251

252
  @locking.ssynchronized(_config_lock, shared=1)
253
  def GenerateDRBDSecret(self, ec_id):
254 255 256 257 258
    """Generate a DRBD secret.

    This checks the current disks for duplicates.

    """
259 260 261
    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
                                            utils.GenerateSecret,
                                            ec_id)
Michael Hanselmann's avatar
Michael Hanselmann committed
262

263
  def _AllLVs(self):
264 265 266 267 268 269 270 271 272 273
    """Compute the list of all LVs.

    """
    lvnames = set()
    for instance in self._config_data.instances.values():
      node_data = instance.MapLVsByNode()
      for lv_list in node_data.values():
        lvnames.update(lv_list)
    return lvnames

274 275 276 277 278 279 280 281 282 283 284
  def _AllIDs(self, include_temporary):
    """Compute the list of all UUIDs and names we have.

    @type include_temporary: boolean
    @param include_temporary: whether to include the _temporary_ids set
    @rtype: set
    @return: a set of IDs

    """
    existing = set()
    if include_temporary:
285
      existing.update(self._temporary_ids.GetReserved())
286 287 288
    existing.update(self._AllLVs())
    existing.update(self._config_data.instances.keys())
    existing.update(self._config_data.nodes.keys())
289
    existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
290 291
    return existing

292
  def _GenerateUniqueID(self, ec_id):
293
    """Generate an unique UUID.
294 295 296 297

    This checks the current node, instances and disk names for
    duplicates.

Iustin Pop's avatar
Iustin Pop committed
298 299
    @rtype: string
    @return: the unique id
300 301

    """
302 303
    existing = self._AllIDs(include_temporary=False)
    return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
304

305
  @locking.ssynchronized(_config_lock, shared=1)
306
  def GenerateUniqueID(self, ec_id):
307 308 309 310
    """Generate an unique ID.

    This is just a wrapper over the unlocked version.

311 312
    @type ec_id: string
    @param ec_id: unique id for the job to reserve the id to
313 314

    """
315
    return self._GenerateUniqueID(ec_id)
316

Iustin Pop's avatar
Iustin Pop committed
317 318 319
  def _AllMACs(self):
    """Return all MACs present in the config.

Iustin Pop's avatar
Iustin Pop committed
320 321 322
    @rtype: list
    @return: the list of all MACs

Iustin Pop's avatar
Iustin Pop committed
323 324 325 326 327 328 329 330
    """
    result = []
    for instance in self._config_data.instances.values():
      for nic in instance.nics:
        result.append(nic.mac)

    return result

331 332 333
  def _AllDRBDSecrets(self):
    """Return all DRBD secrets present in the config.

Iustin Pop's avatar
Iustin Pop committed
334 335 336
    @rtype: list
    @return: the list of all DRBD secrets

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
    """
    def helper(disk, result):
      """Recursively gather secrets from this disk."""
      if disk.dev_type == constants.DT_DRBD8:
        result.append(disk.logical_id[5])
      if disk.children:
        for child in disk.children:
          helper(child, result)

    result = []
    for instance in self._config_data.instances.values():
      for disk in instance.disks:
        helper(disk, result)

    return result

353 354 355 356 357 358 359 360 361 362 363 364 365 366
  def _CheckDiskIDs(self, disk, l_ids, p_ids):
    """Compute duplicate disk IDs

    @type disk: L{objects.Disk}
    @param disk: the disk at which to start searching
    @type l_ids: list
    @param l_ids: list of current logical ids
    @type p_ids: list
    @param p_ids: list of current physical ids
    @rtype: list
    @return: a list of error messages

    """
    result = []
367 368 369 370 371 372 373 374 375 376
    if disk.logical_id is not None:
      if disk.logical_id in l_ids:
        result.append("duplicate logical id %s" % str(disk.logical_id))
      else:
        l_ids.append(disk.logical_id)
    if disk.physical_id is not None:
      if disk.physical_id in p_ids:
        result.append("duplicate physical id %s" % str(disk.physical_id))
      else:
        p_ids.append(disk.physical_id)
377 378 379 380 381 382

    if disk.children:
      for child in disk.children:
        result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
    return result

383
  def _UnlockedVerifyConfig(self):
384 385
    """Verify function.

386 387 388 389
    @rtype: list
    @return: a list of error messages; a non-empty list signifies
        configuration errors

Iustin Pop's avatar
Iustin Pop committed
390
    """
391
    # pylint: disable=R0914
Iustin Pop's avatar
Iustin Pop committed
392 393
    result = []
    seen_macs = []
394
    ports = {}
Iustin Pop's avatar
Iustin Pop committed
395
    data = self._config_data
396
    cluster = data.cluster
397 398
    seen_lids = []
    seen_pids = []
399 400

    # global cluster checks
401
    if not cluster.enabled_hypervisors:
402
      result.append("enabled hypervisors list doesn't have any entries")
403
    invalid_hvs = set(cluster.enabled_hypervisors) - constants.HYPER_TYPES
404 405 406
    if invalid_hvs:
      result.append("enabled hypervisors contains invalid entries: %s" %
                    invalid_hvs)
407 408
    missing_hvp = (set(cluster.enabled_hypervisors) -
                   set(cluster.hvparams.keys()))
409 410 411
    if missing_hvp:
      result.append("hypervisor parameters missing for the enabled"
                    " hypervisor(s) %s" % utils.CommaJoin(missing_hvp))
412

413
    if cluster.master_node not in data.nodes:
414
      result.append("cluster has invalid primary node '%s'" %
415
                    cluster.master_node)
416

417 418 419 420 421 422 423 424 425 426 427 428
    def _helper(owner, attr, value, template):
      try:
        utils.ForceDictType(value, template)
      except errors.GenericError, err:
        result.append("%s has invalid %s: %s" % (owner, attr, err))

    def _helper_nic(owner, params):
      try:
        objects.NIC.CheckParameterSyntax(params)
      except errors.ConfigurationError, err:
        result.append("%s has invalid nicparams: %s" % (owner, err))

429 430 431 432 433 434 435
    def _helper_ipolicy(owner, params):
      try:
        objects.InstancePolicy.CheckParameterSyntax(params)
      except errors.ConfigurationError, err:
        result.append("%s has invalid instance policy: %s" % (owner, err))

    def _helper_ispecs(owner, params):
436
      for key, value in params.items():
437
        if key in constants.IPOLICY_ISPECS:
438 439 440 441 442 443 444 445
          fullkey = "ipolicy/" + key
          _helper(owner, fullkey, value, constants.ISPECS_PARAMETER_TYPES)
        else:
          # FIXME: assuming list type
          if not isinstance(value, list):
            result.append("%s has invalid instance policy: for %s,"
                          " expecting list, got %s" %
                          (owner, key, type(value)))
446

447 448 449 450 451 452 453 454
    # check cluster parameters
    _helper("cluster", "beparams", cluster.SimpleFillBE({}),
            constants.BES_PARAMETER_TYPES)
    _helper("cluster", "nicparams", cluster.SimpleFillNIC({}),
            constants.NICS_PARAMETER_TYPES)
    _helper_nic("cluster", cluster.SimpleFillNIC({}))
    _helper("cluster", "ndparams", cluster.SimpleFillND({}),
            constants.NDS_PARAMETER_TYPES)
455 456
    _helper_ipolicy("cluster", cluster.SimpleFillIPolicy({}))
    _helper_ispecs("cluster", cluster.SimpleFillIPolicy({}))
457

458
    # per-instance checks
Iustin Pop's avatar
Iustin Pop committed
459 460
    for instance_name in data.instances:
      instance = data.instances[instance_name]
461 462 463
      if instance.name != instance_name:
        result.append("instance '%s' is indexed by wrong name '%s'" %
                      (instance.name, instance_name))
Iustin Pop's avatar
Iustin Pop committed
464
      if instance.primary_node not in data.nodes:
465
        result.append("instance '%s' has invalid primary node '%s'" %
Iustin Pop's avatar
Iustin Pop committed
466 467 468
                      (instance_name, instance.primary_node))
      for snode in instance.secondary_nodes:
        if snode not in data.nodes:
469
          result.append("instance '%s' has invalid secondary node '%s'" %
Iustin Pop's avatar
Iustin Pop committed
470 471 472
                        (instance_name, snode))
      for idx, nic in enumerate(instance.nics):
        if nic.mac in seen_macs:
473
          result.append("instance '%s' has NIC %d mac %s duplicate" %
Iustin Pop's avatar
Iustin Pop committed
474 475 476
                        (instance_name, idx, nic.mac))
        else:
          seen_macs.append(nic.mac)
477 478 479 480 481 482 483 484 485 486 487
        if nic.nicparams:
          filled = cluster.SimpleFillNIC(nic.nicparams)
          owner = "instance %s nic %d" % (instance.name, idx)
          _helper(owner, "nicparams",
                  filled, constants.NICS_PARAMETER_TYPES)
          _helper_nic(owner, filled)

      # parameter checks
      if instance.beparams:
        _helper("instance %s" % instance.name, "beparams",
                cluster.FillBE(instance), constants.BES_PARAMETER_TYPES)
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502

      # gather the drbd ports for duplicate checks
      for dsk in instance.disks:
        if dsk.dev_type in constants.LDS_DRBD:
          tcp_port = dsk.logical_id[2]
          if tcp_port not in ports:
            ports[tcp_port] = []
          ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
      # gather network port reservation
      net_port = getattr(instance, "network_port", None)
      if net_port is not None:
        if net_port not in ports:
          ports[net_port] = []
        ports[net_port].append((instance.name, "network port"))

503 504 505 506
      # instance disk verify
      for idx, disk in enumerate(instance.disks):
        result.extend(["instance '%s' disk %d error: %s" %
                       (instance.name, idx, msg) for msg in disk.Verify()])
507
        result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
508

509
    # cluster-wide pool of free ports
510
    for free_port in cluster.tcpudp_port_pool:
511 512 513 514 515 516 517 518 519 520
      if free_port not in ports:
        ports[free_port] = []
      ports[free_port].append(("cluster", "port marked as free"))

    # compute tcp/udp duplicate ports
    keys = ports.keys()
    keys.sort()
    for pnum in keys:
      pdata = ports[pnum]
      if len(pdata) > 1:
521
        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
522 523 524 525
        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))

    # highest used tcp port check
    if keys:
526
      if keys[-1] > cluster.highest_used_port:
527
        result.append("Highest used port mismatch, saved %s, computed %s" %
528
                      (cluster.highest_used_port, keys[-1]))
529

530
    if not data.nodes[cluster.master_node].master_candidate:
531 532
      result.append("Master node is not a master candidate")

533
    # master candidate checks
534
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
535 536 537
    if mc_now < mc_max:
      result.append("Not enough master candidates: actual %d, target %d" %
                    (mc_now, mc_max))
538

539
    # node checks
540 541 542 543
    for node_name, node in data.nodes.items():
      if node.name != node_name:
        result.append("Node '%s' is indexed by wrong name '%s'" %
                      (node.name, node_name))
544 545 546
      if [node.master_candidate, node.drained, node.offline].count(True) > 1:
        result.append("Node %s state is invalid: master_candidate=%s,"
                      " drain=%s, offline=%s" %
547
                      (node.name, node.master_candidate, node.drained,
548
                       node.offline))
549 550 551 552 553 554 555
      if node.group not in data.nodegroups:
        result.append("Node '%s' has invalid group '%s'" %
                      (node.name, node.group))
      else:
        _helper("node %s" % node.name, "ndparams",
                cluster.FillND(node, data.nodegroups[node.group]),
                constants.NDS_PARAMETER_TYPES)
556

557
    # nodegroups checks
558
    nodegroups_names = set()
559 560 561
    for nodegroup_uuid in data.nodegroups:
      nodegroup = data.nodegroups[nodegroup_uuid]
      if nodegroup.uuid != nodegroup_uuid:
562
        result.append("node group '%s' (uuid: '%s') indexed by wrong uuid '%s'"
563
                      % (nodegroup.name, nodegroup.uuid, nodegroup_uuid))
564
      if utils.UUID_RE.match(nodegroup.name.lower()):
565
        result.append("node group '%s' (uuid: '%s') has uuid-like name" %
566
                      (nodegroup.name, nodegroup.uuid))
567
      if nodegroup.name in nodegroups_names:
568
        result.append("duplicate node group name '%s'" % nodegroup.name)
569 570
      else:
        nodegroups_names.add(nodegroup.name)
571 572 573
      group_name = "group %s" % nodegroup.name
      _helper_ipolicy(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
      _helper_ispecs(group_name, cluster.SimpleFillIPolicy(nodegroup.ipolicy))
574
      if nodegroup.ndparams:
575
        _helper(group_name, "ndparams",
576 577 578
                cluster.SimpleFillND(nodegroup.ndparams),
                constants.NDS_PARAMETER_TYPES)

579
    # drbd minors check
580
    _, duplicates = self._UnlockedComputeDRBDMap()
581 582 583 584
    for node, minor, instance_a, instance_b in duplicates:
      result.append("DRBD minor %d on node %s is assigned twice to instances"
                    " %s and %s" % (minor, node, instance_a, instance_b))

585
    # IP checks
586
    default_nicparams = cluster.nicparams[constants.PP_DEFAULT]
587 588 589 590 591
    ips = {}

    def _AddIpAddress(ip, name):
      ips.setdefault(ip, []).append(name)

592
    _AddIpAddress(cluster.master_ip, "cluster_ip")
593 594

    for node in data.nodes.values():
595
      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
596
      if node.secondary_ip != node.primary_ip:
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)

    for instance in data.instances.values():
      for idx, nic in enumerate(instance.nics):
        if nic.ip is None:
          continue

        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
        nic_mode = nicparams[constants.NIC_MODE]
        nic_link = nicparams[constants.NIC_LINK]

        if nic_mode == constants.NIC_MODE_BRIDGED:
          link = "bridge:%s" % nic_link
        elif nic_mode == constants.NIC_MODE_ROUTED:
          link = "route:%s" % nic_link
        else:
          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)

        _AddIpAddress("%s/%s" % (link, nic.ip),
                      "instance:%s/nic:%d" % (instance.name, idx))
617 618 619 620

    for ip, owners in ips.items():
      if len(owners) > 1:
        result.append("IP address %s is used by multiple owners: %s" %
621
                      (ip, utils.CommaJoin(owners)))
622

Iustin Pop's avatar
Iustin Pop committed
623 624
    return result

625 626 627 628 629 630 631 632 633 634 635 636 637
  @locking.ssynchronized(_config_lock, shared=1)
  def VerifyConfig(self):
    """Verify function.

    This is just a wrapper over L{_UnlockedVerifyConfig}.

    @rtype: list
    @return: a list of error messages; a non-empty list signifies
        configuration errors

    """
    return self._UnlockedVerifyConfig()

638
  def _UnlockedSetDiskID(self, disk, node_name):
Iustin Pop's avatar
Iustin Pop committed
639 640 641 642 643 644 645 646
    """Convert the unique ID to the ID needed on the target nodes.

    This is used only for drbd, which needs ip/port configuration.

    The routine descends down and updates its children also, because
    this helps when the only the top device is passed to the remote
    node.

647 648
    This function is for internal use, when the config lock is already held.

Iustin Pop's avatar
Iustin Pop committed
649 650 651
    """
    if disk.children:
      for child in disk.children:
652
        self._UnlockedSetDiskID(child, node_name)
Iustin Pop's avatar
Iustin Pop committed
653 654 655

    if disk.logical_id is None and disk.physical_id is not None:
      return
656
    if disk.dev_type == constants.LD_DRBD8:
657
      pnode, snode, port, pminor, sminor, secret = disk.logical_id
Iustin Pop's avatar
Iustin Pop committed
658
      if node_name not in (pnode, snode):
659 660
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
                                        node_name)
661 662
      pnode_info = self._UnlockedGetNodeInfo(pnode)
      snode_info = self._UnlockedGetNodeInfo(snode)
Iustin Pop's avatar
Iustin Pop committed
663 664 665
      if pnode_info is None or snode_info is None:
        raise errors.ConfigurationError("Can't find primary or secondary node"
                                        " for %s" % str(disk))
666 667
      p_data = (pnode_info.secondary_ip, port)
      s_data = (snode_info.secondary_ip, port)
Iustin Pop's avatar
Iustin Pop committed
668
      if pnode == node_name:
669
        disk.physical_id = p_data + s_data + (pminor, secret)
Iustin Pop's avatar
Iustin Pop committed
670
      else: # it must be secondary, we tested above
671
        disk.physical_id = s_data + p_data + (sminor, secret)
Iustin Pop's avatar
Iustin Pop committed
672 673 674 675
    else:
      disk.physical_id = disk.logical_id
    return

676 677 678 679 680 681 682 683 684 685 686 687 688 689
  @locking.ssynchronized(_config_lock)
  def SetDiskID(self, disk, node_name):
    """Convert the unique ID to the ID needed on the target nodes.

    This is used only for drbd, which needs ip/port configuration.

    The routine descends down and updates its children also, because
    this helps when the only the top device is passed to the remote
    node.

    """
    return self._UnlockedSetDiskID(disk, node_name)

  @locking.ssynchronized(_config_lock)
690 691 692 693
  def AddTcpUdpPort(self, port):
    """Adds a new port to the available port pool.

    """
694
    if not isinstance(port, int):
695
      raise errors.ProgrammerError("Invalid type passed for port")
696

697
    self._config_data.cluster.tcpudp_port_pool.add(port)
698 699
    self._WriteConfig()

700
  @locking.ssynchronized(_config_lock, shared=1)
701
  def GetPortList(self):
702 703 704
    """Returns a copy of the current port list.

    """
705
    return self._config_data.cluster.tcpudp_port_pool.copy()
706

707
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
708 709 710
  def AllocatePort(self):
    """Allocate a port.

711 712 713
    The port will be taken from the available port pool or from the
    default port range (and in this case we increase
    highest_used_port).
Iustin Pop's avatar
Iustin Pop committed
714 715

    """
716
    # If there are TCP/IP ports configured, we use them first.
717 718
    if self._config_data.cluster.tcpudp_port_pool:
      port = self._config_data.cluster.tcpudp_port_pool.pop()
719 720 721
    else:
      port = self._config_data.cluster.highest_used_port + 1
      if port >= constants.LAST_DRBD_PORT:
722 723 724
        raise errors.ConfigurationError("The highest used port is greater"
                                        " than %s. Aborting." %
                                        constants.LAST_DRBD_PORT)
725
      self._config_data.cluster.highest_used_port = port
Iustin Pop's avatar
Iustin Pop committed
726 727 728 729

    self._WriteConfig()
    return port

730
  def _UnlockedComputeDRBDMap(self):
731 732
    """Compute the used DRBD minor/nodes.

733
    @rtype: (dict, list)
Iustin Pop's avatar
Iustin Pop committed
734 735
    @return: dictionary of node_name: dict of minor: instance_name;
        the returned dict will have all the nodes in it (even if with
736 737 738
        an empty list), and a list of duplicates; if the duplicates
        list is not empty, the configuration is corrupted and its caller
        should raise an exception
739 740 741

    """
    def _AppendUsedPorts(instance_name, disk, used):
742
      duplicates = []
743
      if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
Michael Hanselmann's avatar
Michael Hanselmann committed
744 745
        node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
        for node, port in ((node_a, minor_a), (node_b, minor_b)):
746 747
          assert node in used, ("Node '%s' of instance '%s' not found"
                                " in node list" % (node, instance_name))
748
          if port in used[node]:
749 750 751
            duplicates.append((node, port, instance_name, used[node][port]))
          else:
            used[node][port] = instance_name
752 753
      if disk.children:
        for child in disk.children:
754 755
          duplicates.extend(_AppendUsedPorts(instance_name, child, used))
      return duplicates
756

757
    duplicates = []
758
    my_dict = dict((node, {}) for node in self._config_data.nodes)
759 760 761
    for instance in self._config_data.instances.itervalues():
      for disk in instance.disks:
        duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
762
    for (node, minor), instance in self._temporary_drbds.iteritems():
763
      if minor in my_dict[node] and my_dict[node][minor] != instance:
764 765 766 767
        duplicates.append((node, minor, instance, my_dict[node][minor]))
      else:
        my_dict[node][minor] = instance
    return my_dict, duplicates
768

769 770 771 772 773 774 775 776 777 778 779
  @locking.ssynchronized(_config_lock)
  def ComputeDRBDMap(self):
    """Compute the used DRBD minor/nodes.

    This is just a wrapper over L{_UnlockedComputeDRBDMap}.

    @return: dictionary of node_name: dict of minor: instance_name;
        the returned dict will have all the nodes in it (even if with
        an empty list).

    """
780 781 782 783 784
    d_map, duplicates = self._UnlockedComputeDRBDMap()
    if duplicates:
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
                                      str(duplicates))
    return d_map
785

786 787 788 789 790 791 792 793 794
  @locking.ssynchronized(_config_lock)
  def AllocateDRBDMinor(self, nodes, instance):
    """Allocate a drbd minor.

    The free minor will be automatically computed from the existing
    devices. A node can be given multiple times in order to allocate
    multiple minors. The result is the list of minors, in the same
    order as the passed nodes.

795 796 797
    @type instance: string
    @param instance: the instance for which we allocate minors

798
    """
799
    assert isinstance(instance, basestring), \
800
           "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
801

802 803 804 805
    d_map, duplicates = self._UnlockedComputeDRBDMap()
    if duplicates:
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
                                      str(duplicates))
806 807 808 809 810 811 812
    result = []
    for nname in nodes:
      ndata = d_map[nname]
      if not ndata:
        # no minors used, we can start at 0
        result.append(0)
        ndata[0] = instance
813
        self._temporary_drbds[(nname, 0)] = instance
814 815 816 817 818 819 820 821 822 823
        continue
      keys = ndata.keys()
      keys.sort()
      ffree = utils.FirstFree(keys)
      if ffree is None:
        # return the next minor
        # TODO: implement high-limit check
        minor = keys[-1] + 1
      else:
        minor = ffree
824 825 826 827 828
      # double-check minor against current instances
      assert minor not in d_map[nname], \
             ("Attempt to reuse allocated DRBD minor %d on node %s,"
              " already allocated to instance %s" %
              (minor, nname, d_map[nname][minor]))
829
      ndata[minor] = instance
830 831 832 833 834 835 836 837
      # double-check minor against reservation
      r_key = (nname, minor)
      assert r_key not in self._temporary_drbds, \
             ("Attempt to reuse reserved DRBD minor %d on node %s,"
              " reserved for instance %s" %
              (minor, nname, self._temporary_drbds[r_key]))
      self._temporary_drbds[r_key] = instance
      result.append(minor)
838 839 840 841
    logging.debug("Request to allocate drbd minors, input: %s, returning %s",
                  nodes, result)
    return result

842
  def _UnlockedReleaseDRBDMinors(self, instance):
843 844 845 846 847 848 849
    """Release temporary drbd minors allocated for a given instance.

    @type instance: string
    @param instance: the instance for which temporary minors should be
                     released

    """
850 851
    assert isinstance(instance, basestring), \
           "Invalid argument passed to ReleaseDRBDMinors"
852 853 854 855
    for key, name in self._temporary_drbds.items():
      if name == instance:
        del self._temporary_drbds[key]

856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
  @locking.ssynchronized(_config_lock)
  def ReleaseDRBDMinors(self, instance):
    """Release temporary drbd minors allocated for a given instance.

    This should be called on the error paths, on the success paths
    it's automatically called by the ConfigWriter add and update
    functions.

    This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.

    @type instance: string
    @param instance: the instance for which temporary minors should be
                     released

    """
    self._UnlockedReleaseDRBDMinors(instance)

873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
  @locking.ssynchronized(_config_lock, shared=1)
  def GetConfigVersion(self):
    """Get the configuration version.

    @return: Config version

    """
    return self._config_data.version

  @locking.ssynchronized(_config_lock, shared=1)
  def GetClusterName(self):
    """Get cluster name.

    @return: Cluster name

    """
    return self._config_data.cluster.cluster_name

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNode(self):
    """Get the hostname of the master node for this cluster.

    @return: Master hostname

    """
    return self._config_data.cluster.master_node

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterIP(self):
    """Get the IP of the master node for this cluster.

    @return: Master IP

    """
    return self._config_data.cluster.master_ip

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNetdev(self):
    """Get the master network device for this cluster.

    """
    return self._config_data.cluster.master_netdev

916 917 918 919 920 921 922
  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNetmask(self):
    """Get the netmask of the master node for this cluster.

    """
    return self._config_data.cluster.master_netmask

923 924 925 926 927 928 929
  @locking.ssynchronized(_config_lock, shared=1)
  def GetUseExternalMipScript(self):
    """Get flag representing whether to use the external master IP setup script.

    """
    return self._config_data.cluster.use_external_mip_script

930 931 932 933 934 935 936
  @locking.ssynchronized(_config_lock, shared=1)
  def GetFileStorageDir(self):
    """Get the file storage dir for this cluster.

    """
    return self._config_data.cluster.file_storage_dir

937 938 939 940 941 942 943
  @locking.ssynchronized(_config_lock, shared=1)
  def GetSharedFileStorageDir(self):
    """Get the shared file storage dir for this cluster.

    """
    return self._config_data.cluster.shared_file_storage_dir

944 945 946 947 948
  @locking.ssynchronized(_config_lock, shared=1)
  def GetHypervisorType(self):
    """Get the hypervisor type for this cluster.

    """
949
    return self._config_data.cluster.enabled_hypervisors[0]
950

951
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
952 953 954
  def GetHostKey(self):
    """Return the rsa hostkey from the config.

Iustin Pop's avatar
Iustin Pop committed
955 956
    @rtype: string
    @return: the rsa hostkey
Iustin Pop's avatar
Iustin Pop committed
957 958 959 960

    """
    return self._config_data.cluster.rsahostkeypub

961 962 963 964 965 966 967
  @locking.ssynchronized(_config_lock, shared=1)
  def GetDefaultIAllocator(self):
    """Get the default instance allocator for this cluster.

    """
    return self._config_data.cluster.default_iallocator

968 969 970 971 972 973 974 975 976
  @locking.ssynchronized(_config_lock, shared=1)
  def GetPrimaryIPFamily(self):
    """Get cluster primary ip family.

    @return: primary ip family

    """
    return self._config_data.cluster.primary_ip_family

977 978 979 980
  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNetworkParameters(self):
    """Get network parameters of the master node.

981 982
    @rtype: L{object.MasterNetworkParameters}
    @return: network parameters of the master node
983 984 985

    """
    cluster = self._config_data.cluster
986 987 988 989 990
    result = objects.MasterNetworkParameters(name=cluster.master_node,
      ip=cluster.master_ip,
      netmask=cluster.master_netmask,
      netdev=cluster.master_netdev,
      ip_family=cluster.primary_ip_family)
991

992 993
    return result

994 995 996 997
  @locking.ssynchronized(_config_lock)
  def AddNodeGroup(self, group, ec_id, check_uuid=True):
    """Add a node group to the configuration.

998 999 1000
    This method calls group.UpgradeConfig() to fill any missing attributes
    according to their default values.

1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
    @type group: L{objects.NodeGroup}
    @param group: the NodeGroup object to add
    @type ec_id: string
    @param ec_id: unique id for the job to use when creating a missing UUID
    @type check_uuid: bool
    @param check_uuid: add an UUID to the group if it doesn't have one or, if
                       it does, ensure that it does not exist in the
                       configuration already

    """
    self._UnlockedAddNodeGroup(group, ec_id, check_uuid)
    self._WriteConfig()

  def _UnlockedAddNodeGroup(self, group, ec_id, check_uuid):
    """Add a node group to the configuration.

    """
    logging.info("Adding node group %s to configuration", group.name)

    # Some code might need to add a node group with a pre-populated UUID
    # generated with ConfigWriter.GenerateUniqueID(). We allow them to bypass
    # the "does this UUID" exist already check.
    if check_uuid:
      self._EnsureUUID(group, ec_id)

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
    try:
      existing_uuid = self._UnlockedLookupNodeGroup(group.name)
    except errors.OpPrereqError:
      pass
    else:
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
                                 " node group (UUID: %s)" %
                                 (group.name, existing_uuid),
                                 errors.ECODE_EXISTS)

1036 1037
    group.serial_no = 1
    group.ctime = group.mtime = time.time()
1038
    group.UpgradeConfig()
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055

    self._config_data.nodegroups[group.uuid] = group
    self._config_data.cluster.serial_no += 1

  @locking.ssynchronized(_config_lock)
  def RemoveNodeGroup(self, group_uuid):
    """Remove a node group from the configuration.

    @type group_uuid: string
    @param group_uuid: the UUID of the node group to remove

    """
    logging.info("Removing node group %s from configuration", group_uuid)

    if group_uuid not in self._config_data.nodegroups:
      raise errors.ConfigurationError("Unknown node group '%s'" % group_uuid)

Stephen Shirley's avatar