config.py 46.7 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
#
Iustin Pop's avatar
Iustin Pop committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#

# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Configuration management for Ganeti

24
This module provides the interface to the Ganeti cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
25

26 27
The configuration data is stored on every node but is updated on the master
only. After each update, the master distributes the data to the other nodes.
Iustin Pop's avatar
Iustin Pop committed
28

29 30
Currently, the data storage format is JSON. YAML was slow and consuming too
much memory.
Iustin Pop's avatar
Iustin Pop committed
31 32 33 34 35

"""

import os
import random
36
import logging
37
import time
Iustin Pop's avatar
Iustin Pop committed
38 39

from ganeti import errors
40
from ganeti import locking
Iustin Pop's avatar
Iustin Pop committed
41 42 43 44
from ganeti import utils
from ganeti import constants
from ganeti import rpc
from ganeti import objects
45
from ganeti import serializer
46 47


48 49
_config_lock = locking.SharedLock()

50
# job id used for resource management at config upgrade time
Michael Hanselmann's avatar
Michael Hanselmann committed
51
_UPGRADE_CONFIG_JID = "jid-cfg-upgrade"
52

53

Michael Hanselmann's avatar
Michael Hanselmann committed
54
def _ValidateConfig(data):
Iustin Pop's avatar
Iustin Pop committed
55 56 57 58 59 60 61 62
  """Verifies that a configuration objects looks valid.

  This only verifies the version of the configuration.

  @raise errors.ConfigurationError: if the version differs from what
      we expect

  """
Michael Hanselmann's avatar
Michael Hanselmann committed
63
  if data.version != constants.CONFIG_VERSION:
64 65
    raise errors.ConfigurationError("Cluster configuration version"
                                    " mismatch, got %s instead of %s" %
Michael Hanselmann's avatar
Michael Hanselmann committed
66
                                    (data.version,
67
                                     constants.CONFIG_VERSION))
Iustin Pop's avatar
Iustin Pop committed
68

69

Guido Trotter's avatar
Guido Trotter committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
class TemporaryReservationManager:
  """A temporary resource reservation manager.

  This is used to reserve resources in a job, before using them, making sure
  other jobs cannot get them in the meantime.

  """
  def __init__(self):
    self._ec_reserved = {}

  def Reserved(self, resource):
    for holder_reserved in self._ec_reserved.items():
      if resource in holder_reserved:
        return True
    return False

  def Reserve(self, ec_id, resource):
    if self.Reserved(resource):
      raise errors.ReservationError("Duplicate reservation for resource: %s." %
                                    (resource))
    if ec_id not in self._ec_reserved:
      self._ec_reserved[ec_id] = set([resource])
    else:
      self._ec_reserved[ec_id].add(resource)

  def DropECReservations(self, ec_id):
    if ec_id in self._ec_reserved:
      del self._ec_reserved[ec_id]

  def GetReserved(self):
    all_reserved = set()
    for holder_reserved in self._ec_reserved.values():
      all_reserved.update(holder_reserved)
    return all_reserved

  def Generate(self, existing, generate_one_fn, ec_id):
    """Generate a new resource of this type

    """
    assert callable(generate_one_fn)

    all_elems = self.GetReserved()
    all_elems.update(existing)
    retries = 64
    while retries > 0:
      new_resource = generate_one_fn()
      if new_resource is not None and new_resource not in all_elems:
        break
    else:
      raise errors.ConfigurationError("Not able generate new resource"
                                      " (last tried: %s)" % new_resource)
    self.Reserve(ec_id, new_resource)
    return new_resource


Iustin Pop's avatar
Iustin Pop committed
125
class ConfigWriter:
126
  """The interface to the cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
127

128
  """
Iustin Pop's avatar
Iustin Pop committed
129
  def __init__(self, cfg_file=None, offline=False):
130
    self.write_count = 0
131
    self._lock = _config_lock
Iustin Pop's avatar
Iustin Pop committed
132 133 134 135 136 137
    self._config_data = None
    self._offline = offline
    if cfg_file is None:
      self._cfg_file = constants.CLUSTER_CONF_FILE
    else:
      self._cfg_file = cfg_file
138
    self._temporary_ids = TemporaryReservationManager()
139
    self._temporary_drbds = {}
140
    self._temporary_macs = TemporaryReservationManager()
141
    self._temporary_secrets = TemporaryReservationManager()
142 143 144 145 146
    # Note: in order to prevent errors when resolving our name in
    # _DistributeConfig, we compute it here once and reuse it; it's
    # better to raise an error before starting to modify the config
    # file than after it was modified
    self._my_hostname = utils.HostInfo().name
147
    self._last_cluster_serial = -1
Iustin Pop's avatar
Iustin Pop committed
148
    self._OpenConfig()
Iustin Pop's avatar
Iustin Pop committed
149 150 151 152 153 154 155 156 157

  # this method needs to be static, so that we can call it on the class
  @staticmethod
  def IsCluster():
    """Check if the cluster is configured.

    """
    return os.path.exists(constants.CLUSTER_CONF_FILE)

158 159 160 161 162 163 164 165 166 167 168
  def _GenerateOneMAC(self):
    """Generate one mac address

    """
    prefix = self._config_data.cluster.mac_prefix
    byte1 = random.randrange(0, 256)
    byte2 = random.randrange(0, 256)
    byte3 = random.randrange(0, 256)
    mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
    return mac

169
  @locking.ssynchronized(_config_lock, shared=1)
170
  def GenerateMAC(self, ec_id):
Iustin Pop's avatar
Iustin Pop committed
171 172 173 174 175
    """Generate a MAC for an instance.

    This should check the current instances for duplicates.

    """
176 177
    existing = self._AllMACs()
    return self._temporary_ids.Generate(existing, self._GenerateOneMAC, ec_id)
Iustin Pop's avatar
Iustin Pop committed
178

179
  @locking.ssynchronized(_config_lock, shared=1)
180 181
  def ReserveMAC(self, mac, ec_id):
    """Reserve a MAC for an instance.
182 183 184 185 186 187

    This only checks instances managed by this cluster, it does not
    check for potential collisions elsewhere.

    """
    all_macs = self._AllMACs()
188 189 190 191
    if mac in all_macs:
      raise errors.ReservationError("mac already in use")
    else:
      self._temporary_macs.Reserve(mac, ec_id)
192

193
  @locking.ssynchronized(_config_lock, shared=1)
194
  def GenerateDRBDSecret(self, ec_id):
195 196 197 198 199
    """Generate a DRBD secret.

    This checks the current disks for duplicates.

    """
200 201 202
    return self._temporary_secrets.Generate(self._AllDRBDSecrets(),
                                            utils.GenerateSecret,
                                            ec_id)
Michael Hanselmann's avatar
Michael Hanselmann committed
203

204
  def _AllLVs(self):
205 206 207 208 209 210 211 212 213 214
    """Compute the list of all LVs.

    """
    lvnames = set()
    for instance in self._config_data.instances.values():
      node_data = instance.MapLVsByNode()
      for lv_list in node_data.values():
        lvnames.update(lv_list)
    return lvnames

215 216 217 218 219 220 221 222 223 224 225
  def _AllIDs(self, include_temporary):
    """Compute the list of all UUIDs and names we have.

    @type include_temporary: boolean
    @param include_temporary: whether to include the _temporary_ids set
    @rtype: set
    @return: a set of IDs

    """
    existing = set()
    if include_temporary:
226
      existing.update(self._temporary_ids.GetReserved())
227 228 229
    existing.update(self._AllLVs())
    existing.update(self._config_data.instances.keys())
    existing.update(self._config_data.nodes.keys())
230
    existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
231 232
    return existing

233
  def _GenerateUniqueID(self, ec_id):
234
    """Generate an unique UUID.
235 236 237 238

    This checks the current node, instances and disk names for
    duplicates.

Iustin Pop's avatar
Iustin Pop committed
239 240
    @rtype: string
    @return: the unique id
241 242

    """
243 244
    existing = self._AllIDs(include_temporary=False)
    return self._temporary_ids.Generate(existing, utils.NewUUID, ec_id)
245

246
  @locking.ssynchronized(_config_lock, shared=1)
247
  def GenerateUniqueID(self, ec_id):
248 249 250 251
    """Generate an unique ID.

    This is just a wrapper over the unlocked version.

252 253
    @type ec_id: string
    @param ec_id: unique id for the job to reserve the id to
254 255

    """
256
    return self._GenerateUniqueID(ec_id)
257

Iustin Pop's avatar
Iustin Pop committed
258 259 260
  def _AllMACs(self):
    """Return all MACs present in the config.

Iustin Pop's avatar
Iustin Pop committed
261 262 263
    @rtype: list
    @return: the list of all MACs

Iustin Pop's avatar
Iustin Pop committed
264 265 266 267 268 269 270 271
    """
    result = []
    for instance in self._config_data.instances.values():
      for nic in instance.nics:
        result.append(nic.mac)

    return result

272 273 274
  def _AllDRBDSecrets(self):
    """Return all DRBD secrets present in the config.

Iustin Pop's avatar
Iustin Pop committed
275 276 277
    @rtype: list
    @return: the list of all DRBD secrets

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
    """
    def helper(disk, result):
      """Recursively gather secrets from this disk."""
      if disk.dev_type == constants.DT_DRBD8:
        result.append(disk.logical_id[5])
      if disk.children:
        for child in disk.children:
          helper(child, result)

    result = []
    for instance in self._config_data.instances.values():
      for disk in instance.disks:
        helper(disk, result)

    return result

294 295 296 297 298 299 300 301 302 303 304 305 306 307
  def _CheckDiskIDs(self, disk, l_ids, p_ids):
    """Compute duplicate disk IDs

    @type disk: L{objects.Disk}
    @param disk: the disk at which to start searching
    @type l_ids: list
    @param l_ids: list of current logical ids
    @type p_ids: list
    @param p_ids: list of current physical ids
    @rtype: list
    @return: a list of error messages

    """
    result = []
308 309 310 311 312 313 314 315 316 317
    if disk.logical_id is not None:
      if disk.logical_id in l_ids:
        result.append("duplicate logical id %s" % str(disk.logical_id))
      else:
        l_ids.append(disk.logical_id)
    if disk.physical_id is not None:
      if disk.physical_id in p_ids:
        result.append("duplicate physical id %s" % str(disk.physical_id))
      else:
        p_ids.append(disk.physical_id)
318 319 320 321 322 323

    if disk.children:
      for child in disk.children:
        result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
    return result

324
  def _UnlockedVerifyConfig(self):
325 326
    """Verify function.

327 328 329 330
    @rtype: list
    @return: a list of error messages; a non-empty list signifies
        configuration errors

Iustin Pop's avatar
Iustin Pop committed
331 332 333
    """
    result = []
    seen_macs = []
334
    ports = {}
Iustin Pop's avatar
Iustin Pop committed
335
    data = self._config_data
336 337
    seen_lids = []
    seen_pids = []
338 339 340 341 342 343 344 345 346 347 348 349 350 351

    # global cluster checks
    if not data.cluster.enabled_hypervisors:
      result.append("enabled hypervisors list doesn't have any entries")
    invalid_hvs = set(data.cluster.enabled_hypervisors) - constants.HYPER_TYPES
    if invalid_hvs:
      result.append("enabled hypervisors contains invalid entries: %s" %
                    invalid_hvs)

    if data.cluster.master_node not in data.nodes:
      result.append("cluster has invalid primary node '%s'" %
                    data.cluster.master_node)

    # per-instance checks
Iustin Pop's avatar
Iustin Pop committed
352 353 354
    for instance_name in data.instances:
      instance = data.instances[instance_name]
      if instance.primary_node not in data.nodes:
355
        result.append("instance '%s' has invalid primary node '%s'" %
Iustin Pop's avatar
Iustin Pop committed
356 357 358
                      (instance_name, instance.primary_node))
      for snode in instance.secondary_nodes:
        if snode not in data.nodes:
359
          result.append("instance '%s' has invalid secondary node '%s'" %
Iustin Pop's avatar
Iustin Pop committed
360 361 362
                        (instance_name, snode))
      for idx, nic in enumerate(instance.nics):
        if nic.mac in seen_macs:
363
          result.append("instance '%s' has NIC %d mac %s duplicate" %
Iustin Pop's avatar
Iustin Pop committed
364 365 366
                        (instance_name, idx, nic.mac))
        else:
          seen_macs.append(nic.mac)
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381

      # gather the drbd ports for duplicate checks
      for dsk in instance.disks:
        if dsk.dev_type in constants.LDS_DRBD:
          tcp_port = dsk.logical_id[2]
          if tcp_port not in ports:
            ports[tcp_port] = []
          ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
      # gather network port reservation
      net_port = getattr(instance, "network_port", None)
      if net_port is not None:
        if net_port not in ports:
          ports[net_port] = []
        ports[net_port].append((instance.name, "network port"))

382 383 384 385
      # instance disk verify
      for idx, disk in enumerate(instance.disks):
        result.extend(["instance '%s' disk %d error: %s" %
                       (instance.name, idx, msg) for msg in disk.Verify()])
386
        result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
387

388
    # cluster-wide pool of free ports
389
    for free_port in data.cluster.tcpudp_port_pool:
390 391 392 393 394 395 396 397 398 399
      if free_port not in ports:
        ports[free_port] = []
      ports[free_port].append(("cluster", "port marked as free"))

    # compute tcp/udp duplicate ports
    keys = ports.keys()
    keys.sort()
    for pnum in keys:
      pdata = ports[pnum]
      if len(pdata) > 1:
400
        txt = utils.CommaJoin(["%s/%s" % val for val in pdata])
401 402 403 404
        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))

    # highest used tcp port check
    if keys:
405
      if keys[-1] > data.cluster.highest_used_port:
406
        result.append("Highest used port mismatch, saved %s, computed %s" %
407 408
                      (data.cluster.highest_used_port, keys[-1]))

409 410 411
    if not data.nodes[data.cluster.master_node].master_candidate:
      result.append("Master node is not a master candidate")

412
    # master candidate checks
413
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
414 415 416
    if mc_now < mc_max:
      result.append("Not enough master candidates: actual %d, target %d" %
                    (mc_now, mc_max))
417

418 419 420 421 422 423 424 425
    # node checks
    for node in data.nodes.values():
      if [node.master_candidate, node.drained, node.offline].count(True) > 1:
        result.append("Node %s state is invalid: master_candidate=%s,"
                      " drain=%s, offline=%s" %
                      (node.name, node.master_candidate, node.drain,
                       node.offline))

426
    # drbd minors check
427
    _, duplicates = self._UnlockedComputeDRBDMap()
428 429 430 431
    for node, minor, instance_a, instance_b in duplicates:
      result.append("DRBD minor %d on node %s is assigned twice to instances"
                    " %s and %s" % (minor, node, instance_a, instance_b))

432
    # IP checks
433 434 435 436 437 438 439
    default_nicparams = data.cluster.nicparams[constants.PP_DEFAULT]
    ips = {}

    def _AddIpAddress(ip, name):
      ips.setdefault(ip, []).append(name)

    _AddIpAddress(data.cluster.master_ip, "cluster_ip")
440 441

    for node in data.nodes.values():
442
      _AddIpAddress(node.primary_ip, "node:%s/primary" % node.name)
443
      if node.secondary_ip != node.primary_ip:
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
        _AddIpAddress(node.secondary_ip, "node:%s/secondary" % node.name)

    for instance in data.instances.values():
      for idx, nic in enumerate(instance.nics):
        if nic.ip is None:
          continue

        nicparams = objects.FillDict(default_nicparams, nic.nicparams)
        nic_mode = nicparams[constants.NIC_MODE]
        nic_link = nicparams[constants.NIC_LINK]

        if nic_mode == constants.NIC_MODE_BRIDGED:
          link = "bridge:%s" % nic_link
        elif nic_mode == constants.NIC_MODE_ROUTED:
          link = "route:%s" % nic_link
        else:
          raise errors.ProgrammerError("NIC mode '%s' not handled" % nic_mode)

        _AddIpAddress("%s/%s" % (link, nic.ip),
                      "instance:%s/nic:%d" % (instance.name, idx))
464 465 466 467

    for ip, owners in ips.items():
      if len(owners) > 1:
        result.append("IP address %s is used by multiple owners: %s" %
468
                      (ip, utils.CommaJoin(owners)))
469

Iustin Pop's avatar
Iustin Pop committed
470 471
    return result

472 473 474 475 476 477 478 479 480 481 482 483 484
  @locking.ssynchronized(_config_lock, shared=1)
  def VerifyConfig(self):
    """Verify function.

    This is just a wrapper over L{_UnlockedVerifyConfig}.

    @rtype: list
    @return: a list of error messages; a non-empty list signifies
        configuration errors

    """
    return self._UnlockedVerifyConfig()

485
  def _UnlockedSetDiskID(self, disk, node_name):
Iustin Pop's avatar
Iustin Pop committed
486 487 488 489 490 491 492 493
    """Convert the unique ID to the ID needed on the target nodes.

    This is used only for drbd, which needs ip/port configuration.

    The routine descends down and updates its children also, because
    this helps when the only the top device is passed to the remote
    node.

494 495
    This function is for internal use, when the config lock is already held.

Iustin Pop's avatar
Iustin Pop committed
496 497 498
    """
    if disk.children:
      for child in disk.children:
499
        self._UnlockedSetDiskID(child, node_name)
Iustin Pop's avatar
Iustin Pop committed
500 501 502

    if disk.logical_id is None and disk.physical_id is not None:
      return
503
    if disk.dev_type == constants.LD_DRBD8:
504
      pnode, snode, port, pminor, sminor, secret = disk.logical_id
Iustin Pop's avatar
Iustin Pop committed
505
      if node_name not in (pnode, snode):
506 507
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
                                        node_name)
508 509
      pnode_info = self._UnlockedGetNodeInfo(pnode)
      snode_info = self._UnlockedGetNodeInfo(snode)
Iustin Pop's avatar
Iustin Pop committed
510 511 512
      if pnode_info is None or snode_info is None:
        raise errors.ConfigurationError("Can't find primary or secondary node"
                                        " for %s" % str(disk))
513 514
      p_data = (pnode_info.secondary_ip, port)
      s_data = (snode_info.secondary_ip, port)
Iustin Pop's avatar
Iustin Pop committed
515
      if pnode == node_name:
516
        disk.physical_id = p_data + s_data + (pminor, secret)
Iustin Pop's avatar
Iustin Pop committed
517
      else: # it must be secondary, we tested above
518
        disk.physical_id = s_data + p_data + (sminor, secret)
Iustin Pop's avatar
Iustin Pop committed
519 520 521 522
    else:
      disk.physical_id = disk.logical_id
    return

523 524 525 526 527 528 529 530 531 532 533 534 535 536
  @locking.ssynchronized(_config_lock)
  def SetDiskID(self, disk, node_name):
    """Convert the unique ID to the ID needed on the target nodes.

    This is used only for drbd, which needs ip/port configuration.

    The routine descends down and updates its children also, because
    this helps when the only the top device is passed to the remote
    node.

    """
    return self._UnlockedSetDiskID(disk, node_name)

  @locking.ssynchronized(_config_lock)
537 538 539 540
  def AddTcpUdpPort(self, port):
    """Adds a new port to the available port pool.

    """
541
    if not isinstance(port, int):
542
      raise errors.ProgrammerError("Invalid type passed for port")
543

544
    self._config_data.cluster.tcpudp_port_pool.add(port)
545 546
    self._WriteConfig()

547
  @locking.ssynchronized(_config_lock, shared=1)
548
  def GetPortList(self):
549 550 551
    """Returns a copy of the current port list.

    """
552
    return self._config_data.cluster.tcpudp_port_pool.copy()
553

554
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
555 556 557
  def AllocatePort(self):
    """Allocate a port.

558 559 560
    The port will be taken from the available port pool or from the
    default port range (and in this case we increase
    highest_used_port).
Iustin Pop's avatar
Iustin Pop committed
561 562

    """
563
    # If there are TCP/IP ports configured, we use them first.
564 565
    if self._config_data.cluster.tcpudp_port_pool:
      port = self._config_data.cluster.tcpudp_port_pool.pop()
566 567 568
    else:
      port = self._config_data.cluster.highest_used_port + 1
      if port >= constants.LAST_DRBD_PORT:
569 570 571
        raise errors.ConfigurationError("The highest used port is greater"
                                        " than %s. Aborting." %
                                        constants.LAST_DRBD_PORT)
572
      self._config_data.cluster.highest_used_port = port
Iustin Pop's avatar
Iustin Pop committed
573 574 575 576

    self._WriteConfig()
    return port

577
  def _UnlockedComputeDRBDMap(self):
578 579
    """Compute the used DRBD minor/nodes.

580
    @rtype: (dict, list)
Iustin Pop's avatar
Iustin Pop committed
581 582
    @return: dictionary of node_name: dict of minor: instance_name;
        the returned dict will have all the nodes in it (even if with
583 584 585
        an empty list), and a list of duplicates; if the duplicates
        list is not empty, the configuration is corrupted and its caller
        should raise an exception
586 587 588

    """
    def _AppendUsedPorts(instance_name, disk, used):
589
      duplicates = []
590
      if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
Michael Hanselmann's avatar
Michael Hanselmann committed
591 592
        node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
        for node, port in ((node_a, minor_a), (node_b, minor_b)):
593 594
          assert node in used, ("Node '%s' of instance '%s' not found"
                                " in node list" % (node, instance_name))
595
          if port in used[node]:
596 597 598
            duplicates.append((node, port, instance_name, used[node][port]))
          else:
            used[node][port] = instance_name
599 600
      if disk.children:
        for child in disk.children:
601 602
          duplicates.extend(_AppendUsedPorts(instance_name, child, used))
      return duplicates
603

604
    duplicates = []
605
    my_dict = dict((node, {}) for node in self._config_data.nodes)
606 607 608
    for instance in self._config_data.instances.itervalues():
      for disk in instance.disks:
        duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
609
    for (node, minor), instance in self._temporary_drbds.iteritems():
610
      if minor in my_dict[node] and my_dict[node][minor] != instance:
611 612 613 614
        duplicates.append((node, minor, instance, my_dict[node][minor]))
      else:
        my_dict[node][minor] = instance
    return my_dict, duplicates
615

616 617 618 619 620 621 622 623 624 625 626
  @locking.ssynchronized(_config_lock)
  def ComputeDRBDMap(self):
    """Compute the used DRBD minor/nodes.

    This is just a wrapper over L{_UnlockedComputeDRBDMap}.

    @return: dictionary of node_name: dict of minor: instance_name;
        the returned dict will have all the nodes in it (even if with
        an empty list).

    """
627 628 629 630 631
    d_map, duplicates = self._UnlockedComputeDRBDMap()
    if duplicates:
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
                                      str(duplicates))
    return d_map
632

633 634 635 636 637 638 639 640 641
  @locking.ssynchronized(_config_lock)
  def AllocateDRBDMinor(self, nodes, instance):
    """Allocate a drbd minor.

    The free minor will be automatically computed from the existing
    devices. A node can be given multiple times in order to allocate
    multiple minors. The result is the list of minors, in the same
    order as the passed nodes.

642 643 644
    @type instance: string
    @param instance: the instance for which we allocate minors

645
    """
646
    assert isinstance(instance, basestring), \
647
           "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
648

649 650 651 652
    d_map, duplicates = self._UnlockedComputeDRBDMap()
    if duplicates:
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
                                      str(duplicates))
653 654 655 656 657 658 659
    result = []
    for nname in nodes:
      ndata = d_map[nname]
      if not ndata:
        # no minors used, we can start at 0
        result.append(0)
        ndata[0] = instance
660
        self._temporary_drbds[(nname, 0)] = instance
661 662 663 664 665 666 667 668 669 670
        continue
      keys = ndata.keys()
      keys.sort()
      ffree = utils.FirstFree(keys)
      if ffree is None:
        # return the next minor
        # TODO: implement high-limit check
        minor = keys[-1] + 1
      else:
        minor = ffree
671 672 673 674 675
      # double-check minor against current instances
      assert minor not in d_map[nname], \
             ("Attempt to reuse allocated DRBD minor %d on node %s,"
              " already allocated to instance %s" %
              (minor, nname, d_map[nname][minor]))
676
      ndata[minor] = instance
677 678 679 680 681 682 683 684
      # double-check minor against reservation
      r_key = (nname, minor)
      assert r_key not in self._temporary_drbds, \
             ("Attempt to reuse reserved DRBD minor %d on node %s,"
              " reserved for instance %s" %
              (minor, nname, self._temporary_drbds[r_key]))
      self._temporary_drbds[r_key] = instance
      result.append(minor)
685 686 687 688
    logging.debug("Request to allocate drbd minors, input: %s, returning %s",
                  nodes, result)
    return result

689
  def _UnlockedReleaseDRBDMinors(self, instance):
690 691 692 693 694 695 696
    """Release temporary drbd minors allocated for a given instance.

    @type instance: string
    @param instance: the instance for which temporary minors should be
                     released

    """
697 698
    assert isinstance(instance, basestring), \
           "Invalid argument passed to ReleaseDRBDMinors"
699 700 701 702
    for key, name in self._temporary_drbds.items():
      if name == instance:
        del self._temporary_drbds[key]

703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
  @locking.ssynchronized(_config_lock)
  def ReleaseDRBDMinors(self, instance):
    """Release temporary drbd minors allocated for a given instance.

    This should be called on the error paths, on the success paths
    it's automatically called by the ConfigWriter add and update
    functions.

    This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.

    @type instance: string
    @param instance: the instance for which temporary minors should be
                     released

    """
    self._UnlockedReleaseDRBDMinors(instance)

720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
  @locking.ssynchronized(_config_lock, shared=1)
  def GetConfigVersion(self):
    """Get the configuration version.

    @return: Config version

    """
    return self._config_data.version

  @locking.ssynchronized(_config_lock, shared=1)
  def GetClusterName(self):
    """Get cluster name.

    @return: Cluster name

    """
    return self._config_data.cluster.cluster_name

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNode(self):
    """Get the hostname of the master node for this cluster.

    @return: Master hostname

    """
    return self._config_data.cluster.master_node

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterIP(self):
    """Get the IP of the master node for this cluster.

    @return: Master IP

    """
    return self._config_data.cluster.master_ip

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNetdev(self):
    """Get the master network device for this cluster.

    """
    return self._config_data.cluster.master_netdev

  @locking.ssynchronized(_config_lock, shared=1)
  def GetFileStorageDir(self):
    """Get the file storage dir for this cluster.

    """
    return self._config_data.cluster.file_storage_dir

  @locking.ssynchronized(_config_lock, shared=1)
  def GetHypervisorType(self):
    """Get the hypervisor type for this cluster.

    """
775
    return self._config_data.cluster.enabled_hypervisors[0]
776

777
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
778 779 780
  def GetHostKey(self):
    """Return the rsa hostkey from the config.

Iustin Pop's avatar
Iustin Pop committed
781 782
    @rtype: string
    @return: the rsa hostkey
Iustin Pop's avatar
Iustin Pop committed
783 784 785 786

    """
    return self._config_data.cluster.rsahostkeypub

787
  @locking.ssynchronized(_config_lock)
788
  def AddInstance(self, instance, ec_id):
Iustin Pop's avatar
Iustin Pop committed
789 790 791 792
    """Add an instance to the config.

    This should be used after creating a new instance.

Iustin Pop's avatar
Iustin Pop committed
793 794 795
    @type instance: L{objects.Instance}
    @param instance: the instance object

Iustin Pop's avatar
Iustin Pop committed
796 797 798 799
    """
    if not isinstance(instance, objects.Instance):
      raise errors.ProgrammerError("Invalid type passed to AddInstance")

800 801
    if instance.disk_template != constants.DT_DISKLESS:
      all_lvs = instance.MapLVsByNode()
Iustin Pop's avatar
Iustin Pop committed
802
      logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
803

804 805 806 807
    all_macs = self._AllMACs()
    for nic in instance.nics:
      if nic.mac in all_macs:
        raise errors.ConfigurationError("Cannot add instance %s:"
808 809 810
                                        " MAC address '%s' already in use." %
                                        (instance.name, nic.mac))

811
    self._EnsureUUID(instance, ec_id)
812

813
    instance.serial_no = 1
814
    instance.ctime = instance.mtime = time.time()
Iustin Pop's avatar
Iustin Pop committed
815
    self._config_data.instances[instance.name] = instance
816
    self._config_data.cluster.serial_no += 1
817
    self._UnlockedReleaseDRBDMinors(instance.name)
Iustin Pop's avatar
Iustin Pop committed
818 819
    self._WriteConfig()

820
  def _EnsureUUID(self, item, ec_id):
821 822 823
    """Ensures a given object has a valid UUID.

    @param item: the instance or node to be checked
824
    @param ec_id: the execution context id for the uuid reservation
825 826 827

    """
    if not item.uuid:
828
      item.uuid = self._GenerateUniqueID(ec_id)
829 830 831
    elif item.uuid in self._AllIDs(include_temporary=True):
      raise errors.ConfigurationError("Cannot add '%s': UUID %s already"
                                      " in use" % (item.name, item.uuid))
832

833 834
  def _SetInstanceStatus(self, instance_name, status):
    """Set the instance's status to a given value.
Iustin Pop's avatar
Iustin Pop committed
835 836

    """
837 838
    assert isinstance(status, bool), \
           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
Iustin Pop's avatar
Iustin Pop committed
839 840

    if instance_name not in self._config_data.instances:
841 842
      raise errors.ConfigurationError("Unknown instance '%s'" %
                                      instance_name)
Iustin Pop's avatar
Iustin Pop committed
843
    instance = self._config_data.instances[instance_name]
844 845
    if instance.admin_up != status:
      instance.admin_up = status
846
      instance.serial_no += 1
847
      instance.mtime = time.time()
848
      self._WriteConfig()
Iustin Pop's avatar
Iustin Pop committed
849

850
  @locking.ssynchronized(_config_lock)
851 852 853 854
  def MarkInstanceUp(self, instance_name):
    """Mark the instance status to up in the config.

    """
855
    self._SetInstanceStatus(instance_name, True)
856

857
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
858 859 860 861 862
  def RemoveInstance(self, instance_name):
    """Remove the instance from the configuration.

    """
    if instance_name not in self._config_data.instances:
863
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
Iustin Pop's avatar
Iustin Pop committed
864
    del self._config_data.instances[instance_name]
865
    self._config_data.cluster.serial_no += 1
Iustin Pop's avatar
Iustin Pop committed
866 867
    self._WriteConfig()

868
  @locking.ssynchronized(_config_lock)
869 870 871 872 873 874 875 876 877 878 879 880 881
  def RenameInstance(self, old_name, new_name):
    """Rename an instance.

    This needs to be done in ConfigWriter and not by RemoveInstance
    combined with AddInstance as only we can guarantee an atomic
    rename.

    """
    if old_name not in self._config_data.instances:
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
    inst = self._config_data.instances[old_name]
    del self._config_data.instances[old_name]
    inst.name = new_name
882 883 884 885 886 887

    for disk in inst.disks:
      if disk.dev_type == constants.LD_FILE:
        # rename the file paths in logical and physical id
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
        disk.physical_id = disk.logical_id = (disk.logical_id[0],
888 889 890
                                              utils.PathJoin(file_storage_dir,
                                                             inst.name,
                                                             disk.iv_name))
891

892 893 894
    self._config_data.instances[inst.name] = inst
    self._WriteConfig()

895
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
896 897 898 899
  def MarkInstanceDown(self, instance_name):
    """Mark the status of an instance to down in the configuration.

    """
900
    self._SetInstanceStatus(instance_name, False)
Iustin Pop's avatar
Iustin Pop committed
901

902 903 904 905 906 907 908 909
  def _UnlockedGetInstanceList(self):
    """Get the list of instances.

    This function is for internal use, when the config lock is already held.

    """
    return self._config_data.instances.keys()

910
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
911 912 913
  def GetInstanceList(self):
    """Get the list of instances.

Iustin Pop's avatar
Iustin Pop committed
914 915
    @return: array of instances, ex. ['instance2.example.com',
        'instance1.example.com']
Iustin Pop's avatar
Iustin Pop committed
916 917

    """
918
    return self._UnlockedGetInstanceList()
Iustin Pop's avatar
Iustin Pop committed
919

920
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
921 922 923 924 925
  def ExpandInstanceName(self, short_name):
    """Attempt to expand an incomplete instance name.

    """
    return utils.MatchNameComponent(short_name,
926 927
                                    self._config_data.instances.keys(),
                                    case_sensitive=False)
Iustin Pop's avatar
Iustin Pop committed
928

929
  def _UnlockedGetInstanceInfo(self, instance_name):
Michael Hanselmann's avatar
Michael Hanselmann committed
930
    """Returns information about an instance.
931 932 933 934 935 936 937 938 939

    This function is for internal use, when the config lock is already held.

    """
    if instance_name not in self._config_data.instances:
      return None

    return self._config_data.instances[instance_name]

940
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
941
  def GetInstanceInfo(self, instance_name):
Michael Hanselmann's avatar
Michael Hanselmann committed
942
    """Returns information about an instance.
Iustin Pop's avatar
Iustin Pop committed
943

Michael Hanselmann's avatar
Michael Hanselmann committed
944
    It takes the information from the configuration file. Other information of
Iustin Pop's avatar
Iustin Pop committed
945 946
    an instance are taken from the live systems.

Iustin Pop's avatar
Iustin Pop committed
947 948
    @param instance_name: name of the instance, e.g.
        I{instance1.example.com}
Iustin Pop's avatar
Iustin Pop committed
949

Iustin Pop's avatar
Iustin Pop committed
950 951
    @rtype: L{objects.Instance}
    @return: the instance object
Iustin Pop's avatar
Iustin Pop committed
952 953

    """
954
    return self._UnlockedGetInstanceInfo(instance_name)
Iustin Pop's avatar
Iustin Pop committed
955

956 957 958 959 960
  @locking.ssynchronized(_config_lock, shared=1)
  def GetAllInstancesInfo(self):
    """Get the configuration of all instances.

    @rtype: dict
Iustin Pop's avatar
Iustin Pop committed
961
    @return: dict of (instance, instance_info), where instance_info is what
962 963 964
              would GetInstanceInfo return for the node

    """
965 966
    my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
                    for instance in self._UnlockedGetInstanceList()])
967 968
    return my_dict

969
  @locking.ssynchronized(_config_lock)
970
  def AddNode(self, node, ec_id):
Iustin Pop's avatar
Iustin Pop committed
971 972
    """Add a node to the configuration.

Iustin Pop's avatar
Iustin Pop committed
973 974
    @type node: L{objects.Node}
    @param node: a Node instance
Iustin Pop's avatar
Iustin Pop committed
975 976

    """
977
    logging.info("Adding node %s to configuration", node.name)
978

979
    self._EnsureUUID(node, ec_id)
980

981
    node.serial_no = 1
982
    node.ctime = node.mtime = time.time()
Iustin Pop's avatar
Iustin Pop committed
983
    self._config_data.nodes[node.name] = node
984
    self._config_data.cluster.serial_no += 1
Iustin Pop's avatar
Iustin Pop committed
985 986
    self._WriteConfig()

987
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
988 989 990 991
  def RemoveNode(self, node_name):
    """Remove a node from the configuration.

    """
992
    logging.info("Removing node %s from configuration", node_name)
993

Iustin Pop's avatar
Iustin Pop committed
994
    if node_name not in self._config_data.nodes:
995
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
Iustin Pop's avatar
Iustin Pop committed
996 997

    del self._config_data.nodes[node_name]
998
    self._config_data.cluster.serial_no += 1
Iustin Pop's avatar
Iustin Pop committed
999 1000
    self._WriteConfig()

1001
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
1002 1003 1004 1005 1006
  def ExpandNodeName(self, short_name):
    """Attempt to expand an incomplete instance name.

    """
    return utils.MatchNameComponent(short_name,
1007 1008
                                    self._config_data.nodes.keys(),
                                    case_sensitive=False)
Iustin Pop's avatar
Iustin Pop committed
1009

1010
  def _UnlockedGetNodeInfo(self, node_name):
Iustin Pop's avatar
Iustin Pop committed
1011 1012
    """Get the configuration of a node, as stored in the config.

Iustin Pop's avatar
Iustin Pop committed
1013 1014
    This function is for internal use, when the config lock is already
    held.
1015

Iustin Pop's avatar
Iustin Pop committed
1016
    @param node_name: the node name, e.g. I{node1.example.com}