config.py 46 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
#
Iustin Pop's avatar
Iustin Pop committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#

# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Configuration management for Ganeti

24
This module provides the interface to the Ganeti cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
25

26
27
The configuration data is stored on every node but is updated on the master
only. After each update, the master distributes the data to the other nodes.
Iustin Pop's avatar
Iustin Pop committed
28

29
30
Currently, the data storage format is JSON. YAML was slow and consuming too
much memory.
Iustin Pop's avatar
Iustin Pop committed
31
32
33
34
35

"""

import os
import random
36
import logging
37
import time
Iustin Pop's avatar
Iustin Pop committed
38
39

from ganeti import errors
40
from ganeti import locking
Iustin Pop's avatar
Iustin Pop committed
41
42
43
44
from ganeti import utils
from ganeti import constants
from ganeti import rpc
from ganeti import objects
45
from ganeti import serializer
46
47


48
49
50
_config_lock = locking.SharedLock()


Michael Hanselmann's avatar
Michael Hanselmann committed
51
def _ValidateConfig(data):
Iustin Pop's avatar
Iustin Pop committed
52
53
54
55
56
57
58
59
  """Verifies that a configuration objects looks valid.

  This only verifies the version of the configuration.

  @raise errors.ConfigurationError: if the version differs from what
      we expect

  """
Michael Hanselmann's avatar
Michael Hanselmann committed
60
  if data.version != constants.CONFIG_VERSION:
61
62
    raise errors.ConfigurationError("Cluster configuration version"
                                    " mismatch, got %s instead of %s" %
Michael Hanselmann's avatar
Michael Hanselmann committed
63
                                    (data.version,
64
                                     constants.CONFIG_VERSION))
Iustin Pop's avatar
Iustin Pop committed
65

66

Guido Trotter's avatar
Guido Trotter committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
class TemporaryReservationManager:
  """A temporary resource reservation manager.

  This is used to reserve resources in a job, before using them, making sure
  other jobs cannot get them in the meantime.

  """
  def __init__(self):
    self._ec_reserved = {}

  def Reserved(self, resource):
    for holder_reserved in self._ec_reserved.items():
      if resource in holder_reserved:
        return True
    return False

  def Reserve(self, ec_id, resource):
    if self.Reserved(resource):
      raise errors.ReservationError("Duplicate reservation for resource: %s." %
                                    (resource))
    if ec_id not in self._ec_reserved:
      self._ec_reserved[ec_id] = set([resource])
    else:
      self._ec_reserved[ec_id].add(resource)

  def DropECReservations(self, ec_id):
    if ec_id in self._ec_reserved:
      del self._ec_reserved[ec_id]

  def GetReserved(self):
    all_reserved = set()
    for holder_reserved in self._ec_reserved.values():
      all_reserved.update(holder_reserved)
    return all_reserved

  def Generate(self, existing, generate_one_fn, ec_id):
    """Generate a new resource of this type

    """
    assert callable(generate_one_fn)

    all_elems = self.GetReserved()
    all_elems.update(existing)
    retries = 64
    while retries > 0:
      new_resource = generate_one_fn()
      if new_resource is not None and new_resource not in all_elems:
        break
    else:
      raise errors.ConfigurationError("Not able generate new resource"
                                      " (last tried: %s)" % new_resource)
    self.Reserve(ec_id, new_resource)
    return new_resource


Iustin Pop's avatar
Iustin Pop committed
122
class ConfigWriter:
123
  """The interface to the cluster configuration.
Iustin Pop's avatar
Iustin Pop committed
124

125
  """
Iustin Pop's avatar
Iustin Pop committed
126
  def __init__(self, cfg_file=None, offline=False):
127
    self.write_count = 0
128
    self._lock = _config_lock
Iustin Pop's avatar
Iustin Pop committed
129
130
131
132
133
134
    self._config_data = None
    self._offline = offline
    if cfg_file is None:
      self._cfg_file = constants.CLUSTER_CONF_FILE
    else:
      self._cfg_file = cfg_file
135
    self._temporary_ids = set()
136
    self._temporary_drbds = {}
137
    self._temporary_macs = set()
138
139
140
141
142
    # Note: in order to prevent errors when resolving our name in
    # _DistributeConfig, we compute it here once and reuse it; it's
    # better to raise an error before starting to modify the config
    # file than after it was modified
    self._my_hostname = utils.HostInfo().name
143
    self._last_cluster_serial = -1
Iustin Pop's avatar
Iustin Pop committed
144
    self._OpenConfig()
Iustin Pop's avatar
Iustin Pop committed
145
146
147
148
149
150
151
152
153

  # this method needs to be static, so that we can call it on the class
  @staticmethod
  def IsCluster():
    """Check if the cluster is configured.

    """
    return os.path.exists(constants.CLUSTER_CONF_FILE)

154
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  def GenerateMAC(self):
    """Generate a MAC for an instance.

    This should check the current instances for duplicates.

    """
    prefix = self._config_data.cluster.mac_prefix
    all_macs = self._AllMACs()
    retries = 64
    while retries > 0:
      byte1 = random.randrange(0, 256)
      byte2 = random.randrange(0, 256)
      byte3 = random.randrange(0, 256)
      mac = "%s:%02x:%02x:%02x" % (prefix, byte1, byte2, byte3)
169
      if mac not in all_macs and mac not in self._temporary_macs:
Iustin Pop's avatar
Iustin Pop committed
170
171
172
        break
      retries -= 1
    else:
173
      raise errors.ConfigurationError("Can't generate unique MAC")
174
    self._temporary_macs.add(mac)
Iustin Pop's avatar
Iustin Pop committed
175
176
    return mac

177
  @locking.ssynchronized(_config_lock, shared=1)
178
179
180
181
182
183
184
185
  def IsMacInUse(self, mac):
    """Predicate: check if the specified MAC is in use in the Ganeti cluster.

    This only checks instances managed by this cluster, it does not
    check for potential collisions elsewhere.

    """
    all_macs = self._AllMACs()
186
    return mac in all_macs or mac in self._temporary_macs
187

188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  @locking.ssynchronized(_config_lock, shared=1)
  def GenerateDRBDSecret(self):
    """Generate a DRBD secret.

    This checks the current disks for duplicates.

    """
    all_secrets = self._AllDRBDSecrets()
    retries = 64
    while retries > 0:
      secret = utils.GenerateSecret()
      if secret not in all_secrets:
        break
      retries -= 1
    else:
      raise errors.ConfigurationError("Can't generate unique DRBD secret")
    return secret

206
  def _AllLVs(self):
207
208
209
210
211
212
213
214
215
216
    """Compute the list of all LVs.

    """
    lvnames = set()
    for instance in self._config_data.instances.values():
      node_data = instance.MapLVsByNode()
      for lv_list in node_data.values():
        lvnames.update(lv_list)
    return lvnames

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
  def _AllIDs(self, include_temporary):
    """Compute the list of all UUIDs and names we have.

    @type include_temporary: boolean
    @param include_temporary: whether to include the _temporary_ids set
    @rtype: set
    @return: a set of IDs

    """
    existing = set()
    if include_temporary:
      existing.update(self._temporary_ids)
    existing.update(self._AllLVs())
    existing.update(self._config_data.instances.keys())
    existing.update(self._config_data.nodes.keys())
232
    existing.update([i.uuid for i in self._AllUUIDObjects() if i.uuid])
233
234
    return existing

235
  def _GenerateUniqueID(self):
236
    """Generate an unique UUID.
237
238
239
240

    This checks the current node, instances and disk names for
    duplicates.

Iustin Pop's avatar
Iustin Pop committed
241
242
    @rtype: string
    @return: the unique id
243
244

    """
245
    existing = self._AllIDs(include_temporary=True)
246
247
    retries = 64
    while retries > 0:
248
      unique_id = utils.NewUUID()
249
250
251
      if unique_id not in existing and unique_id is not None:
        break
    else:
252
253
      raise errors.ConfigurationError("Not able generate an unique ID"
                                      " (last tried ID: %s" % unique_id)
254
255
256
    self._temporary_ids.add(unique_id)
    return unique_id

257
  @locking.ssynchronized(_config_lock, shared=1)
258
  def GenerateUniqueID(self):
259
260
261
262
263
    """Generate an unique ID.

    This is just a wrapper over the unlocked version.

    """
264
    return self._GenerateUniqueID()
265

266
267
268
269
270
271
272
  def _CleanupTemporaryIDs(self):
    """Cleanups the _temporary_ids structure.

    """
    existing = self._AllIDs(include_temporary=False)
    self._temporary_ids = self._temporary_ids - existing

Iustin Pop's avatar
Iustin Pop committed
273
274
275
  def _AllMACs(self):
    """Return all MACs present in the config.

Iustin Pop's avatar
Iustin Pop committed
276
277
278
    @rtype: list
    @return: the list of all MACs

Iustin Pop's avatar
Iustin Pop committed
279
280
281
282
283
284
285
286
    """
    result = []
    for instance in self._config_data.instances.values():
      for nic in instance.nics:
        result.append(nic.mac)

    return result

287
288
289
  def _AllDRBDSecrets(self):
    """Return all DRBD secrets present in the config.

Iustin Pop's avatar
Iustin Pop committed
290
291
292
    @rtype: list
    @return: the list of all DRBD secrets

293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
    """
    def helper(disk, result):
      """Recursively gather secrets from this disk."""
      if disk.dev_type == constants.DT_DRBD8:
        result.append(disk.logical_id[5])
      if disk.children:
        for child in disk.children:
          helper(child, result)

    result = []
    for instance in self._config_data.instances.values():
      for disk in instance.disks:
        helper(disk, result)

    return result

309
310
311
312
313
314
315
316
317
318
319
320
321
322
  def _CheckDiskIDs(self, disk, l_ids, p_ids):
    """Compute duplicate disk IDs

    @type disk: L{objects.Disk}
    @param disk: the disk at which to start searching
    @type l_ids: list
    @param l_ids: list of current logical ids
    @type p_ids: list
    @param p_ids: list of current physical ids
    @rtype: list
    @return: a list of error messages

    """
    result = []
323
324
325
326
327
328
329
330
331
332
    if disk.logical_id is not None:
      if disk.logical_id in l_ids:
        result.append("duplicate logical id %s" % str(disk.logical_id))
      else:
        l_ids.append(disk.logical_id)
    if disk.physical_id is not None:
      if disk.physical_id in p_ids:
        result.append("duplicate physical id %s" % str(disk.physical_id))
      else:
        p_ids.append(disk.physical_id)
333
334
335
336
337
338

    if disk.children:
      for child in disk.children:
        result.extend(self._CheckDiskIDs(child, l_ids, p_ids))
    return result

339
  def _UnlockedVerifyConfig(self):
340
341
    """Verify function.

342
343
344
345
    @rtype: list
    @return: a list of error messages; a non-empty list signifies
        configuration errors

Iustin Pop's avatar
Iustin Pop committed
346
347
348
    """
    result = []
    seen_macs = []
349
    ports = {}
Iustin Pop's avatar
Iustin Pop committed
350
    data = self._config_data
351
352
    seen_lids = []
    seen_pids = []
353
354
355
356
357
358
359
360
361
362
363
364
365
366

    # global cluster checks
    if not data.cluster.enabled_hypervisors:
      result.append("enabled hypervisors list doesn't have any entries")
    invalid_hvs = set(data.cluster.enabled_hypervisors) - constants.HYPER_TYPES
    if invalid_hvs:
      result.append("enabled hypervisors contains invalid entries: %s" %
                    invalid_hvs)

    if data.cluster.master_node not in data.nodes:
      result.append("cluster has invalid primary node '%s'" %
                    data.cluster.master_node)

    # per-instance checks
Iustin Pop's avatar
Iustin Pop committed
367
368
369
    for instance_name in data.instances:
      instance = data.instances[instance_name]
      if instance.primary_node not in data.nodes:
370
        result.append("instance '%s' has invalid primary node '%s'" %
Iustin Pop's avatar
Iustin Pop committed
371
372
373
                      (instance_name, instance.primary_node))
      for snode in instance.secondary_nodes:
        if snode not in data.nodes:
374
          result.append("instance '%s' has invalid secondary node '%s'" %
Iustin Pop's avatar
Iustin Pop committed
375
376
377
                        (instance_name, snode))
      for idx, nic in enumerate(instance.nics):
        if nic.mac in seen_macs:
378
          result.append("instance '%s' has NIC %d mac %s duplicate" %
Iustin Pop's avatar
Iustin Pop committed
379
380
381
                        (instance_name, idx, nic.mac))
        else:
          seen_macs.append(nic.mac)
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396

      # gather the drbd ports for duplicate checks
      for dsk in instance.disks:
        if dsk.dev_type in constants.LDS_DRBD:
          tcp_port = dsk.logical_id[2]
          if tcp_port not in ports:
            ports[tcp_port] = []
          ports[tcp_port].append((instance.name, "drbd disk %s" % dsk.iv_name))
      # gather network port reservation
      net_port = getattr(instance, "network_port", None)
      if net_port is not None:
        if net_port not in ports:
          ports[net_port] = []
        ports[net_port].append((instance.name, "network port"))

397
398
399
400
      # instance disk verify
      for idx, disk in enumerate(instance.disks):
        result.extend(["instance '%s' disk %d error: %s" %
                       (instance.name, idx, msg) for msg in disk.Verify()])
401
        result.extend(self._CheckDiskIDs(disk, seen_lids, seen_pids))
402

403
    # cluster-wide pool of free ports
404
    for free_port in data.cluster.tcpudp_port_pool:
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
      if free_port not in ports:
        ports[free_port] = []
      ports[free_port].append(("cluster", "port marked as free"))

    # compute tcp/udp duplicate ports
    keys = ports.keys()
    keys.sort()
    for pnum in keys:
      pdata = ports[pnum]
      if len(pdata) > 1:
        txt = ", ".join(["%s/%s" % val for val in pdata])
        result.append("tcp/udp port %s has duplicates: %s" % (pnum, txt))

    # highest used tcp port check
    if keys:
420
      if keys[-1] > data.cluster.highest_used_port:
421
        result.append("Highest used port mismatch, saved %s, computed %s" %
422
423
                      (data.cluster.highest_used_port, keys[-1]))

424
425
426
    if not data.nodes[data.cluster.master_node].master_candidate:
      result.append("Master node is not a master candidate")

427
    # master candidate checks
428
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats()
429
430
431
    if mc_now < mc_max:
      result.append("Not enough master candidates: actual %d, target %d" %
                    (mc_now, mc_max))
432

433
434
435
436
437
438
439
440
    # node checks
    for node in data.nodes.values():
      if [node.master_candidate, node.drained, node.offline].count(True) > 1:
        result.append("Node %s state is invalid: master_candidate=%s,"
                      " drain=%s, offline=%s" %
                      (node.name, node.master_candidate, node.drain,
                       node.offline))

441
442
443
444
445
446
    # drbd minors check
    d_map, duplicates = self._UnlockedComputeDRBDMap()
    for node, minor, instance_a, instance_b in duplicates:
      result.append("DRBD minor %d on node %s is assigned twice to instances"
                    " %s and %s" % (minor, node, instance_a, instance_b))

447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
    # IP checks
    ips = { data.cluster.master_ip: ["cluster_ip"] }
    def _helper(ip, name):
      if ip in ips:
        ips[ip].append(name)
      else:
        ips[ip] = [name]

    for node in data.nodes.values():
      _helper(node.primary_ip, "node:%s/primary" % node.name)
      if node.secondary_ip != node.primary_ip:
        _helper(node.secondary_ip, "node:%s/secondary" % node.name)

    for ip, owners in ips.items():
      if len(owners) > 1:
        result.append("IP address %s is used by multiple owners: %s" %
                      (ip, ", ".join(owners)))
Iustin Pop's avatar
Iustin Pop committed
464
465
    return result

466
467
468
469
470
471
472
473
474
475
476
477
478
  @locking.ssynchronized(_config_lock, shared=1)
  def VerifyConfig(self):
    """Verify function.

    This is just a wrapper over L{_UnlockedVerifyConfig}.

    @rtype: list
    @return: a list of error messages; a non-empty list signifies
        configuration errors

    """
    return self._UnlockedVerifyConfig()

479
  def _UnlockedSetDiskID(self, disk, node_name):
Iustin Pop's avatar
Iustin Pop committed
480
481
482
483
484
485
486
487
    """Convert the unique ID to the ID needed on the target nodes.

    This is used only for drbd, which needs ip/port configuration.

    The routine descends down and updates its children also, because
    this helps when the only the top device is passed to the remote
    node.

488
489
    This function is for internal use, when the config lock is already held.

Iustin Pop's avatar
Iustin Pop committed
490
491
492
    """
    if disk.children:
      for child in disk.children:
493
        self._UnlockedSetDiskID(child, node_name)
Iustin Pop's avatar
Iustin Pop committed
494
495
496

    if disk.logical_id is None and disk.physical_id is not None:
      return
497
    if disk.dev_type == constants.LD_DRBD8:
498
      pnode, snode, port, pminor, sminor, secret = disk.logical_id
Iustin Pop's avatar
Iustin Pop committed
499
      if node_name not in (pnode, snode):
500
501
        raise errors.ConfigurationError("DRBD device not knowing node %s" %
                                        node_name)
502
503
      pnode_info = self._UnlockedGetNodeInfo(pnode)
      snode_info = self._UnlockedGetNodeInfo(snode)
Iustin Pop's avatar
Iustin Pop committed
504
505
506
      if pnode_info is None or snode_info is None:
        raise errors.ConfigurationError("Can't find primary or secondary node"
                                        " for %s" % str(disk))
507
508
      p_data = (pnode_info.secondary_ip, port)
      s_data = (snode_info.secondary_ip, port)
Iustin Pop's avatar
Iustin Pop committed
509
      if pnode == node_name:
510
        disk.physical_id = p_data + s_data + (pminor, secret)
Iustin Pop's avatar
Iustin Pop committed
511
      else: # it must be secondary, we tested above
512
        disk.physical_id = s_data + p_data + (sminor, secret)
Iustin Pop's avatar
Iustin Pop committed
513
514
515
516
    else:
      disk.physical_id = disk.logical_id
    return

517
518
519
520
521
522
523
524
525
526
527
528
529
530
  @locking.ssynchronized(_config_lock)
  def SetDiskID(self, disk, node_name):
    """Convert the unique ID to the ID needed on the target nodes.

    This is used only for drbd, which needs ip/port configuration.

    The routine descends down and updates its children also, because
    this helps when the only the top device is passed to the remote
    node.

    """
    return self._UnlockedSetDiskID(disk, node_name)

  @locking.ssynchronized(_config_lock)
531
532
533
534
  def AddTcpUdpPort(self, port):
    """Adds a new port to the available port pool.

    """
535
    if not isinstance(port, int):
536
      raise errors.ProgrammerError("Invalid type passed for port")
537

538
    self._config_data.cluster.tcpudp_port_pool.add(port)
539
540
    self._WriteConfig()

541
  @locking.ssynchronized(_config_lock, shared=1)
542
  def GetPortList(self):
543
544
545
    """Returns a copy of the current port list.

    """
546
    return self._config_data.cluster.tcpudp_port_pool.copy()
547

548
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
549
550
551
  def AllocatePort(self):
    """Allocate a port.

552
553
554
    The port will be taken from the available port pool or from the
    default port range (and in this case we increase
    highest_used_port).
Iustin Pop's avatar
Iustin Pop committed
555
556

    """
557
    # If there are TCP/IP ports configured, we use them first.
558
559
    if self._config_data.cluster.tcpudp_port_pool:
      port = self._config_data.cluster.tcpudp_port_pool.pop()
560
561
562
    else:
      port = self._config_data.cluster.highest_used_port + 1
      if port >= constants.LAST_DRBD_PORT:
563
564
565
        raise errors.ConfigurationError("The highest used port is greater"
                                        " than %s. Aborting." %
                                        constants.LAST_DRBD_PORT)
566
      self._config_data.cluster.highest_used_port = port
Iustin Pop's avatar
Iustin Pop committed
567
568
569
570

    self._WriteConfig()
    return port

571
  def _UnlockedComputeDRBDMap(self):
572
573
    """Compute the used DRBD minor/nodes.

574
    @rtype: (dict, list)
Iustin Pop's avatar
Iustin Pop committed
575
576
    @return: dictionary of node_name: dict of minor: instance_name;
        the returned dict will have all the nodes in it (even if with
577
578
579
        an empty list), and a list of duplicates; if the duplicates
        list is not empty, the configuration is corrupted and its caller
        should raise an exception
580
581
582

    """
    def _AppendUsedPorts(instance_name, disk, used):
583
      duplicates = []
584
      if disk.dev_type == constants.LD_DRBD8 and len(disk.logical_id) >= 5:
Michael Hanselmann's avatar
Michael Hanselmann committed
585
586
        node_a, node_b, _, minor_a, minor_b = disk.logical_id[:5]
        for node, port in ((node_a, minor_a), (node_b, minor_b)):
587
588
          assert node in used, ("Node '%s' of instance '%s' not found"
                                " in node list" % (node, instance_name))
589
          if port in used[node]:
590
591
592
            duplicates.append((node, port, instance_name, used[node][port]))
          else:
            used[node][port] = instance_name
593
594
      if disk.children:
        for child in disk.children:
595
596
          duplicates.extend(_AppendUsedPorts(instance_name, child, used))
      return duplicates
597

598
    duplicates = []
599
    my_dict = dict((node, {}) for node in self._config_data.nodes)
600
601
602
    for instance in self._config_data.instances.itervalues():
      for disk in instance.disks:
        duplicates.extend(_AppendUsedPorts(instance.name, disk, my_dict))
603
    for (node, minor), instance in self._temporary_drbds.iteritems():
604
      if minor in my_dict[node] and my_dict[node][minor] != instance:
605
606
607
608
        duplicates.append((node, minor, instance, my_dict[node][minor]))
      else:
        my_dict[node][minor] = instance
    return my_dict, duplicates
609

610
611
612
613
614
615
616
617
618
619
620
  @locking.ssynchronized(_config_lock)
  def ComputeDRBDMap(self):
    """Compute the used DRBD minor/nodes.

    This is just a wrapper over L{_UnlockedComputeDRBDMap}.

    @return: dictionary of node_name: dict of minor: instance_name;
        the returned dict will have all the nodes in it (even if with
        an empty list).

    """
621
622
623
624
625
    d_map, duplicates = self._UnlockedComputeDRBDMap()
    if duplicates:
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
                                      str(duplicates))
    return d_map
626

627
628
629
630
631
632
633
634
635
  @locking.ssynchronized(_config_lock)
  def AllocateDRBDMinor(self, nodes, instance):
    """Allocate a drbd minor.

    The free minor will be automatically computed from the existing
    devices. A node can be given multiple times in order to allocate
    multiple minors. The result is the list of minors, in the same
    order as the passed nodes.

636
637
638
    @type instance: string
    @param instance: the instance for which we allocate minors

639
    """
640
    assert isinstance(instance, basestring), \
641
           "Invalid argument '%s' passed to AllocateDRBDMinor" % instance
642

643
644
645
646
    d_map, duplicates = self._UnlockedComputeDRBDMap()
    if duplicates:
      raise errors.ConfigurationError("Duplicate DRBD ports detected: %s" %
                                      str(duplicates))
647
648
649
650
651
652
653
    result = []
    for nname in nodes:
      ndata = d_map[nname]
      if not ndata:
        # no minors used, we can start at 0
        result.append(0)
        ndata[0] = instance
654
        self._temporary_drbds[(nname, 0)] = instance
655
656
657
658
659
660
661
662
663
664
        continue
      keys = ndata.keys()
      keys.sort()
      ffree = utils.FirstFree(keys)
      if ffree is None:
        # return the next minor
        # TODO: implement high-limit check
        minor = keys[-1] + 1
      else:
        minor = ffree
665
666
667
668
669
      # double-check minor against current instances
      assert minor not in d_map[nname], \
             ("Attempt to reuse allocated DRBD minor %d on node %s,"
              " already allocated to instance %s" %
              (minor, nname, d_map[nname][minor]))
670
      ndata[minor] = instance
671
672
673
674
675
676
677
678
      # double-check minor against reservation
      r_key = (nname, minor)
      assert r_key not in self._temporary_drbds, \
             ("Attempt to reuse reserved DRBD minor %d on node %s,"
              " reserved for instance %s" %
              (minor, nname, self._temporary_drbds[r_key]))
      self._temporary_drbds[r_key] = instance
      result.append(minor)
679
680
681
682
    logging.debug("Request to allocate drbd minors, input: %s, returning %s",
                  nodes, result)
    return result

683
  def _UnlockedReleaseDRBDMinors(self, instance):
684
685
686
687
688
689
690
    """Release temporary drbd minors allocated for a given instance.

    @type instance: string
    @param instance: the instance for which temporary minors should be
                     released

    """
691
692
    assert isinstance(instance, basestring), \
           "Invalid argument passed to ReleaseDRBDMinors"
693
694
695
696
    for key, name in self._temporary_drbds.items():
      if name == instance:
        del self._temporary_drbds[key]

697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
  @locking.ssynchronized(_config_lock)
  def ReleaseDRBDMinors(self, instance):
    """Release temporary drbd minors allocated for a given instance.

    This should be called on the error paths, on the success paths
    it's automatically called by the ConfigWriter add and update
    functions.

    This function is just a wrapper over L{_UnlockedReleaseDRBDMinors}.

    @type instance: string
    @param instance: the instance for which temporary minors should be
                     released

    """
    self._UnlockedReleaseDRBDMinors(instance)

714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
  @locking.ssynchronized(_config_lock, shared=1)
  def GetConfigVersion(self):
    """Get the configuration version.

    @return: Config version

    """
    return self._config_data.version

  @locking.ssynchronized(_config_lock, shared=1)
  def GetClusterName(self):
    """Get cluster name.

    @return: Cluster name

    """
    return self._config_data.cluster.cluster_name

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNode(self):
    """Get the hostname of the master node for this cluster.

    @return: Master hostname

    """
    return self._config_data.cluster.master_node

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterIP(self):
    """Get the IP of the master node for this cluster.

    @return: Master IP

    """
    return self._config_data.cluster.master_ip

  @locking.ssynchronized(_config_lock, shared=1)
  def GetMasterNetdev(self):
    """Get the master network device for this cluster.

    """
    return self._config_data.cluster.master_netdev

  @locking.ssynchronized(_config_lock, shared=1)
  def GetFileStorageDir(self):
    """Get the file storage dir for this cluster.

    """
    return self._config_data.cluster.file_storage_dir

  @locking.ssynchronized(_config_lock, shared=1)
  def GetHypervisorType(self):
    """Get the hypervisor type for this cluster.

    """
769
    return self._config_data.cluster.enabled_hypervisors[0]
770

771
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
772
773
774
  def GetHostKey(self):
    """Return the rsa hostkey from the config.

Iustin Pop's avatar
Iustin Pop committed
775
776
    @rtype: string
    @return: the rsa hostkey
Iustin Pop's avatar
Iustin Pop committed
777
778
779
780

    """
    return self._config_data.cluster.rsahostkeypub

781
  @locking.ssynchronized(_config_lock)
782
  def AddInstance(self, instance, ec_id):
Iustin Pop's avatar
Iustin Pop committed
783
784
785
786
    """Add an instance to the config.

    This should be used after creating a new instance.

Iustin Pop's avatar
Iustin Pop committed
787
788
789
    @type instance: L{objects.Instance}
    @param instance: the instance object

Iustin Pop's avatar
Iustin Pop committed
790
791
792
793
    """
    if not isinstance(instance, objects.Instance):
      raise errors.ProgrammerError("Invalid type passed to AddInstance")

794
795
    if instance.disk_template != constants.DT_DISKLESS:
      all_lvs = instance.MapLVsByNode()
Iustin Pop's avatar
Iustin Pop committed
796
      logging.info("Instance '%s' DISK_LAYOUT: %s", instance.name, all_lvs)
797

798
799
800
801
    all_macs = self._AllMACs()
    for nic in instance.nics:
      if nic.mac in all_macs:
        raise errors.ConfigurationError("Cannot add instance %s:"
802
803
804
                                        " MAC address '%s' already in use." %
                                        (instance.name, nic.mac))

805
    self._EnsureUUID(instance, ec_id)
806

807
    instance.serial_no = 1
808
    instance.ctime = instance.mtime = time.time()
Iustin Pop's avatar
Iustin Pop committed
809
    self._config_data.instances[instance.name] = instance
810
    self._config_data.cluster.serial_no += 1
811
    self._UnlockedReleaseDRBDMinors(instance.name)
812
813
    for nic in instance.nics:
      self._temporary_macs.discard(nic.mac)
Iustin Pop's avatar
Iustin Pop committed
814
815
    self._WriteConfig()

816
  def _EnsureUUID(self, item, ec_id):
817
818
819
    """Ensures a given object has a valid UUID.

    @param item: the instance or node to be checked
820
    @param ec_id: the execution context id for the uuid reservation
821
822
823
824
825
826
827
828

    """
    if not item.uuid:
      item.uuid = self._GenerateUniqueID()
    elif item.uuid in self._AllIDs(temporary=True):
      raise errors.ConfigurationError("Cannot add '%s': UUID already in use" %
                                      (item.name, item.uuid))

829
830
  def _SetInstanceStatus(self, instance_name, status):
    """Set the instance's status to a given value.
Iustin Pop's avatar
Iustin Pop committed
831
832

    """
833
834
    assert isinstance(status, bool), \
           "Invalid status '%s' passed to SetInstanceStatus" % (status,)
Iustin Pop's avatar
Iustin Pop committed
835
836

    if instance_name not in self._config_data.instances:
837
838
      raise errors.ConfigurationError("Unknown instance '%s'" %
                                      instance_name)
Iustin Pop's avatar
Iustin Pop committed
839
    instance = self._config_data.instances[instance_name]
840
841
    if instance.admin_up != status:
      instance.admin_up = status
842
      instance.serial_no += 1
843
      instance.mtime = time.time()
844
      self._WriteConfig()
Iustin Pop's avatar
Iustin Pop committed
845

846
  @locking.ssynchronized(_config_lock)
847
848
849
850
  def MarkInstanceUp(self, instance_name):
    """Mark the instance status to up in the config.

    """
851
    self._SetInstanceStatus(instance_name, True)
852

853
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
854
855
856
857
858
  def RemoveInstance(self, instance_name):
    """Remove the instance from the configuration.

    """
    if instance_name not in self._config_data.instances:
859
      raise errors.ConfigurationError("Unknown instance '%s'" % instance_name)
Iustin Pop's avatar
Iustin Pop committed
860
    del self._config_data.instances[instance_name]
861
    self._config_data.cluster.serial_no += 1
Iustin Pop's avatar
Iustin Pop committed
862
863
    self._WriteConfig()

864
  @locking.ssynchronized(_config_lock)
865
866
867
868
869
870
871
872
873
874
875
876
877
  def RenameInstance(self, old_name, new_name):
    """Rename an instance.

    This needs to be done in ConfigWriter and not by RemoveInstance
    combined with AddInstance as only we can guarantee an atomic
    rename.

    """
    if old_name not in self._config_data.instances:
      raise errors.ConfigurationError("Unknown instance '%s'" % old_name)
    inst = self._config_data.instances[old_name]
    del self._config_data.instances[old_name]
    inst.name = new_name
878
879
880
881
882
883
884
885
886
887

    for disk in inst.disks:
      if disk.dev_type == constants.LD_FILE:
        # rename the file paths in logical and physical id
        file_storage_dir = os.path.dirname(os.path.dirname(disk.logical_id[1]))
        disk.physical_id = disk.logical_id = (disk.logical_id[0],
                                              os.path.join(file_storage_dir,
                                                           inst.name,
                                                           disk.iv_name))

888
889
890
    self._config_data.instances[inst.name] = inst
    self._WriteConfig()

891
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
892
893
894
895
  def MarkInstanceDown(self, instance_name):
    """Mark the status of an instance to down in the configuration.

    """
896
    self._SetInstanceStatus(instance_name, False)
Iustin Pop's avatar
Iustin Pop committed
897

898
899
900
901
902
903
904
905
  def _UnlockedGetInstanceList(self):
    """Get the list of instances.

    This function is for internal use, when the config lock is already held.

    """
    return self._config_data.instances.keys()

906
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
907
908
909
  def GetInstanceList(self):
    """Get the list of instances.

Iustin Pop's avatar
Iustin Pop committed
910
911
    @return: array of instances, ex. ['instance2.example.com',
        'instance1.example.com']
Iustin Pop's avatar
Iustin Pop committed
912
913

    """
914
    return self._UnlockedGetInstanceList()
Iustin Pop's avatar
Iustin Pop committed
915

916
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
917
918
919
920
921
  def ExpandInstanceName(self, short_name):
    """Attempt to expand an incomplete instance name.

    """
    return utils.MatchNameComponent(short_name,
922
923
                                    self._config_data.instances.keys(),
                                    case_sensitive=False)
Iustin Pop's avatar
Iustin Pop committed
924

925
  def _UnlockedGetInstanceInfo(self, instance_name):
Michael Hanselmann's avatar
Michael Hanselmann committed
926
    """Returns information about an instance.
927
928
929
930
931
932
933
934
935

    This function is for internal use, when the config lock is already held.

    """
    if instance_name not in self._config_data.instances:
      return None

    return self._config_data.instances[instance_name]

936
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
937
  def GetInstanceInfo(self, instance_name):
Michael Hanselmann's avatar
Michael Hanselmann committed
938
    """Returns information about an instance.
Iustin Pop's avatar
Iustin Pop committed
939

Michael Hanselmann's avatar
Michael Hanselmann committed
940
    It takes the information from the configuration file. Other information of
Iustin Pop's avatar
Iustin Pop committed
941
942
    an instance are taken from the live systems.

Iustin Pop's avatar
Iustin Pop committed
943
944
    @param instance_name: name of the instance, e.g.
        I{instance1.example.com}
Iustin Pop's avatar
Iustin Pop committed
945

Iustin Pop's avatar
Iustin Pop committed
946
947
    @rtype: L{objects.Instance}
    @return: the instance object
Iustin Pop's avatar
Iustin Pop committed
948
949

    """
950
    return self._UnlockedGetInstanceInfo(instance_name)
Iustin Pop's avatar
Iustin Pop committed
951

952
953
954
955
956
  @locking.ssynchronized(_config_lock, shared=1)
  def GetAllInstancesInfo(self):
    """Get the configuration of all instances.

    @rtype: dict
Iustin Pop's avatar
Iustin Pop committed
957
    @return: dict of (instance, instance_info), where instance_info is what
958
959
960
              would GetInstanceInfo return for the node

    """
961
962
    my_dict = dict([(instance, self._UnlockedGetInstanceInfo(instance))
                    for instance in self._UnlockedGetInstanceList()])
963
964
    return my_dict

965
  @locking.ssynchronized(_config_lock)
966
  def AddNode(self, node, ec_id):
Iustin Pop's avatar
Iustin Pop committed
967
968
    """Add a node to the configuration.

Iustin Pop's avatar
Iustin Pop committed
969
970
    @type node: L{objects.Node}
    @param node: a Node instance
Iustin Pop's avatar
Iustin Pop committed
971
972

    """
973
    logging.info("Adding node %s to configuration", node.name)
974

975
    self._EnsureUUID(node, ec_id)
976

977
    node.serial_no = 1
978
    node.ctime = node.mtime = time.time()
Iustin Pop's avatar
Iustin Pop committed
979
    self._config_data.nodes[node.name] = node
980
    self._config_data.cluster.serial_no += 1
Iustin Pop's avatar
Iustin Pop committed
981
982
    self._WriteConfig()

983
  @locking.ssynchronized(_config_lock)
Iustin Pop's avatar
Iustin Pop committed
984
985
986
987
  def RemoveNode(self, node_name):
    """Remove a node from the configuration.

    """
988
    logging.info("Removing node %s from configuration", node_name)
989

Iustin Pop's avatar
Iustin Pop committed
990
    if node_name not in self._config_data.nodes:
991
      raise errors.ConfigurationError("Unknown node '%s'" % node_name)
Iustin Pop's avatar
Iustin Pop committed
992
993

    del self._config_data.nodes[node_name]
994
    self._config_data.cluster.serial_no += 1
Iustin Pop's avatar
Iustin Pop committed
995
996
    self._WriteConfig()

997
  @locking.ssynchronized(_config_lock, shared=1)
Iustin Pop's avatar
Iustin Pop committed
998
999
1000
1001
1002
  def ExpandNodeName(self, short_name):
    """Attempt to expand an incomplete instance name.

    """
    return utils.MatchNameComponent(short_name,
1003
1004
                                    self._config_data.nodes.keys(),
                                    case_sensitive=False)
Iustin Pop's avatar
Iustin Pop committed
1005

1006
  def _UnlockedGetNodeInfo(self, node_name):
Iustin Pop's avatar
Iustin Pop committed
1007
1008
    """Get the configuration of a node, as stored in the config.

Iustin Pop's avatar
Iustin Pop committed
1009
1010
    This function is for internal use, when the config lock is already
    held.
1011

Iustin Pop's avatar
Iustin Pop committed
1012
    @param node_name: the node name, e.g. I{node1.example.com}
Iustin Pop's avatar
Iustin Pop committed
1013

Iustin Pop's avatar
Iustin Pop committed
1014
1015
    @rtype: L{objects.Node}
    @return: the node object
Iustin Pop's avatar
Iustin Pop committed
1016
1017
1018
1019
1020
1021
1022

    """
    if node_name not in self._config_data.nodes:
      return None

    return self._config_data.nodes[node_name]

1023
1024
1025
1026
1027

  @locking.ssynchronized(_config_lock, shared=1)
  def GetNodeInfo(self, node_name):
    """Get the configuration of a node, as stored in the config.

Iustin Pop's avatar
Iustin Pop committed
1028
    This is just a locked wrapper over L{_UnlockedGetNodeInfo}.
1029

Iustin Pop's avatar
Iustin Pop committed
1030
1031
1032
1033
    @param node_name: the node name, e.g. I{node1.example.com}

    @rtype: L{objects.Node}
    @return: the node object
1034
1035
1036
1037
1038

    """
    return self._UnlockedGetNodeInfo(node_name)

  def _UnlockedGetNodeList(self):
Iustin Pop's avatar
Iustin Pop committed
1039
1040
    """Return the list of nodes which are in the configuration.

Iustin Pop's avatar
Iustin Pop committed
1041
1042
1043
1044
    This function is for internal use, when the config lock is already
    held.

    @rtype: list
1045

Iustin Pop's avatar
Iustin Pop committed
1046
1047
1048
    """
    return self._config_data.nodes.keys()

1049
1050
1051
1052
1053
1054
1055
1056

  @locking.ssynchronized(_config_lock, shared=1)
  def GetNodeList(self):
    """Return the list of nodes which are in the configuration.

    """
    return self._UnlockedGetNodeList()

Iustin Pop's avatar
Iustin Pop committed
1057
1058
1059
1060
1061
1062
1063
1064
1065
  @locking.ssynchronized(_config_lock, shared=1)
  def GetOnlineNodeList(self):
    """Return the list of nodes which are online.

    """
    all_nodes = [self._UnlockedGetNodeInfo(node)
                 for node in self._UnlockedGetNodeList()]
    return [node.name for node in all_nodes if not node.offline]

1066
1067
1068
1069
1070
  @locking.ssynchronized(_config_lock, shared=1)
  def GetAllNodesInfo(self):
    """Get the configuration of all nodes.

    @rtype: dict
1071
    @return: dict of (node, node_info), where node_info is what
1072
1073
1074
1075
1076
1077
1078
              would GetNodeInfo return for the node

    """
    my_dict = dict([(node, self._UnlockedGetNodeInfo(node))
                    for node in self._UnlockedGetNodeList()])
    return my_dict

1079
  def _UnlockedGetMasterCandidateStats(self, exceptions=None):
1080
1081
    """Get the number of current and maximum desired and possible candidates.

1082
1083
    @type exceptions: list
    @param exceptions: if passed, list of nodes that should be ignored
1084
    @rtype: tuple
1085
    @return: tuple of (current, desired and possible, possible)
1086
1087

    """
1088
    mc_now = mc_should = mc_max = 0
1089
1090
1091
    for node in self._config_data.nodes.values():
      if exceptions and node.name in exceptions:
        continue
1092
      if not (node.offline or node.drained):
1093
1094
1095
        mc_max += 1
      if node.master_candidate:
        mc_now += 1
1096
1097
    mc_should = min(mc_max, self._config_data.cluster.candidate_pool_size)
    return (mc_now, mc_should, mc_max)
1098
1099

  @locking.ssynchronized(_config_lock, shared=1)
1100
  def GetMasterCandidateStats(self, exceptions=None):
1101
1102
1103
1104
    """Get the number of current and maximum possible candidates.

    This is just a wrapper over L{_UnlockedGetMasterCandidateStats}.

1105
1106
    @type exceptions: list
    @param exceptions: if passed, list of nodes that should be ignored
1107
1108
1109
1110
    @rtype: tuple
    @return: tuple of (current, max)

    """
1111
    return self._UnlockedGetMasterCandidateStats(exceptions)
1112
1113

  @locking.ssynchronized(_config_lock)
Guido Trotter's avatar
Guido Trotter committed
1114
  def MaintainCandidatePool(self, exceptions):
1115
1116
    """Try to grow the candidate pool to the desired size.

Guido Trotter's avatar
Guido Trotter committed
1117
1118
    @type exceptions: list
    @param exceptions: if passed, list of nodes that should be ignored
1119
    @rtype: list
Iustin Pop's avatar
Iustin Pop committed
1120
    @return: list with the adjusted nodes (L{objects.Node} instances)
1121
1122

    """
Guido Trotter's avatar
Guido Trotter committed
1123
    mc_now, mc_max, _ = self._UnlockedGetMasterCandidateStats(exceptions)
1124
1125
1126
1127
1128
1129
1130
1131
    mod_list = []
    if mc_now < mc_max:
      node_list = self._config_data.nodes.keys()
      random.shuffle(node_list)
      for name in node_list:
        if mc_now >= mc_max:
          break
        node = self._config_data.nodes[name]
Guido Trotter's avatar
Guido Trotter committed
1132
1133
        if (node.master_candidate or node.offline or node.drained or
            node.name in exceptions):
1134
          continue
Iustin Pop's avatar
Iustin Pop committed
1135
        mod_list.append(node)
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
        node.master_candidate = True
        node.serial_no += 1
        mc_now += 1
      if mc_now != mc_max:
        # this should not happen
        logging.warning("Warning: MaintainCandidatePool didn't manage to"
                        " fill the candidate pool (%d/%d)", mc_now, mc_max)
      if mod_list:
        self._config_data.cluster.serial_no += 1
        self._WriteConfig()

    return mod_list

Iustin Pop's avatar
Iustin Pop committed
1149
1150
1151
1152
  def _BumpSerialNo(self):
    """Bump up the serial number of the config.

    """
1153
    self._config_data.serial_no += 1
1154
    self._config_data.mtime = time.time()
Iustin Pop's avatar
Iustin Pop committed
1155

1156
1157
1158
1159
1160
1161
1162
1163
  def _AllUUIDObjects(self):
    """Returns all objects with uuid attributes.

    """
    return (self._config_data.instances.values() +
            self._config_data.nodes.values() +
            [self._config_data.cluster])

Iustin Pop's avatar
Iustin Pop committed
1164
1165
1166
1167
  def _OpenConfig(self):
    """Read the config data from disk.

    """
1168
1169
    raw_data = utils.ReadFile(self._cfg_file)

Iustin Pop's avatar
Iustin Pop committed
1170
    try:
1171
1172
1173
      data = objects.ConfigData.FromDict(serializer.Load(raw_data))
    except Exception, err:
      raise errors.ConfigurationError(err)
Michael Hanselmann's avatar
Michael Hanselmann committed
1174
1175
1176
1177

    # Make sure the configuration has the right version
    _ValidateConfig(data)

Iustin Pop's avatar
Iustin Pop committed
1178
    if (not hasattr(data, 'cluster') or
1179
        not hasattr(data.cluster, 'rsahostkeypub')):
1180
      raise errors.ConfigurationError("Incomplete configuration"
1181
                                      " (missing cluster.rsahostkeypub)")
1182
1183
1184
1185

    # Upgrade configuration if needed
    data.UpgradeConfig()

Iustin Pop's avatar
Iustin Pop committed
1186
    self._config_data = data
1187
    # reset the last serial as -1 so that the next write will cause
1188
1189
    # ssconf update
    self._last_cluster_serial = -1
Iustin Pop's avatar
Iustin Pop committed
1190

1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
    # And finally run our (custom) config upgrade sequence
    self._UpgradeConfig()

  def _UpgradeConfig(self):
    """Run upgrade steps that cannot be done purely in the objects.

    This is because some data elements need uniqueness across the
    whole configuration, etc.

    @warning: this function will call L{_WriteConfig()}, so it needs
        to either be called with the lock held or from a safe place
        (the constructor)

    """
    modified = False
    for item in self._AllUUIDObjects():
      if item.uuid is None:
1208
        item.uuid = self._GenerateUniqueID()
1209
1210
1211
1212