gnt_cluster.py 49.3 KB
Newer Older
1
#
Iustin Pop's avatar
Iustin Pop committed
2
3
#

4
# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
Iustin Pop's avatar
Iustin Pop committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

Iustin Pop's avatar
Iustin Pop committed
21
"""Cluster related commands"""
Iustin Pop's avatar
Iustin Pop committed
22

23
# pylint: disable=W0401,W0613,W0614,C0103
24
# W0401: Wildcard import ganeti.cli
25
# W0613: Unused argument, since all functions follow the same API
26
# W0614: Unused import %s from wildcard import (since we need cli)
Iustin Pop's avatar
Iustin Pop committed
27
# C0103: Invalid name gnt-cluster
28

29
import os.path
30
import time
31
import OpenSSL
René Nussbaumer's avatar
René Nussbaumer committed
32
import itertools
Iustin Pop's avatar
Iustin Pop committed
33
34
35

from ganeti.cli import *
from ganeti import opcodes
36
from ganeti import constants
37
from ganeti import errors
38
from ganeti import utils
39
from ganeti import bootstrap
40
from ganeti import ssh
41
from ganeti import objects
42
from ganeti import uidpool
43
from ganeti import compat
René Nussbaumer's avatar
René Nussbaumer committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from ganeti import netutils


ON_OPT = cli_option("--on", default=False,
                    action="store_true", dest="on",
                    help="Recover from an EPO")

GROUPS_OPT = cli_option("--groups", default=False,
                    action="store_true", dest="groups",
                    help="Arguments are node groups instead of nodes")

_EPO_PING_INTERVAL = 30 # 30 seconds between pings
_EPO_PING_TIMEOUT = 1 # 1 second
_EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
Iustin Pop's avatar
Iustin Pop committed
58
59


60
@UsesRPC
Iustin Pop's avatar
Iustin Pop committed
61
62
63
def InitCluster(opts, args):
  """Initialize the cluster.

64
65
66
67
68
69
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the desired
      cluster name
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
70
71

  """
72
  if not opts.lvm_storage and opts.vg_name:
73
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
74
75
76
77
78
79
    return 1

  vg_name = opts.vg_name
  if opts.lvm_storage and not opts.vg_name:
    vg_name = constants.DEFAULT_VG

80
81
82
83
84
85
86
87
  if not opts.drbd_storage and opts.drbd_helper:
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
    return 1

  drbd_helper = opts.drbd_helper
  if opts.drbd_storage and not opts.drbd_helper:
    drbd_helper = constants.DEFAULT_DRBD_HELPER

88
89
90
91
  master_netdev = opts.master_netdev
  if master_netdev is None:
    master_netdev = constants.DEFAULT_BRIDGE

92
  hvlist = opts.enabled_hypervisors
93
94
  if hvlist is None:
    hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
95
  hvlist = hvlist.split(",")
96

97
  hvparams = dict(opts.hvparams)
98
  beparams = opts.beparams
99
  nicparams = opts.nicparams
100

101
102
103
104
105
106
107
108
109
110
  diskparams = dict(opts.diskparams)

  # check the disk template types here, as we cannot rely on the type check done
  # by the opcode parameter types
  diskparams_keys = set(diskparams.keys())
  if not (diskparams_keys <= constants.DISK_TEMPLATES):
    unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
    ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
    return 1

111
  # prepare beparams dict
112
  beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
113
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
114

115
116
117
118
  # prepare nicparams dict
  nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)

119
120
121
122
123
124
125
  # prepare ndparams dict
  if opts.ndparams is None:
    ndparams = dict(constants.NDC_DEFAULTS)
  else:
    ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)

126
127
128
129
  # prepare hvparams dict
  for hv in constants.HYPER_TYPES:
    if hv not in hvparams:
      hvparams[hv] = {}
130
    hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
131
    utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
132

133
134
135
136
137
138
139
140
  # prepare diskparams dict
  for templ in constants.DISK_TEMPLATES:
    if templ not in diskparams:
      diskparams[templ] = {}
    diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
                                         diskparams[templ])
    utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)

141
142
143
  if opts.candidate_pool_size is None:
    opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT

144
145
146
  if opts.mac_prefix is None:
    opts.mac_prefix = constants.DEFAULT_MAC_PREFIX

147
148
149
150
  uid_pool = opts.uid_pool
  if uid_pool is not None:
    uid_pool = uidpool.ParseUidPool(uid_pool)

151
152
153
  if opts.prealloc_wipe_disks is None:
    opts.prealloc_wipe_disks = False

154
155
156
157
  external_ip_setup_script = opts.use_external_mip_script
  if external_ip_setup_script is None:
    external_ip_setup_script = False

158
159
160
161
162
163
  try:
    primary_ip_version = int(opts.primary_ip_version)
  except (ValueError, TypeError), err:
    ToStderr("Invalid primary ip version value: %s" % str(err))
    return 1

164
165
166
167
168
169
170
171
  master_netmask = opts.master_netmask
  try:
    if master_netmask is not None:
      master_netmask = int(master_netmask)
  except (ValueError, TypeError), err:
    ToStderr("Invalid master netmask value: %s" % str(err))
    return 1

172
173
174
175
  bootstrap.InitCluster(cluster_name=args[0],
                        secondary_ip=opts.secondary_ip,
                        vg_name=vg_name,
                        mac_prefix=opts.mac_prefix,
176
                        master_netmask=master_netmask,
177
                        master_netdev=master_netdev,
178
                        file_storage_dir=opts.file_storage_dir,
179
                        shared_file_storage_dir=opts.shared_file_storage_dir,
180
181
                        enabled_hypervisors=hvlist,
                        hvparams=hvparams,
182
                        beparams=beparams,
183
                        nicparams=nicparams,
184
                        ndparams=ndparams,
185
                        diskparams=diskparams,
186
                        candidate_pool_size=opts.candidate_pool_size,
187
                        modify_etc_hosts=opts.modify_etc_hosts,
188
                        modify_ssh_setup=opts.modify_ssh_setup,
189
                        maintain_node_health=opts.maintain_node_health,
190
                        drbd_helper=drbd_helper,
191
                        uid_pool=uid_pool,
192
                        default_iallocator=opts.default_iallocator,
193
                        primary_ip_version=primary_ip_version,
194
                        prealloc_wipe_disks=opts.prealloc_wipe_disks,
195
                        use_external_mip_script=external_ip_setup_script,
196
                        )
197
  op = opcodes.OpClusterPostInit()
198
  SubmitOpCode(op, opts=opts)
Iustin Pop's avatar
Iustin Pop committed
199
200
201
  return 0


202
@UsesRPC
Iustin Pop's avatar
Iustin Pop committed
203
204
205
def DestroyCluster(opts, args):
  """Destroy the cluster.

206
207
208
209
210
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
211

Iustin Pop's avatar
Iustin Pop committed
212
213
  """
  if not opts.yes_do_it:
214
215
    ToStderr("Destroying a cluster is irreversible. If you really want"
             " destroy this cluster, supply the --yes-do-it option.")
Iustin Pop's avatar
Iustin Pop committed
216
217
    return 1

218
  op = opcodes.OpClusterDestroy()
219
  master = SubmitOpCode(op, opts=opts)
Iustin Pop's avatar
Iustin Pop committed
220
221
222
  # if we reached this, the opcode didn't fail; we can proceed to
  # shutdown all the daemons
  bootstrap.FinalizeClusterDestroy(master)
Iustin Pop's avatar
Iustin Pop committed
223
224
225
  return 0


226
227
228
def RenameCluster(opts, args):
  """Rename the cluster.

229
230
231
232
233
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the new cluster name
  @rtype: int
  @return: the desired exit code
234
235

  """
236
237
238
239
240
  cl = GetClient()

  (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])

  new_name = args[0]
241
  if not opts.force:
242
243
244
245
246
    usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
                " connected over the network to the cluster name, the"
                " operation is very dangerous as the IP address will be"
                " removed from the node and the change may not go through."
                " Continue?") % (cluster_name, new_name)
247
    if not AskUser(usertext):
248
249
      return 1

250
  op = opcodes.OpClusterRename(name=new_name)
251
252
  result = SubmitOpCode(op, opts=opts, cl=cl)

253
254
  if result:
    ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
255

256
257
258
  return 0


259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def ActivateMasterIp(opts, args):
  """Activates the master IP.

  """
  op = opcodes.OpClusterActivateMasterIp()
  SubmitOpCode(op)
  return 0


def DeactivateMasterIp(opts, args):
  """Deactivates the master IP.

  """
  if not opts.confirm:
    usertext = ("This will disable the master IP. All the open connections to"
                " the master IP will be closed. To reach the master you will"
                " need to use its node IP."
                " Continue?")
    if not AskUser(usertext):
      return 1

  op = opcodes.OpClusterDeactivateMasterIp()
  SubmitOpCode(op)
  return 0


285
286
287
288
289
290
291
292
293
294
def RedistributeConfig(opts, args):
  """Forces push of the cluster configuration.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: empty list
  @rtype: int
  @return: the desired exit code

  """
295
  op = opcodes.OpClusterRedistConf()
296
297
298
299
  SubmitOrSend(op, opts)
  return 0


Iustin Pop's avatar
Iustin Pop committed
300
301
302
def ShowClusterVersion(opts, args):
  """Write version of ganeti software to the standard output.

303
304
305
306
307
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
308
309

  """
310
311
  cl = GetClient()
  result = cl.QueryClusterInfo()
312
313
314
315
316
  ToStdout("Software version: %s", result["software_version"])
  ToStdout("Internode protocol: %s", result["protocol_version"])
  ToStdout("Configuration format: %s", result["config_version"])
  ToStdout("OS api version: %s", result["os_api_version"])
  ToStdout("Export interface: %s", result["export_version"])
Iustin Pop's avatar
Iustin Pop committed
317
318
319
320
321
322
  return 0


def ShowClusterMaster(opts, args):
  """Write name of master node to the standard output.

323
324
325
326
327
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
328
329

  """
330
331
  master = bootstrap.GetMaster()
  ToStdout(master)
Iustin Pop's avatar
Iustin Pop committed
332
333
  return 0

334

Guido Trotter's avatar
Guido Trotter committed
335
def _PrintGroupedParams(paramsdict, level=1, roman=False):
336
337
338
339
  """Print Grouped parameters (be, nic, disk) by group.

  @type paramsdict: dict of dicts
  @param paramsdict: {group: {param: value, ...}, ...}
340
341
  @type level: int
  @param level: Level of indention
342
343

  """
344
  indent = "  " * level
345
  for item, val in sorted(paramsdict.items()):
346
347
    if isinstance(val, dict):
      ToStdout("%s- %s:", indent, item)
Guido Trotter's avatar
Guido Trotter committed
348
349
350
      _PrintGroupedParams(val, level=level + 1, roman=roman)
    elif roman and isinstance(val, int):
      ToStdout("%s  %s: %s", indent, item, compat.TryToRoman(val))
351
352
    else:
      ToStdout("%s  %s: %s", indent, item, val)
Iustin Pop's avatar
Iustin Pop committed
353

354

Iustin Pop's avatar
Iustin Pop committed
355
356
357
def ShowClusterConfig(opts, args):
  """Shows cluster information.

358
359
360
361
362
363
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

Iustin Pop's avatar
Iustin Pop committed
364
  """
365
366
  cl = GetClient()
  result = cl.QueryClusterInfo()
Iustin Pop's avatar
Iustin Pop committed
367

368
  ToStdout("Cluster name: %s", result["name"])
369
  ToStdout("Cluster UUID: %s", result["uuid"])
Iustin Pop's avatar
Iustin Pop committed
370

371
372
373
  ToStdout("Creation time: %s", utils.FormatTime(result["ctime"]))
  ToStdout("Modification time: %s", utils.FormatTime(result["mtime"]))

374
  ToStdout("Master node: %s", result["master"])
Iustin Pop's avatar
Iustin Pop committed
375

376
377
  ToStdout("Architecture (this node): %s (%s)",
           result["architecture"][0], result["architecture"][1])
Iustin Pop's avatar
Iustin Pop committed
378

379
  if result["tags"]:
380
    tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
381
382
383
384
385
  else:
    tags = "(none)"

  ToStdout("Tags: %s", tags)

386
  ToStdout("Default hypervisor: %s", result["default_hypervisor"])
387
388
  ToStdout("Enabled hypervisors: %s",
           utils.CommaJoin(result["enabled_hypervisors"]))
389

390
  ToStdout("Hypervisor parameters:")
391
  _PrintGroupedParams(result["hvparams"])
392

393
  ToStdout("OS-specific hypervisor parameters:")
394
395
  _PrintGroupedParams(result["os_hvp"])

396
397
398
  ToStdout("OS parameters:")
  _PrintGroupedParams(result["osparams"])

399
400
401
  ToStdout("Hidden OSes: %s", utils.CommaJoin(result["hidden_os"]))
  ToStdout("Blacklisted OSes: %s", utils.CommaJoin(result["blacklisted_os"]))

402
  ToStdout("Cluster parameters:")
Guido Trotter's avatar
Guido Trotter committed
403
404
405
  ToStdout("  - candidate pool size: %s",
            compat.TryToRoman(result["candidate_pool_size"],
                              convert=opts.roman_integers))
406
  ToStdout("  - master netdev: %s", result["master_netdev"])
407
  ToStdout("  - master netmask: %s", result["master_netmask"])
408
409
  ToStdout("  - use external master IP address setup script: %s",
           result["use_external_mip_script"])
410
  ToStdout("  - lvm volume group: %s", result["volume_group_name"])
411
412
413
414
415
  if result["reserved_lvs"]:
    reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
  else:
    reserved_lvs = "(none)"
  ToStdout("  - lvm reserved volumes: %s", reserved_lvs)
416
  ToStdout("  - drbd usermode helper: %s", result["drbd_usermode_helper"])
417
  ToStdout("  - file storage path: %s", result["file_storage_dir"])
418
419
  ToStdout("  - shared file storage path: %s",
           result["shared_file_storage_dir"])
420
421
  ToStdout("  - maintenance of node health: %s",
           result["maintain_node_health"])
Guido Trotter's avatar
Guido Trotter committed
422
423
424
  ToStdout("  - uid pool: %s",
            uidpool.FormatUidPool(result["uid_pool"],
                                  roman=opts.roman_integers))
425
  ToStdout("  - default instance allocator: %s", result["default_iallocator"])
426
  ToStdout("  - primary ip version: %d", result["primary_ip_version"])
427
  ToStdout("  - preallocation wipe disks: %s", result["prealloc_wipe_disks"])
428
  ToStdout("  - OS search path: %s", utils.CommaJoin(constants.OS_SEARCH_PATH))
429

430
431
432
  ToStdout("Default node parameters:")
  _PrintGroupedParams(result["ndparams"], roman=opts.roman_integers)

433
  ToStdout("Default instance parameters:")
Guido Trotter's avatar
Guido Trotter committed
434
  _PrintGroupedParams(result["beparams"], roman=opts.roman_integers)
435
436

  ToStdout("Default nic parameters:")
Guido Trotter's avatar
Guido Trotter committed
437
  _PrintGroupedParams(result["nicparams"], roman=opts.roman_integers)
438

Iustin Pop's avatar
Iustin Pop committed
439
440
441
442
443
444
  return 0


def ClusterCopyFile(opts, args):
  """Copy a file from master to some nodes.

445
446
447
448
449
450
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the path of
      the file to be copied
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
451
452

  """
453
454
  filename = args[0]
  if not os.path.exists(filename):
455
456
    raise errors.OpPrereqError("No such filename '%s'" % filename,
                               errors.ECODE_INVAL)
457

Iustin Pop's avatar
Iustin Pop committed
458
459
460
461
  cl = GetClient()

  cluster_name = cl.QueryConfigValues(["cluster_name"])[0]

462
  results = GetOnlineNodes(nodes=opts.nodes, cl=cl, filter_master=True,
463
464
                           secondary_ips=opts.use_replication_network,
                           nodegroup=opts.nodegroup)
Michael Hanselmann's avatar
Michael Hanselmann committed
465

Iustin Pop's avatar
Iustin Pop committed
466
  srun = ssh.SshRunner(cluster_name=cluster_name)
467
468
  for node in results:
    if not srun.CopyFileToNode(node, filename):
469
      ToStderr("Copy of file %s to node %s failed", filename, node)
470

Iustin Pop's avatar
Iustin Pop committed
471
472
473
474
475
476
  return 0


def RunClusterCommand(opts, args):
  """Run a command on some nodes.

477
478
479
480
481
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain the command to be run and its arguments
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
482
483

  """
Iustin Pop's avatar
Iustin Pop committed
484
  cl = GetClient()
Michael Hanselmann's avatar
Michael Hanselmann committed
485

Iustin Pop's avatar
Iustin Pop committed
486
  command = " ".join(args)
487

488
  nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl, nodegroup=opts.nodegroup)
Iustin Pop's avatar
Iustin Pop committed
489
490
491

  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
                                                    "master_node"])
492

Iustin Pop's avatar
Iustin Pop committed
493
  srun = ssh.SshRunner(cluster_name=cluster_name)
494

Michael Hanselmann's avatar
Michael Hanselmann committed
495
  # Make sure master node is at list end
496
497
498
499
500
501
  if master_node in nodes:
    nodes.remove(master_node)
    nodes.append(master_node)

  for name in nodes:
    result = srun.Run(name, "root", command)
502
503
504
505
    ToStdout("------------------------------------------------")
    ToStdout("node: %s", name)
    ToStdout("%s", result.output)
    ToStdout("return code = %s", result.exit_code)
506
507

  return 0
Iustin Pop's avatar
Iustin Pop committed
508
509
510
511
512


def VerifyCluster(opts, args):
  """Verify integrity of cluster, performing various test on nodes.

513
514
515
516
517
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
518
519

  """
Iustin Pop's avatar
Iustin Pop committed
520
  skip_checks = []
521

522
523
  if opts.skip_nplusone_mem:
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
524

525
  cl = GetClient()
526

527
528
529
530
  op = opcodes.OpClusterVerify(verbose=opts.verbose,
                               error_codes=opts.error_codes,
                               debug_simulate_errors=opts.simulate_errors,
                               skip_checks=skip_checks,
531
                               ignore_errors=opts.ignore_errors,
532
533
                               group_name=opts.nodegroup)
  result = SubmitOpCode(op, cl=cl, opts=opts)
534

535
536
537
538
539
  # Keep track of submitted jobs
  jex = JobExecutor(cl=cl, opts=opts)

  for (status, job_id) in result[constants.JOB_IDS_KEY]:
    jex.AddJobId(None, status, job_id)
540

541
  results = jex.GetResults()
542
543
544
545
546
547
548
549
550
551
552
553

  (bad_jobs, bad_results) = \
    map(len,
        # Convert iterators to lists
        map(list,
            # Count errors
            map(compat.partial(itertools.ifilterfalse, bool),
                # Convert result to booleans in a tuple
                zip(*((job_success, len(op_results) == 1 and op_results[0])
                      for (job_success, op_results) in results)))))

  if bad_jobs == 0 and bad_results == 0:
554
    rcode = constants.EXIT_SUCCESS
Guido Trotter's avatar
Guido Trotter committed
555
  else:
556
    rcode = constants.EXIT_FAILURE
557
558
    if bad_jobs > 0:
      ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
559
560

  return rcode
Iustin Pop's avatar
Iustin Pop committed
561
562


563
564
565
def VerifyDisks(opts, args):
  """Verify integrity of cluster disks.

566
567
568
569
570
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
571
572

  """
573
574
  cl = GetClient()

575
  op = opcodes.OpClusterVerifyDisks()
576

577
578
579
580
581
582
583
  result = SubmitOpCode(op, cl=cl, opts=opts)

  # Keep track of submitted jobs
  jex = JobExecutor(cl=cl, opts=opts)

  for (status, job_id) in result[constants.JOB_IDS_KEY]:
    jex.AddJobId(None, status, job_id)
584

585
  retcode = constants.EXIT_SUCCESS
586

587
588
589
590
591
592
593
  for (status, result) in jex.GetResults():
    if not status:
      ToStdout("Job failed: %s", result)
      continue

    ((bad_nodes, instances, missing), ) = result

594
595
    for node, text in bad_nodes.items():
      ToStdout("Error gathering data on node %s: %s",
596
               node, utils.SafeEncode(text[-400:]))
597
      retcode = constants.EXIT_FAILURE
598
      ToStdout("You need to fix these nodes first before fixing instances")
599

600
    for iname in instances:
601
602
      if iname in missing:
        continue
603
      op = opcodes.OpInstanceActivateDisks(instance_name=iname)
604
      try:
605
        ToStdout("Activating disks for instance '%s'", iname)
606
        SubmitOpCode(op, opts=opts, cl=cl)
607
608
609
      except errors.GenericError, err:
        nret, msg = FormatError(err)
        retcode |= nret
610
        ToStderr("Error activating disks for instance %s: %s", iname, msg)
611

612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
    if missing:
      for iname, ival in missing.iteritems():
        all_missing = compat.all(x[0] in bad_nodes for x in ival)
        if all_missing:
          ToStdout("Instance %s cannot be verified as it lives on"
                   " broken nodes", iname)
        else:
          ToStdout("Instance %s has missing logical volumes:", iname)
          ival.sort()
          for node, vol in ival:
            if node in bad_nodes:
              ToStdout("\tbroken node %s /dev/%s", node, vol)
            else:
              ToStdout("\t%s /dev/%s", node, vol)

      ToStdout("You need to replace or recreate disks for all the above"
               " instances if this message persists after fixing broken nodes.")
      retcode = constants.EXIT_FAILURE
630
631
632
633

  return retcode


634
635
636
637
638
639
640
641
642
643
def RepairDiskSizes(opts, args):
  """Verify sizes of cluster disks.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: optional list of instances to restrict check to
  @rtype: int
  @return: the desired exit code

  """
644
  op = opcodes.OpClusterRepairDiskSizes(instances=args)
645
  SubmitOpCode(op, opts=opts)
646
647


648
@UsesRPC
Iustin Pop's avatar
Iustin Pop committed
649
650
651
652
653
654
655
def MasterFailover(opts, args):
  """Failover the master node.

  This command, when run on a non-master node, will cause the current
  master to cease being master, and the non-master to become new
  master.

656
657
658
659
660
661
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

Iustin Pop's avatar
Iustin Pop committed
662
  """
663
664
665
666
667
668
669
670
671
  if opts.no_voting:
    usertext = ("This will perform the failover even if most other nodes"
                " are down, or if this node is outdated. This is dangerous"
                " as it can lead to a non-consistent cluster. Check the"
                " gnt-cluster(8) man page before proceeding. Continue?")
    if not AskUser(usertext):
      return 1

  return bootstrap.MasterFailover(no_voting=opts.no_voting)
Iustin Pop's avatar
Iustin Pop committed
672
673


Iustin Pop's avatar
Iustin Pop committed
674
675
676
677
678
679
680
681
682
683
684
685
686
687
def MasterPing(opts, args):
  """Checks if the master is alive.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  try:
    cl = GetClient()
    cl.QueryClusterInfo()
    return 0
688
  except Exception: # pylint: disable=W0703
Iustin Pop's avatar
Iustin Pop committed
689
690
691
    return 1


Iustin Pop's avatar
Iustin Pop committed
692
693
694
def SearchTags(opts, args):
  """Searches the tags on all the cluster.

695
696
697
698
699
700
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the tag pattern
  @rtype: int
  @return: the desired exit code

Iustin Pop's avatar
Iustin Pop committed
701
  """
702
  op = opcodes.OpTagsSearch(pattern=args[0])
703
  result = SubmitOpCode(op, opts=opts)
Iustin Pop's avatar
Iustin Pop committed
704
705
706
707
708
  if not result:
    return 1
  result = list(result)
  result.sort()
  for path, tag in result:
709
    ToStdout("%s %s", path, tag)
Iustin Pop's avatar
Iustin Pop committed
710
711


712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
  """Reads and verifies an X509 certificate.

  @type cert_filename: string
  @param cert_filename: the path of the file containing the certificate to
                        verify encoded in PEM format
  @type verify_private_key: bool
  @param verify_private_key: whether to verify the private key in addition to
                             the public certificate
  @rtype: string
  @return: a string containing the PEM-encoded certificate.

  """
  try:
    pem = utils.ReadFile(cert_filename)
  except IOError, err:
    raise errors.X509CertError(cert_filename,
                               "Unable to read certificate: %s" % str(err))

  try:
    OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
  except Exception, err:
    raise errors.X509CertError(cert_filename,
                               "Unable to load certificate: %s" % str(err))

  if verify_private_key:
    try:
      OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
    except Exception, err:
      raise errors.X509CertError(cert_filename,
                                 "Unable to load private key: %s" % str(err))

  return pem


def _RenewCrypto(new_cluster_cert, new_rapi_cert, #pylint: disable=R0911
                 rapi_cert_filename, new_spice_cert, spice_cert_filename,
                 spice_cacert_filename, new_confd_hmac_key, new_cds,
                 cds_filename, force):
751
752
753
754
755
756
757
758
  """Renews cluster certificates, keys and secrets.

  @type new_cluster_cert: bool
  @param new_cluster_cert: Whether to generate a new cluster certificate
  @type new_rapi_cert: bool
  @param new_rapi_cert: Whether to generate a new RAPI certificate
  @type rapi_cert_filename: string
  @param rapi_cert_filename: Path to file containing new RAPI certificate
759
760
761
762
763
764
765
  @type new_spice_cert: bool
  @param new_spice_cert: Whether to generate a new SPICE certificate
  @type spice_cert_filename: string
  @param spice_cert_filename: Path to file containing new SPICE certificate
  @type spice_cacert_filename: string
  @param spice_cacert_filename: Path to file containing the certificate of the
                                CA that signed the SPICE certificate
766
767
  @type new_confd_hmac_key: bool
  @param new_confd_hmac_key: Whether to generate a new HMAC key
Michael Hanselmann's avatar
Michael Hanselmann committed
768
769
770
771
  @type new_cds: bool
  @param new_cds: Whether to generate a new cluster domain secret
  @type cds_filename: string
  @param cds_filename: Path to file containing new cluster domain secret
772
773
774
775
776
  @type force: bool
  @param force: Whether to ask user for confirmation

  """
  if new_rapi_cert and rapi_cert_filename:
777
    ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
778
779
780
             " options can be specified at the same time.")
    return 1

Michael Hanselmann's avatar
Michael Hanselmann committed
781
782
783
784
785
786
  if new_cds and cds_filename:
    ToStderr("Only one of the --new-cluster-domain-secret and"
             " --cluster-domain-secret options can be specified at"
             " the same time.")
    return 1

787
788
789
790
  if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
    ToStderr("When using --new-spice-certificate, the --spice-certificate"
             " and --spice-ca-certificate must not be used.")
    return 1
791

792
793
794
795
  if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
    ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
             " specified.")
    return 1
796

797
798
799
800
801
802
803
804
805
806
  rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
  try:
    if rapi_cert_filename:
      rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
    if spice_cert_filename:
      spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
      spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
  except errors.X509CertError, err:
    ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
    return 1
807

Michael Hanselmann's avatar
Michael Hanselmann committed
808
809
810
  if cds_filename:
    try:
      cds = utils.ReadFile(cds_filename)
811
    except Exception, err: # pylint: disable=W0703
Michael Hanselmann's avatar
Michael Hanselmann committed
812
813
814
815
816
817
      ToStderr("Can't load new cluster domain secret from %s: %s" %
               (cds_filename, str(err)))
      return 1
  else:
    cds = None

818
819
820
821
822
823
824
825
  if not force:
    usertext = ("This requires all daemons on all nodes to be restarted and"
                " may take some time. Continue?")
    if not AskUser(usertext):
      return 1

  def _RenewCryptoInner(ctx):
    ctx.feedback_fn("Updating certificates and keys")
826
827
828
    bootstrap.GenerateClusterCrypto(new_cluster_cert,
                                    new_rapi_cert,
                                    new_spice_cert,
829
                                    new_confd_hmac_key,
Michael Hanselmann's avatar
Michael Hanselmann committed
830
831
                                    new_cds,
                                    rapi_cert_pem=rapi_cert_pem,
832
833
                                    spice_cert_pem=spice_cert_pem,
                                    spice_cacert_pem=spice_cacert_pem,
Michael Hanselmann's avatar
Michael Hanselmann committed
834
                                    cds=cds)
835
836
837
838

    files_to_copy = []

    if new_cluster_cert:
839
      files_to_copy.append(constants.NODED_CERT_FILE)
840
841
842
843

    if new_rapi_cert or rapi_cert_pem:
      files_to_copy.append(constants.RAPI_CERT_FILE)

844
845
846
847
    if new_spice_cert or spice_cert_pem:
      files_to_copy.append(constants.SPICE_CERT_FILE)
      files_to_copy.append(constants.SPICE_CACERT_FILE)

848
849
    if new_confd_hmac_key:
      files_to_copy.append(constants.CONFD_HMAC_KEY)
850

Michael Hanselmann's avatar
Michael Hanselmann committed
851
852
853
    if new_cds or cds:
      files_to_copy.append(constants.CLUSTER_DOMAIN_SECRET_FILE)

854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
    if files_to_copy:
      for node_name in ctx.nonmaster_nodes:
        ctx.feedback_fn("Copying %s to %s" %
                        (", ".join(files_to_copy), node_name))
        for file_name in files_to_copy:
          ctx.ssh.CopyFileToNode(node_name, file_name)

  RunWhileClusterStopped(ToStdout, _RenewCryptoInner)

  ToStdout("All requested certificates and keys have been replaced."
           " Running \"gnt-cluster verify\" now is recommended.")

  return 0


def RenewCrypto(opts, args):
  """Renews cluster certificates, keys and secrets.

  """
  return _RenewCrypto(opts.new_cluster_cert,
                      opts.new_rapi_cert,
                      opts.rapi_cert,
876
877
878
                      opts.new_spice_cert,
                      opts.spice_cert,
                      opts.spice_cacert,
879
                      opts.new_confd_hmac_key,
Michael Hanselmann's avatar
Michael Hanselmann committed
880
881
                      opts.new_cluster_domain_secret,
                      opts.cluster_domain_secret,
882
883
884
                      opts.force)


885
886
887
def SetClusterParams(opts, args):
  """Modify the cluster.

888
889
890
891
892
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
893
894

  """
895
  if not (not opts.lvm_storage or opts.vg_name or
896
          not opts.drbd_storage or opts.drbd_helper or
897
          opts.enabled_hypervisors or opts.hvparams or
898
899
          opts.beparams or opts.nicparams or
          opts.ndparams or opts.diskparams or
900
          opts.candidate_pool_size is not None or
901
          opts.uid_pool is not None or
902
903
          opts.maintain_node_health is not None or
          opts.add_uids is not None or
904
          opts.remove_uids is not None or
905
          opts.default_iallocator is not None or
906
          opts.reserved_lvs is not None or
907
          opts.master_netdev is not None or
908
          opts.master_netmask is not None or
909
          opts.use_external_mip_script is not None or
910
911
912
          opts.prealloc_wipe_disks is not None or
          opts.hv_state or
          opts.disk_state):
913
    ToStderr("Please give at least one of the parameters.")
914
915
916
917
    return 1

  vg_name = opts.vg_name
  if not opts.lvm_storage and opts.vg_name:
918
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
919
    return 1
920
921
922

  if not opts.lvm_storage:
    vg_name = ""
923

924
925
926
927
928
929
930
931
  drbd_helper = opts.drbd_helper
  if not opts.drbd_storage and opts.drbd_helper:
    ToStderr("Options --no-drbd-storage and --drbd-usermode-helper conflict.")
    return 1

  if not opts.drbd_storage:
    drbd_helper = ""

932
933
934
935
  hvlist = opts.enabled_hypervisors
  if hvlist is not None:
    hvlist = hvlist.split(",")

936
937
  # a list of (name, dict) we can pass directly to dict() (or [])
  hvparams = dict(opts.hvparams)
Iustin Pop's avatar
Iustin Pop committed
938
  for hv_params in hvparams.values():
939
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
940

941
942
943
944
945
  diskparams = dict(opts.diskparams)

  for dt_params in hvparams.values():
    utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)

946
  beparams = opts.beparams
947
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
948

949
950
951
  nicparams = opts.nicparams
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)

952
953
954
  ndparams = opts.ndparams
  if ndparams is not None:
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)
955

956
957
  mnh = opts.maintain_node_health

958
959
960
961
  uid_pool = opts.uid_pool
  if uid_pool is not None:
    uid_pool = uidpool.ParseUidPool(uid_pool)

962
963
964
965
966
967
968
969
  add_uids = opts.add_uids
  if add_uids is not None:
    add_uids = uidpool.ParseUidPool(add_uids)

  remove_uids = opts.remove_uids
  if remove_uids is not None:
    remove_uids = uidpool.ParseUidPool(remove_uids)

970
971
972
973
974
975
  if opts.reserved_lvs is not None:
    if opts.reserved_lvs == "":
      opts.reserved_lvs = []
    else:
      opts.reserved_lvs = utils.UnescapeAndSplit(opts.reserved_lvs, sep=",")

976
977
978
979
980
981
982
  if opts.master_netmask is not None:
    try:
      opts.master_netmask = int(opts.master_netmask)
    except ValueError:
      ToStderr("The --master-netmask option expects an int parameter.")
      return 1

983
984
  ext_ip_script = opts.use_external_mip_script

985
986
987
988
989
990
991
  if opts.disk_state:
    disk_state = utils.FlatToDict(opts.disk_state)
  else:
    disk_state = {}

  hv_state = dict(opts.hv_state)

992
  op = opcodes.OpClusterSetParams(vg_name=vg_name,
993
                                  drbd_helper=drbd_helper,
994
995
                                  enabled_hypervisors=hvlist,
                                  hvparams=hvparams,
996
                                  os_hvp=None,
997
                                  beparams=beparams,
998
                                  nicparams=nicparams,
999
                                  ndparams=ndparams,
1000
                                  diskparams=diskparams,
1001
                                  candidate_pool_size=opts.candidate_pool_size,
1002
                                  maintain_node_health=mnh,
1003
1004
                                  uid_pool=uid_pool,
                                  add_uids=add_uids,
1005
                                  remove_uids=remove_uids,
1006
                                  default_iallocator=opts.default_iallocator,
1007
                                  prealloc_wipe_disks=opts.prealloc_wipe_disks,
1008
                                  master_netdev=opts.master_netdev,
1009
                                  master_netmask=opts.master_netmask,
1010
1011
                                  reserved_lvs=opts.reserved_lvs,
                                  use_external_mip_script=ext_ip_script,
1012
1013
                                  hv_state=hv_state,
                                  disk_state=disk_state,
1014
                                  )
1015
  SubmitOpCode(op, opts=opts)
1016
1017
1018
  return 0


1019
1020
1021
def QueueOps(opts, args):
  """Queue operations.

1022
1023
1024
1025
1026
1027
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the subcommand
  @rtype: int
  @return: the desired exit code

1028
1029
1030
1031
1032
1033
1034
1035
1036
  """
  command = args[0]
  client = GetClient()
  if command in ("drain", "undrain"):
    drain_flag = command == "drain"
    client.SetQueueDrainFlag(drain_flag)
  elif command == "info":
    result = client.QueryConfigValues(["drain_flag"])
    if result[0]:
1037
      val = "set"
1038
    else:
1039
1040
      val = "unset"
    ToStdout("The drain flag is %s" % val)
1041
  else:
1042
1043
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
                               errors.ECODE_INVAL)
1044

1045
1046
  return 0

1047

1048
1049
1050
1051
1052
1053
1054
def _ShowWatcherPause(until):
  if until is None or until < time.time():
    ToStdout("The watcher is not paused.")
  else:
    ToStdout("The watcher is paused until %s.", time.ctime(until))


1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
def WatcherOps(opts, args):
  """Watcher operations.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the subcommand
  @rtype: int
  @return: the desired exit code

  """
  command = args[0]
  client = GetClient()

  if command == "continue":
    client.SetWatcherPause(None)
1070
    ToStdout("The watcher is no longer paused.")
1071
1072
1073

  elif command == "pause":
    if len(args) < 2:
1074
      raise errors.OpPrereqError("Missing pause duration", errors.ECODE_INVAL)
1075

1076
1077
    result = client.SetWatcherPause(time.time() + ParseTimespec(args[1]))
    _ShowWatcherPause(result)
1078
1079
1080

  elif command == "info":
    result = client.QueryConfigValues(["watcher_pause"])
1081
    _ShowWatcherPause(result[0])
1082
1083

  else:
1084
1085
    raise errors.OpPrereqError("Command '%s' is not valid." % command,
                               errors.ECODE_INVAL)
1086
1087
1088
1089

  return 0


René Nussbaumer's avatar
René Nussbaumer committed
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
def _OobPower(opts, node_list, power):
  """Puts the node in the list to desired power state.

  @param opts: The command line options selected by the user
  @param node_list: The list of nodes to operate on
  @param power: True if they should be powered on, False otherwise
  @return: The success of the operation (none failed)

  """
  if power:
    command = constants.OOB_POWER_ON
  else:
    command = constants.OOB_POWER_OFF

  op = opcodes.OpOobCommand(node_names=node_list,
                            command=command,
                            ignore_status=True,
1107
1108
                            timeout=opts.oob_timeout,
                            power_delay=opts.power_delay)