gnt_cluster.py 75 KB
Newer Older
1
#
Iustin Pop's avatar
Iustin Pop committed
2
3
#

4
# Copyright (C) 2006, 2007, 2010, 2011, 2012, 2013, 2014 Google Inc.
Iustin Pop's avatar
Iustin Pop committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

Iustin Pop's avatar
Iustin Pop committed
21
"""Cluster related commands"""
Iustin Pop's avatar
Iustin Pop committed
22

23
# pylint: disable=W0401,W0613,W0614,C0103
24
# W0401: Wildcard import ganeti.cli
25
# W0613: Unused argument, since all functions follow the same API
26
# W0614: Unused import %s from wildcard import (since we need cli)
Iustin Pop's avatar
Iustin Pop committed
27
# C0103: Invalid name gnt-cluster
28

29
from cStringIO import StringIO
30
import os
31
import time
32
import OpenSSL
33
import tempfile
René Nussbaumer's avatar
René Nussbaumer committed
34
import itertools
Iustin Pop's avatar
Iustin Pop committed
35
36

from ganeti.cli import *
37
from ganeti import bootstrap
38
from ganeti import compat
39
40
from ganeti import constants
from ganeti import errors
René Nussbaumer's avatar
René Nussbaumer committed
41
from ganeti import netutils
42
43
from ganeti import objects
from ganeti import opcodes
44
from ganeti import pathutils
45
from ganeti import qlang
46
47
48
49
50
51
from ganeti import serializer
from ganeti import ssconf
from ganeti import ssh
from ganeti import uidpool
from ganeti import utils
from ganeti.client import base
René Nussbaumer's avatar
René Nussbaumer committed
52
53
54
55
56
57
58


ON_OPT = cli_option("--on", default=False,
                    action="store_true", dest="on",
                    help="Recover from an EPO")

GROUPS_OPT = cli_option("--groups", default=False,
Iustin Pop's avatar
Iustin Pop committed
59
60
                        action="store_true", dest="groups",
                        help="Arguments are node groups instead of nodes")
René Nussbaumer's avatar
René Nussbaumer committed
61

62
63
64
65
FORCE_FAILOVER = cli_option("--yes-do-it", dest="yes_do_it",
                            help="Override interactive check for --no-voting",
                            default=False, action="store_true")

66
67
68
69
70
71
FORCE_DISTRIBUTION = cli_option("--yes-do-it", dest="yes_do_it",
                                help="Unconditionally distribute the"
                                " configuration, even if the queue"
                                " is drained",
                                default=False, action="store_true")

72
73
74
75
76
77
TO_OPT = cli_option("--to", default=None, type="string",
                    help="The Ganeti version to upgrade to")

RESUME_OPT = cli_option("--resume", default=False, action="store_true",
                        help="Resume any pending Ganeti upgrades")

René Nussbaumer's avatar
René Nussbaumer committed
78
79
80
_EPO_PING_INTERVAL = 30 # 30 seconds between pings
_EPO_PING_TIMEOUT = 1 # 1 second
_EPO_REACHABLE_TIMEOUT = 15 * 60 # 15 minutes
Iustin Pop's avatar
Iustin Pop committed
81
82


83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def _InitEnabledDiskTemplates(opts):
  """Initialize the list of enabled disk templates.

  """
  if opts.enabled_disk_templates:
    return opts.enabled_disk_templates.split(",")
  else:
    return constants.DEFAULT_ENABLED_DISK_TEMPLATES


def _InitVgName(opts, enabled_disk_templates):
  """Initialize the volume group name.

  @type enabled_disk_templates: list of strings
  @param enabled_disk_templates: cluster-wide enabled disk templates

  """
  vg_name = None
  if opts.vg_name is not None:
    vg_name = opts.vg_name
    if vg_name:
      if not utils.IsLvmEnabled(enabled_disk_templates):
        ToStdout("You specified a volume group with --vg-name, but you did not"
                 " enable any disk template that uses lvm.")
    elif utils.IsLvmEnabled(enabled_disk_templates):
      raise errors.OpPrereqError(
          "LVM disk templates are enabled, but vg name not set.")
  elif utils.IsLvmEnabled(enabled_disk_templates):
    vg_name = constants.DEFAULT_VG
  return vg_name


115
def _InitDrbdHelper(opts, enabled_disk_templates):
116
117
118
  """Initialize the DRBD usermode helper.

  """
119
120
  drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates

121
122
123
  if not drbd_enabled and opts.drbd_helper is not None:
    ToStdout("Note: You specified a DRBD usermode helper, while DRBD storage"
             " is not enabled.")
124

125
126
127
128
129
130
131
  if drbd_enabled:
    if opts.drbd_helper is None:
      return constants.DEFAULT_DRBD_HELPER
    if opts.drbd_helper == '':
      raise errors.OpPrereqError(
          "Unsetting the drbd usermode helper while enabling DRBD is not"
          " allowed.")
132
133
134
135

  return opts.drbd_helper


136
@UsesRPC
Iustin Pop's avatar
Iustin Pop committed
137
138
139
def InitCluster(opts, args):
  """Initialize the cluster.

140
141
142
143
144
145
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the desired
      cluster name
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
146
147

  """
148
  enabled_disk_templates = _InitEnabledDiskTemplates(opts)
149

150
151
  try:
    vg_name = _InitVgName(opts, enabled_disk_templates)
152
    drbd_helper = _InitDrbdHelper(opts, enabled_disk_templates)
153
154
  except errors.OpPrereqError, e:
    ToStderr(str(e))
155
156
    return 1

157
158
  master_netdev = opts.master_netdev
  if master_netdev is None:
159
160
    nic_mode = opts.nicparams.get(constants.NIC_MODE, None)
    if not nic_mode:
161
162
      # default case, use bridging
      master_netdev = constants.DEFAULT_BRIDGE
163
    elif nic_mode == constants.NIC_MODE_OVS:
164
165
166
      # default ovs is different from default bridge
      master_netdev = constants.DEFAULT_OVS
      opts.nicparams[constants.NIC_LINK] = constants.DEFAULT_OVS
167

168
  hvlist = opts.enabled_hypervisors
169
170
  if hvlist is None:
    hvlist = constants.DEFAULT_ENABLED_HYPERVISOR
171
  hvlist = hvlist.split(",")
172

173
  hvparams = dict(opts.hvparams)
174
  beparams = opts.beparams
175
  nicparams = opts.nicparams
176

177
178
179
180
181
182
183
184
185
186
  diskparams = dict(opts.diskparams)

  # check the disk template types here, as we cannot rely on the type check done
  # by the opcode parameter types
  diskparams_keys = set(diskparams.keys())
  if not (diskparams_keys <= constants.DISK_TEMPLATES):
    unknown = utils.NiceSort(diskparams_keys - constants.DISK_TEMPLATES)
    ToStderr("Disk templates unknown: %s" % utils.CommaJoin(unknown))
    return 1

187
  # prepare beparams dict
188
  beparams = objects.FillDict(constants.BEC_DEFAULTS, beparams)
189
  utils.ForceDictType(beparams, constants.BES_PARAMETER_COMPAT)
190

191
192
193
194
  # prepare nicparams dict
  nicparams = objects.FillDict(constants.NICC_DEFAULTS, nicparams)
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)

195
196
197
198
199
200
201
  # prepare ndparams dict
  if opts.ndparams is None:
    ndparams = dict(constants.NDC_DEFAULTS)
  else:
    ndparams = objects.FillDict(constants.NDC_DEFAULTS, opts.ndparams)
    utils.ForceDictType(ndparams, constants.NDS_PARAMETER_TYPES)

202
203
204
205
  # prepare hvparams dict
  for hv in constants.HYPER_TYPES:
    if hv not in hvparams:
      hvparams[hv] = {}
206
    hvparams[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], hvparams[hv])
207
    utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)
208

209
210
211
212
213
214
215
216
  # prepare diskparams dict
  for templ in constants.DISK_TEMPLATES:
    if templ not in diskparams:
      diskparams[templ] = {}
    diskparams[templ] = objects.FillDict(constants.DISK_DT_DEFAULTS[templ],
                                         diskparams[templ])
    utils.ForceDictType(diskparams[templ], constants.DISK_DT_TYPES)

217
  # prepare ipolicy dict
218
  ipolicy = CreateIPolicyFromOpts(
219
220
221
222
223
    ispecs_mem_size=opts.ispecs_mem_size,
    ispecs_cpu_count=opts.ispecs_cpu_count,
    ispecs_disk_count=opts.ispecs_disk_count,
    ispecs_disk_size=opts.ispecs_disk_size,
    ispecs_nic_count=opts.ispecs_nic_count,
224
225
    minmax_ispecs=opts.ipolicy_bounds_specs,
    std_ispecs=opts.ipolicy_std_specs,
226
227
    ipolicy_disk_templates=opts.ipolicy_disk_templates,
    ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
228
    ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
229
    fill_all=True)
230

231
232
233
  if opts.candidate_pool_size is None:
    opts.candidate_pool_size = constants.MASTER_POOL_SIZE_DEFAULT

234
235
236
  if opts.mac_prefix is None:
    opts.mac_prefix = constants.DEFAULT_MAC_PREFIX

237
238
239
240
  uid_pool = opts.uid_pool
  if uid_pool is not None:
    uid_pool = uidpool.ParseUidPool(uid_pool)

241
242
243
  if opts.prealloc_wipe_disks is None:
    opts.prealloc_wipe_disks = False

244
245
246
247
  external_ip_setup_script = opts.use_external_mip_script
  if external_ip_setup_script is None:
    external_ip_setup_script = False

248
249
250
251
252
253
  try:
    primary_ip_version = int(opts.primary_ip_version)
  except (ValueError, TypeError), err:
    ToStderr("Invalid primary ip version value: %s" % str(err))
    return 1

254
255
256
257
258
259
260
261
  master_netmask = opts.master_netmask
  try:
    if master_netmask is not None:
      master_netmask = int(master_netmask)
  except (ValueError, TypeError), err:
    ToStderr("Invalid master netmask value: %s" % str(err))
    return 1

262
263
264
265
266
267
268
  if opts.disk_state:
    disk_state = utils.FlatToDict(opts.disk_state)
  else:
    disk_state = {}

  hv_state = dict(opts.hv_state)

269
270
271
272
273
  if opts.install_image:
    install_image = opts.install_image
  else:
    install_image = ""

Hrvoje Ribicic's avatar
Hrvoje Ribicic committed
274
275
276
277
278
  if opts.zeroing_image:
    zeroing_image = opts.zeroing_image
  else:
    zeroing_image = ""

279
280
  compression_tools = _GetCompressionTools(opts)

281
  default_ialloc_params = opts.default_iallocator_params
282
283
284
285
286
287

  if opts.enabled_user_shutdown:
    enabled_user_shutdown = True
  else:
    enabled_user_shutdown = False

288
289
290
291
  bootstrap.InitCluster(cluster_name=args[0],
                        secondary_ip=opts.secondary_ip,
                        vg_name=vg_name,
                        mac_prefix=opts.mac_prefix,
292
                        master_netmask=master_netmask,
293
                        master_netdev=master_netdev,
294
                        file_storage_dir=opts.file_storage_dir,
295
                        shared_file_storage_dir=opts.shared_file_storage_dir,
296
                        gluster_storage_dir=opts.gluster_storage_dir,
297
298
                        enabled_hypervisors=hvlist,
                        hvparams=hvparams,
299
                        beparams=beparams,
300
                        nicparams=nicparams,
301
                        ndparams=ndparams,
302
                        diskparams=diskparams,
303
                        ipolicy=ipolicy,
304
                        candidate_pool_size=opts.candidate_pool_size,
305
                        modify_etc_hosts=opts.modify_etc_hosts,
306
                        modify_ssh_setup=opts.modify_ssh_setup,
307
                        maintain_node_health=opts.maintain_node_health,
308
                        drbd_helper=drbd_helper,
309
                        uid_pool=uid_pool,
310
                        default_iallocator=opts.default_iallocator,
311
                        default_iallocator_params=default_ialloc_params,
312
                        primary_ip_version=primary_ip_version,
313
                        prealloc_wipe_disks=opts.prealloc_wipe_disks,
314
                        use_external_mip_script=external_ip_setup_script,
315
316
                        hv_state=hv_state,
                        disk_state=disk_state,
317
                        enabled_disk_templates=enabled_disk_templates,
318
                        install_image=install_image,
319
                        zeroing_image=zeroing_image,
320
                        compression_tools=compression_tools,
321
                        enabled_user_shutdown=enabled_user_shutdown,
322
                        )
323
  op = opcodes.OpClusterPostInit()
324
  SubmitOpCode(op, opts=opts)
Iustin Pop's avatar
Iustin Pop committed
325
326
327
  return 0


328
@UsesRPC
Iustin Pop's avatar
Iustin Pop committed
329
330
331
def DestroyCluster(opts, args):
  """Destroy the cluster.

332
333
334
335
336
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
337

Iustin Pop's avatar
Iustin Pop committed
338
339
  """
  if not opts.yes_do_it:
340
341
    ToStderr("Destroying a cluster is irreversible. If you really want"
             " destroy this cluster, supply the --yes-do-it option.")
Iustin Pop's avatar
Iustin Pop committed
342
343
    return 1

344
  op = opcodes.OpClusterDestroy()
Thomas Thrainer's avatar
Thomas Thrainer committed
345
  master_uuid = SubmitOpCode(op, opts=opts)
Iustin Pop's avatar
Iustin Pop committed
346
347
  # if we reached this, the opcode didn't fail; we can proceed to
  # shutdown all the daemons
Thomas Thrainer's avatar
Thomas Thrainer committed
348
  bootstrap.FinalizeClusterDestroy(master_uuid)
Iustin Pop's avatar
Iustin Pop committed
349
350
351
  return 0


352
353
354
def RenameCluster(opts, args):
  """Rename the cluster.

355
356
357
358
359
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the new cluster name
  @rtype: int
  @return: the desired exit code
360
361

  """
362
363
364
365
366
  cl = GetClient()

  (cluster_name, ) = cl.QueryConfigValues(["cluster_name"])

  new_name = args[0]
367
  if not opts.force:
368
369
370
371
372
    usertext = ("This will rename the cluster from '%s' to '%s'. If you are"
                " connected over the network to the cluster name, the"
                " operation is very dangerous as the IP address will be"
                " removed from the node and the change may not go through."
                " Continue?") % (cluster_name, new_name)
373
    if not AskUser(usertext):
374
375
      return 1

376
  op = opcodes.OpClusterRename(name=new_name)
377
378
  result = SubmitOpCode(op, opts=opts, cl=cl)

379
380
  if result:
    ToStdout("Cluster renamed from '%s' to '%s'", cluster_name, result)
381

382
383
384
  return 0


385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def ActivateMasterIp(opts, args):
  """Activates the master IP.

  """
  op = opcodes.OpClusterActivateMasterIp()
  SubmitOpCode(op)
  return 0


def DeactivateMasterIp(opts, args):
  """Deactivates the master IP.

  """
  if not opts.confirm:
    usertext = ("This will disable the master IP. All the open connections to"
                " the master IP will be closed. To reach the master you will"
                " need to use its node IP."
                " Continue?")
    if not AskUser(usertext):
      return 1

  op = opcodes.OpClusterDeactivateMasterIp()
  SubmitOpCode(op)
  return 0


411
412
413
414
415
416
417
418
419
420
def RedistributeConfig(opts, args):
  """Forces push of the cluster configuration.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: empty list
  @rtype: int
  @return: the desired exit code

  """
421
  op = opcodes.OpClusterRedistConf()
422
423
424
425
  if opts.yes_do_it:
    SubmitOpCodeToDrainedQueue(op)
  else:
    SubmitOrSend(op, opts)
426
427
428
  return 0


Iustin Pop's avatar
Iustin Pop committed
429
430
431
def ShowClusterVersion(opts, args):
  """Write version of ganeti software to the standard output.

432
433
434
435
436
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
437
438

  """
439
  cl = GetClient()
440
  result = cl.QueryClusterInfo()
441
442
443
444
445
  ToStdout("Software version: %s", result["software_version"])
  ToStdout("Internode protocol: %s", result["protocol_version"])
  ToStdout("Configuration format: %s", result["config_version"])
  ToStdout("OS api version: %s", result["os_api_version"])
  ToStdout("Export interface: %s", result["export_version"])
446
  ToStdout("VCS version: %s", result["vcs_version"])
Iustin Pop's avatar
Iustin Pop committed
447
448
449
450
451
452
  return 0


def ShowClusterMaster(opts, args):
  """Write name of master node to the standard output.

453
454
455
456
457
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
458
459

  """
460
461
  master = bootstrap.GetMaster()
  ToStdout(master)
Iustin Pop's avatar
Iustin Pop committed
462
463
  return 0

464

465
466
def _FormatGroupedParams(paramsdict, roman=False):
  """Format Grouped parameters (be, nic, disk) by group.
467
468
469

  @type paramsdict: dict of dicts
  @param paramsdict: {group: {param: value, ...}, ...}
470
471
  @rtype: dict of dicts
  @return: copy of the input dictionaries with strings as values
472
473

  """
474
475
  ret = {}
  for (item, val) in paramsdict.items():
476
    if isinstance(val, dict):
477
      ret[item] = _FormatGroupedParams(val, roman=roman)
Guido Trotter's avatar
Guido Trotter committed
478
    elif roman and isinstance(val, int):
479
      ret[item] = compat.TryToRoman(val)
480
    else:
481
482
      ret[item] = str(val)
  return ret
Iustin Pop's avatar
Iustin Pop committed
483

484

Iustin Pop's avatar
Iustin Pop committed
485
486
487
def ShowClusterConfig(opts, args):
  """Shows cluster information.

488
489
490
491
492
493
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

Iustin Pop's avatar
Iustin Pop committed
494
  """
495
  cl = GetClient()
496
  result = cl.QueryClusterInfo()
Iustin Pop's avatar
Iustin Pop committed
497

498
  if result["tags"]:
499
    tags = utils.CommaJoin(utils.NiceSort(result["tags"]))
500
501
  else:
    tags = "(none)"
502
503
504
505
  if result["reserved_lvs"]:
    reserved_lvs = utils.CommaJoin(result["reserved_lvs"])
  else:
    reserved_lvs = "(none)"
506

507
508
509
510
  enabled_hv = result["enabled_hypervisors"]
  hvparams = dict((k, v) for k, v in result["hvparams"].iteritems()
                  if k in enabled_hv)

511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
  info = [
    ("Cluster name", result["name"]),
    ("Cluster UUID", result["uuid"]),

    ("Creation time", utils.FormatTime(result["ctime"])),
    ("Modification time", utils.FormatTime(result["mtime"])),

    ("Master node", result["master"]),

    ("Architecture (this node)",
     "%s (%s)" % (result["architecture"][0], result["architecture"][1])),

    ("Tags", tags),

    ("Default hypervisor", result["default_hypervisor"]),
526
    ("Enabled hypervisors", utils.CommaJoin(enabled_hv)),
527

528
529
    ("Hypervisor parameters", _FormatGroupedParams(hvparams,
                                                   opts.roman_integers)),
530
531

    ("OS-specific hypervisor parameters",
532
     _FormatGroupedParams(result["os_hvp"], opts.roman_integers)),
533

534
535
    ("OS parameters", _FormatGroupedParams(result["osparams"],
                                           opts.roman_integers)),
536
537
538
539
540
541
542
543

    ("Hidden OSes", utils.CommaJoin(result["hidden_os"])),
    ("Blacklisted OSes", utils.CommaJoin(result["blacklisted_os"])),

    ("Cluster parameters", [
      ("candidate pool size",
       compat.TryToRoman(result["candidate_pool_size"],
                         convert=opts.roman_integers)),
Klaus Aehlig's avatar
Klaus Aehlig committed
544
545
546
      ("maximal number of jobs running simultaneously",
       compat.TryToRoman(result["max_running_jobs"],
                         convert=opts.roman_integers)),
Klaus Aehlig's avatar
Klaus Aehlig committed
547
548
549
      ("maximal number of jobs simultaneously tracked by the scheduler",
       compat.TryToRoman(result["max_tracked_jobs"],
                         convert=opts.roman_integers)),
550
      ("mac prefix", result["mac_prefix"]),
551
      ("master netdev", result["master_netdev"]),
552
553
      ("master netmask", compat.TryToRoman(result["master_netmask"],
                                           opts.roman_integers)),
554
555
556
557
558
559
560
      ("use external master IP address setup script",
       result["use_external_mip_script"]),
      ("lvm volume group", result["volume_group_name"]),
      ("lvm reserved volumes", reserved_lvs),
      ("drbd usermode helper", result["drbd_usermode_helper"]),
      ("file storage path", result["file_storage_dir"]),
      ("shared file storage path", result["shared_file_storage_dir"]),
561
      ("gluster storage path", result["gluster_storage_dir"]),
562
563
564
      ("maintenance of node health", result["maintain_node_health"]),
      ("uid pool", uidpool.FormatUidPool(result["uid_pool"])),
      ("default instance allocator", result["default_iallocator"]),
565
566
      ("default instance allocator parameters",
       result["default_iallocator_params"]),
567
568
      ("primary ip version", compat.TryToRoman(result["primary_ip_version"],
                                               opts.roman_integers)),
569
570
571
572
      ("preallocation wipe disks", result["prealloc_wipe_disks"]),
      ("OS search path", utils.CommaJoin(pathutils.OS_SEARCH_PATH)),
      ("ExtStorage Providers search path",
       utils.CommaJoin(pathutils.ES_SEARCH_PATH)),
573
574
      ("enabled disk templates",
       utils.CommaJoin(result["enabled_disk_templates"])),
575
      ("install image", result["install_image"]),
576
577
      ("instance communication network",
       result["instance_communication_network"]),
Hrvoje Ribicic's avatar
Hrvoje Ribicic committed
578
      ("zeroing image", result["zeroing_image"]),
579
      ("compression tools", result["compression_tools"]),
580
      ("enabled user shutdown", result["enabled_user_shutdown"]),
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
      ]),

    ("Default node parameters",
     _FormatGroupedParams(result["ndparams"], roman=opts.roman_integers)),

    ("Default instance parameters",
     _FormatGroupedParams(result["beparams"], roman=opts.roman_integers)),

    ("Default nic parameters",
     _FormatGroupedParams(result["nicparams"], roman=opts.roman_integers)),

    ("Default disk parameters",
     _FormatGroupedParams(result["diskparams"], roman=opts.roman_integers)),

    ("Instance policy - limits for instances",
596
     FormatPolicyInfo(result["ipolicy"], None, True, opts.roman_integers)),
597
598
599
    ]

  PrintGenericInfo(info)
Iustin Pop's avatar
Iustin Pop committed
600
601
602
603
604
605
  return 0


def ClusterCopyFile(opts, args):
  """Copy a file from master to some nodes.

606
607
608
609
610
611
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the path of
      the file to be copied
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
612
613

  """
614
  filename = args[0]
615
616
  filename = os.path.abspath(filename)

617
  if not os.path.exists(filename):
618
619
    raise errors.OpPrereqError("No such filename '%s'" % filename,
                               errors.ECODE_INVAL)
620

Iustin Pop's avatar
Iustin Pop committed
621
  cl = GetClient()
622
  qcl = GetClient()
623
624
  try:
    cluster_name = cl.QueryConfigValues(["cluster_name"])[0]
Iustin Pop's avatar
Iustin Pop committed
625

626
    results = GetOnlineNodes(nodes=opts.nodes, cl=qcl, filter_master=True,
627
628
                             secondary_ips=opts.use_replication_network,
                             nodegroup=opts.nodegroup)
629
    ports = GetNodesSshPorts(opts.nodes, qcl)
630
631
  finally:
    cl.Close()
632
    qcl.Close()
Michael Hanselmann's avatar
Michael Hanselmann committed
633

634
  srun = ssh.SshRunner(cluster_name)
635
636
637
  for (node, port) in zip(results, ports):
    if not srun.CopyFileToNode(node, port, filename):
      ToStderr("Copy of file %s to node %s:%d failed", filename, node, port)
638

Iustin Pop's avatar
Iustin Pop committed
639
640
641
642
643
644
  return 0


def RunClusterCommand(opts, args):
  """Run a command on some nodes.

645
646
647
648
649
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain the command to be run and its arguments
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
650
651

  """
Iustin Pop's avatar
Iustin Pop committed
652
  cl = GetClient()
653
  qcl = GetClient()
Michael Hanselmann's avatar
Michael Hanselmann committed
654

Iustin Pop's avatar
Iustin Pop committed
655
  command = " ".join(args)
656

657
658
  nodes = GetOnlineNodes(nodes=opts.nodes, cl=qcl, nodegroup=opts.nodegroup)
  ports = GetNodesSshPorts(nodes, qcl)
Iustin Pop's avatar
Iustin Pop committed
659
660
661

  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
                                                    "master_node"])
662

Iustin Pop's avatar
Iustin Pop committed
663
  srun = ssh.SshRunner(cluster_name=cluster_name)
664

Michael Hanselmann's avatar
Michael Hanselmann committed
665
  # Make sure master node is at list end
666
667
668
669
  if master_node in nodes:
    nodes.remove(master_node)
    nodes.append(master_node)

670
671
  for (name, port) in zip(nodes, ports):
    result = srun.Run(name, constants.SSH_LOGIN_USER, command, port=port)
672
673
674
675
676

    if opts.failure_only and result.exit_code == constants.EXIT_SUCCESS:
      # Do not output anything for successful commands
      continue

677
    ToStdout("------------------------------------------------")
678
679
680
681
682
683
    if opts.show_machine_names:
      for line in result.output.splitlines():
        ToStdout("%s: %s", name, line)
    else:
      ToStdout("node: %s", name)
      ToStdout("%s", result.output)
684
    ToStdout("return code = %s", result.exit_code)
685
686

  return 0
Iustin Pop's avatar
Iustin Pop committed
687
688
689
690
691


def VerifyCluster(opts, args):
  """Verify integrity of cluster, performing various test on nodes.

692
693
694
695
696
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
Iustin Pop's avatar
Iustin Pop committed
697
698

  """
Iustin Pop's avatar
Iustin Pop committed
699
  skip_checks = []
700

701
702
  if opts.skip_nplusone_mem:
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
703

704
  cl = GetClient()
705

706
707
708
709
  op = opcodes.OpClusterVerify(verbose=opts.verbose,
                               error_codes=opts.error_codes,
                               debug_simulate_errors=opts.simulate_errors,
                               skip_checks=skip_checks,
710
                               ignore_errors=opts.ignore_errors,
711
712
                               group_name=opts.nodegroup)
  result = SubmitOpCode(op, cl=cl, opts=opts)
713

714
715
716
717
718
  # Keep track of submitted jobs
  jex = JobExecutor(cl=cl, opts=opts)

  for (status, job_id) in result[constants.JOB_IDS_KEY]:
    jex.AddJobId(None, status, job_id)
719

720
  results = jex.GetResults()
721
722
723
724
725
726
727
728
729
730
731
732

  (bad_jobs, bad_results) = \
    map(len,
        # Convert iterators to lists
        map(list,
            # Count errors
            map(compat.partial(itertools.ifilterfalse, bool),
                # Convert result to booleans in a tuple
                zip(*((job_success, len(op_results) == 1 and op_results[0])
                      for (job_success, op_results) in results)))))

  if bad_jobs == 0 and bad_results == 0:
733
    rcode = constants.EXIT_SUCCESS
Guido Trotter's avatar
Guido Trotter committed
734
  else:
735
    rcode = constants.EXIT_FAILURE
736
737
    if bad_jobs > 0:
      ToStdout("%s job(s) failed while verifying the cluster.", bad_jobs)
738
739

  return rcode
Iustin Pop's avatar
Iustin Pop committed
740
741


742
743
744
def VerifyDisks(opts, args):
  """Verify integrity of cluster disks.

745
746
747
748
749
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
750
751

  """
752
753
  cl = GetClient()

754
  op = opcodes.OpClusterVerifyDisks()
755

756
757
758
759
760
761
762
  result = SubmitOpCode(op, cl=cl, opts=opts)

  # Keep track of submitted jobs
  jex = JobExecutor(cl=cl, opts=opts)

  for (status, job_id) in result[constants.JOB_IDS_KEY]:
    jex.AddJobId(None, status, job_id)
763

764
  retcode = constants.EXIT_SUCCESS
765

766
767
768
769
770
771
772
  for (status, result) in jex.GetResults():
    if not status:
      ToStdout("Job failed: %s", result)
      continue

    ((bad_nodes, instances, missing), ) = result

773
774
    for node, text in bad_nodes.items():
      ToStdout("Error gathering data on node %s: %s",
775
               node, utils.SafeEncode(text[-400:]))
776
      retcode = constants.EXIT_FAILURE
777
      ToStdout("You need to fix these nodes first before fixing instances")
778

779
    for iname in instances:
780
781
      if iname in missing:
        continue
782
      op = opcodes.OpInstanceActivateDisks(instance_name=iname)
783
      try:
784
        ToStdout("Activating disks for instance '%s'", iname)
785
        SubmitOpCode(op, opts=opts, cl=cl)
786
787
788
      except errors.GenericError, err:
        nret, msg = FormatError(err)
        retcode |= nret
789
        ToStderr("Error activating disks for instance %s: %s", iname, msg)
790

791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
    if missing:
      for iname, ival in missing.iteritems():
        all_missing = compat.all(x[0] in bad_nodes for x in ival)
        if all_missing:
          ToStdout("Instance %s cannot be verified as it lives on"
                   " broken nodes", iname)
        else:
          ToStdout("Instance %s has missing logical volumes:", iname)
          ival.sort()
          for node, vol in ival:
            if node in bad_nodes:
              ToStdout("\tbroken node %s /dev/%s", node, vol)
            else:
              ToStdout("\t%s /dev/%s", node, vol)

      ToStdout("You need to replace or recreate disks for all the above"
               " instances if this message persists after fixing broken nodes.")
      retcode = constants.EXIT_FAILURE
809
810
    elif not instances:
      ToStdout("No disks need to be activated.")
811
812
813
814

  return retcode


815
816
817
818
819
820
821
822
823
824
def RepairDiskSizes(opts, args):
  """Verify sizes of cluster disks.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: optional list of instances to restrict check to
  @rtype: int
  @return: the desired exit code

  """
825
  op = opcodes.OpClusterRepairDiskSizes(instances=args)
826
  SubmitOpCode(op, opts=opts)
827
828


829
@UsesRPC
Iustin Pop's avatar
Iustin Pop committed
830
831
832
833
834
835
836
def MasterFailover(opts, args):
  """Failover the master node.

  This command, when run on a non-master node, will cause the current
  master to cease being master, and the non-master to become new
  master.

837
838
839
840
841
842
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

Iustin Pop's avatar
Iustin Pop committed
843
  """
844
  if opts.no_voting and not opts.yes_do_it:
845
846
847
848
849
850
851
852
    usertext = ("This will perform the failover even if most other nodes"
                " are down, or if this node is outdated. This is dangerous"
                " as it can lead to a non-consistent cluster. Check the"
                " gnt-cluster(8) man page before proceeding. Continue?")
    if not AskUser(usertext):
      return 1

  return bootstrap.MasterFailover(no_voting=opts.no_voting)
Iustin Pop's avatar
Iustin Pop committed
853
854


Iustin Pop's avatar
Iustin Pop committed
855
856
857
858
859
860
861
862
863
864
865
866
867
868
def MasterPing(opts, args):
  """Checks if the master is alive.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  try:
    cl = GetClient()
    cl.QueryClusterInfo()
    return 0
869
  except Exception: # pylint: disable=W0703
Iustin Pop's avatar
Iustin Pop committed
870
871
872
    return 1


Iustin Pop's avatar
Iustin Pop committed
873
874
875
def SearchTags(opts, args):
  """Searches the tags on all the cluster.

876
877
878
879
880
881
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the tag pattern
  @rtype: int
  @return: the desired exit code

Iustin Pop's avatar
Iustin Pop committed
882
  """
883
  op = opcodes.OpTagsSearch(pattern=args[0])
884
  result = SubmitOpCode(op, opts=opts)
Iustin Pop's avatar
Iustin Pop committed
885
886
887
888
889
  if not result:
    return 1
  result = list(result)
  result.sort()
  for path, tag in result:
890
    ToStdout("%s %s", path, tag)
Iustin Pop's avatar
Iustin Pop committed
891
892


893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
def _ReadAndVerifyCert(cert_filename, verify_private_key=False):
  """Reads and verifies an X509 certificate.

  @type cert_filename: string
  @param cert_filename: the path of the file containing the certificate to
                        verify encoded in PEM format
  @type verify_private_key: bool
  @param verify_private_key: whether to verify the private key in addition to
                             the public certificate
  @rtype: string
  @return: a string containing the PEM-encoded certificate.

  """
  try:
    pem = utils.ReadFile(cert_filename)
  except IOError, err:
    raise errors.X509CertError(cert_filename,
                               "Unable to read certificate: %s" % str(err))

  try:
    OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, pem)
  except Exception, err:
    raise errors.X509CertError(cert_filename,
                               "Unable to load certificate: %s" % str(err))

  if verify_private_key:
    try:
      OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_PEM, pem)
    except Exception, err:
      raise errors.X509CertError(cert_filename,
                                 "Unable to load private key: %s" % str(err))

  return pem


Iustin Pop's avatar
Iustin Pop committed
928
def _RenewCrypto(new_cluster_cert, new_rapi_cert, # pylint: disable=R0911
929
930
                 rapi_cert_filename, new_spice_cert, spice_cert_filename,
                 spice_cacert_filename, new_confd_hmac_key, new_cds,
931
                 cds_filename, force, new_node_cert):
932
933
934
935
936
937
938
939
  """Renews cluster certificates, keys and secrets.

  @type new_cluster_cert: bool
  @param new_cluster_cert: Whether to generate a new cluster certificate
  @type new_rapi_cert: bool
  @param new_rapi_cert: Whether to generate a new RAPI certificate
  @type rapi_cert_filename: string
  @param rapi_cert_filename: Path to file containing new RAPI certificate
940
941
942
943
944
945
946
  @type new_spice_cert: bool
  @param new_spice_cert: Whether to generate a new SPICE certificate
  @type spice_cert_filename: string
  @param spice_cert_filename: Path to file containing new SPICE certificate
  @type spice_cacert_filename: string
  @param spice_cacert_filename: Path to file containing the certificate of the
                                CA that signed the SPICE certificate
947
948
  @type new_confd_hmac_key: bool
  @param new_confd_hmac_key: Whether to generate a new HMAC key
Michael Hanselmann's avatar
Michael Hanselmann committed
949
950
951
952
  @type new_cds: bool
  @param new_cds: Whether to generate a new cluster domain secret
  @type cds_filename: string
  @param cds_filename: Path to file containing new cluster domain secret
953
954
  @type force: bool
  @param force: Whether to ask user for confirmation
955
956
  @type new_node_cert: string
  @param new_node_cert: Whether to generate new node certificates
957
958
959

  """
  if new_rapi_cert and rapi_cert_filename:
960
    ToStderr("Only one of the --new-rapi-certificate and --rapi-certificate"
961
962
963
             " options can be specified at the same time.")
    return 1

Michael Hanselmann's avatar
Michael Hanselmann committed
964
965
966
967
968
969
  if new_cds and cds_filename:
    ToStderr("Only one of the --new-cluster-domain-secret and"
             " --cluster-domain-secret options can be specified at"
             " the same time.")
    return 1

970
971
972
973
  if new_spice_cert and (spice_cert_filename or spice_cacert_filename):
    ToStderr("When using --new-spice-certificate, the --spice-certificate"
             " and --spice-ca-certificate must not be used.")
    return 1
974

975
976
977
978
  if bool(spice_cacert_filename) ^ bool(spice_cert_filename):
    ToStderr("Both --spice-certificate and --spice-ca-certificate must be"
             " specified.")
    return 1
979

980
981
982
983
984
985
986
987
988
989
  rapi_cert_pem, spice_cert_pem, spice_cacert_pem = (None, None, None)
  try:
    if rapi_cert_filename:
      rapi_cert_pem = _ReadAndVerifyCert(rapi_cert_filename, True)
    if spice_cert_filename:
      spice_cert_pem = _ReadAndVerifyCert(spice_cert_filename, True)
      spice_cacert_pem = _ReadAndVerifyCert(spice_cacert_filename)
  except errors.X509CertError, err:
    ToStderr("Unable to load X509 certificate from %s: %s", err[0], err[1])
    return 1
990

Michael Hanselmann's avatar
Michael Hanselmann committed
991
992
993
  if cds_filename:
    try:
      cds = utils.ReadFile(cds_filename)
994
    except Exception, err: # pylint: disable=W0703
Michael Hanselmann's avatar
Michael Hanselmann committed
995
996
997
998
999
1000
      ToStderr("Can't load new cluster domain secret from %s: %s" %
               (cds_filename, str(err)))
      return 1
  else:
    cds = None

1001
1002
1003
1004
1005
1006
1007
1008
  if not force:
    usertext = ("This requires all daemons on all nodes to be restarted and"
                " may take some time. Continue?")
    if not AskUser(usertext):
      return 1

  def _RenewCryptoInner(ctx):
    ctx.feedback_fn("Updating certificates and keys")
1009
    # Note: the node certificate will be generated in the LU
1010
1011
1012
    bootstrap.GenerateClusterCrypto(new_cluster_cert,
                                    new_rapi_cert,
                                    new_spice_cert,
1013
                                    new_confd_hmac_key,
Michael Hanselmann's avatar
Michael Hanselmann committed
1014
1015
                                    new_cds,
                                    rapi_cert_pem=rapi_cert_pem,
1016
1017
                                    spice_cert_pem=spice_cert_pem,
                                    spice_cacert_pem=spice_cacert_pem,
Michael Hanselmann's avatar
Michael Hanselmann committed
1018
                                    cds=cds)
1019
1020
1021
1022

    files_to_copy = []

    if new_cluster_cert:
1023
      files_to_copy.append(pathutils.NODED_CERT_FILE)
1024
1025

    if new_rapi_cert or rapi_cert_pem:
1026
      files_to_copy.append(pathutils.RAPI_CERT_FILE)
1027

1028
    if new_spice_cert or spice_cert_pem:
1029
1030
      files_to_copy.append(pathutils.SPICE_CERT_FILE)
      files_to_copy.append(pathutils.SPICE_CACERT_FILE)
1031

1032
    if new_confd_hmac_key:
1033
      files_to_copy.append(pathutils.CONFD_HMAC_KEY)
1034

Michael Hanselmann's avatar
Michael Hanselmann committed
1035
    if new_cds or cds:
1036
      files_to_copy.append(pathutils.CLUSTER_DOMAIN_SECRET_FILE)
Michael Hanselmann's avatar
Michael Hanselmann committed
1037

1038
1039
    if files_to_copy:
      for node_name in ctx.nonmaster_nodes:
1040
1041
1042
        port = ctx.ssh_ports[node_name]
        ctx.feedback_fn("Copying %s to %s:%d" %
                        (", ".join(files_to_copy), node_name, port))
1043
        for file_name in files_to_copy:
1044
          ctx.ssh.CopyFileToNode(node_name, port, file_name)
1045
1046
1047
1048
1049
1050

  RunWhileClusterStopped(ToStdout, _RenewCryptoInner)

  ToStdout("All requested certificates and keys have been replaced."
           " Running \"gnt-cluster verify\" now is recommended.")

1051
1052
1053
1054
1055
  if new_node_cert:
    cl = GetClient()
    renew_op = opcodes.OpClusterRenewCrypto()
    SubmitOpCode(renew_op, cl=cl)

1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
  return 0


def RenewCrypto(opts, args):
  """Renews cluster certificates, keys and secrets.

  """
  return _RenewCrypto(opts.new_cluster_cert,
                      opts.new_rapi_cert,
                      opts.rapi_cert,
1066
1067
1068
                      opts.new_spice_cert,
                      opts.spice_cert,
                      opts.spice_cacert,
1069
                      opts.new_confd_hmac_key,
Michael Hanselmann's avatar
Michael Hanselmann committed
1070
1071
                      opts.new_cluster_domain_secret,
                      opts.cluster_domain_secret,
1072
1073
                      opts.force,
                      opts.new_node_cert)
1074
1075


1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
def _GetEnabledDiskTemplates(opts):
  """Determine the list of enabled disk templates.

  """
  if opts.enabled_disk_templates:
    return opts.enabled_disk_templates.split(",")
  else:
    return None


def _GetVgName(opts, enabled_disk_templates):
  """Determine the volume group name.

  @type enabled_disk_templates: list of strings
  @param enabled_disk_templates: cluster-wide enabled disk-templates

  """
  # consistency between vg name and enabled disk templates
  vg_name = None
  if opts.vg_name is not None:
    vg_name = opts.vg_name
  if enabled_disk_templates:
    if vg_name and not utils.IsLvmEnabled(enabled_disk_templates):
      ToStdout("You specified a volume group with --vg-name, but you did not"
               " enable any of the following lvm-based disk templates: %s" %
Helga Velroyen's avatar
Helga Velroyen committed
1101
               utils.CommaJoin(constants.DTS_LVM))
1102
1103
1104
  return vg_name


1105
def _GetDrbdHelper(opts, enabled_disk_templates):
1106
1107
1108
1109
  """Determine the DRBD usermode helper.

  """
  drbd_helper = opts.drbd_helper
1110
1111
1112
  if enabled_disk_templates:
    drbd_enabled = constants.DT_DRBD8 in enabled_disk_templates
    if not drbd_enabled and opts.drbd_helper:
1113
1114
      ToStdout("You specified a DRBD usermode helper with "
               " --drbd-usermode-helper while DRBD is not enabled.")
1115
1116
1117
  return drbd_helper


1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
def _GetCompressionTools(opts):
  """Determine the list of custom compression tools.

  """
  if opts.compression_tools:
    return opts.compression_tools.split(",")
  elif opts.compression_tools is None:
    return None # To note the parameter was not provided
  else:
    return constants.IEC_DEFAULT_TOOLS # Resetting to default


1130
1131
1132
def SetClusterParams(opts, args):
  """Modify the cluster.

1133
1134
1135
1136
1137
  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code
1138
1139

  """
1140
1141
  if not (opts.vg_name is not None or
          opts.drbd_helper is not None or
1142
          opts.enabled_hypervisors or opts.hvparams or
1143
1144
          opts.beparams or opts.nicparams or
          opts.ndparams or opts.diskparams or
1145
          opts.candidate_pool_size is not None or
1146
          opts.max_running_jobs is not None or
1147
          opts.max_tracked_jobs is not None or
1148
          opts.uid_pool is not None or
1149
1150
          opts.maintain_node_health is not None or
          opts.add_uids is not None or