Skip to content
Snippets Groups Projects
gnt-cluster 22.69 KiB
#!/usr/bin/python
#

# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


# pylint: disable-msg=W0401,W0614
# W0401: Wildcard import ganeti.cli
# W0614: Unused import %s from wildcard import (since we need cli)

import sys
from optparse import make_option
import os.path

from ganeti.cli import *
from ganeti import opcodes
from ganeti import constants
from ganeti import errors
from ganeti import utils
from ganeti import bootstrap
from ganeti import ssh


@UsesRPC
def InitCluster(opts, args):
  """Initialize the cluster.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the desired
      cluster name
  @rtype: int
  @return: the desired exit code

  """
  if not opts.lvm_storage and opts.vg_name:
    ToStderr("Options --no-lvm-storage and --vg-name conflict.")
    return 1

  vg_name = opts.vg_name
  if opts.lvm_storage and not opts.vg_name:
    vg_name = constants.DEFAULT_VG

  hvlist = opts.enabled_hypervisors
  if hvlist is not None:
    hvlist = hvlist.split(",")
  else:
    hvlist = [opts.default_hypervisor]

  # avoid an impossible situation
  if opts.default_hypervisor not in hvlist:
    ToStderr("The default hypervisor requested (%s) is not"
             " within the enabled hypervisor list (%s)" %
             (opts.default_hypervisor, hvlist))
    return 1

  hvparams = dict(opts.hvparams)

  beparams = opts.beparams
  # check for invalid parameters
  for parameter in beparams:
    if parameter not in constants.BES_PARAMETERS:
      ToStderr("Invalid backend parameter: %s", parameter)
      return 1

  # prepare beparams dict
  for parameter in constants.BES_PARAMETERS:
    if parameter not in beparams:
      beparams[parameter] = constants.BEC_DEFAULTS[parameter]
  utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)

  # prepare hvparams dict
  for hv in constants.HYPER_TYPES:
    if hv not in hvparams:
      hvparams[hv] = {}
    for parameter in constants.HVC_DEFAULTS[hv]:
      if parameter not in hvparams[hv]:
        hvparams[hv][parameter] = constants.HVC_DEFAULTS[hv][parameter]
    utils.ForceDictType(hvparams[hv], constants.HVS_PARAMETER_TYPES)

  for hv in hvlist:
    if hv not in constants.HYPER_TYPES:
      ToStderr("invalid hypervisor: %s", hv)
      return 1

  bootstrap.InitCluster(cluster_name=args[0],
                        secondary_ip=opts.secondary_ip,
                        vg_name=vg_name,
                        mac_prefix=opts.mac_prefix,
                        def_bridge=opts.def_bridge,
                        master_netdev=opts.master_netdev,
                        file_storage_dir=opts.file_storage_dir,
                        enabled_hypervisors=hvlist,
                        default_hypervisor=opts.default_hypervisor,
                        hvparams=hvparams,
                        beparams=beparams,
                        candidate_pool_size=opts.candidate_pool_size,
                        )
  return 0


@UsesRPC
def DestroyCluster(opts, args):
  """Destroy the cluster.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  if not opts.yes_do_it:
    ToStderr("Destroying a cluster is irreversible. If you really want"
             " destroy this cluster, supply the --yes-do-it option.")
    return 1

  op = opcodes.OpDestroyCluster()
  master = SubmitOpCode(op)
  # if we reached this, the opcode didn't fail; we can proceed to
  # shutdown all the daemons
  bootstrap.FinalizeClusterDestroy(master)
  return 0


def RenameCluster(opts, args):
  """Rename the cluster.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the new cluster name
  @rtype: int
  @return: the desired exit code

  """
  name = args[0]
  if not opts.force:
    usertext = ("This will rename the cluster to '%s'. If you are connected"
                " over the network to the cluster name, the operation is very"
                " dangerous as the IP address will be removed from the node"
                " and the change may not go through. Continue?") % name
    if not AskUser(usertext):
      return 1

  op = opcodes.OpRenameCluster(name=name)
  SubmitOpCode(op)
  return 0


def RedistributeConfig(opts, args):
  """Forces push of the cluster configuration.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: empty list
  @rtype: int
  @return: the desired exit code

  """
  op = opcodes.OpRedistributeConfig()
  SubmitOrSend(op, opts)
  return 0


def ShowClusterVersion(opts, args):
  """Write version of ganeti software to the standard output.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  cl = GetClient()
  result = cl.QueryClusterInfo()
  ToStdout("Software version: %s", result["software_version"])
  ToStdout("Internode protocol: %s", result["protocol_version"])
  ToStdout("Configuration format: %s", result["config_version"])
  ToStdout("OS api version: %s", result["os_api_version"])
  ToStdout("Export interface: %s", result["export_version"])
  return 0


def ShowClusterMaster(opts, args):
  """Write name of master node to the standard output.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  master = bootstrap.GetMaster()
  ToStdout(master)
  return 0


def ShowClusterConfig(opts, args):
  """Shows cluster information.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  cl = GetClient()
  result = cl.QueryClusterInfo()

  ToStdout("Cluster name: %s", result["name"])

  ToStdout("Master node: %s", result["master"])

  ToStdout("Architecture (this node): %s (%s)",
           result["architecture"][0], result["architecture"][1])

  ToStdout("Default hypervisor: %s", result["default_hypervisor"])
  ToStdout("Enabled hypervisors: %s", ", ".join(result["enabled_hypervisors"]))

  ToStdout("Hypervisor parameters:")
  for hv_name, hv_dict in result["hvparams"].items():
    ToStdout("  - %s:", hv_name)
    for item, val in hv_dict.iteritems():
      ToStdout("      %s: %s", item, val)

  ToStdout("Cluster parameters:")
  ToStdout("  - candidate pool size: %s", result["candidate_pool_size"])

  ToStdout("Default instance parameters:")
  for gr_name, gr_dict in result["beparams"].items():
    ToStdout("  - %s:", gr_name)
    for item, val in gr_dict.iteritems():
      ToStdout("      %s: %s", item, val)

  return 0


def ClusterCopyFile(opts, args):
  """Copy a file from master to some nodes.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the path of
      the file to be copied
  @rtype: int
  @return: the desired exit code

  """
  filename = args[0]
  if not os.path.exists(filename):
    raise errors.OpPrereqError("No such filename '%s'" % filename)

  cl = GetClient()

  myname = utils.HostInfo().name

  cluster_name = cl.QueryConfigValues(["cluster_name"])[0]

  results = GetOnlineNodes(nodes=opts.nodes, cl=cl)
  results = [name for name in results if name != myname]

  srun = ssh.SshRunner(cluster_name=cluster_name)
  for node in results:
    if not srun.CopyFileToNode(node, filename):
      ToStderr("Copy of file %s to node %s failed", filename, node)

  return 0


def RunClusterCommand(opts, args):
  """Run a command on some nodes.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain the command to be run and its arguments
  @rtype: int
  @return: the desired exit code

  """
  cl = GetClient()

  command = " ".join(args)

  nodes = GetOnlineNodes(nodes=opts.nodes, cl=cl)

  cluster_name, master_node = cl.QueryConfigValues(["cluster_name",
                                                    "master_node"])

  srun = ssh.SshRunner(cluster_name=cluster_name)

  # Make sure master node is at list end
  if master_node in nodes:
    nodes.remove(master_node)
    nodes.append(master_node)

  for name in nodes:
    result = srun.Run(name, "root", command)
    ToStdout("------------------------------------------------")
    ToStdout("node: %s", name)
    ToStdout("%s", result.output)
    ToStdout("return code = %s", result.exit_code)

  return 0


def VerifyCluster(opts, args):
  """Verify integrity of cluster, performing various test on nodes.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  skip_checks = []
  if opts.skip_nplusone_mem:
    skip_checks.append(constants.VERIFY_NPLUSONE_MEM)
  op = opcodes.OpVerifyCluster(skip_checks=skip_checks)
  if SubmitOpCode(op):
    return 0
  else:
    return 1


def VerifyDisks(opts, args):
  """Verify integrity of cluster disks.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  op = opcodes.OpVerifyDisks()
  result = SubmitOpCode(op)
  if not isinstance(result, (list, tuple)) or len(result) != 4:
    raise errors.ProgrammerError("Unknown result type for OpVerifyDisks")

  nodes, nlvm, instances, missing = result

  if nodes:
    ToStdout("Nodes unreachable or with bad data:")
    for name in nodes:
      ToStdout("\t%s", name)
  retcode = constants.EXIT_SUCCESS

  if nlvm:
    for node, text in nlvm.iteritems():
      ToStdout("Error on node %s: LVM error: %s",
               node, utils.SafeEncode(text[-400:]))
      retcode |= 1
      ToStdout("You need to fix these nodes first before fixing instances")

  if instances:
    for iname in instances:
      if iname in missing:
        continue
      op = opcodes.OpActivateInstanceDisks(instance_name=iname)
      try:
        ToStdout("Activating disks for instance '%s'", iname)
        SubmitOpCode(op)
      except errors.GenericError, err:
        nret, msg = FormatError(err)
        retcode |= nret
        ToStderr("Error activating disks for instance %s: %s", iname, msg)

  if missing:
    for iname, ival in missing.iteritems():
      all_missing = utils.all(ival, lambda x: x[0] in nlvm)
      if all_missing:
        ToStdout("Instance %s cannot be verified as it lives on"
                 " broken nodes", iname)
      else:
        ToStdout("Instance %s has missing logical volumes:", iname)
        ival.sort()
        for node, vol in ival:
          if node in nlvm:
            ToStdout("\tbroken node %s /dev/xenvg/%s", node, vol)
          else:
            ToStdout("\t%s /dev/xenvg/%s", node, vol)
    ToStdout("You need to run replace_disks for all the above"
           " instances, if this message persist after fixing nodes.")
    retcode |= 1

  return retcode


@UsesRPC
def MasterFailover(opts, args):
  """Failover the master node.

  This command, when run on a non-master node, will cause the current
  master to cease being master, and the non-master to become new
  master.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  return bootstrap.MasterFailover()


def SearchTags(opts, args):
  """Searches the tags on all the cluster.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the tag pattern
  @rtype: int
  @return: the desired exit code

  """
  op = opcodes.OpSearchTags(pattern=args[0])
  result = SubmitOpCode(op)
  if not result:
    return 1
  result = list(result)
  result.sort()
  for path, tag in result:
    ToStdout("%s %s", path, tag)


def SetClusterParams(opts, args):
  """Modify the cluster.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
  if not (not opts.lvm_storage or opts.vg_name or
          opts.enabled_hypervisors or opts.hvparams or
          opts.beparams or opts.candidate_pool_size is not None):
    ToStderr("Please give at least one of the parameters.")
    return 1

  vg_name = opts.vg_name
  if not opts.lvm_storage and opts.vg_name:
    ToStdout("Options --no-lvm-storage and --vg-name conflict.")
    return 1

  hvlist = opts.enabled_hypervisors
  if hvlist is not None:
    hvlist = hvlist.split(",")

  # a list of (name, dict) we can pass directly to dict() (or [])
  hvparams = dict(opts.hvparams)
  for hv, hv_params in hvparams.iteritems():
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)

  beparams = opts.beparams
  utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)

  op = opcodes.OpSetClusterParams(vg_name=opts.vg_name,
                                  enabled_hypervisors=hvlist,
                                  hvparams=hvparams,
                                  beparams=beparams,
                                  candidate_pool_size=opts.candidate_pool_size)
  SubmitOpCode(op)
  return 0


def QueueOps(opts, args):
  """Queue operations.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should contain only one element, the subcommand
  @rtype: int
  @return: the desired exit code

  """
  command = args[0]
  client = GetClient()
  if command in ("drain", "undrain"):
    drain_flag = command == "drain"
    client.SetQueueDrainFlag(drain_flag)
  elif command == "info":
    result = client.QueryConfigValues(["drain_flag"])
    if result[0]:
      val = "set"
    else:
      val = "unset"
    ToStdout("The drain flag is %s" % val)
  else:
    raise errors.OpPrereqError("Command '%s' is not valid." % command)

  return 0

# this is an option common to more than one command, so we declare
# it here and reuse it
node_option = make_option("-n", "--node", action="append", dest="nodes",
                          help="Node to copy to (if not given, all nodes),"
                               " can be given multiple times",
                          metavar="<node>", default=[])

commands = {
  'init': (InitCluster, ARGS_ONE,
           [DEBUG_OPT,
            make_option("-s", "--secondary-ip", dest="secondary_ip",
                        help="Specify the secondary ip for this node;"
                        " if given, the entire cluster must have secondary"
                        " addresses",
                        metavar="ADDRESS", default=None),
            make_option("-m", "--mac-prefix", dest="mac_prefix",
                        help="Specify the mac prefix for the instance IP"
                        " addresses, in the format XX:XX:XX",
                        metavar="PREFIX",
                        default=constants.DEFAULT_MAC_PREFIX,),
            make_option("-g", "--vg-name", dest="vg_name",
                        help="Specify the volume group name "
                        " (cluster-wide) for disk allocation [xenvg]",
                        metavar="VG",
                        default=None,),
            make_option("-b", "--bridge", dest="def_bridge",
                        help="Specify the default bridge name (cluster-wide)"
                          " to connect the instances to [%s]" %
                          constants.DEFAULT_BRIDGE,
                        metavar="BRIDGE",
                        default=constants.DEFAULT_BRIDGE,),
            make_option("--master-netdev", dest="master_netdev",
                        help="Specify the node interface (cluster-wide)"
                          " on which the master IP address will be added "
                          " [%s]" % constants.DEFAULT_BRIDGE,
                        metavar="NETDEV",
                        default=constants.DEFAULT_BRIDGE,),
            make_option("--file-storage-dir", dest="file_storage_dir",
                        help="Specify the default directory (cluster-wide)"
                             " for storing the file-based disks [%s]" %
                             constants.DEFAULT_FILE_STORAGE_DIR,
                        metavar="DIR",
                        default=constants.DEFAULT_FILE_STORAGE_DIR,),
            make_option("--no-lvm-storage", dest="lvm_storage",
                        help="No support for lvm based instances"
                             " (cluster-wide)",
                        action="store_false", default=True,),
            make_option("--enabled-hypervisors", dest="enabled_hypervisors",
                        help="Comma-separated list of hypervisors",
                        type="string", default=None),
            make_option("-t", "--default-hypervisor",
                        dest="default_hypervisor",
                        help="Default hypervisor to use for instance creation",
                        choices=list(constants.HYPER_TYPES),
                        default=constants.DEFAULT_ENABLED_HYPERVISOR),
            ikv_option("-H", "--hypervisor-parameters", dest="hvparams",
                       help="Hypervisor and hypervisor options, in the"
                         " format"
                       " hypervisor:option=value,option=value,...",
                       default=[],
                       action="append",
                       type="identkeyval"),
            keyval_option("-B", "--backend-parameters", dest="beparams",
                          type="keyval", default={},
                          help="Backend parameters"),
            make_option("-C", "--candidate-pool-size",
                        default=constants.MASTER_POOL_SIZE_DEFAULT,
                        help="Set the candidate pool size",
                        dest="candidate_pool_size", type="int"),
            ],
           "[opts...] <cluster_name>",
           "Initialises a new cluster configuration"),
  'destroy': (DestroyCluster, ARGS_NONE,
              [DEBUG_OPT,
               make_option("--yes-do-it", dest="yes_do_it",
                           help="Destroy cluster",
                           action="store_true"),
              ],
              "", "Destroy cluster"),
  'rename': (RenameCluster, ARGS_ONE, [DEBUG_OPT, FORCE_OPT],
               "<new_name>",
               "Renames the cluster"),
  'redist-conf': (RedistributeConfig, ARGS_NONE, [DEBUG_OPT, SUBMIT_OPT],
                  "",
                  "Forces a push of the configuration file and ssconf files"
                  " to the nodes in the cluster"),
  'verify': (VerifyCluster, ARGS_NONE, [DEBUG_OPT,
             make_option("--no-nplus1-mem", dest="skip_nplusone_mem",
                         help="Skip N+1 memory redundancy tests",
                         action="store_true",
                         default=False,),
             ],
             "", "Does a check on the cluster configuration"),
  'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT],
                   "", "Does a check on the cluster disk status"),
  'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT],
                     "", "Makes the current node the master"),
  'version': (ShowClusterVersion, ARGS_NONE, [DEBUG_OPT],
              "", "Shows the cluster version"),
  'getmaster': (ShowClusterMaster, ARGS_NONE, [DEBUG_OPT],
                "", "Shows the cluster master"),
  'copyfile': (ClusterCopyFile, ARGS_ONE, [DEBUG_OPT, node_option],
               "[-n node...] <filename>",
               "Copies a file to all (or only some) nodes"),
  'command': (RunClusterCommand, ARGS_ATLEAST(1), [DEBUG_OPT, node_option],
              "[-n node...] <command>",
              "Runs a command on all (or only some) nodes"),
  'info': (ShowClusterConfig, ARGS_NONE, [DEBUG_OPT],
                 "", "Show cluster configuration"),
  'list-tags': (ListTags, ARGS_NONE,
                [DEBUG_OPT], "", "List the tags of the cluster"),
  'add-tags': (AddTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT],
               "tag...", "Add tags to the cluster"),
  'remove-tags': (RemoveTags, ARGS_ANY, [DEBUG_OPT, TAG_SRC_OPT],
                  "tag...", "Remove tags from the cluster"),
  'search-tags': (SearchTags, ARGS_ONE,
                  [DEBUG_OPT], "", "Searches the tags on all objects on"
                  " the cluster for a given pattern (regex)"),
  'queue': (QueueOps, ARGS_ONE, [DEBUG_OPT],
            "drain|undrain|info", "Change queue properties"),
  'modify': (SetClusterParams, ARGS_NONE,
             [DEBUG_OPT,
              make_option("-g", "--vg-name", dest="vg_name",
                          help="Specify the volume group name "
                          " (cluster-wide) for disk allocation "
                          "and enable lvm based storage",
                          metavar="VG",),
              make_option("--no-lvm-storage", dest="lvm_storage",
                          help="Disable support for lvm based instances"
                               " (cluster-wide)",
                          action="store_false", default=True,),
              make_option("--enabled-hypervisors", dest="enabled_hypervisors",
                          help="Comma-separated list of hypervisors",
                          type="string", default=None),
              ikv_option("-H", "--hypervisor-parameters", dest="hvparams",
                         help="Hypervisor and hypervisor options, in the"
                         " format"
                         " hypervisor:option=value,option=value,...",
                         default=[],
                         action="append",
                         type="identkeyval"),
              keyval_option("-B", "--backend-parameters", dest="beparams",
                            type="keyval", default={},
                            help="Backend parameters"),
              make_option("-C", "--candidate-pool-size", default=None,
                          help="Set the candidate pool size",
                          dest="candidate_pool_size", type="int"),
              ],
             "[opts...]",
             "Alters the parameters of the cluster"),
  }

if __name__ == '__main__':
  sys.exit(GenericMain(commands, override={"tag_type": constants.TAG_CLUSTER}))