hv_xen.py 46 KB
Newer Older
1
2
3
#
#

4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Xen hypervisors

"""

26
import logging
27
import errno
28
import string # pylint: disable=W0402
29
import shutil
30
31
32
33
34
from cStringIO import StringIO

from ganeti import constants
from ganeti import errors
from ganeti import utils
35
from ganeti.hypervisor import hv_base
36
from ganeti import netutils
37
from ganeti import objects
38
from ganeti import pathutils
39
40


41
42
43
44
XEND_CONFIG_FILE = utils.PathJoin(pathutils.XEN_CONFIG_DIR, "xend-config.sxp")
XL_CONFIG_FILE = utils.PathJoin(pathutils.XEN_CONFIG_DIR, "xen/xl.conf")
VIF_BRIDGE_SCRIPT = utils.PathJoin(pathutils.XEN_CONFIG_DIR,
                                   "scripts/vif-bridge")
45
_DOM0_NAME = "Domain-0"
46
47
48
49
50
_DISK_LETTERS = string.ascii_lowercase

_FILE_DRIVER_MAP = {
  constants.FD_LOOP: "file",
  constants.FD_BLKTAP: "tap:aio",
51
  constants.FD_BLKTAP2: "tap2:tapdisk:aio",
52
  }
53
54


55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def _CreateConfigCpus(cpu_mask):
  """Create a CPU config string for Xen's config file.

  """
  # Convert the string CPU mask to a list of list of int's
  cpu_list = utils.ParseMultiCpuMask(cpu_mask)

  if len(cpu_list) == 1:
    all_cpu_mapping = cpu_list[0]
    if all_cpu_mapping == constants.CPU_PINNING_OFF:
      # If CPU pinning has 1 entry that's "all", then remove the
      # parameter from the config file
      return None
    else:
      # If CPU pinning has one non-all entry, mapping all vCPUS (the entire
      # VM) to one physical CPU, using format 'cpu = "C"'
      return "cpu = \"%s\"" % ",".join(map(str, all_cpu_mapping))
  else:

    def _GetCPUMap(vcpu):
      if vcpu[0] == constants.CPU_PINNING_ALL_VAL:
        cpu_map = constants.CPU_PINNING_ALL_XEN
      else:
        cpu_map = ",".join(map(str, vcpu))
      return "\"%s\"" % cpu_map

    # build the result string in format 'cpus = [ "c", "c", "c" ]',
    # where each c is a physical CPU number, a range, a list, or any
    # combination
    return "cpus = [ %s ]" % ", ".join(map(_GetCPUMap, cpu_list))


87
def _RunInstanceList(fn, instance_list_errors):
Jose A. Lopes's avatar
Jose A. Lopes committed
88
89
  """Helper function for L{_GetAllInstanceList} to retrieve the list
  of instances from xen.
90
91

  @type fn: callable
92
93
94
  @param fn: Function to query xen for the list of instances
  @type instance_list_errors: list
  @param instance_list_errors: Error list
95
96
97
98
99
  @rtype: list

  """
  result = fn()
  if result.failed:
100
101
102
    logging.error("Retrieving the instance list from xen failed (%s): %s",
                  result.fail_reason, result.output)
    instance_list_errors.append(result)
103
104
105
106
107
108
    raise utils.RetryAgain()

  # skip over the heading
  return result.stdout.splitlines()


109
110
def _ParseInstanceList(lines, include_node):
  """Parses the output of listing instances by xen.
111
112

  @type lines: list
113
  @param lines: Result of retrieving the instance list from xen
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
  @type include_node: boolean
  @param include_node: If True, return information for Dom0
  @return: list of tuple containing (name, id, memory, vcpus, state, time
    spent)

  """
  result = []

  # Iterate through all lines while ignoring header
  for line in lines[1:]:
    # The format of lines is:
    # Name      ID Mem(MiB) VCPUs State  Time(s)
    # Domain-0   0  3418     4 r-----    266.2
    data = line.split()
    if len(data) != 6:
129
      raise errors.HypervisorError("Can't parse instance list,"
130
131
132
133
134
                                   " line: %s" % line)
    try:
      data[1] = int(data[1])
      data[2] = int(data[2])
      data[3] = int(data[3])
Jose A. Lopes's avatar
Jose A. Lopes committed
135
      data[4] = _XenToHypervisorInstanceState(data[4])
136
137
      data[5] = float(data[5])
    except (TypeError, ValueError), err:
138
      raise errors.HypervisorError("Can't parse instance list,"
139
140
141
142
143
144
145
146
147
                                   " line: %s, error: %s" % (line, err))

    # skip the Domain-0 (optional)
    if include_node or data[0] != _DOM0_NAME:
      result.append(data)

  return result


Jose A. Lopes's avatar
Jose A. Lopes committed
148
149
def _GetAllInstanceList(fn, include_node, _timeout=5):
  """Return the list of instances including running and shutdown.
150

151
  See L{_RunInstanceList} and L{_ParseInstanceList} for parameter details.
152
153

  """
154
  instance_list_errors = []
155
  try:
156
157
    lines = utils.Retry(_RunInstanceList, (0.3, 1.5, 1.0), _timeout,
                        args=(fn, instance_list_errors))
158
  except utils.RetryTimeout:
159
160
    if instance_list_errors:
      instance_list_result = instance_list_errors.pop()
161

162
163
      errmsg = ("listing instances failed, timeout exceeded (%s): %s" %
                (instance_list_result.fail_reason, instance_list_result.output))
164
    else:
165
      errmsg = "listing instances failed"
166
167
168

    raise errors.HypervisorError(errmsg)

169
  return _ParseInstanceList(lines, include_node)
170
171


172
173
174
175
# Determine whether an instance is running
#
# An instance is running if it is in the following Xen states:
# running, blocked, or paused.
Jose A. Lopes's avatar
Jose A. Lopes committed
176
177
def _IsInstanceRunning(instance_info):
  return instance_info == "r-----" \
178
179
      or instance_info == "-b----" \
      or instance_info == "--p---"
Jose A. Lopes's avatar
Jose A. Lopes committed
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216


def _IsInstanceShutdown(instance_info):
  return instance_info == "---s--"


def _XenToHypervisorInstanceState(instance_info):
  if _IsInstanceRunning(instance_info):
    return hv_base.HvInstanceState.RUNNING
  elif _IsInstanceShutdown(instance_info):
    return hv_base.HvInstanceState.SHUTDOWN
  else:
    raise errors.HypervisorError("hv_xen._XenToHypervisorInstanceState:"
                                 " unhandled Xen instance state '%s'" %
                                   instance_info)


def _GetRunningInstanceList(fn, include_node, _timeout=5):
  """Return the list of running instances.

  See L{_GetAllInstanceList} for parameter details.

  """
  instances = _GetAllInstanceList(fn, include_node, _timeout)
  return [i for i in instances if hv_base.HvInstanceState.IsRunning(i[4])]


def _GetShutdownInstanceList(fn, include_node, _timeout=5):
  """Return the list of shutdown instances.

  See L{_GetAllInstanceList} for parameter details.

  """
  instances = _GetAllInstanceList(fn, include_node, _timeout)
  return [i for i in instances if hv_base.HvInstanceState.IsShutdown(i[4])]


217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def _ParseNodeInfo(info):
  """Return information about the node.

  @return: a dict with the following keys (memory values in MiB):
        - memory_total: the total memory size on the node
        - memory_free: the available memory on the node for instances
        - nr_cpus: total number of CPUs
        - nr_nodes: in a NUMA system, the number of domains
        - nr_sockets: the number of physical CPU sockets in the node
        - hv_version: the hypervisor version in the form (major, minor)

  """
  result = {}
  cores_per_socket = threads_per_core = nr_cpus = None
  xen_major, xen_minor = None, None
  memory_total = None
  memory_free = None

  for line in info.splitlines():
    fields = line.split(":", 1)

    if len(fields) < 2:
      continue

    (key, val) = map(lambda s: s.strip(), fields)

    # Note: in Xen 3, memory has changed to total_memory
    if key in ("memory", "total_memory"):
      memory_total = int(val)
    elif key == "free_memory":
      memory_free = int(val)
    elif key == "nr_cpus":
      nr_cpus = result["cpu_total"] = int(val)
    elif key == "nr_nodes":
      result["cpu_nodes"] = int(val)
    elif key == "cores_per_socket":
      cores_per_socket = int(val)
    elif key == "threads_per_core":
      threads_per_core = int(val)
    elif key == "xen_major":
      xen_major = int(val)
    elif key == "xen_minor":
      xen_minor = int(val)

  if None not in [cores_per_socket, threads_per_core, nr_cpus]:
    result["cpu_sockets"] = nr_cpus / (cores_per_socket * threads_per_core)

  if memory_free is not None:
    result["memory_free"] = memory_free

  if memory_total is not None:
    result["memory_total"] = memory_total

  if not (xen_major is None or xen_minor is None):
    result[constants.HV_NODEINFO_KEY_VERSION] = (xen_major, xen_minor)

  return result


276
def _MergeInstanceInfo(info, instance_list):
277
278
279
280
  """Updates node information from L{_ParseNodeInfo} with instance info.

  @type info: dict
  @param info: Result from L{_ParseNodeInfo}
281
282
  @type instance_list: list of tuples
  @param instance_list: list of instance information; one tuple per instance
283
284
285
286
287
  @rtype: dict

  """
  total_instmem = 0

288
  for (name, _, mem, vcpus, _, _) in instance_list:
289
290
    if name == _DOM0_NAME:
      info["memory_dom0"] = mem
291
      info["cpu_dom0"] = vcpus
292
293
294
295
296
297
298
299
300
301
302
303
304
305

    # Include Dom0 in total memory usage
    total_instmem += mem

  memory_free = info.get("memory_free")
  memory_total = info.get("memory_total")

  # Calculate memory used by hypervisor
  if None not in [memory_total, memory_free, total_instmem]:
    info["memory_hv"] = memory_total - memory_free - total_instmem

  return info


306
def _GetNodeInfo(info, instance_list):
307
308
  """Combines L{_MergeInstanceInfo} and L{_ParseNodeInfo}.

309
310
311
  @type instance_list: list of tuples
  @param instance_list: list of instance information; one tuple per instance

312
  """
313
  return _MergeInstanceInfo(_ParseNodeInfo(info), instance_list)
314
315


316
317
318
319
320
321
322
323
324
def _GetConfigFileDiskData(block_devices, blockdev_prefix,
                           _letters=_DISK_LETTERS):
  """Get disk directives for Xen config file.

  This method builds the xen config disk directive according to the
  given disk_template and block_devices.

  @param block_devices: list of tuples (cfdev, rldev):
      - cfdev: dict containing ganeti config disk part
325
      - rldev: ganeti.block.bdev.BlockDev object
326
327
328
329
330
331
332
333
334
335
336
  @param blockdev_prefix: a string containing blockdevice prefix,
                          e.g. "sd" for /dev/sda

  @return: string containing disk directive for xen instance config file

  """
  if len(block_devices) > len(_letters):
    raise errors.HypervisorError("Too many disks")

  disk_data = []

Raffa Santi's avatar
Raffa Santi committed
337
  for sd_suffix, (cfdev, dev_path, _) in zip(_letters, block_devices):
338
339
340
341
342
343
344
    sd_name = blockdev_prefix + sd_suffix

    if cfdev.mode == constants.DISK_RDWR:
      mode = "w"
    else:
      mode = "r"

345
    if cfdev.dev_type in constants.DTS_FILEBASED:
346
      driver = _FILE_DRIVER_MAP[cfdev.logical_id[0]]
347
348
349
350
351
352
353
354
    else:
      driver = "phy"

    disk_data.append("'%s:%s,%s,%s'" % (driver, dev_path, sd_name, mode))

  return disk_data


355
356
357
358
359
360
361
362
363
364
365
366
367
def _QuoteCpuidField(data):
  """Add quotes around the CPUID field only if necessary.

  Xen CPUID fields come in two shapes: LIBXL strings, which need quotes around
  them, and lists of XEND strings, which don't.

  @param data: Either type of parameter.
  @return: The quoted version thereof.

  """
  return "'%s'" % data if data.startswith("host") else data


368
class XenHypervisor(hv_base.BaseHypervisor):
369
370
371
372
373
374
  """Xen generic hypervisor interface

  This is the Xen base class used for both Xen PVM and HVM. It contains
  all the functionality that is identical for both.

  """
375
  CAN_MIGRATE = True
Iustin Pop's avatar
Iustin Pop committed
376
377
  REBOOT_RETRY_COUNT = 60
  REBOOT_RETRY_INTERVAL = 10
378
379
380
  _ROOT_DIR = pathutils.RUN_DIR + "/xen-hypervisor"
  _NICS_DIR = _ROOT_DIR + "/nic" # contains NICs' info
  _DIRS = [_ROOT_DIR, _NICS_DIR]
381

Guido Trotter's avatar
Guido Trotter committed
382
  ANCILLARY_FILES = [
383
384
385
    XEND_CONFIG_FILE,
    XL_CONFIG_FILE,
    VIF_BRIDGE_SCRIPT,
Guido Trotter's avatar
Guido Trotter committed
386
    ]
387
388
  ANCILLARY_FILES_OPT = [
    XL_CONFIG_FILE,
Guido Trotter's avatar
Guido Trotter committed
389
390
    ]

391
  def __init__(self, _cfgdir=None, _run_cmd_fn=None, _cmd=None):
392
393
394
395
396
397
398
    hv_base.BaseHypervisor.__init__(self)

    if _cfgdir is None:
      self._cfgdir = pathutils.XEN_CONFIG_DIR
    else:
      self._cfgdir = _cfgdir

399
400
401
402
403
404
405
    if _run_cmd_fn is None:
      self._run_cmd_fn = utils.RunCmd
    else:
      self._run_cmd_fn = _run_cmd_fn

    self._cmd = _cmd

406
407
408
409
410
411
412
413
414
415
416
417
418
  @staticmethod
  def _GetCommandFromHvparams(hvparams):
    """Returns the Xen command extracted from the given hvparams.

    @type hvparams: dict of strings
    @param hvparams: hypervisor parameters

    """
    if hvparams is None or constants.HV_XEN_CMD not in hvparams:
      raise errors.HypervisorError("Cannot determine xen command.")
    else:
      return hvparams[constants.HV_XEN_CMD]

419
  def _GetCommand(self, hvparams):
420
421
    """Returns Xen command to use.

422
423
424
    @type hvparams: dict of strings
    @param hvparams: hypervisor parameters

425
    """
426
    if self._cmd is None:
427
      cmd = XenHypervisor._GetCommandFromHvparams(hvparams)
428
429
430
431
432
433
434
435
    else:
      cmd = self._cmd

    if cmd not in constants.KNOWN_XEN_COMMANDS:
      raise errors.ProgrammerError("Unknown Xen command '%s'" % cmd)

    return cmd

436
  def _RunXen(self, args, hvparams, timeout=None):
Michael Hanselmann's avatar
Michael Hanselmann committed
437
    """Wrapper around L{utils.process.RunCmd} to run Xen command.
438

439
440
    @type hvparams: dict of strings
    @param hvparams: dictionary of hypervisor params
441
442
443
    @type timeout: int or None
    @param timeout: if a timeout (in seconds) is specified, the command will be
                    terminated after that number of seconds.
Michael Hanselmann's avatar
Michael Hanselmann committed
444
    @see: L{utils.process.RunCmd}
445
446

    """
447
448
449
450
451
    cmd = []

    if timeout is not None:
      cmd.extend(["timeout", str(timeout)])

452
    cmd.extend([self._GetCommand(hvparams)])
453
454
455
456
    cmd.extend(args)

    return self._run_cmd_fn(cmd)

457
  def _ConfigFileName(self, instance_name):
458
459
460
461
462
463
464
465
    """Get the config file name for an instance.

    @param instance_name: instance name
    @type instance_name: str
    @return: fully qualified path to instance config file
    @rtype: str

    """
466
    return utils.PathJoin(self._cfgdir, instance_name)
467

468
  @classmethod
469
  def _WriteNICInfoFile(cls, instance, idx, nic):
470
471
472
473
474
    """Write the Xen config file for the instance.

    This version of the function just writes the config file from static data.

    """
475
    instance_name = instance.name
476
477
478
479
480
481
482
    dirs = [(dname, constants.RUN_DIRS_MODE)
            for dname in cls._DIRS + [cls._InstanceNICDir(instance_name)]]
    utils.EnsureDirs(dirs)

    cfg_file = cls._InstanceNICFile(instance_name, idx)
    data = StringIO()

483
    data.write("TAGS=%s\n" % r"\ ".join(instance.GetTags()))
484
485
    if nic.netinfo:
      netinfo = objects.Network.FromDict(nic.netinfo)
486
487
      for k, v in netinfo.HooksDict().iteritems():
        data.write("%s=%s\n" % (k, v))
488
489

    data.write("MAC=%s\n" % nic.mac)
490
491
492
493
494
495
    if nic.ip:
      data.write("IP=%s\n" % nic.ip)
    data.write("INTERFACE_INDEX=%s\n" % str(idx))
    if nic.name:
      data.write("INTERFACE_NAME=%s\n" % nic.name)
    data.write("INTERFACE_UUID=%s\n" % nic.uuid)
496
497
    data.write("MODE=%s\n" % nic.nicparams[constants.NIC_MODE])
    data.write("LINK=%s\n" % nic.nicparams[constants.NIC_LINK])
498
    data.write("VLAN=%s\n" % nic.nicparams[constants.NIC_VLAN])
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519

    try:
      utils.WriteFile(cfg_file, data=data.getvalue())
    except EnvironmentError, err:
      raise errors.HypervisorError("Cannot write Xen instance configuration"
                                   " file %s: %s" % (cfg_file, err))

  @classmethod
  def _InstanceNICDir(cls, instance_name):
    """Returns the directory holding the tap device files for a given instance.

    """
    return utils.PathJoin(cls._NICS_DIR, instance_name)

  @classmethod
  def _InstanceNICFile(cls, instance_name, seq):
    """Returns the name of the file containing the tap device for a given NIC

    """
    return utils.PathJoin(cls._InstanceNICDir(instance_name), str(seq))

Iustin Pop's avatar
Iustin Pop committed
520
  @classmethod
521
522
  def _GetConfig(cls, instance, startup_memory, block_devices):
    """Build Xen configuration for an instance.
523
524
525
526

    """
    raise NotImplementedError

527
  def _WriteConfigFile(self, instance_name, data):
528
529
530
531
532
    """Write the Xen config file for the instance.

    This version of the function just writes the config file from static data.

    """
533
    # just in case it exists
534
    utils.RemoveFile(utils.PathJoin(self._cfgdir, "auto", instance_name))
535

536
    cfg_file = self._ConfigFileName(instance_name)
537
538
539
540
541
    try:
      utils.WriteFile(cfg_file, data=data)
    except EnvironmentError, err:
      raise errors.HypervisorError("Cannot write Xen instance configuration"
                                   " file %s: %s" % (cfg_file, err))
542

543
  def _ReadConfigFile(self, instance_name):
544
545
546
    """Returns the contents of the instance config file.

    """
547
    filename = self._ConfigFileName(instance_name)
548

549
    try:
550
      file_content = utils.ReadFile(filename)
551
552
    except EnvironmentError, err:
      raise errors.HypervisorError("Failed to load Xen config file: %s" % err)
553

554
555
    return file_content

556
  def _RemoveConfigFile(self, instance_name):
557
558
559
    """Remove the xen configuration file.

    """
560
    utils.RemoveFile(self._ConfigFileName(instance_name))
561
562
563
564
565
    try:
      shutil.rmtree(self._InstanceNICDir(instance_name))
    except OSError, err:
      if err.errno != errno.ENOENT:
        raise
566

567
568
569
570
571
572
573
574
575
576
577
  def _StashConfigFile(self, instance_name):
    """Move the Xen config file to the log directory and return its new path.

    """
    old_filename = self._ConfigFileName(instance_name)
    base = ("%s-%s" %
            (instance_name, utils.TimestampForFilename()))
    new_filename = utils.PathJoin(pathutils.LOG_XEN_DIR, base)
    utils.RenameFile(old_filename, new_filename)
    return new_filename

578
  def _GetInstanceList(self, include_node, hvparams):
Jose A. Lopes's avatar
Jose A. Lopes committed
579
    """Wrapper around module level L{_GetAllInstanceList}.
580

581
582
583
    @type hvparams: dict of strings
    @param hvparams: hypervisor parameters to be used on this node

584
    """
Jose A. Lopes's avatar
Jose A. Lopes committed
585
586
    return _GetAllInstanceList(lambda: self._RunXen(["list"], hvparams),
                               include_node)
587

588
  def ListInstances(self, hvparams=None):
589
590
591
    """Get the list of running instances.

    """
592
    instance_list = self._GetInstanceList(False, hvparams)
593
    names = [info[0] for info in instance_list]
594
595
    return names

596
  def GetInstanceInfo(self, instance_name, hvparams=None):
597
598
    """Get instance properties.

599
    @type instance_name: string
Iustin Pop's avatar
Iustin Pop committed
600
    @param instance_name: the instance name
601
602
    @type hvparams: dict of strings
    @param hvparams: the instance's hypervisor params
Iustin Pop's avatar
Iustin Pop committed
603
604

    @return: tuple (name, id, memory, vcpus, stat, times)
605
606

    """
607
    instance_list = self._GetInstanceList(instance_name == _DOM0_NAME, hvparams)
608
    result = None
609
    for data in instance_list:
610
611
612
613
614
      if data[0] == instance_name:
        result = data
        break
    return result

615
  def GetAllInstancesInfo(self, hvparams=None):
616
617
    """Get properties of all instances.

618
619
    @type hvparams: dict of strings
    @param hvparams: hypervisor parameters
Jose A. Lopes's avatar
Jose A. Lopes committed
620
621
622

    @rtype: (string, string, int, int, HypervisorInstanceState, int)
    @return: list of tuples (name, id, memory, vcpus, state, times)
Iustin Pop's avatar
Iustin Pop committed
623

624
    """
625
    return self._GetInstanceList(False, hvparams)
626

627
628
629
630
631
632
633
634
635
636
637
638
639
640
  def _MakeConfigFile(self, instance, startup_memory, block_devices):
    """Gather configuration details and write to disk.

    See L{_GetConfig} for arguments.

    """
    buf = StringIO()
    buf.write("# Automatically generated by Ganeti. Do not edit!\n")
    buf.write("\n")
    buf.write(self._GetConfig(instance, startup_memory, block_devices))
    buf.write("\n")

    self._WriteConfigFile(instance.name, buf.getvalue())

641
  def StartInstance(self, instance, block_devices, startup_paused):
Iustin Pop's avatar
Iustin Pop committed
642
643
644
    """Start an instance.

    """
645
    startup_memory = self._InstanceStartupMemory(instance)
646
647
648

    self._MakeConfigFile(instance, startup_memory, block_devices)

649
    cmd = ["create"]
650
    if startup_paused:
651
652
      cmd.append("-p")
    cmd.append(self._ConfigFileName(instance.name))
653

654
    result = self._RunXen(cmd, instance.hvparams)
655
    if result.failed:
656
657
658
659
660
      # Move the Xen configuration file to the log directory to avoid
      # leaving a stale config file behind.
      stashed_config = self._StashConfigFile(instance.name)
      raise errors.HypervisorError("Failed to start instance %s: %s (%s). Moved"
                                   " config file to %s" %
661
                                   (instance.name, result.fail_reason,
662
                                    result.output, stashed_config))
663

664
665
  def StopInstance(self, instance, force=False, retry=False, name=None,
                   timeout=None):
Iustin Pop's avatar
Iustin Pop committed
666
667
    """Stop an instance.

668
669
    A soft shutdown can be interrupted. A hard shutdown tries forever.

Iustin Pop's avatar
Iustin Pop committed
670
    """
671
672
    assert(timeout is None or force is not None)

673
674
    if name is None:
      name = instance.name
675

676
    return self._StopInstance(name, force, instance.hvparams, timeout)
677

678
  def _ShutdownInstance(self, name, hvparams, timeout):
Jose A. Lopes's avatar
Jose A. Lopes committed
679
680
681
682
683
684
685
686
    """Shutdown an instance if the instance is running.

    The '-w' flag waits for shutdown to complete which avoids the need
    to poll in the case where we want to destroy the domain
    immediately after shutdown.

    @type name: string
    @param name: name of the instance to stop
687
688
    @type hvparams: dict of string
    @param hvparams: hypervisor parameters of the instance
689
690
691
    @type timeout: int or None
    @param timeout: a timeout after which the shutdown command should be killed,
                    or None for no timeout
Jose A. Lopes's avatar
Jose A. Lopes committed
692
693

    """
694
    instance_info = self.GetInstanceInfo(name, hvparams=hvparams)
Jose A. Lopes's avatar
Jose A. Lopes committed
695
696
697
698
699

    if instance_info is None or _IsInstanceShutdown(instance_info[4]):
      logging.info("Failed to shutdown instance %s, not running", name)
      return None

700
    return self._RunXen(["shutdown", "-w", name], hvparams, timeout)
Jose A. Lopes's avatar
Jose A. Lopes committed
701
702

  def _DestroyInstance(self, name, hvparams):
Jose A. Lopes's avatar
Jose A. Lopes committed
703
    """Destroy an instance if the instance if the instance exists.
704

Jose A. Lopes's avatar
Jose A. Lopes committed
705
706
    @type name: string
    @param name: name of the instance to destroy
707
708
    @type hvparams: dict of string
    @param hvparams: hypervisor parameters of the instance
Jose A. Lopes's avatar
Jose A. Lopes committed
709
710

    """
711
    instance_info = self.GetInstanceInfo(name, hvparams=hvparams)
Jose A. Lopes's avatar
Jose A. Lopes committed
712
713
714
715
716

    if instance_info is None:
      logging.info("Failed to destroy instance %s, does not exist", name)
      return None

717
    return self._RunXen(["destroy", name], hvparams)
Jose A. Lopes's avatar
Jose A. Lopes committed
718

719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
  # Destroy a domain only if necessary
  #
  # This method checks if the domain has already been destroyed before
  # issuing the 'destroy' command.  This step is necessary to handle
  # domains created by other versions of Ganeti.  For example, an
  # instance created with 2.10 will be destroy by the
  # '_ShutdownInstance', thus not requiring an additional destroy,
  # which would cause an error if issued.  See issue 619.
  def _DestroyInstanceIfAlive(self, name, hvparams):
    instance_info = self.GetInstanceInfo(name, hvparams=hvparams)

    if instance_info is None:
      raise errors.HypervisorError("Failed to destroy instance %s, already"
                                   " destroyed" % name)
    else:
      self._DestroyInstance(name, hvparams)

736
  def _StopInstance(self, name, force, hvparams, timeout):
737
738
    """Stop an instance.

739
    @type name: string
Jose A. Lopes's avatar
Jose A. Lopes committed
740
741
    @param name: name of the instance to destroy

742
    @type force: boolean
Jose A. Lopes's avatar
Jose A. Lopes committed
743
744
    @param force: whether to do a "hard" stop (destroy)

745
746
747
    @type hvparams: dict of string
    @param hvparams: hypervisor parameters of the instance

748
749
750
751
    @type timeout: int or None
    @param timeout: a timeout after which the shutdown command should be killed,
                    or None for no timeout

752
    """
Jose A. Lopes's avatar
Jose A. Lopes committed
753
754
755
756
757
758
    instance_info = self.GetInstanceInfo(name, hvparams=hvparams)

    if instance_info is None:
      raise errors.HypervisorError("Failed to shutdown instance %s,"
                                   " not running" % name)

759
    if force:
760
      result = self._DestroyInstanceIfAlive(name, hvparams)
761
    else:
762
      self._ShutdownInstance(name, hvparams, timeout)
763
      result = self._DestroyInstanceIfAlive(name, hvparams)
764

Jose A. Lopes's avatar
Jose A. Lopes committed
765
    if result is not None and result.failed and \
766
          self.GetInstanceInfo(name, hvparams=hvparams) is not None:
767
      raise errors.HypervisorError("Failed to stop instance %s: %s, %s" %
768
                                   (name, result.fail_reason, result.output))
769

770
771
772
    # Remove configuration file if stopping/starting instance was successful
    self._RemoveConfigFile(name)

773
  def RebootInstance(self, instance):
Iustin Pop's avatar
Iustin Pop committed
774
775
776
    """Reboot an instance.

    """
777
    ini_info = self.GetInstanceInfo(instance.name, hvparams=instance.hvparams)
778

779
780
781
782
    if ini_info is None:
      raise errors.HypervisorError("Failed to reboot instance %s,"
                                   " not running" % instance.name)

783
    result = self._RunXen(["reboot", instance.name], instance.hvparams)
784
    if result.failed:
785
786
787
      raise errors.HypervisorError("Failed to reboot instance %s: %s, %s" %
                                   (instance.name, result.fail_reason,
                                    result.output))
788
789

    def _CheckInstance():
790
      new_info = self.GetInstanceInfo(instance.name, hvparams=instance.hvparams)
791
792

      # check if the domain ID has changed or the run time has decreased
793
794
      if (new_info is not None and
          (new_info[1] != ini_info[1] or new_info[5] < ini_info[5])):
795
        return
Iustin Pop's avatar
Iustin Pop committed
796

797
798
799
800
801
802
      raise utils.RetryAgain()

    try:
      utils.Retry(_CheckInstance, self.REBOOT_RETRY_INTERVAL,
                  self.REBOOT_RETRY_INTERVAL * self.REBOOT_RETRY_COUNT)
    except utils.RetryTimeout:
Iustin Pop's avatar
Iustin Pop committed
803
804
805
      raise errors.HypervisorError("Failed to reboot instance %s: instance"
                                   " did not reboot in the expected interval" %
                                   (instance.name, ))
806

807
808
809
810
811
812
813
814
815
  def BalloonInstanceMemory(self, instance, mem):
    """Balloon an instance memory to a certain value.

    @type instance: L{objects.Instance}
    @param instance: instance to be accepted
    @type mem: int
    @param mem: actual memory size to use for instance runtime

    """
816
    result = self._RunXen(["mem-set", instance.name, mem], instance.hvparams)
817
818
819
820
    if result.failed:
      raise errors.HypervisorError("Failed to balloon instance %s: %s (%s)" %
                                   (instance.name, result.fail_reason,
                                    result.output))
821
822

    # Update configuration file
823
    cmd = ["sed", "-ie", "s/^memory.*$/memory = %s/" % mem]
824
    cmd.append(self._ConfigFileName(instance.name))
825

826
827
828
829
830
831
    result = utils.RunCmd(cmd)
    if result.failed:
      raise errors.HypervisorError("Failed to update memory for %s: %s (%s)" %
                                   (instance.name, result.fail_reason,
                                    result.output))

832
  def GetNodeInfo(self, hvparams=None):
833
834
    """Return information about the node.

835
    @see: L{_GetNodeInfo} and L{_ParseNodeInfo}
836
837

    """
838
    result = self._RunXen(["info"], hvparams)
839
    if result.failed:
840
841
      logging.error("Can't retrieve xen hypervisor information (%s): %s",
                    result.fail_reason, result.output)
842
843
      return None

844
    instance_list = self._GetInstanceList(True, hvparams)
845
    return _GetNodeInfo(result.stdout, instance_list)
846

847
  @classmethod
848
  def GetInstanceConsole(cls, instance, primary_node, node_group,
849
                         hvparams, beparams):
850
851
852
    """Return a command for connecting to the console of an instance.

    """
853
    xen_cmd = XenHypervisor._GetCommandFromHvparams(hvparams)
854
    ndparams = node_group.FillND(primary_node)
855
856
    return objects.InstanceConsole(instance=instance.name,
                                   kind=constants.CONS_SSH,
Thomas Thrainer's avatar
Thomas Thrainer committed
857
                                   host=primary_node.name,
858
                                   port=ndparams.get(constants.ND_SSH_PORT),
Michael Hanselmann's avatar
Michael Hanselmann committed
859
                                   user=constants.SSH_CONSOLE_USER,
860
                                   command=[pathutils.XEN_CONSOLE_WRAPPER,
861
                                            xen_cmd, instance.name])
862

863
  def Verify(self, hvparams=None):
864
865
866
867
    """Verify the hypervisor.

    For Xen, this verifies that the xend process is running.

868
869
870
    @type hvparams: dict of strings
    @param hvparams: hypervisor parameters to be verified against

871
872
    @return: Problem description if something is wrong, C{None} otherwise

873
    """
874
875
876
877
878
879
880
881
882
883
884
885
    if hvparams is None:
      return "Could not verify the hypervisor, because no hvparams were" \
             " provided."

    if constants.HV_XEN_CMD in hvparams:
      xen_cmd = hvparams[constants.HV_XEN_CMD]
      try:
        self._CheckToolstack(xen_cmd)
      except errors.HypervisorError:
        return "The configured xen toolstack '%s' is not available on this" \
               " node." % xen_cmd

886
    result = self._RunXen(["info"], hvparams)
887
    if result.failed:
888
889
      return "Retrieving information from xen failed: %s, %s" % \
        (result.fail_reason, result.output)
890

891
892
    return None

893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
  def MigrationInfo(self, instance):
    """Get instance information to perform a migration.

    @type instance: L{objects.Instance}
    @param instance: instance to be migrated
    @rtype: string
    @return: content of the xen config file

    """
    return self._ReadConfigFile(instance.name)

  def AcceptInstance(self, instance, info, target):
    """Prepare to accept an instance.

    @type instance: L{objects.Instance}
    @param instance: instance to be accepted
    @type info: string
    @param info: content of the xen config file on the source node
    @type target: string
    @param target: target host (usually ip), on this node

    """
    pass

917
  def FinalizeMigrationDst(self, instance, info, success):
918
919
920
921
922
923
    """Finalize an instance migration.

    After a successful migration we write the xen config file.
    We do nothing on a failure, as we did not change anything at accept time.

    @type instance: L{objects.Instance}
924
    @param instance: instance whose migration is being finalized
925
926
927
928
929
930
931
    @type info: string
    @param info: content of the xen config file on the source node
    @type success: boolean
    @param success: whether the migration was a success or a failure

    """
    if success:
932
      self._WriteConfigFile(instance.name, info)
933

934
  def MigrateInstance(self, cluster_name, instance, target, live):
935
936
937
938
939
    """Migrate an instance to a target node.

    The migration will not be attempted if the instance is not
    currently running.

940
941
    @type instance: L{objects.Instance}
    @param instance: the instance to be migrated
942
943
944
945
946
    @type target: string
    @param target: ip address of the target node
    @type live: boolean
    @param live: perform a live migration

947
    """
948
949
950
    port = instance.hvparams[constants.HV_MIGRATION_PORT]

    return self._MigrateInstance(cluster_name, instance.name, target, port,
951
                                 live, instance.hvparams)
952
953

  def _MigrateInstance(self, cluster_name, instance_name, target, port, live,
954
                       hvparams, _ping_fn=netutils.TcpPing):
955
956
957
958
959
    """Migrate an instance to a target node.

    @see: L{MigrateInstance} for details

    """
960
961
962
963
    if hvparams is None:
      raise errors.HypervisorError("No hvparams provided.")

    if self.GetInstanceInfo(instance_name, hvparams=hvparams) is None:
964
      raise errors.HypervisorError("Instance not running, cannot migrate")
965

966
    cmd = self._GetCommand(hvparams)
967

968
969
    if (cmd == constants.XEN_CMD_XM and
        not _ping_fn(target, port, live_port_needed=True)):
970
971
972
      raise errors.HypervisorError("Remote host %s not listening on port"
                                   " %s, cannot migrate" % (target, port))

973
974
    args = ["migrate"]

975
    if cmd == constants.XEN_CMD_XM:
976
977
978
      args.extend(["-p", "%d" % port])
      if live:
        args.append("-l")
979

980
981
982
983
984
    elif cmd == constants.XEN_CMD_XL:
      args.extend([
        "-s", constants.XL_SSH_CMD % cluster_name,
        "-C", self._ConfigFileName(instance_name),
        ])
985

986
    else:
987
      raise errors.HypervisorError("Unsupported Xen command: %s" % self._cmd)
988

989
    args.extend([instance_name, target])
990

991
    result = self._RunXen(args, hvparams)
992
993
    if result.failed:
      raise errors.HypervisorError("Failed to migrate instance %s: %s" %
994
                                   (instance_name, result.output))
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030

  def FinalizeMigrationSource(self, instance, success, live):
    """Finalize the instance migration on the source node.

    @type instance: L{objects.Instance}
    @param instance: the instance that was migrated
    @type success: bool
    @param success: whether the migration succeeded or not
    @type live: bool
    @param live: whether the user requested a live migration or not

    """
    # pylint: disable=W0613
    if success:
      # remove old xen file after migration succeeded
      try:
        self._RemoveConfigFile(instance.name)
      except EnvironmentError:
        logging.exception("Failure while removing instance config file")

  def GetMigrationStatus(self, instance):
    """Get the migration status

    As MigrateInstance for Xen is still blocking, if this method is called it
    means that MigrateInstance has completed successfully. So we can safely
    assume that the migration was successful and notify this fact to the client.

    @type instance: L{objects.Instance}
    @param instance: the instance that is being migrated
    @rtype: L{objects.MigrationStatus}
    @return: the status of the current migration (one of
             L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional
             progress info that can be retrieved from the hypervisor

    """
    return objects.MigrationStatus(status=constants.HV_MIGRATION_COMPLETED)
1031

1032
  def PowercycleNode(self, hvparams=None):
Iustin Pop's avatar
Iustin Pop committed
1033
1034
1035
1036
1037
1038
1039
1040
1041
    """Xen-specific powercycle.

    This first does a Linux reboot (which triggers automatically a Xen
    reboot), and if that fails it tries to do a Xen reboot. The reason
    we don't try a Xen reboot first is that the xen reboot launches an
    external command which connects to the Xen hypervisor, and that
    won't work in case the root filesystem is broken and/or the xend
    daemon is not working.

1042
1043
1044
    @type hvparams: dict of strings
    @param hvparams: hypervisor params to be used on this node

Iustin Pop's avatar
Iustin Pop committed
1045
1046
    """
    try:
1047
      self.LinuxPowercycle()
Iustin Pop's avatar
Iustin Pop committed
1048
    finally:
1049
1050
      xen_cmd = self._GetCommand(hvparams)
      utils.RunCmd([xen_cmd, "debug", "R"])
Iustin Pop's avatar
Iustin Pop committed
1051

1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
  def _CheckToolstack(self, xen_cmd):
    """Check whether the given toolstack is available on the node.

    @type xen_cmd: string
    @param xen_cmd: xen command (e.g. 'xm' or 'xl')

    """
    binary_found = self._CheckToolstackBinary(xen_cmd)
    if not binary_found:
      raise errors.HypervisorError("No '%s' binary found on node." % xen_cmd)
    elif xen_cmd == constants.XEN_CMD_XL:
      if not self._CheckToolstackXlConfigured():
        raise errors.HypervisorError("Toolstack '%s' is not enabled on this"
                                     "node." % xen_cmd)

  def _CheckToolstackBinary(self, xen_cmd):
    """Checks whether the xen command's binary is found on the machine.

    """
    if xen_cmd not in constants.KNOWN_XEN_COMMANDS:
      raise errors.HypervisorError("Unknown xen command '%s'." % xen_cmd)
    result = self._run_cmd_fn(["which", xen_cmd])
    return not result.failed

  def _CheckToolstackXlConfigured(self):
    """Checks whether xl is enabled on an xl-capable node.

    @rtype: bool
    @returns: C{True} if 'xl' is enabled, C{False} otherwise

    """
    result = self._run_cmd_fn([constants.XEN_CMD_XL, "help"])
    if not result.failed:
      return True
    elif result.failed:
      if "toolstack" in result.stderr:
        return False
      # xl fails for some other reason than the toolstack
      else:
        raise errors.HypervisorError("Cannot run xen ('%s'). Error: %s."
                                     % (constants.XEN_CMD_XL, result.stderr))

1094

1095
1096
1097
1098
1099
1100
1101
1102
1103
def WriteXenConfigEvents(config, hvp):
  config.write("on_poweroff = 'preserve'\n")
  if hvp[constants.HV_REBOOT_BEHAVIOR] == constants.INSTANCE_REBOOT_ALLOWED:
    config.write("on_reboot = 'restart'\n")
  else:
    config.write("on_reboot = 'destroy'\n")
  config.write("on_crash = 'restart'\n")


1104
1105
1106
class XenPvmHypervisor(XenHypervisor):
  """Xen PVM hypervisor interface"""

1107
  PARAMETERS = {
1108
1109
1110
    constants.HV_USE_BOOTLOADER: hv_base.NO_CHECK,
    constants.HV_BOOTLOADER_PATH: hv_base.OPT_FILE_CHECK,
    constants.HV_BOOTLOADER_ARGS: hv_base.NO_CHECK,
1111
1112
    constants.HV_KERNEL_PATH: hv_base.REQ_FILE_CHECK,
    constants.HV_INITRD_PATH: hv_base.OPT_FILE_CHECK,
1113
    constants.HV_ROOT_PATH: hv_base.NO_CHECK,
1114
    constants.HV_KERNEL_ARGS: hv_base.NO_CHECK,
1115
    constants.HV_MIGRATION_PORT: hv_base.REQ_NET_PORT_CHECK,
1116
    constants.HV_MIGRATION_MODE: hv_base.MIGRATION_MODE_CHECK,
1117
1118
    # TODO: Add a check for the blockdev prefix (matching [a-z:] or similar).
    constants.HV_BLOCKDEV_PREFIX: hv_base.NO_CHECK,
1119
    constants.HV_REBOOT_BEHAVIOR:
1120
1121
      hv_base.ParamInSet(True, constants.REBOOT_BEHAVIORS),
    constants.HV_CPU_MASK: hv_base.OPT_MULTI_CPU_MASK_CHECK,
1122
    constants.HV_CPU_CAP: hv_base.OPT_NONNEGATIVE_INT_CHECK,
1123
1124
    constants.HV_CPU_WEIGHT:
      (False, lambda x: 0 < x < 65536, "invalid weight", None, None),
1125
    constants.HV_VIF_SCRIPT: hv_base.OPT_FILE_CHECK,
1126
1127
    constants.HV_XEN_CMD:
      hv_base.ParamInSet(True, constants.KNOWN_XEN_COMMANDS),
1128
    constants.HV_XEN_CPUID: hv_base.NO_CHECK,
1129
    constants.HV_SOUNDHW: hv_base.NO_CHECK,
1130
    }
1131

1132
  def _GetConfig(self, instance, startup_memory, block_devices):
1133
1134
1135
    """Write the Xen config file for the instance.

    """
1136
    hvp = instance.hvparams
1137
1138
1139
    config = StringIO()
    config.write("# this is autogenerated by Ganeti, please do not edit\n#\n")

1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
    # if bootloader is True, use bootloader instead of kernel and ramdisk
    # parameters.
    if hvp[constants.HV_USE_BOOTLOADER]:
      # bootloader handling
      bootloader_path = hvp[constants.HV_BOOTLOADER_PATH]
      if bootloader_path:
        config.write("bootloader = '%s'\n" % bootloader_path)
      else:
        raise errors.HypervisorError("Bootloader enabled, but missing"
                                     " bootloader path")
1150

1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
      bootloader_args = hvp[constants.HV_BOOTLOADER_ARGS]
      if bootloader_args:
        config.write("bootargs = '%s'\n" % bootloader_args)
    else:
      # kernel handling
      kpath = hvp[constants.HV_KERNEL_PATH]
      config.write("kernel = '%s'\n" % kpath)

      # initrd handling
      initrd_path = hvp[constants.HV_INITRD_PATH]
      if initrd_path:
        config.write("ramdisk = '%s'\n" % initrd_path)
1163
1164

    # rest of the settings
1165
    config.write("memory = %d\n" % startup_memory)
1166
    config.write("maxmem = %d\n" % instance.beparams[constants.BE_MAXMEM])
1167
    config.write("vcpus = %d\n" % instance.beparams[constants.BE_VCPUS])
1168
    cpu_pinning = _CreateConfigCpus(hvp[constants.HV_CPU_MASK])
1169
1170
    if cpu_pinning:
      config.write("%s\n" % cpu_pinning)
1171
1172
1173
1174
1175
1176
    cpu_cap = hvp[constants.HV_CPU_CAP]
    if cpu_cap:
      config.write("cpu_cap=%d\n" % cpu_cap)
    cpu_weight = hvp[constants.HV_CPU_WEIGHT]
    if cpu_weight:
      config.write("cpu_weight=%d\n" % cpu_weight)
1177

1178
1179
1180
    config.write("name = '%s'\n" % instance.name)

    vif_data = []
1181
    for idx, nic in enumerate(instance.nics):
Guido Trotter's avatar
Guido Trotter committed
1182
      nic_str = "mac=%s" % (nic.mac)
1183
1184
1185
      ip = getattr(nic, "ip", None)
      if ip is not None:
        nic_str += ", ip=%s" % ip
Guido Trotter's avatar
Guido Trotter committed
1186
1187
      if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
        nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
1188
1189
1190
1191
      if nic.nicparams[constants.NIC_MODE] == constants.NIC_MODE_OVS:
        nic_str += ", bridge=%s" % nic.nicparams[constants.NIC_LINK]
        if nic.nicparams[constants.NIC_VLAN]:
          nic_str += "%s" % nic.nicparams[constants.NIC_VLAN]
1192
1193
      if hvp[constants.HV_VIF_SCRIPT]:
        nic_str += ", script=%s" % hvp[constants.HV_VIF_SCRIPT]
1194
      vif_data.append("'%s'" % nic_str)
1195
      self._WriteNICInfoFile(instance, idx, nic)