burnin 33.2 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Burnin program

"""
25

Iustin Pop's avatar
Iustin Pop committed
26
27
import sys
import optparse
Iustin Pop's avatar
Iustin Pop committed
28
import time
29
import socket
30
import urllib
31
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
32
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
33
34
35
36

from ganeti import opcodes
from ganeti import constants
from ganeti import cli
37
38
from ganeti import errors
from ganeti import utils
Iustin Pop's avatar
Iustin Pop committed
39

40

41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
42

43
MAX_RETRIES = 3
Iustin Pop's avatar
Iustin Pop committed
44
45
46
47
48
LOG_HEADERS = {
  0: "- ",
  1: "* ",
  2: ""
  }
49

50
51
52
53
class InstanceDown(Exception):
  """The checked instance was not up"""


54
55
56
57
class BurninFailure(Exception):
  """Failure detected during burning"""


Iustin Pop's avatar
Iustin Pop committed
58
59
60
61
62
63
64
def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

65

Iustin Pop's avatar
Iustin Pop committed
66
def Log(msg, *args, **kwargs):
67
68
69
  """Simple function that prints out its argument.

  """
Iustin Pop's avatar
Iustin Pop committed
70
71
72
  if args:
    msg = msg % args
  indent = kwargs.get('indent', 0)
73
  sys.stdout.write("%*s%s%s\n" % (2*indent, "",
Iustin Pop's avatar
Iustin Pop committed
74
                                  LOG_HEADERS.get(indent, "  "), msg))
75
  sys.stdout.flush()
Iustin Pop's avatar
Iustin Pop committed
76

Iustin Pop's avatar
Iustin Pop committed
77

78
79
80
81
82
83
84
def Err(msg, exit_code=1):
  """Simple error logging that prints to stderr.

  """
  sys.stderr.write(msg + "\n")
  sys.stderr.flush()
  sys.exit(exit_code)
85

86
87
88

class SimpleOpener(urllib.FancyURLopener):
  """A simple url opener"""
Iustin Pop's avatar
Iustin Pop committed
89
  # pylint: disable-msg=W0221
90

Iustin Pop's avatar
Iustin Pop committed
91
  def prompt_user_passwd(self, host, realm, clear_cache=0):
92
    """No-interaction version of prompt_user_passwd."""
93
94
    # we follow parent class' API
    # pylint: disable-msg=W0613
95
96
97
98
99
100
101
102
103
104
105
106
    return None, None

  def http_error_default(self, url, fp, errcode, errmsg, headers):
    """Custom error handling"""
    # make sure sockets are not left in CLOSE_WAIT, this is similar
    # but with a different exception to the BasicURLOpener class
    _ = fp.read() # throw away data
    fp.close()
    raise InstanceDown("HTTP error returned: code %s, msg %s" %
                       (errcode, errmsg))


107
108
109
OPTIONS = [
  cli.cli_option("-o", "--os", dest="os", default=None,
                 help="OS to use during burnin",
110
111
                 metavar="<OS>",
                 completion_suggest=cli.OPT_COMPL_ONE_OS),
112
113
114
115
116
117
118
119
120
121
122
  cli.cli_option("--disk-size", dest="disk_size",
                 help="Disk size (determines disk count)",
                 default="128m", type="string", metavar="<size,size,...>",
                 completion_suggest=("128M 512M 1G 4G 1G,256M"
                                     " 4G,1G,1G 10G").split()),
  cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
                 default="128m", type="string", metavar="<size,size,...>"),
  cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
                 default=128, type="unit", metavar="<size>",
                 completion_suggest=("128M 256M 512M 1G 4G 8G"
                                     " 12G 16G").split()),
Iustin Pop's avatar
Iustin Pop committed
123
  cli.DEBUG_OPT,
124
  cli.VERBOSE_OPT,
125
126
  cli.NOIPCHECK_OPT,
  cli.NONAMECHECK_OPT,
127
  cli.EARLY_RELEASE_OPT,
128
129
130
131
132
133
134
135
136
137
138
139
  cli.cli_option("--no-replace1", dest="do_replace1",
                 help="Skip disk replacement with the same secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-replace2", dest="do_replace2",
                 help="Skip disk replacement with a different secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-failover", dest="do_failover",
                 help="Skip instance failovers", action="store_false",
                 default=True),
  cli.cli_option("--no-migrate", dest="do_migrate",
                 help="Skip instance live migration",
                 action="store_false", default=True),
140
141
142
  cli.cli_option("--no-move", dest="do_move",
                 help="Skip instance moves", action="store_false",
                 default=True),
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
  cli.cli_option("--no-importexport", dest="do_importexport",
                 help="Skip instance export/import", action="store_false",
                 default=True),
  cli.cli_option("--no-startstop", dest="do_startstop",
                 help="Skip instance stop/start", action="store_false",
                 default=True),
  cli.cli_option("--no-reinstall", dest="do_reinstall",
                 help="Skip instance reinstall", action="store_false",
                 default=True),
  cli.cli_option("--no-reboot", dest="do_reboot",
                 help="Skip instance reboot", action="store_false",
                 default=True),
  cli.cli_option("--no-activate-disks", dest="do_activate_disks",
                 help="Skip disk activation/deactivation",
                 action="store_false", default=True),
  cli.cli_option("--no-add-disks", dest="do_addremove_disks",
                 help="Skip disk addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-add-nics", dest="do_addremove_nics",
                 help="Skip NIC addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-nics", dest="nics",
                 help="No network interfaces", action="store_const",
                 const=[], default=[{}]),
  cli.cli_option("--rename", dest="rename", default=None,
                 help=("Give one unused instance name which is taken"
                       " to start the renaming sequence"),
                 metavar="<instance_name>"),
  cli.cli_option("-t", "--disk-template", dest="disk_template",
                 choices=list(constants.DISK_TEMPLATES),
                 default=constants.DT_DRBD8,
                 help="Disk template (diskless, file, plain or drbd) [drbd]"),
  cli.cli_option("-n", "--nodes", dest="nodes", default="",
                 help=("Comma separated list of nodes to perform"
177
178
                       " the burnin on (defaults to all nodes)"),
                 completion_suggest=cli.OPT_COMPL_MANY_NODES),
179
180
181
182
183
  cli.cli_option("-I", "--iallocator", dest="iallocator",
                 default=None, type="string",
                 help=("Perform the allocation using an iallocator"
                       " instead of fixed node spread (node restrictions no"
                       " longer apply, therefore -n/--nodes must not be"
184
185
                       " used"),
                 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
  cli.cli_option("-p", "--parallel", default=False, action="store_true",
                 dest="parallel",
                 help=("Enable parallelization of some operations in"
                       " order to speed burnin or to test granular locking")),
  cli.cli_option("--net-timeout", default=15, type="int",
                 dest="net_timeout",
                 help=("The instance check network timeout in seconds"
                       " (defaults to 15 seconds)"),
                 completion_suggest="15 60 300 900".split()),
  cli.cli_option("-C", "--http-check", default=False, action="store_true",
                 dest="http_check",
                 help=("Enable checking of instance status via http,"
                       " looking for /hostname.txt that should contain the"
                       " name of the instance")),
  cli.cli_option("-K", "--keep-instances", default=False,
                 action="store_true",
                 dest="keep_instances",
                 help=("Leave instances on the cluster after burnin,"
                       " for investigation in case of errors or simply"
                       " to use them")),
  ]

# Mainly used for bash completion
ARGUMENTS = [cli.ArgInstance(min=1)]


212
213
214
215
216
217
218
def _DoCheckInstances(fn):
  """Decorator for checking instances.

  """
  def wrapper(self, *args, **kwargs):
    val = fn(self, *args, **kwargs)
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
219
      self._CheckInstanceAlive(instance) # pylint: disable-msg=W0212
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
    return val

  return wrapper


def _DoBatch(retry):
  """Decorator for possible batch operations.

  Must come after the _DoCheckInstances decorator (if any).

  @param retry: whether this is a retryable batch, will be
      passed to StartBatch

  """
  def wrap(fn):
    def batched(self, *args, **kwargs):
      self.StartBatch(retry)
      val = fn(self, *args, **kwargs)
      self.CommitQueue()
      return val
    return batched

  return wrap


245
246
247
248
249
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
250
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
251
    self.url_opener = SimpleOpener()
Iustin Pop's avatar
Iustin Pop committed
252
    self._feed_buf = StringIO()
253
254
255
    self.nodes = []
    self.instances = []
    self.to_rem = []
256
    self.queued_ops = []
257
    self.opts = None
258
259
260
    self.queue_retry = False
    self.disk_count = self.disk_growth = self.disk_size = None
    self.hvp = self.bep = None
261
    self.ParseOptions()
262
    self.cl = cli.GetClient()
263
264
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
265
266
267
268
269
270
271
272
273
274
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
Iustin Pop's avatar
Iustin Pop committed
275
276
    formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
    self._feed_buf.write(formatted_msg + "\n")
Iustin Pop's avatar
Iustin Pop committed
277
    if self.opts.verbose:
Iustin Pop's avatar
Iustin Pop committed
278
      Log(formatted_msg, indent=3)
Iustin Pop's avatar
Iustin Pop committed
279

280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  def MaybeRetry(self, retry_count, msg, fn, *args):
    """Possibly retry a given function execution.

    @type retry_count: int
    @param retry_count: retry counter:
        - 0: non-retryable action
        - 1: last retry for a retryable action
        - MAX_RETRIES: original try for a retryable action
    @type msg: str
    @param msg: the kind of the operation
    @type fn: callable
    @param fn: the function to be called

    """
    try:
      val = fn(*args)
      if retry_count > 0 and retry_count < MAX_RETRIES:
Iustin Pop's avatar
Iustin Pop committed
297
298
        Log("Idempotent %s succeeded after %d retries",
            msg, MAX_RETRIES - retry_count)
299
      return val
Iustin Pop's avatar
Iustin Pop committed
300
    except Exception, err: # pylint: disable-msg=W0703
301
      if retry_count == 0:
Iustin Pop's avatar
Iustin Pop committed
302
        Log("Non-idempotent %s failed, aborting", msg)
303
304
        raise
      elif retry_count == 1:
Iustin Pop's avatar
Iustin Pop committed
305
        Log("Idempotent %s repeated failure, aborting", msg)
306
307
        raise
      else:
Iustin Pop's avatar
Iustin Pop committed
308
309
        Log("Idempotent %s failed, retry #%d/%d: %s",
            msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err)
310
311
        self.MaybeRetry(retry_count - 1, msg, fn, *args)

Iustin Pop's avatar
Iustin Pop committed
312
313
314
315
316
  def _SetDebug(self, ops):
    """Set the debug value on the given opcodes"""
    for op in ops:
      op.debug_level = self.opts.debug

317
  def _ExecOp(self, *ops):
318
319
320
321
322
323
324
325
326
327
328
329
330
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    job_id = cli.SendJob(ops, cl=self.cl)
    results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
    if len(ops) == 1:
      return results[0]
    else:
      return results

331
332
333
334
335
336
337
338
339
340
341
  def ExecOp(self, retry, *ops):
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    if retry:
      rval = MAX_RETRIES
    else:
      rval = 0
Iustin Pop's avatar
Iustin Pop committed
342
    self._SetDebug(ops)
343
344
    return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)

345
  def ExecOrQueue(self, name, *ops):
Iustin Pop's avatar
Iustin Pop committed
346
    """Execute an opcode and manage the exec buffer."""
347
    if self.opts.parallel:
Iustin Pop's avatar
Iustin Pop committed
348
      self._SetDebug(ops)
349
350
      self.queued_ops.append((ops, name))
    else:
351
352
353
354
355
356
357
358
359
360
      return self.ExecOp(self.queue_retry, *ops)

  def StartBatch(self, retry):
    """Start a new batch of jobs.

    @param retry: whether this is a retryable batch

    """
    self.queued_ops = []
    self.queue_retry = retry
361
362
363
364
365
366

  def CommitQueue(self):
    """Execute all submitted opcodes in case of parallel burnin"""
    if not self.opts.parallel:
      return

367
368
369
370
371
    if self.queue_retry:
      rval = MAX_RETRIES
    else:
      rval = 0

372
    try:
373
374
      results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
                                self.queued_ops)
375
376
377
    finally:
      self.queued_ops = []
    return results
378
379
380
381
382

  def ExecJobSet(self, jobs):
    """Execute a set of jobs and return once all are done.

    The method will return the list of results, if all jobs are
383
    successful. Otherwise, OpExecError will be raised from within
384
385
386
387
    cli.py.

    """
    self.ClearFeedbackBuf()
Iustin Pop's avatar
Iustin Pop committed
388
389
390
391
392
393
394
    jex = cli.JobExecutor(cl=self.cl, feedback_fn=self.Feedback)
    for ops, name in jobs:
      jex.QueueJob(name, *ops) # pylint: disable-msg=W0142
    try:
      results = jex.GetResults()
    except Exception, err: # pylint: disable-msg=W0703
      Log("Jobs failed: %s", err)
395
      raise BurninFailure()
Iustin Pop's avatar
Iustin Pop committed
396
397
398
399
400

    if utils.any(results, lambda x: not x[0]):
      raise BurninFailure()

    return [i[1] for i in results]
Iustin Pop's avatar
Iustin Pop committed
401

402
403
404
405
406
407
408
409
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
410
411
412
                                   version=("%%prog (ganeti) %s" %
                                            constants.RELEASE_VERSION),
                                   option_list=OPTIONS)
413
414
415
416
417

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

418
    supported_disk_templates = (constants.DT_DISKLESS,
419
                                constants.DT_FILE,
420
                                constants.DT_PLAIN,
421
422
                                constants.DT_DRBD8)
    if options.disk_template not in supported_disk_templates:
423
      Err("Unknown disk template '%s'" % options.disk_template)
424

425
426
    if options.disk_template == constants.DT_DISKLESS:
      disk_size = disk_growth = []
427
      options.do_addremove_disks = False
428
429
430
431
432
    else:
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
      disk_growth = [utils.ParseUnit(v)
                     for v in options.disk_growth.split(",")]
      if len(disk_growth) != len(disk_size):
433
        Err("Wrong disk sizes/growth combination")
434
435
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
436
      Err("Wrong disk count/disk template combination")
437
438
439
440
441

    self.disk_size = disk_size
    self.disk_growth = disk_growth
    self.disk_count = len(disk_size)

442
    if options.nodes and options.iallocator:
443
      Err("Give either the nodes option or the iallocator option, not both")
444

445
446
447
    if options.http_check and not options.name_check:
      Err("Can't enable HTTP checks without name checks")

448
449
    self.opts = options
    self.instances = args
Iustin Pop's avatar
Iustin Pop committed
450
451
452
453
454
    self.bep = {
      constants.BE_MEMORY: options.mem_size,
      constants.BE_VCPUS: 1,
      }
    self.hvp = {}
455

456
457
    socket.setdefaulttimeout(options.net_timeout)

458
459
460
461
462
463
464
  def GetState(self):
    """Read the cluster state from the config."""
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
Iustin Pop's avatar
Iustin Pop committed
465
      op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
466
                                names=names, use_locking=True)
467
      result = self.ExecOp(True, op)
468
469
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
470
      Err(msg, exit_code=err_code)
Iustin Pop's avatar
Iustin Pop committed
471
    self.nodes = [data[0] for data in result if not (data[1] or data[2])]
472

473
474
475
    op_diagnose = opcodes.OpDiagnoseOS(output_fields=["name", "valid",
                                                      "variants"], names=[])
    result = self.ExecOp(True, op_diagnose)
476
477

    if not result:
478
      Err("Can't get the OS list")
479

480
481
482
483
484
    found = False
    for (name, valid, variants) in result:
      if valid and self.opts.os in cli.CalculateOSNames(name, variants):
        found = True
        break
485

486
    if not found:
487
      Err("OS '%s' not found" % self.opts.os)
488

489
  @_DoCheckInstances
490
  @_DoBatch(False)
491
  def BurnCreateInstances(self):
492
493
494
495
496
497
498
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
Iustin Pop's avatar
Iustin Pop committed
499

500
    Log("Creating instances")
501
    for pnode, snode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
502
      Log("instance %s", instance, indent=1)
503
504
      if self.opts.iallocator:
        pnode = snode = None
505
        msg = "with iallocator %s" % self.opts.iallocator
506
507
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
508
        msg = "on %s" % pnode
509
      else:
510
511
512
        msg = "on %s, %s" % (pnode, snode)

      Log(msg, indent=2)
513

514
      op = opcodes.OpCreateInstance(instance_name=instance,
515
516
                                    disks = [ {"size": size}
                                              for size in self.disk_size],
517
                                    disk_template=self.opts.disk_template,
518
                                    nics=self.opts.nics,
Iustin Pop's avatar
Iustin Pop committed
519
                                    mode=constants.INSTANCE_CREATE,
520
521
522
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
Iustin Pop's avatar
Iustin Pop committed
523
                                    start=True,
524
525
                                    ip_check=self.opts.ip_check,
                                    name_check=self.opts.name_check,
526
                                    wait_for_sync=True,
527
                                    file_driver="loop",
528
                                    file_storage_dir=None,
529
                                    iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
530
531
532
                                    beparams=self.bep,
                                    hvparams=self.hvp,
                                    )
533

534
535
536
      self.ExecOrQueue(instance, op)
      self.to_rem.append(instance)

537
  @_DoBatch(False)
538
  def BurnGrowDisks(self):
Iustin Pop's avatar
Iustin Pop committed
539
    """Grow both the os and the swap disks by the requested amount, if any."""
540
    Log("Growing disks")
Iustin Pop's avatar
Iustin Pop committed
541
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
542
      Log("instance %s", instance, indent=1)
543
      for idx, growth in enumerate(self.disk_growth):
Iustin Pop's avatar
Iustin Pop committed
544
        if growth > 0:
545
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
546
                                  amount=growth, wait_for_sync=True)
Iustin Pop's avatar
Iustin Pop committed
547
          Log("increase disk/%s by %s MB", idx, growth, indent=2)
548
          self.ExecOrQueue(instance, op)
Iustin Pop's avatar
Iustin Pop committed
549

550
  @_DoBatch(True)
551
  def BurnReplaceDisks1D8(self):
552
    """Replace disks on primary and secondary for drbd8."""
553
    Log("Replacing disks on the same nodes")
554
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
555
      Log("instance %s", instance, indent=1)
556
      ops = []
557
558
559
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
560
561
                                    disks=[i for i in range(self.disk_count)],
                                    early_release=self.opts.early_release)
Iustin Pop's avatar
Iustin Pop committed
562
        Log("run %s", mode, indent=2)
563
        ops.append(op)
Iustin Pop's avatar
Iustin Pop committed
564
      self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
565

566
  @_DoBatch(True)
567
  def BurnReplaceDisks2(self):
568
    """Replace secondary node."""
569
    Log("Changing the secondary node")
570
    mode = constants.REPLACE_DISK_CHG
571
572
573
574

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
575
      Log("instance %s", instance, indent=1)
576
577
      if self.opts.iallocator:
        tnode = None
578
579
580
        msg = "with iallocator %s" % self.opts.iallocator
      else:
        msg = tnode
581
582
583
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
584
                                  iallocator=self.opts.iallocator,
585
586
                                  disks=[],
                                  early_release=self.opts.early_release)
Iustin Pop's avatar
Iustin Pop committed
587
      Log("run %s %s", mode, msg, indent=2)
588
      self.ExecOrQueue(instance, op)
589

590
  @_DoCheckInstances
591
  @_DoBatch(False)
592
  def BurnFailover(self):
593
    """Failover the instances."""
594
    Log("Failing over instances")
595
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
596
      Log("instance %s", instance, indent=1)
597
598
      op = opcodes.OpFailoverInstance(instance_name=instance,
                                      ignore_consistency=False)
599
      self.ExecOrQueue(instance, op)
600

601
602
603
604
605
606
607
608
  @_DoCheckInstances
  @_DoBatch(False)
  def BurnMove(self):
    """Move the instances."""
    Log("Moving instances")
    mytor = izip(islice(cycle(self.nodes), 1, None),
                 self.instances)
    for tnode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
609
      Log("instance %s", instance, indent=1)
610
611
      op = opcodes.OpMoveInstance(instance_name=instance,
                                  target_node=tnode)
612
      self.ExecOrQueue(instance, op)
613

614
  @_DoBatch(False)
615
  def BurnMigrate(self):
616
    """Migrate the instances."""
617
    Log("Migrating instances")
618
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
619
      Log("instance %s", instance, indent=1)
620
621
      op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=False)
622

623
624
625
626
      op2 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=True)
      Log("migration and migration cleanup", indent=2)
      self.ExecOrQueue(instance, op1, op2)
627

628
  @_DoCheckInstances
629
  @_DoBatch(False)
630
  def BurnImportExport(self):
631
632
633
    """Export the instance, delete it, and import it back.

    """
634
    Log("Exporting and re-importing instances")
635
636
637
638
639
640
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 islice(cycle(self.nodes), 2, None),
                 self.instances)

    for pnode, snode, enode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
641
      Log("instance %s", instance, indent=1)
642
643
      # read the full name of the instance
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
644
                                        names=[instance], use_locking=True)
645
      full_name = self.ExecOp(False, nam_op)[0][0]
646

647
648
      if self.opts.iallocator:
        pnode = snode = None
649
650
651
        import_log_msg = ("import from %s"
                          " with iallocator %s" %
                          (enode, self.opts.iallocator))
652
653
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
654
655
        import_log_msg = ("import from %s to %s" %
                          (enode, pnode))
656
      else:
657
658
        import_log_msg = ("import from %s to %s, %s" %
                          (enode, pnode, snode))
659

660
661
662
      exp_op = opcodes.OpExportInstance(instance_name=instance,
                                           target_node=enode,
                                           shutdown=True)
663
664
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
                                        ignore_failures=True)
665
      imp_dir = utils.PathJoin(constants.EXPORT_DIR, full_name)
666
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
Guido Trotter's avatar
Guido Trotter committed
667
668
                                        disks = [ {"size": size}
                                                  for size in self.disk_size],
669
                                        disk_template=self.opts.disk_template,
670
                                        nics=self.opts.nics,
671
672
673
674
675
676
                                        mode=constants.INSTANCE_IMPORT,
                                        src_node=enode,
                                        src_path=imp_dir,
                                        pnode=pnode,
                                        snode=snode,
                                        start=True,
677
678
                                        ip_check=self.opts.ip_check,
                                        name_check=self.opts.name_check,
679
                                        wait_for_sync=True,
680
                                        file_storage_dir=None,
Guido Trotter's avatar
Guido Trotter committed
681
                                        file_driver="loop",
682
                                        iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
683
684
685
                                        beparams=self.bep,
                                        hvparams=self.hvp,
                                        )
686

Guido Trotter's avatar
Guido Trotter committed
687
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
688

Iustin Pop's avatar
Iustin Pop committed
689
      Log("export to node %s", enode, indent=2)
690
691
692
      Log("remove instance", indent=2)
      Log(import_log_msg, indent=2)
      Log("remove export", indent=2)
693
      self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
694

695
696
  @staticmethod
  def StopInstanceOp(instance):
697
    """Stop given instance."""
698
    return opcodes.OpShutdownInstance(instance_name=instance)
699

700
701
  @staticmethod
  def StartInstanceOp(instance):
702
    """Start given instance."""
703
    return opcodes.OpStartupInstance(instance_name=instance, force=False)
704

705
706
  @staticmethod
  def RenameInstanceOp(instance, instance_new):
707
    """Rename instance."""
708
709
    return opcodes.OpRenameInstance(instance_name=instance,
                                    new_name=instance_new)
710

711
  @_DoCheckInstances
712
  @_DoBatch(True)
713
  def BurnStopStart(self):
714
    """Stop/start the instances."""
715
    Log("Stopping and starting instances")
716
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
717
      Log("instance %s", instance, indent=1)
718
719
720
721
      op1 = self.StopInstanceOp(instance)
      op2 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2)

722
  @_DoBatch(False)
723
  def BurnRemove(self):
724
    """Remove the instances."""
725
    Log("Removing instances")
726
    for instance in self.to_rem:
Iustin Pop's avatar
Iustin Pop committed
727
      Log("instance %s", instance, indent=1)
728
729
      op = opcodes.OpRemoveInstance(instance_name=instance,
                                    ignore_failures=True)
730
731
732
733
      self.ExecOrQueue(instance, op)

  def BurnRename(self):
    """Rename the instances.
734

735
736
737
738
    Note that this function will not execute in parallel, since we
    only have one target for rename.

    """
739
    Log("Renaming instances")
740
741
    rename = self.opts.rename
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
742
      Log("instance %s", instance, indent=1)
Iustin Pop's avatar
Iustin Pop committed
743
744
      op_stop1 = self.StopInstanceOp(instance)
      op_stop2 = self.StopInstanceOp(rename)
745
746
747
748
      op_rename1 = self.RenameInstanceOp(instance, rename)
      op_rename2 = self.RenameInstanceOp(rename, instance)
      op_start1 = self.StartInstanceOp(rename)
      op_start2 = self.StartInstanceOp(instance)
749
      self.ExecOp(False, op_stop1, op_rename1, op_start1)
750
      self._CheckInstanceAlive(rename)
751
      self.ExecOp(False, op_stop2, op_rename2, op_start2)
752
753
      self._CheckInstanceAlive(instance)

754
  @_DoCheckInstances
755
  @_DoBatch(True)
756
  def BurnReinstall(self):
757
    """Reinstall the instances."""
758
    Log("Reinstalling instances")
759
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
760
      Log("instance %s", instance, indent=1)
761
762
      op1 = self.StopInstanceOp(instance)
      op2 = opcodes.OpReinstallInstance(instance_name=instance)
763
      Log("reinstall without passing the OS", indent=2)
764
765
      op3 = opcodes.OpReinstallInstance(instance_name=instance,
                                        os_type=self.opts.os)
766
      Log("reinstall specifying the OS", indent=2)
767
768
769
      op4 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2, op3, op4)

770
  @_DoCheckInstances
771
  @_DoBatch(True)
772
  def BurnReboot(self):
773
774
    """Reboot the instances."""
    Log("Rebooting instances")
775
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
776
      Log("instance %s", instance, indent=1)
777
      ops = []
778
779
780
781
      for reboot_type in constants.REBOOT_TYPES:
        op = opcodes.OpRebootInstance(instance_name=instance,
                                      reboot_type=reboot_type,
                                      ignore_secondaries=False)
Iustin Pop's avatar
Iustin Pop committed
782
        Log("reboot with type '%s'", reboot_type, indent=2)
783
        ops.append(op)
Iustin Pop's avatar
Iustin Pop committed
784
      self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
785

786
  @_DoCheckInstances
787
  @_DoBatch(True)
788
  def BurnActivateDisks(self):
789
    """Activate and deactivate disks of the instances."""
790
    Log("Activating/deactivating disks")
791
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
792
      Log("instance %s", instance, indent=1)
793
      op_start = self.StartInstanceOp(instance)
794
795
      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
796
      op_stop = self.StopInstanceOp(instance)
797
798
799
      Log("activate disks when online", indent=2)
      Log("activate disks when offline", indent=2)
      Log("deactivate disks (when offline)", indent=2)
800
      self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
801

802
  @_DoCheckInstances
803
  @_DoBatch(False)
804
  def BurnAddRemoveDisks(self):
805
    """Add and remove an extra disk for the instances."""
806
    Log("Adding and removing disks")
807
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
808
      Log("instance %s", instance, indent=1)
809
810
811
812
813
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance,
        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
814
815
      op_stop = self.StopInstanceOp(instance)
      op_start = self.StartInstanceOp(instance)
816
817
      Log("adding a disk", indent=2)
      Log("removing last disk", indent=2)
818
      self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
819

820
  @_DoBatch(False)
821
  def BurnAddRemoveNICs(self):
822
    """Add and remove an extra NIC for the instances."""
823
    Log("Adding and removing NICs")
824
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
825
      Log("instance %s", instance, indent=1)
826
827
828
829
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_ADD, {})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
830
831
      Log("adding a NIC", indent=2)
      Log("removing last NIC", indent=2)
832
      self.ExecOrQueue(instance, op_add, op_rem)
833

834
835
836
837
838
839
840
841
842
843
844
  def _CheckInstanceAlive(self, instance):
    """Check if an instance is alive by doing http checks.

    This will try to retrieve the url on the instance /hostname.txt
    and check that it contains the hostname of the instance. In case
    we get ECONNREFUSED, we retry up to the net timeout seconds, for
    any other error we abort.

    """
    if not self.opts.http_check:
      return
845
846
847
848
849
    end_time = time.time() + self.opts.net_timeout
    url = None
    while time.time() < end_time and url is None:
      try:
        url = self.url_opener.open("http://%s/hostname.txt" % instance)
850
      except IOError:
851
852
853
854
        # here we can have connection refused, no route to host, etc.
        time.sleep(1)
    if url is None:
      raise InstanceDown(instance, "Cannot contact instance")
855
    hostname = url.read().strip()
856
    url.close()
857
858
859
860
    if hostname != instance:
      raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
                                    (instance, hostname)))

861
862
863
864
865
866
867
868
869
870
  def BurninCluster(self):
    """Test a cluster intensively.

    This will create instances and then start/stop/failover them.
    It is safe for existing instances but could impact performance.

    """

    opts = self.opts

871
    Log("Testing global parameters")
872

873
    if (len(self.nodes) == 1 and
874
875
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
                                   constants.DT_FILE)):
876
      Err("When one node is available/selected the disk template must"
877
          " be 'diskless', 'file' or 'plain'")
878

Iustin Pop's avatar
Iustin Pop committed
879
    has_err = True
880
    try:
881
      self.BurnCreateInstances()
882
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
883
        self.BurnReplaceDisks1D8()
884
885
      if (opts.do_replace2 and len(self.nodes) > 2 and
          opts.disk_template in constants.DTS_NET_MIRROR) :
886
        self.BurnReplaceDisks2()
887

888
889
      if (opts.disk_template != constants.DT_DISKLESS and
          utils.any(self.disk_growth, lambda n: n > 0)):
890
        self.BurnGrowDisks()
Iustin Pop's avatar
Iustin Pop committed
891

892
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
893
        self.BurnFailover()
894

895
      if opts.do_migrate and opts.disk_template == constants.DT_DRBD8:
896
        self.BurnMigrate()
897

898
899
      if (opts.do_move and len(self.nodes) > 1 and
          opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
900
901
        self.BurnMove()

902
903
904
      if (opts.do_importexport and
          opts.disk_template not in (constants.DT_DISKLESS,
                                     constants.DT_FILE)):
905
        self.BurnImportExport()
906

907
      if opts.do_reinstall:
908
        self.BurnReinstall()
909
910

      if opts.do_reboot:
911
        self.BurnReboot()
912

913
      if opts.do_addremove_disks:
914
        self.BurnAddRemoveDisks()
915
916

      if opts.do_addremove_nics:
917
        self.BurnAddRemoveNICs()
918

919
      if opts.do_activate_disks:
920
        self.BurnActivateDisks()
921

922
      if opts.rename:
923
        self.BurnRename()
924

925
      if opts.do_startstop:
926
        self.BurnStopStart()
927

Iustin Pop's avatar
Iustin Pop committed
928
      has_err = False