burnin 33 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Burnin program

"""
25

26
import os
Iustin Pop's avatar
Iustin Pop committed
27
28
import sys
import optparse
Iustin Pop's avatar
Iustin Pop committed
29
import time
30
import socket
31
import urllib
32
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
33
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
34
35
36
37

from ganeti import opcodes
from ganeti import constants
from ganeti import cli
38
39
from ganeti import errors
from ganeti import utils
Iustin Pop's avatar
Iustin Pop committed
40

41

42
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
43

44
MAX_RETRIES = 3
45

46
47
48
49
class InstanceDown(Exception):
  """The checked instance was not up"""


50
51
52
53
class BurninFailure(Exception):
  """Failure detected during burning"""


Iustin Pop's avatar
Iustin Pop committed
54
55
56
57
58
59
60
def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

61

62
def Log(msg, indent=0):
63
64
65
  """Simple function that prints out its argument.

  """
66
67
68
69
70
71
72
  headers = {
    0: "- ",
    1: "* ",
    2: ""
    }
  sys.stdout.write("%*s%s%s\n" % (2*indent, "",
                                   headers.get(indent, "  "), msg))
73
  sys.stdout.flush()
Iustin Pop's avatar
Iustin Pop committed
74

75
76
77
78
79
80
81
def Err(msg, exit_code=1):
  """Simple error logging that prints to stderr.

  """
  sys.stderr.write(msg + "\n")
  sys.stderr.flush()
  sys.exit(exit_code)
82

83
84
85

class SimpleOpener(urllib.FancyURLopener):
  """A simple url opener"""
Iustin Pop's avatar
Iustin Pop committed
86
  # pylint: disable-msg=W0221
87

Iustin Pop's avatar
Iustin Pop committed
88
  def prompt_user_passwd(self, host, realm, clear_cache=0):
89
    """No-interaction version of prompt_user_passwd."""
90
91
    # we follow parent class' API
    # pylint: disable-msg=W0613
92
93
94
95
96
97
98
99
100
101
102
103
    return None, None

  def http_error_default(self, url, fp, errcode, errmsg, headers):
    """Custom error handling"""
    # make sure sockets are not left in CLOSE_WAIT, this is similar
    # but with a different exception to the BasicURLOpener class
    _ = fp.read() # throw away data
    fp.close()
    raise InstanceDown("HTTP error returned: code %s, msg %s" %
                       (errcode, errmsg))


104
105
106
OPTIONS = [
  cli.cli_option("-o", "--os", dest="os", default=None,
                 help="OS to use during burnin",
107
108
                 metavar="<OS>",
                 completion_suggest=cli.OPT_COMPL_ONE_OS),
109
110
111
112
113
114
115
116
117
118
119
  cli.cli_option("--disk-size", dest="disk_size",
                 help="Disk size (determines disk count)",
                 default="128m", type="string", metavar="<size,size,...>",
                 completion_suggest=("128M 512M 1G 4G 1G,256M"
                                     " 4G,1G,1G 10G").split()),
  cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
                 default="128m", type="string", metavar="<size,size,...>"),
  cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
                 default=128, type="unit", metavar="<size>",
                 completion_suggest=("128M 256M 512M 1G 4G 8G"
                                     " 12G 16G").split()),
120
  cli.VERBOSE_OPT,
121
122
  cli.NOIPCHECK_OPT,
  cli.NONAMECHECK_OPT,
123
124
125
126
127
128
129
130
131
132
133
134
  cli.cli_option("--no-replace1", dest="do_replace1",
                 help="Skip disk replacement with the same secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-replace2", dest="do_replace2",
                 help="Skip disk replacement with a different secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-failover", dest="do_failover",
                 help="Skip instance failovers", action="store_false",
                 default=True),
  cli.cli_option("--no-migrate", dest="do_migrate",
                 help="Skip instance live migration",
                 action="store_false", default=True),
135
136
137
  cli.cli_option("--no-move", dest="do_move",
                 help="Skip instance moves", action="store_false",
                 default=True),
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  cli.cli_option("--no-importexport", dest="do_importexport",
                 help="Skip instance export/import", action="store_false",
                 default=True),
  cli.cli_option("--no-startstop", dest="do_startstop",
                 help="Skip instance stop/start", action="store_false",
                 default=True),
  cli.cli_option("--no-reinstall", dest="do_reinstall",
                 help="Skip instance reinstall", action="store_false",
                 default=True),
  cli.cli_option("--no-reboot", dest="do_reboot",
                 help="Skip instance reboot", action="store_false",
                 default=True),
  cli.cli_option("--no-activate-disks", dest="do_activate_disks",
                 help="Skip disk activation/deactivation",
                 action="store_false", default=True),
  cli.cli_option("--no-add-disks", dest="do_addremove_disks",
                 help="Skip disk addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-add-nics", dest="do_addremove_nics",
                 help="Skip NIC addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-nics", dest="nics",
                 help="No network interfaces", action="store_const",
                 const=[], default=[{}]),
  cli.cli_option("--rename", dest="rename", default=None,
                 help=("Give one unused instance name which is taken"
                       " to start the renaming sequence"),
                 metavar="<instance_name>"),
  cli.cli_option("-t", "--disk-template", dest="disk_template",
                 choices=list(constants.DISK_TEMPLATES),
                 default=constants.DT_DRBD8,
                 help="Disk template (diskless, file, plain or drbd) [drbd]"),
  cli.cli_option("-n", "--nodes", dest="nodes", default="",
                 help=("Comma separated list of nodes to perform"
172
173
                       " the burnin on (defaults to all nodes)"),
                 completion_suggest=cli.OPT_COMPL_MANY_NODES),
174
175
176
177
178
  cli.cli_option("-I", "--iallocator", dest="iallocator",
                 default=None, type="string",
                 help=("Perform the allocation using an iallocator"
                       " instead of fixed node spread (node restrictions no"
                       " longer apply, therefore -n/--nodes must not be"
179
180
                       " used"),
                 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
  cli.cli_option("-p", "--parallel", default=False, action="store_true",
                 dest="parallel",
                 help=("Enable parallelization of some operations in"
                       " order to speed burnin or to test granular locking")),
  cli.cli_option("--net-timeout", default=15, type="int",
                 dest="net_timeout",
                 help=("The instance check network timeout in seconds"
                       " (defaults to 15 seconds)"),
                 completion_suggest="15 60 300 900".split()),
  cli.cli_option("-C", "--http-check", default=False, action="store_true",
                 dest="http_check",
                 help=("Enable checking of instance status via http,"
                       " looking for /hostname.txt that should contain the"
                       " name of the instance")),
  cli.cli_option("-K", "--keep-instances", default=False,
                 action="store_true",
                 dest="keep_instances",
                 help=("Leave instances on the cluster after burnin,"
                       " for investigation in case of errors or simply"
                       " to use them")),
  ]

# Mainly used for bash completion
ARGUMENTS = [cli.ArgInstance(min=1)]


207
208
209
210
211
212
213
def _DoCheckInstances(fn):
  """Decorator for checking instances.

  """
  def wrapper(self, *args, **kwargs):
    val = fn(self, *args, **kwargs)
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
214
      self._CheckInstanceAlive(instance) # pylint: disable-msg=W0212
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
    return val

  return wrapper


def _DoBatch(retry):
  """Decorator for possible batch operations.

  Must come after the _DoCheckInstances decorator (if any).

  @param retry: whether this is a retryable batch, will be
      passed to StartBatch

  """
  def wrap(fn):
    def batched(self, *args, **kwargs):
      self.StartBatch(retry)
      val = fn(self, *args, **kwargs)
      self.CommitQueue()
      return val
    return batched

  return wrap


240
241
242
243
244
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
245
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
246
    self.url_opener = SimpleOpener()
Iustin Pop's avatar
Iustin Pop committed
247
    self._feed_buf = StringIO()
248
249
250
    self.nodes = []
    self.instances = []
    self.to_rem = []
251
    self.queued_ops = []
252
    self.opts = None
253
254
255
    self.queue_retry = False
    self.disk_count = self.disk_growth = self.disk_size = None
    self.hvp = self.bep = None
256
    self.ParseOptions()
257
    self.cl = cli.GetClient()
258
259
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
260
261
262
263
264
265
266
267
268
269
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
Iustin Pop's avatar
Iustin Pop committed
270
271
    formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
    self._feed_buf.write(formatted_msg + "\n")
Iustin Pop's avatar
Iustin Pop committed
272
    if self.opts.verbose:
Iustin Pop's avatar
Iustin Pop committed
273
      Log(formatted_msg, indent=3)
Iustin Pop's avatar
Iustin Pop committed
274

275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  def MaybeRetry(self, retry_count, msg, fn, *args):
    """Possibly retry a given function execution.

    @type retry_count: int
    @param retry_count: retry counter:
        - 0: non-retryable action
        - 1: last retry for a retryable action
        - MAX_RETRIES: original try for a retryable action
    @type msg: str
    @param msg: the kind of the operation
    @type fn: callable
    @param fn: the function to be called

    """
    try:
      val = fn(*args)
      if retry_count > 0 and retry_count < MAX_RETRIES:
        Log("Idempotent %s succeeded after %d retries" %
            (msg, MAX_RETRIES - retry_count))
      return val
Iustin Pop's avatar
Iustin Pop committed
295
    except Exception, err: # pylint: disable-msg=W0703
296
297
298
299
300
301
302
303
304
305
306
307
      if retry_count == 0:
        Log("Non-idempotent %s failed, aborting" % (msg, ))
        raise
      elif retry_count == 1:
        Log("Idempotent %s repeated failure, aborting" % (msg, ))
        raise
      else:
        Log("Idempotent %s failed, retry #%d/%d: %s" %
            (msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err))
        self.MaybeRetry(retry_count - 1, msg, fn, *args)

  def _ExecOp(self, *ops):
308
309
310
311
312
313
314
315
316
317
318
319
320
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    job_id = cli.SendJob(ops, cl=self.cl)
    results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
    if len(ops) == 1:
      return results[0]
    else:
      return results

321
322
323
324
325
326
327
328
329
330
331
332
333
  def ExecOp(self, retry, *ops):
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    if retry:
      rval = MAX_RETRIES
    else:
      rval = 0
    return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)

334
  def ExecOrQueue(self, name, *ops):
Iustin Pop's avatar
Iustin Pop committed
335
    """Execute an opcode and manage the exec buffer."""
336
337
338
    if self.opts.parallel:
      self.queued_ops.append((ops, name))
    else:
339
340
341
342
343
344
345
346
347
348
      return self.ExecOp(self.queue_retry, *ops)

  def StartBatch(self, retry):
    """Start a new batch of jobs.

    @param retry: whether this is a retryable batch

    """
    self.queued_ops = []
    self.queue_retry = retry
349
350
351
352
353
354

  def CommitQueue(self):
    """Execute all submitted opcodes in case of parallel burnin"""
    if not self.opts.parallel:
      return

355
356
357
358
359
    if self.queue_retry:
      rval = MAX_RETRIES
    else:
      rval = 0

360
    try:
361
362
      results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
                                self.queued_ops)
363
364
365
    finally:
      self.queued_ops = []
    return results
366
367
368
369
370

  def ExecJobSet(self, jobs):
    """Execute a set of jobs and return once all are done.

    The method will return the list of results, if all jobs are
371
    successful. Otherwise, OpExecError will be raised from within
372
373
374
375
    cli.py.

    """
    self.ClearFeedbackBuf()
376
    job_ids = [cli.SendJob(row[0], cl=self.cl) for row in jobs]
377
    Log("Submitted job ID(s) %s" % utils.CommaJoin(job_ids), indent=1)
378
    results = []
379
380
    for jid, (_, iname) in zip(job_ids, jobs):
      Log("waiting for job %s for %s" % (jid, iname), indent=2)
381
382
      try:
        results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
Iustin Pop's avatar
Iustin Pop committed
383
      except Exception, err: # pylint: disable-msg=W0703
384
385
386
        Log("Job for %s failed: %s" % (iname, err))
    if len(results) != len(jobs):
      raise BurninFailure()
387
    return results
Iustin Pop's avatar
Iustin Pop committed
388

389
390
391
392
393
394
395
396
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
397
398
399
                                   version=("%%prog (ganeti) %s" %
                                            constants.RELEASE_VERSION),
                                   option_list=OPTIONS)
400
401
402
403
404

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

405
    supported_disk_templates = (constants.DT_DISKLESS,
406
                                constants.DT_FILE,
407
                                constants.DT_PLAIN,
408
409
                                constants.DT_DRBD8)
    if options.disk_template not in supported_disk_templates:
410
      Err("Unknown disk template '%s'" % options.disk_template)
411

412
413
    if options.disk_template == constants.DT_DISKLESS:
      disk_size = disk_growth = []
414
      options.do_addremove_disks = False
415
416
417
418
419
    else:
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
      disk_growth = [utils.ParseUnit(v)
                     for v in options.disk_growth.split(",")]
      if len(disk_growth) != len(disk_size):
420
        Err("Wrong disk sizes/growth combination")
421
422
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
423
      Err("Wrong disk count/disk template combination")
424
425
426
427
428

    self.disk_size = disk_size
    self.disk_growth = disk_growth
    self.disk_count = len(disk_size)

429
    if options.nodes and options.iallocator:
430
      Err("Give either the nodes option or the iallocator option, not both")
431

432
433
434
    if options.http_check and not options.name_check:
      Err("Can't enable HTTP checks without name checks")

435
436
    self.opts = options
    self.instances = args
Iustin Pop's avatar
Iustin Pop committed
437
438
439
440
441
    self.bep = {
      constants.BE_MEMORY: options.mem_size,
      constants.BE_VCPUS: 1,
      }
    self.hvp = {}
442

443
444
    socket.setdefaulttimeout(options.net_timeout)

445
446
447
448
449
450
451
  def GetState(self):
    """Read the cluster state from the config."""
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
Iustin Pop's avatar
Iustin Pop committed
452
      op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
453
                                names=names, use_locking=True)
454
      result = self.ExecOp(True, op)
455
456
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
457
      Err(msg, exit_code=err_code)
Iustin Pop's avatar
Iustin Pop committed
458
    self.nodes = [data[0] for data in result if not (data[1] or data[2])]
459

460
461
462
    op_diagnose = opcodes.OpDiagnoseOS(output_fields=["name", "valid",
                                                      "variants"], names=[])
    result = self.ExecOp(True, op_diagnose)
463
464

    if not result:
465
      Err("Can't get the OS list")
466

467
468
469
470
471
    found = False
    for (name, valid, variants) in result:
      if valid and self.opts.os in cli.CalculateOSNames(name, variants):
        found = True
        break
472

473
    if not found:
474
      Err("OS '%s' not found" % self.opts.os)
475

476
  @_DoCheckInstances
477
  @_DoBatch(False)
478
  def BurnCreateInstances(self):
479
480
481
482
483
484
485
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
Iustin Pop's avatar
Iustin Pop committed
486

487
    Log("Creating instances")
488
    for pnode, snode, instance in mytor:
489
      Log("instance %s" % instance, indent=1)
490
491
      if self.opts.iallocator:
        pnode = snode = None
492
        msg = "with iallocator %s" % self.opts.iallocator
493
494
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
495
        msg = "on %s" % pnode
496
      else:
497
498
499
        msg = "on %s, %s" % (pnode, snode)

      Log(msg, indent=2)
500

501
      op = opcodes.OpCreateInstance(instance_name=instance,
502
503
                                    disks = [ {"size": size}
                                              for size in self.disk_size],
504
                                    disk_template=self.opts.disk_template,
505
                                    nics=self.opts.nics,
Iustin Pop's avatar
Iustin Pop committed
506
                                    mode=constants.INSTANCE_CREATE,
507
508
509
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
Iustin Pop's avatar
Iustin Pop committed
510
                                    start=True,
511
512
                                    ip_check=self.opts.ip_check,
                                    name_check=self.opts.name_check,
513
                                    wait_for_sync=True,
514
                                    file_driver="loop",
515
                                    file_storage_dir=None,
516
                                    iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
517
518
519
                                    beparams=self.bep,
                                    hvparams=self.hvp,
                                    )
520

521
522
523
      self.ExecOrQueue(instance, op)
      self.to_rem.append(instance)

524
  @_DoBatch(False)
525
  def BurnGrowDisks(self):
Iustin Pop's avatar
Iustin Pop committed
526
    """Grow both the os and the swap disks by the requested amount, if any."""
527
    Log("Growing disks")
Iustin Pop's avatar
Iustin Pop committed
528
    for instance in self.instances:
529
      Log("instance %s" % instance, indent=1)
530
      for idx, growth in enumerate(self.disk_growth):
Iustin Pop's avatar
Iustin Pop committed
531
        if growth > 0:
532
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
533
                                  amount=growth, wait_for_sync=True)
534
          Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
535
          self.ExecOrQueue(instance, op)
Iustin Pop's avatar
Iustin Pop committed
536

537
  @_DoBatch(True)
538
  def BurnReplaceDisks1D8(self):
539
    """Replace disks on primary and secondary for drbd8."""
540
    Log("Replacing disks on the same nodes")
541
    for instance in self.instances:
542
      Log("instance %s" % instance, indent=1)
543
      ops = []
544
545
546
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
547
                                    disks=[i for i in range(self.disk_count)])
548
        Log("run %s" % mode, indent=2)
549
        ops.append(op)
Iustin Pop's avatar
Iustin Pop committed
550
      self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
551

552
  @_DoBatch(True)
553
  def BurnReplaceDisks2(self):
554
    """Replace secondary node."""
555
    Log("Changing the secondary node")
556
    mode = constants.REPLACE_DISK_CHG
557
558
559
560

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
561
      Log("instance %s" % instance, indent=1)
562
563
      if self.opts.iallocator:
        tnode = None
564
565
566
        msg = "with iallocator %s" % self.opts.iallocator
      else:
        msg = tnode
567
568
569
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
570
                                  iallocator=self.opts.iallocator,
571
                                  disks=[])
572
      Log("run %s %s" % (mode, msg), indent=2)
573
      self.ExecOrQueue(instance, op)
574

575
  @_DoCheckInstances
576
  @_DoBatch(False)
577
  def BurnFailover(self):
578
    """Failover the instances."""
579
    Log("Failing over instances")
580
    for instance in self.instances:
581
      Log("instance %s" % instance, indent=1)
582
583
      op = opcodes.OpFailoverInstance(instance_name=instance,
                                      ignore_consistency=False)
584
      self.ExecOrQueue(instance, op)
585

586
587
588
589
590
591
592
593
594
595
596
  @_DoCheckInstances
  @_DoBatch(False)
  def BurnMove(self):
    """Move the instances."""
    Log("Moving instances")
    mytor = izip(islice(cycle(self.nodes), 1, None),
                 self.instances)
    for tnode, instance in mytor:
      Log("instance %s" % instance, indent=1)
      op = opcodes.OpMoveInstance(instance_name=instance,
                                  target_node=tnode)
597
      self.ExecOrQueue(instance, op)
598

599
  @_DoBatch(False)
600
  def BurnMigrate(self):
601
    """Migrate the instances."""
602
    Log("Migrating instances")
603
    for instance in self.instances:
604
      Log("instance %s" % instance, indent=1)
605
606
      op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=False)
607

608
609
610
611
      op2 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=True)
      Log("migration and migration cleanup", indent=2)
      self.ExecOrQueue(instance, op1, op2)
612

613
  @_DoCheckInstances
614
  @_DoBatch(False)
615
  def BurnImportExport(self):
616
617
618
    """Export the instance, delete it, and import it back.

    """
619
    Log("Exporting and re-importing instances")
620
621
622
623
624
625
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 islice(cycle(self.nodes), 2, None),
                 self.instances)

    for pnode, snode, enode, instance in mytor:
626
      Log("instance %s" % instance, indent=1)
627
628
      # read the full name of the instance
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
629
                                        names=[instance], use_locking=True)
630
      full_name = self.ExecOp(False, nam_op)[0][0]
631

632
633
      if self.opts.iallocator:
        pnode = snode = None
634
635
636
        import_log_msg = ("import from %s"
                          " with iallocator %s" %
                          (enode, self.opts.iallocator))
637
638
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
639
640
        import_log_msg = ("import from %s to %s" %
                          (enode, pnode))
641
      else:
642
643
        import_log_msg = ("import from %s to %s, %s" %
                          (enode, pnode, snode))
644

645
646
647
      exp_op = opcodes.OpExportInstance(instance_name=instance,
                                           target_node=enode,
                                           shutdown=True)
648
649
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
                                        ignore_failures=True)
650
651
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
Guido Trotter's avatar
Guido Trotter committed
652
653
                                        disks = [ {"size": size}
                                                  for size in self.disk_size],
654
                                        disk_template=self.opts.disk_template,
655
                                        nics=self.opts.nics,
656
657
658
659
660
661
                                        mode=constants.INSTANCE_IMPORT,
                                        src_node=enode,
                                        src_path=imp_dir,
                                        pnode=pnode,
                                        snode=snode,
                                        start=True,
662
663
                                        ip_check=self.opts.ip_check,
                                        name_check=self.opts.name_check,
664
                                        wait_for_sync=True,
665
                                        file_storage_dir=None,
Guido Trotter's avatar
Guido Trotter committed
666
                                        file_driver="loop",
667
                                        iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
668
669
670
                                        beparams=self.bep,
                                        hvparams=self.hvp,
                                        )
671

Guido Trotter's avatar
Guido Trotter committed
672
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
673

674
675
676
677
      Log("export to node %s" % enode, indent=2)
      Log("remove instance", indent=2)
      Log(import_log_msg, indent=2)
      Log("remove export", indent=2)
678
      self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
679

680
681
  @staticmethod
  def StopInstanceOp(instance):
682
    """Stop given instance."""
683
    return opcodes.OpShutdownInstance(instance_name=instance)
684

685
686
  @staticmethod
  def StartInstanceOp(instance):
687
    """Start given instance."""
688
    return opcodes.OpStartupInstance(instance_name=instance, force=False)
689

690
691
  @staticmethod
  def RenameInstanceOp(instance, instance_new):
692
    """Rename instance."""
693
694
    return opcodes.OpRenameInstance(instance_name=instance,
                                    new_name=instance_new)
695

696
  @_DoCheckInstances
697
  @_DoBatch(True)
698
  def BurnStopStart(self):
699
    """Stop/start the instances."""
700
    Log("Stopping and starting instances")
701
    for instance in self.instances:
702
      Log("instance %s" % instance, indent=1)
703
704
705
706
      op1 = self.StopInstanceOp(instance)
      op2 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2)

707
  @_DoBatch(False)
708
  def BurnRemove(self):
709
    """Remove the instances."""
710
    Log("Removing instances")
711
    for instance in self.to_rem:
712
      Log("instance %s" % instance, indent=1)
713
714
      op = opcodes.OpRemoveInstance(instance_name=instance,
                                    ignore_failures=True)
715
716
717
718
      self.ExecOrQueue(instance, op)

  def BurnRename(self):
    """Rename the instances.
719

720
721
722
723
    Note that this function will not execute in parallel, since we
    only have one target for rename.

    """
724
    Log("Renaming instances")
725
726
    rename = self.opts.rename
    for instance in self.instances:
727
      Log("instance %s" % instance, indent=1)
Iustin Pop's avatar
Iustin Pop committed
728
729
      op_stop1 = self.StopInstanceOp(instance)
      op_stop2 = self.StopInstanceOp(rename)
730
731
732
733
      op_rename1 = self.RenameInstanceOp(instance, rename)
      op_rename2 = self.RenameInstanceOp(rename, instance)
      op_start1 = self.StartInstanceOp(rename)
      op_start2 = self.StartInstanceOp(instance)
734
      self.ExecOp(False, op_stop1, op_rename1, op_start1)
735
      self._CheckInstanceAlive(rename)
736
      self.ExecOp(False, op_stop2, op_rename2, op_start2)
737
738
      self._CheckInstanceAlive(instance)

739
  @_DoCheckInstances
740
  @_DoBatch(True)
741
  def BurnReinstall(self):
742
    """Reinstall the instances."""
743
    Log("Reinstalling instances")
744
    for instance in self.instances:
745
      Log("instance %s" % instance, indent=1)
746
747
      op1 = self.StopInstanceOp(instance)
      op2 = opcodes.OpReinstallInstance(instance_name=instance)
748
      Log("reinstall without passing the OS", indent=2)
749
750
      op3 = opcodes.OpReinstallInstance(instance_name=instance,
                                        os_type=self.opts.os)
751
      Log("reinstall specifying the OS", indent=2)
752
753
754
      op4 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2, op3, op4)

755
  @_DoCheckInstances
756
  @_DoBatch(True)
757
  def BurnReboot(self):
758
759
    """Reboot the instances."""
    Log("Rebooting instances")
760
    for instance in self.instances:
761
      Log("instance %s" % instance, indent=1)
762
      ops = []
763
764
765
766
      for reboot_type in constants.REBOOT_TYPES:
        op = opcodes.OpRebootInstance(instance_name=instance,
                                      reboot_type=reboot_type,
                                      ignore_secondaries=False)
767
        Log("reboot with type '%s'" % reboot_type, indent=2)
768
        ops.append(op)
Iustin Pop's avatar
Iustin Pop committed
769
      self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
770

771
  @_DoCheckInstances
772
  @_DoBatch(True)
773
  def BurnActivateDisks(self):
774
    """Activate and deactivate disks of the instances."""
775
    Log("Activating/deactivating disks")
776
    for instance in self.instances:
777
      Log("instance %s" % instance, indent=1)
778
      op_start = self.StartInstanceOp(instance)
779
780
      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
781
      op_stop = self.StopInstanceOp(instance)
782
783
784
      Log("activate disks when online", indent=2)
      Log("activate disks when offline", indent=2)
      Log("deactivate disks (when offline)", indent=2)
785
      self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
786

787
  @_DoCheckInstances
788
  @_DoBatch(False)
789
  def BurnAddRemoveDisks(self):
790
    """Add and remove an extra disk for the instances."""
791
    Log("Adding and removing disks")
792
    for instance in self.instances:
793
      Log("instance %s" % instance, indent=1)
794
795
796
797
798
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance,
        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
799
800
      op_stop = self.StopInstanceOp(instance)
      op_start = self.StartInstanceOp(instance)
801
802
      Log("adding a disk", indent=2)
      Log("removing last disk", indent=2)
803
      self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
804

805
  @_DoBatch(False)
806
  def BurnAddRemoveNICs(self):
807
    """Add and remove an extra NIC for the instances."""
808
    Log("Adding and removing NICs")
809
    for instance in self.instances:
810
      Log("instance %s" % instance, indent=1)
811
812
813
814
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_ADD, {})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
815
816
      Log("adding a NIC", indent=2)
      Log("removing last NIC", indent=2)
817
      self.ExecOrQueue(instance, op_add, op_rem)
818

819
820
821
822
823
824
825
826
827
828
829
  def _CheckInstanceAlive(self, instance):
    """Check if an instance is alive by doing http checks.

    This will try to retrieve the url on the instance /hostname.txt
    and check that it contains the hostname of the instance. In case
    we get ECONNREFUSED, we retry up to the net timeout seconds, for
    any other error we abort.

    """
    if not self.opts.http_check:
      return
830
831
832
833
834
    end_time = time.time() + self.opts.net_timeout
    url = None
    while time.time() < end_time and url is None:
      try:
        url = self.url_opener.open("http://%s/hostname.txt" % instance)
835
      except IOError:
836
837
838
839
        # here we can have connection refused, no route to host, etc.
        time.sleep(1)
    if url is None:
      raise InstanceDown(instance, "Cannot contact instance")
840
    hostname = url.read().strip()
841
    url.close()
842
843
844
845
    if hostname != instance:
      raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
                                    (instance, hostname)))

846
847
848
849
850
851
852
853
854
855
  def BurninCluster(self):
    """Test a cluster intensively.

    This will create instances and then start/stop/failover them.
    It is safe for existing instances but could impact performance.

    """

    opts = self.opts

856
    Log("Testing global parameters")
857

858
    if (len(self.nodes) == 1 and
859
860
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
                                   constants.DT_FILE)):
861
      Err("When one node is available/selected the disk template must"
862
          " be 'diskless', 'file' or 'plain'")
863

Iustin Pop's avatar
Iustin Pop committed
864
    has_err = True
865
    try:
866
      self.BurnCreateInstances()
867
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
868
        self.BurnReplaceDisks1D8()
869
870
      if (opts.do_replace2 and len(self.nodes) > 2 and
          opts.disk_template in constants.DTS_NET_MIRROR) :
871
        self.BurnReplaceDisks2()
872

873
874
      if (opts.disk_template != constants.DT_DISKLESS and
          utils.any(self.disk_growth, lambda n: n > 0)):
875
        self.BurnGrowDisks()
Iustin Pop's avatar
Iustin Pop committed
876

877
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
878
        self.BurnFailover()
879

880
      if opts.do_migrate and opts.disk_template == constants.DT_DRBD8:
881
        self.BurnMigrate()
882

883
884
      if (opts.do_move and len(self.nodes) > 1 and
          opts.disk_template in [constants.DT_PLAIN, constants.DT_FILE]):
885
886
        self.BurnMove()

887
888
889
      if (opts.do_importexport and
          opts.disk_template not in (constants.DT_DISKLESS,
                                     constants.DT_FILE)):
890
        self.BurnImportExport()
891

892
      if opts.do_reinstall:
893
        self.BurnReinstall()
894
895

      if opts.do_reboot:
896
        self.BurnReboot()
897

898
      if opts.do_addremove_disks:
899
        self.BurnAddRemoveDisks()
900
901

      if opts.do_addremove_nics:
902
        self.BurnAddRemoveNICs()
903

904
      if opts.do_activate_disks:
905
        self.BurnActivateDisks()
906

907
      if opts.rename:
908
        self.BurnRename()
909

910
      if opts.do_startstop:
911
        self.BurnStopStart()
912

Iustin Pop's avatar
Iustin Pop committed
913
      has_err = False
914
    finally:
Iustin Pop's avatar
Iustin Pop committed
915
916
917
918
      if has_err:
        Log("Error detected: opcode buffer follows:\n\n")
        Log(self.GetFeedbackBuf())
        Log("\n\n")
919
      if not self.opts.keep_instances:
920
921
        try:
          self.BurnRemove()
Iustin Pop's avatar
Iustin Pop committed
922
        except Exception, err:  # pylint: disable-msg=W0703
923
924
925
926
927
          if has_err: # already detected errors, so errors in removal
                      # are quite expected
            Log("Note: error detected during instance remove: %s" % str(err))
          else: # non-expected error
            raise
928
929

    return 0
Iustin Pop's avatar
Iustin Pop committed
930

931

Iustin Pop's avatar
Iustin Pop committed
932
def main():
933
934
  """Main function"""

935
  burner = Burner()
936
  return burner.BurninCluster()
Iustin Pop's avatar
Iustin Pop committed
937

938

Iustin Pop's avatar
Iustin Pop committed
939
if __name__ == "__main__":
940
  main()