burnin 32.4 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Burnin program

"""
25

26
import os
Iustin Pop's avatar
Iustin Pop committed
27
28
import sys
import optparse
Iustin Pop's avatar
Iustin Pop committed
29
import time
30
import socket
31
import urllib
32
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
33
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
34
35
36
37

from ganeti import opcodes
from ganeti import constants
from ganeti import cli
38
39
from ganeti import errors
from ganeti import utils
Iustin Pop's avatar
Iustin Pop committed
40

41

42
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
43

44
MAX_RETRIES = 3
45

46
47
48
49
class InstanceDown(Exception):
  """The checked instance was not up"""


50
51
52
53
class BurninFailure(Exception):
  """Failure detected during burning"""


Iustin Pop's avatar
Iustin Pop committed
54
55
56
57
58
59
60
def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

61

62
def Log(msg, indent=0):
63
64
65
  """Simple function that prints out its argument.

  """
66
67
68
69
70
71
72
  headers = {
    0: "- ",
    1: "* ",
    2: ""
    }
  sys.stdout.write("%*s%s%s\n" % (2*indent, "",
                                   headers.get(indent, "  "), msg))
73
  sys.stdout.flush()
Iustin Pop's avatar
Iustin Pop committed
74

75
76
77
78
79
80
81
def Err(msg, exit_code=1):
  """Simple error logging that prints to stderr.

  """
  sys.stderr.write(msg + "\n")
  sys.stderr.flush()
  sys.exit(exit_code)
82

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

class SimpleOpener(urllib.FancyURLopener):
  """A simple url opener"""

  def prompt_user_passwd(self, host, realm, clear_cache = 0):
    """No-interaction version of prompt_user_passwd."""
    return None, None

  def http_error_default(self, url, fp, errcode, errmsg, headers):
    """Custom error handling"""
    # make sure sockets are not left in CLOSE_WAIT, this is similar
    # but with a different exception to the BasicURLOpener class
    _ = fp.read() # throw away data
    fp.close()
    raise InstanceDown("HTTP error returned: code %s, msg %s" %
                       (errcode, errmsg))


101
102
103
OPTIONS = [
  cli.cli_option("-o", "--os", dest="os", default=None,
                 help="OS to use during burnin",
104
105
                 metavar="<OS>",
                 completion_suggest=cli.OPT_COMPL_ONE_OS),
106
107
108
109
110
111
112
113
114
115
116
  cli.cli_option("--disk-size", dest="disk_size",
                 help="Disk size (determines disk count)",
                 default="128m", type="string", metavar="<size,size,...>",
                 completion_suggest=("128M 512M 1G 4G 1G,256M"
                                     " 4G,1G,1G 10G").split()),
  cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
                 default="128m", type="string", metavar="<size,size,...>"),
  cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
                 default=128, type="unit", metavar="<size>",
                 completion_suggest=("128M 256M 512M 1G 4G 8G"
                                     " 12G 16G").split()),
117
  cli.VERBOSE_OPT,
118
119
120
121
122
123
124
125
126
127
128
129
  cli.cli_option("--no-replace1", dest="do_replace1",
                 help="Skip disk replacement with the same secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-replace2", dest="do_replace2",
                 help="Skip disk replacement with a different secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-failover", dest="do_failover",
                 help="Skip instance failovers", action="store_false",
                 default=True),
  cli.cli_option("--no-migrate", dest="do_migrate",
                 help="Skip instance live migration",
                 action="store_false", default=True),
130
131
132
  cli.cli_option("--no-move", dest="do_move",
                 help="Skip instance moves", action="store_false",
                 default=True),
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
  cli.cli_option("--no-importexport", dest="do_importexport",
                 help="Skip instance export/import", action="store_false",
                 default=True),
  cli.cli_option("--no-startstop", dest="do_startstop",
                 help="Skip instance stop/start", action="store_false",
                 default=True),
  cli.cli_option("--no-reinstall", dest="do_reinstall",
                 help="Skip instance reinstall", action="store_false",
                 default=True),
  cli.cli_option("--no-reboot", dest="do_reboot",
                 help="Skip instance reboot", action="store_false",
                 default=True),
  cli.cli_option("--no-activate-disks", dest="do_activate_disks",
                 help="Skip disk activation/deactivation",
                 action="store_false", default=True),
  cli.cli_option("--no-add-disks", dest="do_addremove_disks",
                 help="Skip disk addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-add-nics", dest="do_addremove_nics",
                 help="Skip NIC addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-nics", dest="nics",
                 help="No network interfaces", action="store_const",
                 const=[], default=[{}]),
  cli.cli_option("--rename", dest="rename", default=None,
                 help=("Give one unused instance name which is taken"
                       " to start the renaming sequence"),
                 metavar="<instance_name>"),
  cli.cli_option("-t", "--disk-template", dest="disk_template",
                 choices=list(constants.DISK_TEMPLATES),
                 default=constants.DT_DRBD8,
                 help="Disk template (diskless, file, plain or drbd) [drbd]"),
  cli.cli_option("-n", "--nodes", dest="nodes", default="",
                 help=("Comma separated list of nodes to perform"
167
168
                       " the burnin on (defaults to all nodes)"),
                 completion_suggest=cli.OPT_COMPL_MANY_NODES),
169
170
171
172
173
  cli.cli_option("-I", "--iallocator", dest="iallocator",
                 default=None, type="string",
                 help=("Perform the allocation using an iallocator"
                       " instead of fixed node spread (node restrictions no"
                       " longer apply, therefore -n/--nodes must not be"
174
175
                       " used"),
                 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  cli.cli_option("-p", "--parallel", default=False, action="store_true",
                 dest="parallel",
                 help=("Enable parallelization of some operations in"
                       " order to speed burnin or to test granular locking")),
  cli.cli_option("--net-timeout", default=15, type="int",
                 dest="net_timeout",
                 help=("The instance check network timeout in seconds"
                       " (defaults to 15 seconds)"),
                 completion_suggest="15 60 300 900".split()),
  cli.cli_option("-C", "--http-check", default=False, action="store_true",
                 dest="http_check",
                 help=("Enable checking of instance status via http,"
                       " looking for /hostname.txt that should contain the"
                       " name of the instance")),
  cli.cli_option("-K", "--keep-instances", default=False,
                 action="store_true",
                 dest="keep_instances",
                 help=("Leave instances on the cluster after burnin,"
                       " for investigation in case of errors or simply"
                       " to use them")),
  ]

# Mainly used for bash completion
ARGUMENTS = [cli.ArgInstance(min=1)]


202
203
204
205
206
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
207
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
208
    self.url_opener = SimpleOpener()
Iustin Pop's avatar
Iustin Pop committed
209
    self._feed_buf = StringIO()
210
211
212
    self.nodes = []
    self.instances = []
    self.to_rem = []
213
    self.queued_ops = []
214
    self.opts = None
215
216
217
    self.queue_retry = False
    self.disk_count = self.disk_growth = self.disk_size = None
    self.hvp = self.bep = None
218
    self.ParseOptions()
219
    self.cl = cli.GetClient()
220
221
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
222
223
224
225
226
227
228
229
230
231
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
Iustin Pop's avatar
Iustin Pop committed
232
233
    formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
    self._feed_buf.write(formatted_msg + "\n")
Iustin Pop's avatar
Iustin Pop committed
234
    if self.opts.verbose:
Iustin Pop's avatar
Iustin Pop committed
235
      Log(formatted_msg, indent=3)
Iustin Pop's avatar
Iustin Pop committed
236

237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
  def MaybeRetry(self, retry_count, msg, fn, *args):
    """Possibly retry a given function execution.

    @type retry_count: int
    @param retry_count: retry counter:
        - 0: non-retryable action
        - 1: last retry for a retryable action
        - MAX_RETRIES: original try for a retryable action
    @type msg: str
    @param msg: the kind of the operation
    @type fn: callable
    @param fn: the function to be called

    """
    try:
      val = fn(*args)
      if retry_count > 0 and retry_count < MAX_RETRIES:
        Log("Idempotent %s succeeded after %d retries" %
            (msg, MAX_RETRIES - retry_count))
      return val
    except Exception, err:
      if retry_count == 0:
        Log("Non-idempotent %s failed, aborting" % (msg, ))
        raise
      elif retry_count == 1:
        Log("Idempotent %s repeated failure, aborting" % (msg, ))
        raise
      else:
        Log("Idempotent %s failed, retry #%d/%d: %s" %
            (msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err))
        self.MaybeRetry(retry_count - 1, msg, fn, *args)

  def _ExecOp(self, *ops):
270
271
272
273
274
275
276
277
278
279
280
281
282
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    job_id = cli.SendJob(ops, cl=self.cl)
    results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
    if len(ops) == 1:
      return results[0]
    else:
      return results

283
284
285
286
287
288
289
290
291
292
293
294
295
  def ExecOp(self, retry, *ops):
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    if retry:
      rval = MAX_RETRIES
    else:
      rval = 0
    return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)

296
  def ExecOrQueue(self, name, *ops):
Iustin Pop's avatar
Iustin Pop committed
297
    """Execute an opcode and manage the exec buffer."""
298
299
300
    if self.opts.parallel:
      self.queued_ops.append((ops, name))
    else:
301
302
303
304
305
306
307
308
309
310
      return self.ExecOp(self.queue_retry, *ops)

  def StartBatch(self, retry):
    """Start a new batch of jobs.

    @param retry: whether this is a retryable batch

    """
    self.queued_ops = []
    self.queue_retry = retry
311
312
313
314
315
316

  def CommitQueue(self):
    """Execute all submitted opcodes in case of parallel burnin"""
    if not self.opts.parallel:
      return

317
318
319
320
321
    if self.queue_retry:
      rval = MAX_RETRIES
    else:
      rval = 0

322
    try:
323
324
      results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
                                self.queued_ops)
325
326
327
    finally:
      self.queued_ops = []
    return results
328
329
330
331
332

  def ExecJobSet(self, jobs):
    """Execute a set of jobs and return once all are done.

    The method will return the list of results, if all jobs are
333
    successful. Otherwise, OpExecError will be raised from within
334
335
336
337
    cli.py.

    """
    self.ClearFeedbackBuf()
338
339
    job_ids = [cli.SendJob(row[0], cl=self.cl) for row in jobs]
    Log("Submitted job ID(s) %s" % ", ".join(job_ids), indent=1)
340
    results = []
341
342
    for jid, (_, iname) in zip(job_ids, jobs):
      Log("waiting for job %s for %s" % (jid, iname), indent=2)
343
344
345
346
347
348
      try:
        results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
      except Exception, err:
        Log("Job for %s failed: %s" % (iname, err))
    if len(results) != len(jobs):
      raise BurninFailure()
349
    return results
Iustin Pop's avatar
Iustin Pop committed
350

351
352
353
354
355
356
357
358
359
360
361
362
  def _DoCheckInstances(fn):
    """Decorator for checking instances.

    """
    def wrapper(self, *args, **kwargs):
      val = fn(self, *args, **kwargs)
      for instance in self.instances:
        self._CheckInstanceAlive(instance)
      return val

    return wrapper

363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
  def _DoBatch(retry):
    """Decorator for possible batch operations.

    Must come after the _DoCheckInstances decorator (if any).

    @param retry: whether this is a retryable batch, will be
        passed to StartBatch

    """
    def wrap(fn):
      def batched(self, *args, **kwargs):
        self.StartBatch(retry)
        val = fn(self, *args, **kwargs)
        self.CommitQueue()
        return val
      return batched

    return wrap

382
383
384
385
386
387
388
389
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
390
391
392
                                   version=("%%prog (ganeti) %s" %
                                            constants.RELEASE_VERSION),
                                   option_list=OPTIONS)
393
394
395
396
397

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

398
    supported_disk_templates = (constants.DT_DISKLESS,
399
                                constants.DT_FILE,
400
                                constants.DT_PLAIN,
401
402
                                constants.DT_DRBD8)
    if options.disk_template not in supported_disk_templates:
403
      Err("Unknown disk template '%s'" % options.disk_template)
404

405
406
    if options.disk_template == constants.DT_DISKLESS:
      disk_size = disk_growth = []
407
      options.do_addremove_disks = False
408
409
410
411
412
    else:
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
      disk_growth = [utils.ParseUnit(v)
                     for v in options.disk_growth.split(",")]
      if len(disk_growth) != len(disk_size):
413
        Err("Wrong disk sizes/growth combination")
414
415
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
416
      Err("Wrong disk count/disk template combination")
417
418
419
420
421

    self.disk_size = disk_size
    self.disk_growth = disk_growth
    self.disk_count = len(disk_size)

422
    if options.nodes and options.iallocator:
423
      Err("Give either the nodes option or the iallocator option, not both")
424

425
426
    self.opts = options
    self.instances = args
Iustin Pop's avatar
Iustin Pop committed
427
428
429
430
431
    self.bep = {
      constants.BE_MEMORY: options.mem_size,
      constants.BE_VCPUS: 1,
      }
    self.hvp = {}
432

433
434
    socket.setdefaulttimeout(options.net_timeout)

435
436
437
438
439
440
441
  def GetState(self):
    """Read the cluster state from the config."""
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
Iustin Pop's avatar
Iustin Pop committed
442
      op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
443
                                names=names, use_locking=True)
444
      result = self.ExecOp(True, op)
445
446
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
447
      Err(msg, exit_code=err_code)
Iustin Pop's avatar
Iustin Pop committed
448
    self.nodes = [data[0] for data in result if not (data[1] or data[2])]
449

450
451
452
    op_diagnose = opcodes.OpDiagnoseOS(output_fields=["name", "valid",
                                                      "variants"], names=[])
    result = self.ExecOp(True, op_diagnose)
453
454

    if not result:
455
      Err("Can't get the OS list")
456

457
458
459
460
461
    found = False
    for (name, valid, variants) in result:
      if valid and self.opts.os in cli.CalculateOSNames(name, variants):
        found = True
        break
462

463
    if not found:
464
      Err("OS '%s' not found" % self.opts.os)
465

466
  @_DoCheckInstances
467
  @_DoBatch(False)
468
  def BurnCreateInstances(self):
469
470
471
472
473
474
475
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
Iustin Pop's avatar
Iustin Pop committed
476

477
    Log("Creating instances")
478
    for pnode, snode, instance in mytor:
479
      Log("instance %s" % instance, indent=1)
480
481
      if self.opts.iallocator:
        pnode = snode = None
482
        msg = "with iallocator %s" % self.opts.iallocator
483
484
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
485
        msg = "on %s" % pnode
486
      else:
487
488
489
        msg = "on %s, %s" % (pnode, snode)

      Log(msg, indent=2)
490

491
      op = opcodes.OpCreateInstance(instance_name=instance,
492
493
                                    disks = [ {"size": size}
                                              for size in self.disk_size],
494
                                    disk_template=self.opts.disk_template,
495
                                    nics=self.opts.nics,
Iustin Pop's avatar
Iustin Pop committed
496
                                    mode=constants.INSTANCE_CREATE,
497
498
499
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
Iustin Pop's avatar
Iustin Pop committed
500
                                    start=True,
501
                                    ip_check=True,
502
                                    wait_for_sync=True,
503
                                    file_driver="loop",
504
                                    file_storage_dir=None,
505
                                    iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
506
507
508
                                    beparams=self.bep,
                                    hvparams=self.hvp,
                                    )
509

510
511
512
      self.ExecOrQueue(instance, op)
      self.to_rem.append(instance)

513
  @_DoBatch(False)
514
  def BurnGrowDisks(self):
Iustin Pop's avatar
Iustin Pop committed
515
    """Grow both the os and the swap disks by the requested amount, if any."""
516
    Log("Growing disks")
Iustin Pop's avatar
Iustin Pop committed
517
    for instance in self.instances:
518
      Log("instance %s" % instance, indent=1)
519
      for idx, growth in enumerate(self.disk_growth):
Iustin Pop's avatar
Iustin Pop committed
520
        if growth > 0:
521
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
522
                                  amount=growth, wait_for_sync=True)
523
          Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
524
          self.ExecOrQueue(instance, op)
Iustin Pop's avatar
Iustin Pop committed
525

526
  @_DoBatch(True)
527
  def BurnReplaceDisks1D8(self):
528
    """Replace disks on primary and secondary for drbd8."""
529
    Log("Replacing disks on the same nodes")
530
    for instance in self.instances:
531
      Log("instance %s" % instance, indent=1)
532
      ops = []
533
534
535
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
536
                                    disks=[i for i in range(self.disk_count)])
537
        Log("run %s" % mode, indent=2)
538
539
        ops.append(op)
      self.ExecOrQueue(instance, *ops)
540

541
  @_DoBatch(True)
542
  def BurnReplaceDisks2(self):
543
    """Replace secondary node."""
544
    Log("Changing the secondary node")
545
    mode = constants.REPLACE_DISK_CHG
546
547
548
549

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
550
      Log("instance %s" % instance, indent=1)
551
552
      if self.opts.iallocator:
        tnode = None
553
554
555
        msg = "with iallocator %s" % self.opts.iallocator
      else:
        msg = tnode
556
557
558
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
559
                                  iallocator=self.opts.iallocator,
560
                                  disks=[])
561
      Log("run %s %s" % (mode, msg), indent=2)
562
      self.ExecOrQueue(instance, op)
563

564
  @_DoCheckInstances
565
  @_DoBatch(False)
566
  def BurnFailover(self):
567
    """Failover the instances."""
568
    Log("Failing over instances")
569
    for instance in self.instances:
570
      Log("instance %s" % instance, indent=1)
571
572
      op = opcodes.OpFailoverInstance(instance_name=instance,
                                      ignore_consistency=False)
573
      self.ExecOrQueue(instance, op)
574

575
576
577
578
579
580
581
582
583
584
585
  @_DoCheckInstances
  @_DoBatch(False)
  def BurnMove(self):
    """Move the instances."""
    Log("Moving instances")
    mytor = izip(islice(cycle(self.nodes), 1, None),
                 self.instances)
    for tnode, instance in mytor:
      Log("instance %s" % instance, indent=1)
      op = opcodes.OpMoveInstance(instance_name=instance,
                                  target_node=tnode)
586
      self.ExecOrQueue(instance, op)
587

588
  @_DoBatch(False)
589
  def BurnMigrate(self):
590
    """Migrate the instances."""
591
    Log("Migrating instances")
592
    for instance in self.instances:
593
      Log("instance %s" % instance, indent=1)
594
595
      op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=False)
596

597
598
599
600
      op2 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=True)
      Log("migration and migration cleanup", indent=2)
      self.ExecOrQueue(instance, op1, op2)
601

602
  @_DoCheckInstances
603
  @_DoBatch(False)
604
  def BurnImportExport(self):
605
606
607
    """Export the instance, delete it, and import it back.

    """
608
    Log("Exporting and re-importing instances")
609
610
611
612
613
614
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 islice(cycle(self.nodes), 2, None),
                 self.instances)

    for pnode, snode, enode, instance in mytor:
615
      Log("instance %s" % instance, indent=1)
616
617
      # read the full name of the instance
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
618
                                        names=[instance], use_locking=True)
619
      full_name = self.ExecOp(False, nam_op)[0][0]
620

621
622
      if self.opts.iallocator:
        pnode = snode = None
623
624
625
        import_log_msg = ("import from %s"
                          " with iallocator %s" %
                          (enode, self.opts.iallocator))
626
627
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
628
629
        import_log_msg = ("import from %s to %s" %
                          (enode, pnode))
630
      else:
631
632
        import_log_msg = ("import from %s to %s, %s" %
                          (enode, pnode, snode))
633

634
635
636
      exp_op = opcodes.OpExportInstance(instance_name=instance,
                                           target_node=enode,
                                           shutdown=True)
637
638
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
                                        ignore_failures=True)
639
640
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
Guido Trotter's avatar
Guido Trotter committed
641
642
                                        disks = [ {"size": size}
                                                  for size in self.disk_size],
643
                                        disk_template=self.opts.disk_template,
644
                                        nics=self.opts.nics,
645
646
647
648
649
650
651
652
                                        mode=constants.INSTANCE_IMPORT,
                                        src_node=enode,
                                        src_path=imp_dir,
                                        pnode=pnode,
                                        snode=snode,
                                        start=True,
                                        ip_check=True,
                                        wait_for_sync=True,
653
                                        file_storage_dir=None,
Guido Trotter's avatar
Guido Trotter committed
654
                                        file_driver="loop",
655
                                        iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
656
657
658
                                        beparams=self.bep,
                                        hvparams=self.hvp,
                                        )
659

Guido Trotter's avatar
Guido Trotter committed
660
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
661

662
663
664
665
      Log("export to node %s" % enode, indent=2)
      Log("remove instance", indent=2)
      Log(import_log_msg, indent=2)
      Log("remove export", indent=2)
666
      self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
667

668
  def StopInstanceOp(self, instance):
669
    """Stop given instance."""
670
    return opcodes.OpShutdownInstance(instance_name=instance)
671

672
  def StartInstanceOp(self, instance):
673
    """Start given instance."""
674
    return opcodes.OpStartupInstance(instance_name=instance, force=False)
675

676
  def RenameInstanceOp(self, instance, instance_new):
677
    """Rename instance."""
678
679
    return opcodes.OpRenameInstance(instance_name=instance,
                                    new_name=instance_new)
680

681
  @_DoCheckInstances
682
  @_DoBatch(True)
683
  def BurnStopStart(self):
684
    """Stop/start the instances."""
685
    Log("Stopping and starting instances")
686
    for instance in self.instances:
687
      Log("instance %s" % instance, indent=1)
688
689
690
691
      op1 = self.StopInstanceOp(instance)
      op2 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2)

692
  @_DoBatch(False)
693
  def BurnRemove(self):
694
    """Remove the instances."""
695
    Log("Removing instances")
696
    for instance in self.to_rem:
697
      Log("instance %s" % instance, indent=1)
698
699
      op = opcodes.OpRemoveInstance(instance_name=instance,
                                    ignore_failures=True)
700
701
702
703
      self.ExecOrQueue(instance, op)

  def BurnRename(self):
    """Rename the instances.
704

705
706
707
708
    Note that this function will not execute in parallel, since we
    only have one target for rename.

    """
709
    Log("Renaming instances")
710
711
    rename = self.opts.rename
    for instance in self.instances:
712
      Log("instance %s" % instance, indent=1)
Iustin Pop's avatar
Iustin Pop committed
713
714
      op_stop1 = self.StopInstanceOp(instance)
      op_stop2 = self.StopInstanceOp(rename)
715
716
717
718
      op_rename1 = self.RenameInstanceOp(instance, rename)
      op_rename2 = self.RenameInstanceOp(rename, instance)
      op_start1 = self.StartInstanceOp(rename)
      op_start2 = self.StartInstanceOp(instance)
719
      self.ExecOp(False, op_stop1, op_rename1, op_start1)
720
      self._CheckInstanceAlive(rename)
721
      self.ExecOp(False, op_stop2, op_rename2, op_start2)
722
723
      self._CheckInstanceAlive(instance)

724
  @_DoCheckInstances
725
  @_DoBatch(True)
726
  def BurnReinstall(self):
727
    """Reinstall the instances."""
728
    Log("Reinstalling instances")
729
    for instance in self.instances:
730
      Log("instance %s" % instance, indent=1)
731
732
      op1 = self.StopInstanceOp(instance)
      op2 = opcodes.OpReinstallInstance(instance_name=instance)
733
      Log("reinstall without passing the OS", indent=2)
734
735
      op3 = opcodes.OpReinstallInstance(instance_name=instance,
                                        os_type=self.opts.os)
736
      Log("reinstall specifying the OS", indent=2)
737
738
739
      op4 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2, op3, op4)

740
  @_DoCheckInstances
741
  @_DoBatch(True)
742
  def BurnReboot(self):
743
744
    """Reboot the instances."""
    Log("Rebooting instances")
745
    for instance in self.instances:
746
      Log("instance %s" % instance, indent=1)
747
      ops = []
748
749
750
751
      for reboot_type in constants.REBOOT_TYPES:
        op = opcodes.OpRebootInstance(instance_name=instance,
                                      reboot_type=reboot_type,
                                      ignore_secondaries=False)
752
        Log("reboot with type '%s'" % reboot_type, indent=2)
753
754
755
        ops.append(op)
      self.ExecOrQueue(instance, *ops)

756
  @_DoCheckInstances
757
  @_DoBatch(True)
758
  def BurnActivateDisks(self):
759
    """Activate and deactivate disks of the instances."""
760
    Log("Activating/deactivating disks")
761
    for instance in self.instances:
762
      Log("instance %s" % instance, indent=1)
763
      op_start = self.StartInstanceOp(instance)
764
765
      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
766
      op_stop = self.StopInstanceOp(instance)
767
768
769
      Log("activate disks when online", indent=2)
      Log("activate disks when offline", indent=2)
      Log("deactivate disks (when offline)", indent=2)
770
      self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
771

772
  @_DoCheckInstances
773
  @_DoBatch(False)
774
  def BurnAddRemoveDisks(self):
775
    """Add and remove an extra disk for the instances."""
776
    Log("Adding and removing disks")
777
    for instance in self.instances:
778
      Log("instance %s" % instance, indent=1)
779
780
781
782
783
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance,
        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
784
785
      op_stop = self.StopInstanceOp(instance)
      op_start = self.StartInstanceOp(instance)
786
787
      Log("adding a disk", indent=2)
      Log("removing last disk", indent=2)
788
      self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
789

790
  @_DoBatch(False)
791
  def BurnAddRemoveNICs(self):
792
    """Add and remove an extra NIC for the instances."""
793
    Log("Adding and removing NICs")
794
    for instance in self.instances:
795
      Log("instance %s" % instance, indent=1)
796
797
798
799
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_ADD, {})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
800
801
      Log("adding a NIC", indent=2)
      Log("removing last NIC", indent=2)
802
      self.ExecOrQueue(instance, op_add, op_rem)
803

804
805
806
807
808
809
810
811
812
813
814
  def _CheckInstanceAlive(self, instance):
    """Check if an instance is alive by doing http checks.

    This will try to retrieve the url on the instance /hostname.txt
    and check that it contains the hostname of the instance. In case
    we get ECONNREFUSED, we retry up to the net timeout seconds, for
    any other error we abort.

    """
    if not self.opts.http_check:
      return
815
816
817
818
819
    end_time = time.time() + self.opts.net_timeout
    url = None
    while time.time() < end_time and url is None:
      try:
        url = self.url_opener.open("http://%s/hostname.txt" % instance)
820
      except IOError:
821
822
823
824
        # here we can have connection refused, no route to host, etc.
        time.sleep(1)
    if url is None:
      raise InstanceDown(instance, "Cannot contact instance")
825
    hostname = url.read().strip()
826
    url.close()
827
828
829
830
    if hostname != instance:
      raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
                                    (instance, hostname)))

831
832
833
834
835
836
837
838
839
840
  def BurninCluster(self):
    """Test a cluster intensively.

    This will create instances and then start/stop/failover them.
    It is safe for existing instances but could impact performance.

    """

    opts = self.opts

841
    Log("Testing global parameters")
842

843
    if (len(self.nodes) == 1 and
844
845
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
                                   constants.DT_FILE)):
846
      Err("When one node is available/selected the disk template must"
847
          " be 'diskless', 'file' or 'plain'")
848

Iustin Pop's avatar
Iustin Pop committed
849
    has_err = True
850
    try:
851
      self.BurnCreateInstances()
852
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
853
        self.BurnReplaceDisks1D8()
854
855
      if (opts.do_replace2 and len(self.nodes) > 2 and
          opts.disk_template in constants.DTS_NET_MIRROR) :
856
        self.BurnReplaceDisks2()
857

858
859
      if (opts.disk_template != constants.DT_DISKLESS and
          utils.any(self.disk_growth, lambda n: n > 0)):
860
        self.BurnGrowDisks()
Iustin Pop's avatar
Iustin Pop committed
861

862
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
863
        self.BurnFailover()
864

865
      if opts.do_migrate and opts.disk_template == constants.DT_DRBD8:
866
        self.BurnMigrate()
867

868
869
870
871
      if opts.do_move and opts.disk_template in [constants.DT_PLAIN,
                                                 constants.DT_FILE]:
        self.BurnMove()

872
873
874
      if (opts.do_importexport and
          opts.disk_template not in (constants.DT_DISKLESS,
                                     constants.DT_FILE)):
875
        self.BurnImportExport()
876

877
      if opts.do_reinstall:
878
        self.BurnReinstall()
879
880

      if opts.do_reboot:
881
        self.BurnReboot()
882

883
      if opts.do_addremove_disks:
884
        self.BurnAddRemoveDisks()
885
886

      if opts.do_addremove_nics:
887
        self.BurnAddRemoveNICs()
888

889
      if opts.do_activate_disks:
890
        self.BurnActivateDisks()
891

892
      if opts.rename:
893
        self.BurnRename()
894

895
      if opts.do_startstop:
896
        self.BurnStopStart()
897

Iustin Pop's avatar
Iustin Pop committed
898
      has_err = False
899
    finally:
Iustin Pop's avatar
Iustin Pop committed
900
901
902
903
      if has_err:
        Log("Error detected: opcode buffer follows:\n\n")
        Log(self.GetFeedbackBuf())
        Log("\n\n")
904
      if not self.opts.keep_instances:
905
906
907
908
909
910
911
912
        try:
          self.BurnRemove()
        except Exception, err:
          if has_err: # already detected errors, so errors in removal
                      # are quite expected
            Log("Note: error detected during instance remove: %s" % str(err))
          else: # non-expected error
            raise
913
914

    return 0
Iustin Pop's avatar
Iustin Pop committed
915

916

Iustin Pop's avatar
Iustin Pop committed
917
def main():
918
919
  """Main function"""

920
  burner = Burner()
921
  return burner.BurninCluster()
Iustin Pop's avatar
Iustin Pop committed
922

923

Iustin Pop's avatar
Iustin Pop committed
924
if __name__ == "__main__":
925
  main()