burnin 31.4 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Burnin program

"""
25

26
import os
Iustin Pop's avatar
Iustin Pop committed
27
28
import sys
import optparse
Iustin Pop's avatar
Iustin Pop committed
29
import time
30
import socket
31
import urllib
32
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
33
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
34
35
36
37

from ganeti import opcodes
from ganeti import constants
from ganeti import cli
38
39
from ganeti import errors
from ganeti import utils
Iustin Pop's avatar
Iustin Pop committed
40

41

42
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
43

44
MAX_RETRIES = 3
45

46
47
48
49
class InstanceDown(Exception):
  """The checked instance was not up"""


50
51
52
53
class BurninFailure(Exception):
  """Failure detected during burning"""


Iustin Pop's avatar
Iustin Pop committed
54
55
56
57
58
59
60
def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

61

62
def Log(msg, indent=0):
63
64
65
  """Simple function that prints out its argument.

  """
66
67
68
69
70
71
72
  headers = {
    0: "- ",
    1: "* ",
    2: ""
    }
  sys.stdout.write("%*s%s%s\n" % (2*indent, "",
                                   headers.get(indent, "  "), msg))
73
  sys.stdout.flush()
Iustin Pop's avatar
Iustin Pop committed
74

75
76
77
78
79
80
81
def Err(msg, exit_code=1):
  """Simple error logging that prints to stderr.

  """
  sys.stderr.write(msg + "\n")
  sys.stderr.flush()
  sys.exit(exit_code)
82

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

class SimpleOpener(urllib.FancyURLopener):
  """A simple url opener"""

  def prompt_user_passwd(self, host, realm, clear_cache = 0):
    """No-interaction version of prompt_user_passwd."""
    return None, None

  def http_error_default(self, url, fp, errcode, errmsg, headers):
    """Custom error handling"""
    # make sure sockets are not left in CLOSE_WAIT, this is similar
    # but with a different exception to the BasicURLOpener class
    _ = fp.read() # throw away data
    fp.close()
    raise InstanceDown("HTTP error returned: code %s, msg %s" %
                       (errcode, errmsg))


101
102
103
104
105
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
106
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
107
    self.url_opener = SimpleOpener()
Iustin Pop's avatar
Iustin Pop committed
108
    self._feed_buf = StringIO()
109
110
111
    self.nodes = []
    self.instances = []
    self.to_rem = []
112
    self.queued_ops = []
113
    self.opts = None
114
115
116
    self.queue_retry = False
    self.disk_count = self.disk_growth = self.disk_size = None
    self.hvp = self.bep = None
117
    self.ParseOptions()
118
    self.cl = cli.GetClient()
119
120
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
121
122
123
124
125
126
127
128
129
130
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
Iustin Pop's avatar
Iustin Pop committed
131
132
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
                                      msg[2]))
Iustin Pop's avatar
Iustin Pop committed
133
    if self.opts.verbose:
134
      Log(msg, indent=3)
Iustin Pop's avatar
Iustin Pop committed
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  def MaybeRetry(self, retry_count, msg, fn, *args):
    """Possibly retry a given function execution.

    @type retry_count: int
    @param retry_count: retry counter:
        - 0: non-retryable action
        - 1: last retry for a retryable action
        - MAX_RETRIES: original try for a retryable action
    @type msg: str
    @param msg: the kind of the operation
    @type fn: callable
    @param fn: the function to be called

    """
    try:
      val = fn(*args)
      if retry_count > 0 and retry_count < MAX_RETRIES:
        Log("Idempotent %s succeeded after %d retries" %
            (msg, MAX_RETRIES - retry_count))
      return val
    except Exception, err:
      if retry_count == 0:
        Log("Non-idempotent %s failed, aborting" % (msg, ))
        raise
      elif retry_count == 1:
        Log("Idempotent %s repeated failure, aborting" % (msg, ))
        raise
      else:
        Log("Idempotent %s failed, retry #%d/%d: %s" %
            (msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err))
        self.MaybeRetry(retry_count - 1, msg, fn, *args)

  def _ExecOp(self, *ops):
169
170
171
172
173
174
175
176
177
178
179
180
181
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    job_id = cli.SendJob(ops, cl=self.cl)
    results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
    if len(ops) == 1:
      return results[0]
    else:
      return results

182
183
184
185
186
187
188
189
190
191
192
193
194
  def ExecOp(self, retry, *ops):
    """Execute one or more opcodes and manage the exec buffer.

    @result: if only opcode has been passed, we return its result;
        otherwise we return the list of results

    """
    if retry:
      rval = MAX_RETRIES
    else:
      rval = 0
    return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)

195
  def ExecOrQueue(self, name, *ops):
Iustin Pop's avatar
Iustin Pop committed
196
    """Execute an opcode and manage the exec buffer."""
197
198
199
    if self.opts.parallel:
      self.queued_ops.append((ops, name))
    else:
200
201
202
203
204
205
206
207
208
209
      return self.ExecOp(self.queue_retry, *ops)

  def StartBatch(self, retry):
    """Start a new batch of jobs.

    @param retry: whether this is a retryable batch

    """
    self.queued_ops = []
    self.queue_retry = retry
210
211
212
213
214
215

  def CommitQueue(self):
    """Execute all submitted opcodes in case of parallel burnin"""
    if not self.opts.parallel:
      return

216
217
218
219
220
    if self.queue_retry:
      rval = MAX_RETRIES
    else:
      rval = 0

221
    try:
222
223
      results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
                                self.queued_ops)
224
225
226
    finally:
      self.queued_ops = []
    return results
227
228
229
230
231

  def ExecJobSet(self, jobs):
    """Execute a set of jobs and return once all are done.

    The method will return the list of results, if all jobs are
232
    successful. Otherwise, OpExecError will be raised from within
233
234
235
236
    cli.py.

    """
    self.ClearFeedbackBuf()
237
238
    job_ids = [cli.SendJob(row[0], cl=self.cl) for row in jobs]
    Log("Submitted job ID(s) %s" % ", ".join(job_ids), indent=1)
239
    results = []
240
241
    for jid, (_, iname) in zip(job_ids, jobs):
      Log("waiting for job %s for %s" % (jid, iname), indent=2)
242
243
244
245
246
247
      try:
        results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))
      except Exception, err:
        Log("Job for %s failed: %s" % (iname, err))
    if len(results) != len(jobs):
      raise BurninFailure()
248
    return results
Iustin Pop's avatar
Iustin Pop committed
249

250
251
252
253
254
255
256
257
258
259
260
261
  def _DoCheckInstances(fn):
    """Decorator for checking instances.

    """
    def wrapper(self, *args, **kwargs):
      val = fn(self, *args, **kwargs)
      for instance in self.instances:
        self._CheckInstanceAlive(instance)
      return val

    return wrapper

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
  def _DoBatch(retry):
    """Decorator for possible batch operations.

    Must come after the _DoCheckInstances decorator (if any).

    @param retry: whether this is a retryable batch, will be
        passed to StartBatch

    """
    def wrap(fn):
      def batched(self, *args, **kwargs):
        self.StartBatch(retry)
        val = fn(self, *args, **kwargs)
        self.CommitQueue()
        return val
      return batched

    return wrap

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """

    parser = optparse.OptionParser(usage="\n%s" % USAGE,
                                   version="%%prog (ganeti) %s" %
                                   constants.RELEASE_VERSION,
                                   option_class=cli.CliOption)

    parser.add_option("-o", "--os", dest="os", default=None,
                      help="OS to use during burnin",
                      metavar="<OS>")
297
298
299
300
    parser.add_option("--disk-size", dest="disk_size",
                      help="Disk size (determines disk count)",
                      default="128m", type="string", metavar="<size,size,...>")
    parser.add_option("--disk-growth", dest="disk_growth", help="Disk growth",
301
                      default="128m", type="string", metavar="<size,size,...>")
302
303
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
                      default=128, type="unit", metavar="<size>")
304
305
306
307
308
309
310
311
312
313
314
315
    parser.add_option("-v", "--verbose",
                      action="store_true", dest="verbose", default=False,
                      help="print command execution messages to stdout")
    parser.add_option("--no-replace1", dest="do_replace1",
                      help="Skip disk replacement with the same secondary",
                      action="store_false", default=True)
    parser.add_option("--no-replace2", dest="do_replace2",
                      help="Skip disk replacement with a different secondary",
                      action="store_false", default=True)
    parser.add_option("--no-failover", dest="do_failover",
                      help="Skip instance failovers", action="store_false",
                      default=True)
316
317
318
    parser.add_option("--no-migrate", dest="do_migrate",
                      help="Skip instance live migration",
                      action="store_false", default=True)
319
320
321
    parser.add_option("--no-importexport", dest="do_importexport",
                      help="Skip instance export/import", action="store_false",
                      default=True)
322
323
324
    parser.add_option("--no-startstop", dest="do_startstop",
                      help="Skip instance stop/start", action="store_false",
                      default=True)
325
326
327
328
329
330
    parser.add_option("--no-reinstall", dest="do_reinstall",
                      help="Skip instance reinstall", action="store_false",
                      default=True)
    parser.add_option("--no-reboot", dest="do_reboot",
                      help="Skip instance reboot", action="store_false",
                      default=True)
331
332
333
    parser.add_option("--no-activate-disks", dest="do_activate_disks",
                      help="Skip disk activation/deactivation",
                      action="store_false", default=True)
334
335
336
337
338
339
    parser.add_option("--no-add-disks", dest="do_addremove_disks",
                      help="Skip disk addition/removal",
                      action="store_false", default=True)
    parser.add_option("--no-add-nics", dest="do_addremove_nics",
                      help="Skip NIC addition/removal",
                      action="store_false", default=True)
340
341
342
    parser.add_option("--no-nics", dest="nics",
                      help="No network interfaces", action="store_const",
                      const=[], default=[{}])
343
344
345
346
    parser.add_option("--rename", dest="rename", default=None,
                      help="Give one unused instance name which is taken"
                           " to start the renaming sequence",
                      metavar="<instance_name>")
347
    parser.add_option("-t", "--disk-template", dest="disk_template",
348
                      choices=("diskless", "file", "plain", "drbd"),
349
                      default="drbd",
350
351
                      help="Disk template (diskless, file, plain or drbd)"
                            " [drbd]")
352
353
354
    parser.add_option("-n", "--nodes", dest="nodes", default="",
                      help="Comma separated list of nodes to perform"
                      " the burnin on (defaults to all nodes)")
355
    parser.add_option("-I", "--iallocator", dest="iallocator",
356
357
358
359
                      default=None, type="string",
                      help="Perform the allocation using an iallocator"
                      " instead of fixed node spread (node restrictions no"
                      " longer apply, therefore -n/--nodes must not be used")
360
361
362
363
    parser.add_option("-p", "--parallel", default=False, action="store_true",
                      dest="parallel",
                      help="Enable parallelization of some operations in"
                      " order to speed burnin or to test granular locking")
364
365
366
367
368
369
370
371
372
    parser.add_option("--net-timeout", default=15, type="int",
                      dest="net_timeout",
                      help="The instance check network timeout in seconds"
                      " (defaults to 15 seconds)")
    parser.add_option("-C", "--http-check", default=False, action="store_true",
                      dest="http_check",
                      help="Enable checking of instance status via http,"
                      " looking for /hostname.txt that should contain the"
                      " name of the instance")
373
374
375
376
377
378
    parser.add_option("-K", "--keep-instances", default=False,
                      action="store_true",
                      dest="keep_instances",
                      help="Leave instances on the cluster after burnin,"
                      " for investigation in case of errors or simply"
                      " to use them")
379

380
381
382
383
384

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

385
    supported_disk_templates = (constants.DT_DISKLESS,
386
                                constants.DT_FILE,
387
                                constants.DT_PLAIN,
388
389
                                constants.DT_DRBD8)
    if options.disk_template not in supported_disk_templates:
390
      Err("Unknown disk template '%s'" % options.disk_template)
391

392
393
    if options.disk_template == constants.DT_DISKLESS:
      disk_size = disk_growth = []
394
      options.do_addremove_disks = False
395
396
397
398
399
    else:
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
      disk_growth = [utils.ParseUnit(v)
                     for v in options.disk_growth.split(",")]
      if len(disk_growth) != len(disk_size):
400
        Err("Wrong disk sizes/growth combination")
401
402
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
403
      Err("Wrong disk count/disk template combination")
404
405
406
407
408

    self.disk_size = disk_size
    self.disk_growth = disk_growth
    self.disk_count = len(disk_size)

409
    if options.nodes and options.iallocator:
410
      Err("Give either the nodes option or the iallocator option, not both")
411

412
413
    self.opts = options
    self.instances = args
Iustin Pop's avatar
Iustin Pop committed
414
415
416
417
418
    self.bep = {
      constants.BE_MEMORY: options.mem_size,
      constants.BE_VCPUS: 1,
      }
    self.hvp = {}
419

420
421
    socket.setdefaulttimeout(options.net_timeout)

422
423
424
425
426
427
428
  def GetState(self):
    """Read the cluster state from the config."""
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
Iustin Pop's avatar
Iustin Pop committed
429
      op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
430
                                names=names, use_locking=True)
431
      result = self.ExecOp(True, op)
432
433
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
434
      Err(msg, exit_code=err_code)
Iustin Pop's avatar
Iustin Pop committed
435
    self.nodes = [data[0] for data in result if not (data[1] or data[2])]
436

437
438
    op_diagos = opcodes.OpDiagnoseOS(output_fields=["name", "valid"], names=[])
    result = self.ExecOp(True, op_diagos)
439
440

    if not result:
441
      Err("Can't get the OS list")
442
443

    # filter non-valid OS-es
444
    os_set = [val[0] for val in result if val[1]]
445
446

    if self.opts.os not in os_set:
447
      Err("OS '%s' not found" % self.opts.os)
448

449
  @_DoCheckInstances
450
  @_DoBatch(False)
451
  def BurnCreateInstances(self):
452
453
454
455
456
457
458
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
Iustin Pop's avatar
Iustin Pop committed
459

460
    Log("Creating instances")
461
    for pnode, snode, instance in mytor:
462
      Log("instance %s" % instance, indent=1)
463
464
      if self.opts.iallocator:
        pnode = snode = None
465
        msg = "with iallocator %s" % self.opts.iallocator
466
467
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
468
        msg = "on %s" % pnode
469
      else:
470
471
472
        msg = "on %s, %s" % (pnode, snode)

      Log(msg, indent=2)
473

474
      op = opcodes.OpCreateInstance(instance_name=instance,
475
476
                                    disks = [ {"size": size}
                                              for size in self.disk_size],
477
                                    disk_template=self.opts.disk_template,
478
                                    nics=self.opts.nics,
Iustin Pop's avatar
Iustin Pop committed
479
                                    mode=constants.INSTANCE_CREATE,
480
481
482
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
Iustin Pop's avatar
Iustin Pop committed
483
                                    start=True,
484
                                    ip_check=True,
485
                                    wait_for_sync=True,
486
                                    file_driver="loop",
487
                                    file_storage_dir=None,
488
                                    iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
489
490
491
                                    beparams=self.bep,
                                    hvparams=self.hvp,
                                    )
492

493
494
495
      self.ExecOrQueue(instance, op)
      self.to_rem.append(instance)

496
  @_DoBatch(False)
497
  def BurnGrowDisks(self):
Iustin Pop's avatar
Iustin Pop committed
498
    """Grow both the os and the swap disks by the requested amount, if any."""
499
    Log("Growing disks")
Iustin Pop's avatar
Iustin Pop committed
500
    for instance in self.instances:
501
      Log("instance %s" % instance, indent=1)
502
      for idx, growth in enumerate(self.disk_growth):
Iustin Pop's avatar
Iustin Pop committed
503
        if growth > 0:
504
          op = opcodes.OpGrowDisk(instance_name=instance, disk=idx,
505
                                  amount=growth, wait_for_sync=True)
506
          Log("increase disk/%s by %s MB" % (idx, growth), indent=2)
507
          self.ExecOrQueue(instance, op)
Iustin Pop's avatar
Iustin Pop committed
508

509
  @_DoBatch(True)
510
  def BurnReplaceDisks1D8(self):
511
    """Replace disks on primary and secondary for drbd8."""
512
    Log("Replacing disks on the same nodes")
513
    for instance in self.instances:
514
      Log("instance %s" % instance, indent=1)
515
      ops = []
516
517
518
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
519
                                    disks=[i for i in range(self.disk_count)])
520
        Log("run %s" % mode, indent=2)
521
522
        ops.append(op)
      self.ExecOrQueue(instance, *ops)
523

524
  @_DoBatch(True)
525
  def BurnReplaceDisks2(self):
526
    """Replace secondary node."""
527
    Log("Changing the secondary node")
528
    mode = constants.REPLACE_DISK_CHG
529
530
531
532

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
533
      Log("instance %s" % instance, indent=1)
534
535
      if self.opts.iallocator:
        tnode = None
536
537
538
        msg = "with iallocator %s" % self.opts.iallocator
      else:
        msg = tnode
539
540
541
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
542
                                  iallocator=self.opts.iallocator,
543
                                  disks=[])
544
      Log("run %s %s" % (mode, msg), indent=2)
545
      self.ExecOrQueue(instance, op)
546

547
  @_DoCheckInstances
548
  @_DoBatch(False)
549
  def BurnFailover(self):
550
    """Failover the instances."""
551
    Log("Failing over instances")
552
    for instance in self.instances:
553
      Log("instance %s" % instance, indent=1)
554
555
556
      op = opcodes.OpFailoverInstance(instance_name=instance,
                                      ignore_consistency=False)

557
      self.ExecOrQueue(instance, op)
558

559
  @_DoBatch(False)
560
  def BurnMigrate(self):
561
    """Migrate the instances."""
562
    Log("Migrating instances")
563
    for instance in self.instances:
564
      Log("instance %s" % instance, indent=1)
565
566
      op1 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=False)
567

568
569
570
571
      op2 = opcodes.OpMigrateInstance(instance_name=instance, live=True,
                                      cleanup=True)
      Log("migration and migration cleanup", indent=2)
      self.ExecOrQueue(instance, op1, op2)
572

573
  @_DoCheckInstances
574
  @_DoBatch(False)
575
  def BurnImportExport(self):
576
577
578
    """Export the instance, delete it, and import it back.

    """
579
    Log("Exporting and re-importing instances")
580
581
582
583
584
585
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 islice(cycle(self.nodes), 2, None),
                 self.instances)

    for pnode, snode, enode, instance in mytor:
586
      Log("instance %s" % instance, indent=1)
587
588
      # read the full name of the instance
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
589
                                        names=[instance], use_locking=True)
590
      full_name = self.ExecOp(False, nam_op)[0][0]
591

592
593
      if self.opts.iallocator:
        pnode = snode = None
594
595
596
        import_log_msg = ("import from %s"
                          " with iallocator %s" %
                          (enode, self.opts.iallocator))
597
598
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
599
600
        import_log_msg = ("import from %s to %s" %
                          (enode, pnode))
601
      else:
602
603
        import_log_msg = ("import from %s to %s, %s" %
                          (enode, pnode, snode))
604

605
606
607
      exp_op = opcodes.OpExportInstance(instance_name=instance,
                                           target_node=enode,
                                           shutdown=True)
608
609
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
                                        ignore_failures=True)
610
611
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
Guido Trotter's avatar
Guido Trotter committed
612
613
                                        disks = [ {"size": size}
                                                  for size in self.disk_size],
614
                                        disk_template=self.opts.disk_template,
615
                                        nics=self.opts.nics,
616
617
618
619
620
621
622
623
                                        mode=constants.INSTANCE_IMPORT,
                                        src_node=enode,
                                        src_path=imp_dir,
                                        pnode=pnode,
                                        snode=snode,
                                        start=True,
                                        ip_check=True,
                                        wait_for_sync=True,
624
                                        file_storage_dir=None,
Guido Trotter's avatar
Guido Trotter committed
625
                                        file_driver="loop",
626
                                        iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
627
628
629
                                        beparams=self.bep,
                                        hvparams=self.hvp,
                                        )
630

Guido Trotter's avatar
Guido Trotter committed
631
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
632

633
634
635
636
      Log("export to node %s" % enode, indent=2)
      Log("remove instance", indent=2)
      Log(import_log_msg, indent=2)
      Log("remove export", indent=2)
637
      self.ExecOrQueue(instance, exp_op, rem_op, imp_op, erem_op)
638

639
  def StopInstanceOp(self, instance):
640
    """Stop given instance."""
641
    return opcodes.OpShutdownInstance(instance_name=instance)
642

643
  def StartInstanceOp(self, instance):
644
    """Start given instance."""
645
    return opcodes.OpStartupInstance(instance_name=instance, force=False)
646

647
  def RenameInstanceOp(self, instance, instance_new):
648
    """Rename instance."""
649
650
    return opcodes.OpRenameInstance(instance_name=instance,
                                    new_name=instance_new)
651

652
  @_DoCheckInstances
653
  @_DoBatch(True)
654
  def BurnStopStart(self):
655
    """Stop/start the instances."""
656
    Log("Stopping and starting instances")
657
    for instance in self.instances:
658
      Log("instance %s" % instance, indent=1)
659
660
661
662
      op1 = self.StopInstanceOp(instance)
      op2 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2)

663
  @_DoBatch(False)
664
  def BurnRemove(self):
665
    """Remove the instances."""
666
    Log("Removing instances")
667
    for instance in self.to_rem:
668
      Log("instance %s" % instance, indent=1)
669
670
      op = opcodes.OpRemoveInstance(instance_name=instance,
                                    ignore_failures=True)
671
672
673
674
      self.ExecOrQueue(instance, op)

  def BurnRename(self):
    """Rename the instances.
675

676
677
678
679
    Note that this function will not execute in parallel, since we
    only have one target for rename.

    """
680
    Log("Renaming instances")
681
682
    rename = self.opts.rename
    for instance in self.instances:
683
      Log("instance %s" % instance, indent=1)
Iustin Pop's avatar
Iustin Pop committed
684
685
      op_stop1 = self.StopInstanceOp(instance)
      op_stop2 = self.StopInstanceOp(rename)
686
687
688
689
      op_rename1 = self.RenameInstanceOp(instance, rename)
      op_rename2 = self.RenameInstanceOp(rename, instance)
      op_start1 = self.StartInstanceOp(rename)
      op_start2 = self.StartInstanceOp(instance)
690
      self.ExecOp(False, op_stop1, op_rename1, op_start1)
691
      self._CheckInstanceAlive(rename)
692
      self.ExecOp(False, op_stop2, op_rename2, op_start2)
693
694
      self._CheckInstanceAlive(instance)

695
  @_DoCheckInstances
696
  @_DoBatch(True)
697
  def BurnReinstall(self):
698
    """Reinstall the instances."""
699
    Log("Reinstalling instances")
700
    for instance in self.instances:
701
      Log("instance %s" % instance, indent=1)
702
703
      op1 = self.StopInstanceOp(instance)
      op2 = opcodes.OpReinstallInstance(instance_name=instance)
704
      Log("reinstall without passing the OS", indent=2)
705
706
      op3 = opcodes.OpReinstallInstance(instance_name=instance,
                                        os_type=self.opts.os)
707
      Log("reinstall specifying the OS", indent=2)
708
709
710
      op4 = self.StartInstanceOp(instance)
      self.ExecOrQueue(instance, op1, op2, op3, op4)

711
  @_DoCheckInstances
712
  @_DoBatch(True)
713
  def BurnReboot(self):
714
715
    """Reboot the instances."""
    Log("Rebooting instances")
716
    for instance in self.instances:
717
      Log("instance %s" % instance, indent=1)
718
      ops = []
719
720
721
722
      for reboot_type in constants.REBOOT_TYPES:
        op = opcodes.OpRebootInstance(instance_name=instance,
                                      reboot_type=reboot_type,
                                      ignore_secondaries=False)
723
        Log("reboot with type '%s'" % reboot_type, indent=2)
724
725
726
        ops.append(op)
      self.ExecOrQueue(instance, *ops)

727
  @_DoCheckInstances
728
  @_DoBatch(True)
729
  def BurnActivateDisks(self):
730
    """Activate and deactivate disks of the instances."""
731
    Log("Activating/deactivating disks")
732
    for instance in self.instances:
733
      Log("instance %s" % instance, indent=1)
734
      op_start = self.StartInstanceOp(instance)
735
736
      op_act = opcodes.OpActivateInstanceDisks(instance_name=instance)
      op_deact = opcodes.OpDeactivateInstanceDisks(instance_name=instance)
737
      op_stop = self.StopInstanceOp(instance)
738
739
740
      Log("activate disks when online", indent=2)
      Log("activate disks when offline", indent=2)
      Log("deactivate disks (when offline)", indent=2)
741
      self.ExecOrQueue(instance, op_act, op_stop, op_act, op_deact, op_start)
742

743
  @_DoCheckInstances
744
  @_DoBatch(False)
745
  def BurnAddRemoveDisks(self):
746
    """Add and remove an extra disk for the instances."""
747
    Log("Adding and removing disks")
748
    for instance in self.instances:
749
      Log("instance %s" % instance, indent=1)
750
751
752
753
754
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance,
        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
755
756
      op_stop = self.StopInstanceOp(instance)
      op_start = self.StartInstanceOp(instance)
757
758
      Log("adding a disk", indent=2)
      Log("removing last disk", indent=2)
759
      self.ExecOrQueue(instance, op_add, op_stop, op_rem, op_start)
760

761
  @_DoBatch(False)
762
  def BurnAddRemoveNICs(self):
763
    """Add and remove an extra NIC for the instances."""
764
    Log("Adding and removing NICs")
765
    for instance in self.instances:
766
      Log("instance %s" % instance, indent=1)
767
768
769
770
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_ADD, {})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
771
772
      Log("adding a NIC", indent=2)
      Log("removing last NIC", indent=2)
773
      self.ExecOrQueue(instance, op_add, op_rem)
774

775
776
777
778
779
780
781
782
783
784
785
  def _CheckInstanceAlive(self, instance):
    """Check if an instance is alive by doing http checks.

    This will try to retrieve the url on the instance /hostname.txt
    and check that it contains the hostname of the instance. In case
    we get ECONNREFUSED, we retry up to the net timeout seconds, for
    any other error we abort.

    """
    if not self.opts.http_check:
      return
786
787
788
789
790
    end_time = time.time() + self.opts.net_timeout
    url = None
    while time.time() < end_time and url is None:
      try:
        url = self.url_opener.open("http://%s/hostname.txt" % instance)
791
      except IOError:
792
793
794
795
        # here we can have connection refused, no route to host, etc.
        time.sleep(1)
    if url is None:
      raise InstanceDown(instance, "Cannot contact instance")
796
    hostname = url.read().strip()
797
    url.close()
798
799
800
801
    if hostname != instance:
      raise InstanceDown(instance, ("Hostname mismatch, expected %s, got %s" %
                                    (instance, hostname)))

802
803
804
805
806
807
808
809
810
811
  def BurninCluster(self):
    """Test a cluster intensively.

    This will create instances and then start/stop/failover them.
    It is safe for existing instances but could impact performance.

    """

    opts = self.opts

812
    Log("Testing global parameters")
813

814
    if (len(self.nodes) == 1 and
815
816
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
                                   constants.DT_FILE)):
817
      Err("When one node is available/selected the disk template must"
818
          " be 'diskless', 'file' or 'plain'")
819

Iustin Pop's avatar
Iustin Pop committed
820
    has_err = True
821
    try:
822
      self.BurnCreateInstances()
823
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
824
        self.BurnReplaceDisks1D8()
825
826
      if (opts.do_replace2 and len(self.nodes) > 2 and
          opts.disk_template in constants.DTS_NET_MIRROR) :
827
        self.BurnReplaceDisks2()
828

829
830
      if (opts.disk_template != constants.DT_DISKLESS and
          utils.any(self.disk_growth, lambda n: n > 0)):
831
        self.BurnGrowDisks()
Iustin Pop's avatar
Iustin Pop committed
832

833
      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
834
        self.BurnFailover()
835

836
      if opts.do_migrate and opts.disk_template == constants.DT_DRBD8:
837
        self.BurnMigrate()
838

839
840
841
      if (opts.do_importexport and
          opts.disk_template not in (constants.DT_DISKLESS,
                                     constants.DT_FILE)):
842
        self.BurnImportExport()
843

844
      if opts.do_reinstall:
845
        self.BurnReinstall()
846
847

      if opts.do_reboot:
848
        self.BurnReboot()
849

850
      if opts.do_addremove_disks:
851
        self.BurnAddRemoveDisks()
852
853

      if opts.do_addremove_nics:
854
        self.BurnAddRemoveNICs()
855

856
      if opts.do_activate_disks:
857
        self.BurnActivateDisks()
858

859
      if opts.rename:
860
        self.BurnRename()
861

862
      if opts.do_startstop:
863
        self.BurnStopStart()
864

Iustin Pop's avatar
Iustin Pop committed
865
      has_err = False
866
    finally:
Iustin Pop's avatar
Iustin Pop committed
867
868
869
870
      if has_err:
        Log("Error detected: opcode buffer follows:\n\n")
        Log(self.GetFeedbackBuf())
        Log("\n\n")
871
      if not self.opts.keep_instances:
872
873
874
875
876
877
878
879
        try:
          self.BurnRemove()
        except Exception, err:
          if has_err: # already detected errors, so errors in removal
                      # are quite expected
            Log("Note: error detected during instance remove: %s" % str(err))
          else: # non-expected error
            raise
880
881

    return 0
Iustin Pop's avatar
Iustin Pop committed
882

883

Iustin Pop's avatar
Iustin Pop committed
884
def main():
885
886
  """Main function"""

887
  burner = Burner()
888
  return burner.BurninCluster()
Iustin Pop's avatar
Iustin Pop committed
889

890

Iustin Pop's avatar
Iustin Pop committed
891
if __name__ == "__main__":
892
  main()