burnin 38.3 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Burnin program

"""
25

Iustin Pop's avatar
Iustin Pop committed
26
27
import sys
import optparse
Iustin Pop's avatar
Iustin Pop committed
28
import time
29
import socket
30
import urllib
31
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
32
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
33
34
35
36

from ganeti import opcodes
from ganeti import constants
from ganeti import cli
37
38
from ganeti import errors
from ganeti import utils
39
from ganeti import hypervisor
40
from ganeti import compat
41
42

from ganeti.confd import client as confd_client
Iustin Pop's avatar
Iustin Pop committed
43

44

45
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
46

47
MAX_RETRIES = 3
Iustin Pop's avatar
Iustin Pop committed
48
49
50
51
52
LOG_HEADERS = {
  0: "- ",
  1: "* ",
  2: ""
  }
53

54
55
56
57
class InstanceDown(Exception):
  """The checked instance was not up"""


58
59
60
61
class BurninFailure(Exception):
  """Failure detected during burning"""


Iustin Pop's avatar
Iustin Pop committed
62
63
64
65
66
67
68
def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

69

Iustin Pop's avatar
Iustin Pop committed
70
def Log(msg, *args, **kwargs):
71
72
73
  """Simple function that prints out its argument.

  """
Iustin Pop's avatar
Iustin Pop committed
74
75
76
  if args:
    msg = msg % args
  indent = kwargs.get('indent', 0)
77
  sys.stdout.write("%*s%s%s\n" % (2*indent, "",
Iustin Pop's avatar
Iustin Pop committed
78
                                  LOG_HEADERS.get(indent, "  "), msg))
79
  sys.stdout.flush()
Iustin Pop's avatar
Iustin Pop committed
80

Iustin Pop's avatar
Iustin Pop committed
81

82
83
84
85
86
87
88
def Err(msg, exit_code=1):
  """Simple error logging that prints to stderr.

  """
  sys.stderr.write(msg + "\n")
  sys.stderr.flush()
  sys.exit(exit_code)
89

90
91
92

class SimpleOpener(urllib.FancyURLopener):
  """A simple url opener"""
Iustin Pop's avatar
Iustin Pop committed
93
  # pylint: disable-msg=W0221
94

Iustin Pop's avatar
Iustin Pop committed
95
  def prompt_user_passwd(self, host, realm, clear_cache=0):
96
    """No-interaction version of prompt_user_passwd."""
97
98
    # we follow parent class' API
    # pylint: disable-msg=W0613
99
100
101
102
103
104
105
106
107
108
109
110
    return None, None

  def http_error_default(self, url, fp, errcode, errmsg, headers):
    """Custom error handling"""
    # make sure sockets are not left in CLOSE_WAIT, this is similar
    # but with a different exception to the BasicURLOpener class
    _ = fp.read() # throw away data
    fp.close()
    raise InstanceDown("HTTP error returned: code %s, msg %s" %
                       (errcode, errmsg))


111
112
113
OPTIONS = [
  cli.cli_option("-o", "--os", dest="os", default=None,
                 help="OS to use during burnin",
114
115
                 metavar="<OS>",
                 completion_suggest=cli.OPT_COMPL_ONE_OS),
116
  cli.HYPERVISOR_OPT,
117
  cli.OSPARAMS_OPT,
118
119
120
121
122
123
124
125
126
127
128
  cli.cli_option("--disk-size", dest="disk_size",
                 help="Disk size (determines disk count)",
                 default="128m", type="string", metavar="<size,size,...>",
                 completion_suggest=("128M 512M 1G 4G 1G,256M"
                                     " 4G,1G,1G 10G").split()),
  cli.cli_option("--disk-growth", dest="disk_growth", help="Disk growth",
                 default="128m", type="string", metavar="<size,size,...>"),
  cli.cli_option("--mem-size", dest="mem_size", help="Memory size",
                 default=128, type="unit", metavar="<size>",
                 completion_suggest=("128M 256M 512M 1G 4G 8G"
                                     " 12G 16G").split()),
Iustin Pop's avatar
Iustin Pop committed
129
  cli.DEBUG_OPT,
130
  cli.VERBOSE_OPT,
131
132
  cli.NOIPCHECK_OPT,
  cli.NONAMECHECK_OPT,
133
  cli.EARLY_RELEASE_OPT,
134
135
136
137
138
139
140
141
142
143
144
145
  cli.cli_option("--no-replace1", dest="do_replace1",
                 help="Skip disk replacement with the same secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-replace2", dest="do_replace2",
                 help="Skip disk replacement with a different secondary",
                 action="store_false", default=True),
  cli.cli_option("--no-failover", dest="do_failover",
                 help="Skip instance failovers", action="store_false",
                 default=True),
  cli.cli_option("--no-migrate", dest="do_migrate",
                 help="Skip instance live migration",
                 action="store_false", default=True),
146
147
148
  cli.cli_option("--no-move", dest="do_move",
                 help="Skip instance moves", action="store_false",
                 default=True),
149
150
151
152
153
154
155
156
157
158
159
160
  cli.cli_option("--no-importexport", dest="do_importexport",
                 help="Skip instance export/import", action="store_false",
                 default=True),
  cli.cli_option("--no-startstop", dest="do_startstop",
                 help="Skip instance stop/start", action="store_false",
                 default=True),
  cli.cli_option("--no-reinstall", dest="do_reinstall",
                 help="Skip instance reinstall", action="store_false",
                 default=True),
  cli.cli_option("--no-reboot", dest="do_reboot",
                 help="Skip instance reboot", action="store_false",
                 default=True),
161
162
  cli.cli_option("--reboot-types", dest="reboot_types",
                 help="Specify the reboot types", default=None),
163
164
165
166
167
168
169
170
171
172
173
174
  cli.cli_option("--no-activate-disks", dest="do_activate_disks",
                 help="Skip disk activation/deactivation",
                 action="store_false", default=True),
  cli.cli_option("--no-add-disks", dest="do_addremove_disks",
                 help="Skip disk addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-add-nics", dest="do_addremove_nics",
                 help="Skip NIC addition/removal",
                 action="store_false", default=True),
  cli.cli_option("--no-nics", dest="nics",
                 help="No network interfaces", action="store_const",
                 const=[], default=[{}]),
175
176
177
  cli.cli_option("--no-confd", dest="do_confd_tests",
                 help="Skip confd queries",
                 action="store_false", default=True),
178
179
180
181
182
183
184
185
186
187
  cli.cli_option("--rename", dest="rename", default=None,
                 help=("Give one unused instance name which is taken"
                       " to start the renaming sequence"),
                 metavar="<instance_name>"),
  cli.cli_option("-t", "--disk-template", dest="disk_template",
                 choices=list(constants.DISK_TEMPLATES),
                 default=constants.DT_DRBD8,
                 help="Disk template (diskless, file, plain or drbd) [drbd]"),
  cli.cli_option("-n", "--nodes", dest="nodes", default="",
                 help=("Comma separated list of nodes to perform"
188
189
                       " the burnin on (defaults to all nodes)"),
                 completion_suggest=cli.OPT_COMPL_MANY_NODES),
190
191
192
193
194
  cli.cli_option("-I", "--iallocator", dest="iallocator",
                 default=None, type="string",
                 help=("Perform the allocation using an iallocator"
                       " instead of fixed node spread (node restrictions no"
                       " longer apply, therefore -n/--nodes must not be"
195
196
                       " used"),
                 completion_suggest=cli.OPT_COMPL_ONE_IALLOCATOR),
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
  cli.cli_option("-p", "--parallel", default=False, action="store_true",
                 dest="parallel",
                 help=("Enable parallelization of some operations in"
                       " order to speed burnin or to test granular locking")),
  cli.cli_option("--net-timeout", default=15, type="int",
                 dest="net_timeout",
                 help=("The instance check network timeout in seconds"
                       " (defaults to 15 seconds)"),
                 completion_suggest="15 60 300 900".split()),
  cli.cli_option("-C", "--http-check", default=False, action="store_true",
                 dest="http_check",
                 help=("Enable checking of instance status via http,"
                       " looking for /hostname.txt that should contain the"
                       " name of the instance")),
  cli.cli_option("-K", "--keep-instances", default=False,
                 action="store_true",
                 dest="keep_instances",
                 help=("Leave instances on the cluster after burnin,"
                       " for investigation in case of errors or simply"
                       " to use them")),
  ]

# Mainly used for bash completion
ARGUMENTS = [cli.ArgInstance(min=1)]


223
224
225
226
227
228
229
def _DoCheckInstances(fn):
  """Decorator for checking instances.

  """
  def wrapper(self, *args, **kwargs):
    val = fn(self, *args, **kwargs)
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
230
      self._CheckInstanceAlive(instance) # pylint: disable-msg=W0212
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
    return val

  return wrapper


def _DoBatch(retry):
  """Decorator for possible batch operations.

  Must come after the _DoCheckInstances decorator (if any).

  @param retry: whether this is a retryable batch, will be
      passed to StartBatch

  """
  def wrap(fn):
    def batched(self, *args, **kwargs):
      self.StartBatch(retry)
      val = fn(self, *args, **kwargs)
      self.CommitQueue()
      return val
    return batched

  return wrap


256
257
258
259
260
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
261
    utils.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
262
    self.url_opener = SimpleOpener()
Iustin Pop's avatar
Iustin Pop committed
263
    self._feed_buf = StringIO()
264
265
266
    self.nodes = []
    self.instances = []
    self.to_rem = []
267
    self.queued_ops = []
268
    self.opts = None
269
270
271
    self.queue_retry = False
    self.disk_count = self.disk_growth = self.disk_size = None
    self.hvp = self.bep = None
272
    self.ParseOptions()
273
    self.cl = cli.GetClient()
274
275
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
276
277
278
279
280
281
282
283
284
285
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
Iustin Pop's avatar
Iustin Pop committed
286
287
    formatted_msg = "%s %s" % (time.ctime(utils.MergeTime(msg[0])), msg[2])
    self._feed_buf.write(formatted_msg + "\n")
Iustin Pop's avatar
Iustin Pop committed
288
    if self.opts.verbose:
Iustin Pop's avatar
Iustin Pop committed
289
      Log(formatted_msg, indent=3)
Iustin Pop's avatar
Iustin Pop committed
290

291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
  def MaybeRetry(self, retry_count, msg, fn, *args):
    """Possibly retry a given function execution.

    @type retry_count: int
    @param retry_count: retry counter:
        - 0: non-retryable action
        - 1: last retry for a retryable action
        - MAX_RETRIES: original try for a retryable action
    @type msg: str
    @param msg: the kind of the operation
    @type fn: callable
    @param fn: the function to be called

    """
    try:
      val = fn(*args)
      if retry_count > 0 and retry_count < MAX_RETRIES:
Iustin Pop's avatar
Iustin Pop committed
308
309
        Log("Idempotent %s succeeded after %d retries",
            msg, MAX_RETRIES - retry_count)
310
      return val
Iustin Pop's avatar
Iustin Pop committed
311
    except Exception, err: # pylint: disable-msg=W0703
312
      if retry_count == 0:
Iustin Pop's avatar
Iustin Pop committed
313
        Log("Non-idempotent %s failed, aborting", msg)
314
315
        raise
      elif retry_count == 1:
Iustin Pop's avatar
Iustin Pop committed
316
        Log("Idempotent %s repeated failure, aborting", msg)
317
318
        raise
      else:
Iustin Pop's avatar
Iustin Pop committed
319
320
        Log("Idempotent %s failed, retry #%d/%d: %s",
            msg, MAX_RETRIES - retry_count + 1, MAX_RETRIES, err)
321
322
        self.MaybeRetry(retry_count - 1, msg, fn, *args)

Iustin Pop's avatar
Iustin Pop committed
323
324
325
326
327
  def _SetDebug(self, ops):
    """Set the debug value on the given opcodes"""
    for op in ops:
      op.debug_level = self.opts.debug

328
  def _ExecOp(self, *ops):
329
330
    """Execute one or more opcodes and manage the exec buffer.

331
    @return: if only opcode has been passed, we return its result;
332
333
334
335
336
337
338
339
340
341
        otherwise we return the list of results

    """
    job_id = cli.SendJob(ops, cl=self.cl)
    results = cli.PollJob(job_id, cl=self.cl, feedback_fn=self.Feedback)
    if len(ops) == 1:
      return results[0]
    else:
      return results

342
343
344
  def ExecOp(self, retry, *ops):
    """Execute one or more opcodes and manage the exec buffer.

345
    @return: if only opcode has been passed, we return its result;
346
347
348
349
350
351
352
        otherwise we return the list of results

    """
    if retry:
      rval = MAX_RETRIES
    else:
      rval = 0
Iustin Pop's avatar
Iustin Pop committed
353
    self._SetDebug(ops)
354
355
    return self.MaybeRetry(rval, "opcode", self._ExecOp, *ops)

356
  def ExecOrQueue(self, name, ops, post_process=None):
Iustin Pop's avatar
Iustin Pop committed
357
    """Execute an opcode and manage the exec buffer."""
358
    if self.opts.parallel:
Iustin Pop's avatar
Iustin Pop committed
359
      self._SetDebug(ops)
360
      self.queued_ops.append((ops, name, post_process))
361
    else:
362
363
364
365
      val = self.ExecOp(self.queue_retry, *ops) # pylint: disable-msg=W0142
      if post_process is not None:
        post_process()
      return val
366
367
368
369
370
371
372
373
374

  def StartBatch(self, retry):
    """Start a new batch of jobs.

    @param retry: whether this is a retryable batch

    """
    self.queued_ops = []
    self.queue_retry = retry
375
376
377

  def CommitQueue(self):
    """Execute all submitted opcodes in case of parallel burnin"""
378
    if not self.opts.parallel or not self.queued_ops:
379
380
      return

381
382
383
384
385
    if self.queue_retry:
      rval = MAX_RETRIES
    else:
      rval = 0

386
    try:
387
388
      results = self.MaybeRetry(rval, "jobset", self.ExecJobSet,
                                self.queued_ops)
389
390
391
    finally:
      self.queued_ops = []
    return results
392
393
394
395
396

  def ExecJobSet(self, jobs):
    """Execute a set of jobs and return once all are done.

    The method will return the list of results, if all jobs are
397
    successful. Otherwise, OpExecError will be raised from within
398
399
400
401
    cli.py.

    """
    self.ClearFeedbackBuf()
Iustin Pop's avatar
Iustin Pop committed
402
    jex = cli.JobExecutor(cl=self.cl, feedback_fn=self.Feedback)
403
    for ops, name, _ in jobs:
Iustin Pop's avatar
Iustin Pop committed
404
405
406
407
408
      jex.QueueJob(name, *ops) # pylint: disable-msg=W0142
    try:
      results = jex.GetResults()
    except Exception, err: # pylint: disable-msg=W0703
      Log("Jobs failed: %s", err)
409
      raise BurninFailure()
Iustin Pop's avatar
Iustin Pop committed
410

411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
    fail = False
    val = []
    for (_, name, post_process), (success, result) in zip(jobs, results):
      if success:
        if post_process:
          try:
            post_process()
          except Exception, err: # pylint: disable-msg=W0703
            Log("Post process call for job %s failed: %s", name, err)
            fail = True
        val.append(result)
      else:
        fail = True

    if fail:
Iustin Pop's avatar
Iustin Pop committed
426
427
      raise BurninFailure()

428
    return val
Iustin Pop's avatar
Iustin Pop committed
429

430
431
432
433
434
435
436
437
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """
    parser = optparse.OptionParser(usage="\n%s" % USAGE,
438
439
440
                                   version=("%%prog (ganeti) %s" %
                                            constants.RELEASE_VERSION),
                                   option_list=OPTIONS)
441
442
443
444
445

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

446
    supported_disk_templates = (constants.DT_DISKLESS,
447
                                constants.DT_FILE,
448
                                constants.DT_PLAIN,
449
450
                                constants.DT_DRBD8)
    if options.disk_template not in supported_disk_templates:
451
      Err("Unknown disk template '%s'" % options.disk_template)
452

453
454
    if options.disk_template == constants.DT_DISKLESS:
      disk_size = disk_growth = []
455
      options.do_addremove_disks = False
456
457
458
459
460
    else:
      disk_size = [utils.ParseUnit(v) for v in options.disk_size.split(",")]
      disk_growth = [utils.ParseUnit(v)
                     for v in options.disk_growth.split(",")]
      if len(disk_growth) != len(disk_size):
461
        Err("Wrong disk sizes/growth combination")
462
463
    if ((disk_size and options.disk_template == constants.DT_DISKLESS) or
        (not disk_size and options.disk_template != constants.DT_DISKLESS)):
464
      Err("Wrong disk count/disk template combination")
465
466
467
468
469

    self.disk_size = disk_size
    self.disk_growth = disk_growth
    self.disk_count = len(disk_size)

470
    if options.nodes and options.iallocator:
471
      Err("Give either the nodes option or the iallocator option, not both")
472

473
474
475
    if options.http_check and not options.name_check:
      Err("Can't enable HTTP checks without name checks")

476
477
    self.opts = options
    self.instances = args
Iustin Pop's avatar
Iustin Pop committed
478
479
480
481
    self.bep = {
      constants.BE_MEMORY: options.mem_size,
      constants.BE_VCPUS: 1,
      }
482
483

    self.hypervisor = None
Iustin Pop's avatar
Iustin Pop committed
484
    self.hvp = {}
485
486
    if options.hypervisor:
      self.hypervisor, self.hvp = options.hypervisor
487

488
489
490
491
492
493
494
495
    if options.reboot_types is None:
      options.reboot_types = constants.REBOOT_TYPES
    else:
      options.reboot_types = options.reboot_types.split(",")
      rt_diff = set(options.reboot_types).difference(constants.REBOOT_TYPES)
      if rt_diff:
        Err("Invalid reboot types specified: %s" % utils.CommaJoin(rt_diff))

496
497
    socket.setdefaulttimeout(options.net_timeout)

498
  def GetState(self):
499
    """Read the cluster state from the master daemon."""
500
501
502
503
504
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
Iustin Pop's avatar
Iustin Pop committed
505
      op = opcodes.OpQueryNodes(output_fields=["name", "offline", "drained"],
506
                                names=names, use_locking=True)
507
      result = self.ExecOp(True, op)
508
509
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
510
      Err(msg, exit_code=err_code)
Iustin Pop's avatar
Iustin Pop committed
511
    self.nodes = [data[0] for data in result if not (data[1] or data[2])]
512

Guido Trotter's avatar
Guido Trotter committed
513
514
515
    op_diagnose = opcodes.OpDiagnoseOS(output_fields=["name",
                                                      "variants",
                                                      "hidden"],
516
                                       names=[])
517
    result = self.ExecOp(True, op_diagnose)
518
519

    if not result:
520
      Err("Can't get the OS list")
521

522
    found = False
Guido Trotter's avatar
Guido Trotter committed
523
    for (name, variants, _) in result:
524
      if self.opts.os in cli.CalculateOSNames(name, variants):
525
526
        found = True
        break
527

528
    if not found:
529
      Err("OS '%s' not found" % self.opts.os)
530

531
532
533
534
535
536
537
    cluster_info = self.cl.QueryClusterInfo()
    self.cluster_info = cluster_info
    if not self.cluster_info:
      Err("Can't get cluster info")

    default_nic_params = self.cluster_info["nicparams"][constants.PP_DEFAULT]
    self.cluster_default_nicparams = default_nic_params
538
539
540
    if self.hypervisor is None:
      self.hypervisor = self.cluster_info["default_hypervisor"]
    self.hv_class = hypervisor.GetHypervisorClass(self.hypervisor)
541

542
  @_DoCheckInstances
543
  @_DoBatch(False)
544
  def BurnCreateInstances(self):
545
546
547
548
549
550
551
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
Iustin Pop's avatar
Iustin Pop committed
552

553
    Log("Creating instances")
554
    for pnode, snode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
555
      Log("instance %s", instance, indent=1)
556
557
      if self.opts.iallocator:
        pnode = snode = None
558
        msg = "with iallocator %s" % self.opts.iallocator
559
560
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
561
        msg = "on %s" % pnode
562
      else:
563
564
565
        msg = "on %s, %s" % (pnode, snode)

      Log(msg, indent=2)
566

567
      op = opcodes.OpInstanceCreate(instance_name=instance,
568
569
                                    disks = [ {"size": size}
                                              for size in self.disk_size],
570
                                    disk_template=self.opts.disk_template,
571
                                    nics=self.opts.nics,
Iustin Pop's avatar
Iustin Pop committed
572
                                    mode=constants.INSTANCE_CREATE,
573
574
575
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
Iustin Pop's avatar
Iustin Pop committed
576
                                    start=True,
577
578
                                    ip_check=self.opts.ip_check,
                                    name_check=self.opts.name_check,
579
                                    wait_for_sync=True,
580
                                    file_driver="loop",
581
                                    file_storage_dir=None,
582
                                    iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
583
584
                                    beparams=self.bep,
                                    hvparams=self.hvp,
585
                                    hypervisor=self.hypervisor,
586
                                    osparams=self.opts.osparams,
Iustin Pop's avatar
Iustin Pop committed
587
                                    )
588
589
      remove_instance = lambda name: lambda: self.to_rem.append(name)
      self.ExecOrQueue(instance, [op], post_process=remove_instance(instance))
590

591
  @_DoBatch(False)
592
  def BurnGrowDisks(self):
Iustin Pop's avatar
Iustin Pop committed
593
    """Grow both the os and the swap disks by the requested amount, if any."""
594
    Log("Growing disks")
Iustin Pop's avatar
Iustin Pop committed
595
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
596
      Log("instance %s", instance, indent=1)
597
      for idx, growth in enumerate(self.disk_growth):
Iustin Pop's avatar
Iustin Pop committed
598
        if growth > 0:
Iustin Pop's avatar
Iustin Pop committed
599
600
          op = opcodes.OpInstanceGrowDisk(instance_name=instance, disk=idx,
                                          amount=growth, wait_for_sync=True)
Iustin Pop's avatar
Iustin Pop committed
601
          Log("increase disk/%s by %s MB", idx, growth, indent=2)
602
          self.ExecOrQueue(instance, [op])
Iustin Pop's avatar
Iustin Pop committed
603

604
  @_DoBatch(True)
605
  def BurnReplaceDisks1D8(self):
606
    """Replace disks on primary and secondary for drbd8."""
607
    Log("Replacing disks on the same nodes")
608
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
609
      Log("instance %s", instance, indent=1)
610
      ops = []
611
612
613
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
614
615
                                    disks=[i for i in range(self.disk_count)],
                                    early_release=self.opts.early_release)
Iustin Pop's avatar
Iustin Pop committed
616
        Log("run %s", mode, indent=2)
617
        ops.append(op)
618
      self.ExecOrQueue(instance, ops)
619

620
  @_DoBatch(True)
621
  def BurnReplaceDisks2(self):
622
    """Replace secondary node."""
623
    Log("Changing the secondary node")
624
    mode = constants.REPLACE_DISK_CHG
625
626
627
628

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
629
      Log("instance %s", instance, indent=1)
630
631
      if self.opts.iallocator:
        tnode = None
632
633
634
        msg = "with iallocator %s" % self.opts.iallocator
      else:
        msg = tnode
635
636
637
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
638
                                  iallocator=self.opts.iallocator,
639
640
                                  disks=[],
                                  early_release=self.opts.early_release)
Iustin Pop's avatar
Iustin Pop committed
641
      Log("run %s %s", mode, msg, indent=2)
642
      self.ExecOrQueue(instance, [op])
643

644
  @_DoCheckInstances
645
  @_DoBatch(False)
646
  def BurnFailover(self):
647
    """Failover the instances."""
648
    Log("Failing over instances")
649
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
650
      Log("instance %s", instance, indent=1)
651
      op = opcodes.OpInstanceFailover(instance_name=instance,
652
                                      ignore_consistency=False)
653
      self.ExecOrQueue(instance, [op])
654

655
656
657
658
659
660
661
662
  @_DoCheckInstances
  @_DoBatch(False)
  def BurnMove(self):
    """Move the instances."""
    Log("Moving instances")
    mytor = izip(islice(cycle(self.nodes), 1, None),
                 self.instances)
    for tnode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
663
      Log("instance %s", instance, indent=1)
664
      op = opcodes.OpInstanceMove(instance_name=instance,
665
                                  target_node=tnode)
666
      self.ExecOrQueue(instance, [op])
667

668
  @_DoBatch(False)
669
  def BurnMigrate(self):
670
    """Migrate the instances."""
671
    Log("Migrating instances")
672
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
673
      Log("instance %s", instance, indent=1)
674
      op1 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
675
                                      cleanup=False)
676

677
      op2 = opcodes.OpInstanceMigrate(instance_name=instance, mode=None,
678
679
                                      cleanup=True)
      Log("migration and migration cleanup", indent=2)
680
      self.ExecOrQueue(instance, [op1, op2])
681

682
  @_DoCheckInstances
683
  @_DoBatch(False)
684
  def BurnImportExport(self):
685
686
687
    """Export the instance, delete it, and import it back.

    """
688
    Log("Exporting and re-importing instances")
689
690
691
692
693
694
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 islice(cycle(self.nodes), 2, None),
                 self.instances)

    for pnode, snode, enode, instance in mytor:
Iustin Pop's avatar
Iustin Pop committed
695
      Log("instance %s", instance, indent=1)
696
      # read the full name of the instance
697
698
      nam_op = opcodes.OpInstanceQuery(output_fields=["name"],
                                       names=[instance], use_locking=True)
699
      full_name = self.ExecOp(False, nam_op)[0][0]
700

701
702
      if self.opts.iallocator:
        pnode = snode = None
703
704
705
        import_log_msg = ("import from %s"
                          " with iallocator %s" %
                          (enode, self.opts.iallocator))
706
707
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
708
709
        import_log_msg = ("import from %s to %s" %
                          (enode, pnode))
710
      else:
711
712
        import_log_msg = ("import from %s to %s, %s" %
                          (enode, pnode, snode))
713

714
715
716
717
      exp_op = opcodes.OpBackupExport(instance_name=instance,
                                      target_node=enode,
                                      mode=constants.EXPORT_MODE_LOCAL,
                                      shutdown=True)
718
      rem_op = opcodes.OpInstanceRemove(instance_name=instance,
719
                                        ignore_failures=True)
720
      imp_dir = utils.PathJoin(constants.EXPORT_DIR, full_name)
721
      imp_op = opcodes.OpInstanceCreate(instance_name=instance,
Guido Trotter's avatar
Guido Trotter committed
722
723
                                        disks = [ {"size": size}
                                                  for size in self.disk_size],
724
                                        disk_template=self.opts.disk_template,
725
                                        nics=self.opts.nics,
726
727
728
729
730
731
                                        mode=constants.INSTANCE_IMPORT,
                                        src_node=enode,
                                        src_path=imp_dir,
                                        pnode=pnode,
                                        snode=snode,
                                        start=True,
732
733
                                        ip_check=self.opts.ip_check,
                                        name_check=self.opts.name_check,
734
                                        wait_for_sync=True,
735
                                        file_storage_dir=None,
Guido Trotter's avatar
Guido Trotter committed
736
                                        file_driver="loop",
737
                                        iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
738
739
                                        beparams=self.bep,
                                        hvparams=self.hvp,
740
                                        osparams=self.opts.osparams,
Iustin Pop's avatar
Iustin Pop committed
741
                                        )
742

743
      erem_op = opcodes.OpBackupRemove(instance_name=instance)
744

Iustin Pop's avatar
Iustin Pop committed
745
      Log("export to node %s", enode, indent=2)
746
747
748
      Log("remove instance", indent=2)
      Log(import_log_msg, indent=2)
      Log("remove export", indent=2)
749
      self.ExecOrQueue(instance, [exp_op, rem_op, imp_op, erem_op])
750

751
752
  @staticmethod
  def StopInstanceOp(instance):
753
    """Stop given instance."""
754
    return opcodes.OpShutdownInstance(instance_name=instance)
755

756
757
  @staticmethod
  def StartInstanceOp(instance):
758
    """Start given instance."""
759
    return opcodes.OpStartupInstance(instance_name=instance, force=False)
760

761
762
  @staticmethod
  def RenameInstanceOp(instance, instance_new):
763
    """Rename instance."""
764
765
    return opcodes.OpRenameInstance(instance_name=instance,
                                    new_name=instance_new)
766

767
  @_DoCheckInstances
768
  @_DoBatch(True)
769
  def BurnStopStart(self):
770
    """Stop/start the instances."""
771
    Log("Stopping and starting instances")
772
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
773
      Log("instance %s", instance, indent=1)
774
775
      op1 = self.StopInstanceOp(instance)
      op2 = self.StartInstanceOp(instance)
776
      self.ExecOrQueue(instance, [op1, op2])
777

778
  @_DoBatch(False)
779
  def BurnRemove(self):
780
    """Remove the instances."""
781
    Log("Removing instances")
782
    for instance in self.to_rem:
Iustin Pop's avatar
Iustin Pop committed
783
      Log("instance %s", instance, indent=1)
784
      op = opcodes.OpInstanceRemove(instance_name=instance,
785
                                    ignore_failures=True)
786
      self.ExecOrQueue(instance, [op])
787
788
789

  def BurnRename(self):
    """Rename the instances.
790

791
792
793
794
    Note that this function will not execute in parallel, since we
    only have one target for rename.

    """
795
    Log("Renaming instances")
796
797
    rename = self.opts.rename
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
798
      Log("instance %s", instance, indent=1)
Iustin Pop's avatar
Iustin Pop committed
799
800
      op_stop1 = self.StopInstanceOp(instance)
      op_stop2 = self.StopInstanceOp(rename)
801
802
803
804
      op_rename1 = self.RenameInstanceOp(instance, rename)
      op_rename2 = self.RenameInstanceOp(rename, instance)
      op_start1 = self.StartInstanceOp(rename)
      op_start2 = self.StartInstanceOp(instance)
805
      self.ExecOp(False, op_stop1, op_rename1, op_start1)
806
      self._CheckInstanceAlive(rename)
807
      self.ExecOp(False, op_stop2, op_rename2, op_start2)
808
809
      self._CheckInstanceAlive(instance)

810
  @_DoCheckInstances
811
  @_DoBatch(True)
812
  def BurnReinstall(self):
813
    """Reinstall the instances."""
814
    Log("Reinstalling instances")
815
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
816
      Log("instance %s", instance, indent=1)
817
      op1 = self.StopInstanceOp(instance)
818
      op2 = opcodes.OpInstanceReinstall(instance_name=instance)
819
      Log("reinstall without passing the OS", indent=2)
820
      op3 = opcodes.OpInstanceReinstall(instance_name=instance,
821
                                        os_type=self.opts.os)
822
      Log("reinstall specifying the OS", indent=2)
823
      op4 = self.StartInstanceOp(instance)
824
      self.ExecOrQueue(instance, [op1, op2, op3, op4])
825

826
  @_DoCheckInstances
827
  @_DoBatch(True)
828
  def BurnReboot(self):
829
830
    """Reboot the instances."""
    Log("Rebooting instances")
831
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
832
      Log("instance %s", instance, indent=1)
833
      ops = []
834
      for reboot_type in self.opts.reboot_types:
835
        op = opcodes.OpInstanceReboot(instance_name=instance,
836
837
                                      reboot_type=reboot_type,
                                      ignore_secondaries=False)
Iustin Pop's avatar
Iustin Pop committed
838
        Log("reboot with type '%s'", reboot_type, indent=2)
839
        ops.append(op)
840
      self.ExecOrQueue(instance, ops)
841

842
  @_DoCheckInstances
843
  @_DoBatch(True)
844
  def BurnActivateDisks(self):
845
    """Activate and deactivate disks of the instances."""
846
    Log("Activating/deactivating disks")
847
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
848
      Log("instance %s", instance, indent=1)
849
      op_start = self.StartInstanceOp(instance)
850
      op_act = opcodes.OpInstanceActivateDisks(instance_name=instance)
851
      op_deact = opcodes.OpInstanceDeactivateDisks(instance_name=instance)
852
      op_stop = self.StopInstanceOp(instance)
853
854
855
      Log("activate disks when online", indent=2)
      Log("activate disks when offline", indent=2)
      Log("deactivate disks (when offline)", indent=2)
856
      self.ExecOrQueue(instance, [op_act, op_stop, op_act, op_deact, op_start])
857

858
  @_DoCheckInstances
859
  @_DoBatch(False)
860
  def BurnAddRemoveDisks(self):
861
    """Add and remove an extra disk for the instances."""
862
    Log("Adding and removing disks")
863
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
864
      Log("instance %s", instance, indent=1)
865
866
867
868
869
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance,
        disks=[(constants.DDM_ADD, {"size": self.disk_size[0]})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, disks=[(constants.DDM_REMOVE, {})])
870
871
      op_stop = self.StopInstanceOp(instance)
      op_start = self.StartInstanceOp(instance)
872
873
      Log("adding a disk", indent=2)
      Log("removing last disk", indent=2)
874
      self.ExecOrQueue(instance, [op_add, op_stop, op_rem, op_start])
875

876
  @_DoBatch(False)
877
  def BurnAddRemoveNICs(self):
878
    """Add and remove an extra NIC for the instances."""
879
    Log("Adding and removing NICs")
880
    for instance in self.instances:
Iustin Pop's avatar
Iustin Pop committed
881
      Log("instance %s", instance, indent=1)
882
883
884
885
      op_add = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_ADD, {})])
      op_rem = opcodes.OpSetInstanceParams(\
        instance_name=instance, nics=[(constants.DDM_REMOVE, {})])
886
887
      Log("adding a NIC", indent=2)
      Log("removing last NIC", indent=2)
888
      self.ExecOrQueue(instance, [op_add, op_rem])
889

890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
  def ConfdCallback(self, reply):
    """Callback for confd queries"""
    if reply.type == confd_client.UPCALL_REPLY:
      if reply.server_reply.status != constants.CONFD_REPL_STATUS_OK:
        Err("Query %s gave non-ok status %s: %s" % (reply.orig_request,
                                                    reply.server_reply.status,
                                                    reply.server_reply))
      if reply.orig_request.type == constants.CONFD_REQ_PING:
        Log("Ping: OK", indent=1)
      elif reply.orig_request.type == constants.CONFD_REQ_CLUSTER_MASTER:
        if reply.server_reply.answer == self.cluster_info["master"]:
          Log("Master: OK", indent=1)
        else:
          Err("Master: wrong: %s" % reply.server_reply.answer)
      elif reply.orig_request.type == constants.CONFD_REQ_NODE_ROLE_BYNAME:
        if reply.server_reply.answer == constants.CONFD_NODE_ROLE_MASTER:
          Log("Node role for master: OK", indent=1)
        else:
          Err("Node role for master: wrong: %s" % reply.server_reply.answer)

  def DoConfdRequestReply(self, req):
    self.confd_counting_callback.RegisterQuery(req.rsalt)
    self.confd_client.SendRequest(req, async=False)
    while not self.confd_counting_callback.AllAnswered():
      if not self.confd_client.ReceiveReply():
        Err("Did not receive all expected confd replies")
        break

  def BurnConfd(self):
    """Run confd queries for our instances.

    The following confd queries are tested:
922
923
924
      - CONFD_REQ_PING: simple ping
      - CONFD_REQ_CLUSTER_MASTER: cluster master
      - CONFD_REQ_NODE_ROLE_BYNAME: node role, for the master
925
926
927
928
929
930
931
932

    """
    Log("Checking confd results")

    filter_callback = confd_client.ConfdFilterCallback(self.ConfdCallback)
    counting_callback = confd_client.ConfdCountingCallback(filter_callback)
    self.confd_counting_callback = counting_callback

933
    self.confd_client = confd_client.GetConfdClient(counting_callback)
934
935
936
937
938
939
940
941
942
943
944
945
946

    req = confd_client.ConfdClientRequest(type=constants.CONFD_REQ_PING)
    self.DoConfdRequestReply(req)

    req = confd_client.ConfdClientRequest(
      type=constants.CONFD_REQ_CLUSTER_MASTER)
    self.DoConfdRequestReply(req)

    req = confd_client.ConfdClientRequest(
        type=constants.CONFD_REQ_NODE_ROLE_BYNAME,
        query=self.cluster_info["master"])
    self.DoConfdRequestReply(req)

947
948
949
950
951
952
953
954
955
956
957
  def _CheckInstanceAlive(self, instance):
    """Check if an instance is alive by doing http checks.

    This will try to retrieve the url on the instance /hostname.txt
    and check that it contains the hostname of the instance. In case
    we get ECONNREFUSED, we retry up to the net timeout seconds, for
    any other error we abort.

    """
    if not self.opts.http_check:
      return
958
959
960
961
962
    end_time = time.time() + self.opts.net_timeout
    url = None
    while time.time() < end_time and url is None:
      try:
        url = self.url_opener.open("http://%s/hostname.txt" % instance)
963