burnin 17 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Burnin program

"""
25

26
import os
Iustin Pop's avatar
Iustin Pop committed
27
28
import sys
import optparse
Iustin Pop's avatar
Iustin Pop committed
29
import time
30
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
31
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
32
33
34
35
36
37

from ganeti import opcodes
from ganeti import mcpu
from ganeti import constants
from ganeti import cli
from ganeti import logger
38
39
from ganeti import errors
from ganeti import utils
Iustin Pop's avatar
Iustin Pop committed
40

41

42
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
43

44

Iustin Pop's avatar
Iustin Pop committed
45
46
47
48
49
50
51
def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

52

Iustin Pop's avatar
Iustin Pop committed
53
def Log(msg):
54
55
56
57
  """Simple function that prints out its argument.

  """
  print msg
58
  sys.stdout.flush()
Iustin Pop's avatar
Iustin Pop committed
59

60

61
62
63
64
65
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
66
    logger.SetupLogging(constants.LOG_BURNIN, debug=False, stderr_logging=True)
Iustin Pop's avatar
Iustin Pop committed
67
    self._feed_buf = StringIO()
68
69
70
71
    self.nodes = []
    self.instances = []
    self.to_rem = []
    self.opts = None
72
    self.cl = cli.GetClient()
73
74
75
    self.ParseOptions()
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
76
77
78
79
80
81
82
83
84
85
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
Iustin Pop's avatar
Iustin Pop committed
86
87
    self._feed_buf.write("%s %s\n" % (time.ctime(utils.MergeTime(msg[0])),
                                      msg[2]))
Iustin Pop's avatar
Iustin Pop committed
88
89
    if self.opts.verbose:
      Log(msg)
Iustin Pop's avatar
Iustin Pop committed
90
91
92
93

  def ExecOp(self, op):
    """Execute an opcode and manage the exec buffer."""
    self.ClearFeedbackBuf()
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    return cli.SubmitOpCode(op, feedback_fn=self.Feedback, cl=self.cl)

  def ExecJobSet(self, jobs):
    """Execute a set of jobs and return once all are done.

    The method will return the list of results, if all jobs are
    successfull. Otherwise, OpExecError will be raised from within
    cli.py.

    """
    self.ClearFeedbackBuf()
    job_ids = [cli.SendJob(job, cl=self.cl) for job in jobs]
    Log("- Submitted job IDs %s" % ", ".join(job_ids))
    results = []
    for jid in job_ids:
      Log("- Waiting for job %s" % jid)
      results.append(cli.PollJob(jid, cl=self.cl, feedback_fn=self.Feedback))

    return results
Iustin Pop's avatar
Iustin Pop committed
113

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """

    parser = optparse.OptionParser(usage="\n%s" % USAGE,
                                   version="%%prog (ganeti) %s" %
                                   constants.RELEASE_VERSION,
                                   option_class=cli.CliOption)

    parser.add_option("-o", "--os", dest="os", default=None,
                      help="OS to use during burnin",
                      metavar="<OS>")
    parser.add_option("--os-size", dest="os_size", help="Disk size",
                      default=4 * 1024, type="unit", metavar="<size>")
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
                      default=4 * 1024, type="unit", metavar="<size>")
134
135
    parser.add_option("--mem-size", dest="mem_size", help="Memory size",
                      default=128, type="unit", metavar="<size>")
136
137
138
139
140
141
142
143
144
145
146
147
    parser.add_option("-v", "--verbose",
                      action="store_true", dest="verbose", default=False,
                      help="print command execution messages to stdout")
    parser.add_option("--no-replace1", dest="do_replace1",
                      help="Skip disk replacement with the same secondary",
                      action="store_false", default=True)
    parser.add_option("--no-replace2", dest="do_replace2",
                      help="Skip disk replacement with a different secondary",
                      action="store_false", default=True)
    parser.add_option("--no-failover", dest="do_failover",
                      help="Skip instance failovers", action="store_false",
                      default=True)
148
149
150
    parser.add_option("--no-importexport", dest="do_importexport",
                      help="Skip instance export/import", action="store_false",
                      default=True)
151
152
153
    parser.add_option("--no-startstop", dest="do_startstop",
                      help="Skip instance stop/start", action="store_false",
                      default=True)
154
155
156
157
    parser.add_option("--rename", dest="rename", default=None,
                      help="Give one unused instance name which is taken"
                           " to start the renaming sequence",
                      metavar="<instance_name>")
158
    parser.add_option("-t", "--disk-template", dest="disk_template",
159
                      choices=("diskless", "file", "plain", "drbd"),
160
                      default="drbd",
161
162
                      help="Disk template (diskless, file, plain or drbd)"
                            " [drbd]")
163
164
165
    parser.add_option("-n", "--nodes", dest="nodes", default="",
                      help="Comma separated list of nodes to perform"
                      " the burnin on (defaults to all nodes)")
166
167
168
169
170
    parser.add_option("--iallocator", dest="iallocator",
                      default=None, type="string",
                      help="Perform the allocation using an iallocator"
                      " instead of fixed node spread (node restrictions no"
                      " longer apply, therefore -n/--nodes must not be used")
171
172
173
174
    parser.add_option("-p", "--parallel", default=False, action="store_true",
                      dest="parallel",
                      help="Enable parallelization of some operations in"
                      " order to speed burnin or to test granular locking")
175
176
177
178
179

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

180
    supported_disk_templates = (constants.DT_DISKLESS,
181
                                constants.DT_FILE,
182
                                constants.DT_PLAIN,
183
184
                                constants.DT_DRBD8)
    if options.disk_template not in supported_disk_templates:
Iustin Pop's avatar
Iustin Pop committed
185
      Log("Unknown disk template '%s'" % options.disk_template)
186
187
      sys.exit(1)

188
189
190
191
    if options.nodes and options.iallocator:
      Log("Give either the nodes option or the iallocator option, not both")
      sys.exit(1)

192
193
    self.opts = options
    self.instances = args
Iustin Pop's avatar
Iustin Pop committed
194
195
196
197
198
    self.bep = {
      constants.BE_MEMORY: options.mem_size,
      constants.BE_VCPUS: 1,
      }
    self.hvp = {}
199
200
201
202
203
204
205
206
207

  def GetState(self):
    """Read the cluster state from the config."""
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
Iustin Pop's avatar
Iustin Pop committed
208
      result = self.ExecOp(op)
209
210
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
Iustin Pop's avatar
Iustin Pop committed
211
      Log(msg)
212
213
214
      sys.exit(err_code)
    self.nodes = [data[0] for data in result]

215
216
    result = self.ExecOp(opcodes.OpDiagnoseOS(output_fields=["name", "valid"],
                                              names=[]))
217
218

    if not result:
Iustin Pop's avatar
Iustin Pop committed
219
      Log("Can't get the OS list")
220
221
222
      sys.exit(1)

    # filter non-valid OS-es
223
    os_set = [val[0] for val in result if val[1]]
224
225

    if self.opts.os not in os_set:
Iustin Pop's avatar
Iustin Pop committed
226
      Log("OS '%s' not found" % self.opts.os)
227
228
229
230
231
232
233
234
235
236
      sys.exit(1)

  def CreateInstances(self):
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
237
    jobset = []
Iustin Pop's avatar
Iustin Pop committed
238

239
    for pnode, snode, instance in mytor:
240
241
      if self.opts.iallocator:
        pnode = snode = None
242
243
244
245
246
247
248
249
        Log("- Add instance %s (iallocator: %s)" %
              (instance, self.opts.iallocator))
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
        Log("- Add instance %s on node %s" % (instance, pnode))
      else:
        Log("- Add instance %s on nodes %s/%s" % (instance, pnode, snode))

250
251
252
253
      op = opcodes.OpCreateInstance(instance_name=instance,
                                    disk_size=self.opts.os_size,
                                    swap_size=self.opts.swap_size,
                                    disk_template=self.opts.disk_template,
Iustin Pop's avatar
Iustin Pop committed
254
                                    mode=constants.INSTANCE_CREATE,
255
256
257
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
Iustin Pop's avatar
Iustin Pop committed
258
                                    start=True,
259
                                    ip_check=True,
260
261
                                    wait_for_sync=True,
                                    mac="auto",
262
                                    file_driver="loop",
263
                                    file_storage_dir=None,
264
                                    iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
265
266
267
                                    beparams=self.bep,
                                    hvparams=self.hvp,
                                    )
268

269
270
271
272
273
274
275
276
277
278
      if self.opts.parallel:
        jobset.append([op])
        # FIXME: here we should not append to to_rem uncoditionally,
        # but only when the job is successful
        self.to_rem.append(instance)
      else:
        self.ExecOp(op)
        self.to_rem.append(instance)
    if self.opts.parallel:
      self.ExecJobSet(jobset)
279
280
281
282
283
284
285
286

  def ReplaceDisks1D8(self):
    """Replace disks on primary and secondary for drbd8."""
    for instance in self.instances:
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
                                    disks=["sda", "sdb"])
Iustin Pop's avatar
Iustin Pop committed
287
288
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
        self.ExecOp(op)
289
290
291

  def ReplaceDisks2(self):
    """Replace secondary node."""
292
    mode = constants.REPLACE_DISK_SEC
293
294
295
296

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
297
298
      if self.opts.iallocator:
        tnode = None
299
300
301
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
302
                                  iallocator=self.opts.iallocator,
303
                                  disks=["sda", "sdb"])
Iustin Pop's avatar
Iustin Pop committed
304
305
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
      self.ExecOp(op)
306
307
308
309
310
311
312
313

  def Failover(self):
    """Failover the instances."""

    for instance in self.instances:
      op = opcodes.OpFailoverInstance(instance_name=instance,
                                      ignore_consistency=False)

Iustin Pop's avatar
Iustin Pop committed
314
315
      Log("- Failover instance %s" % (instance))
      self.ExecOp(op)
316

317
318
319
320
321
322
323
324
325
326
327
  def ImportExport(self):
    """Export the instance, delete it, and import it back.

    """

    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 islice(cycle(self.nodes), 2, None),
                 self.instances)

    for pnode, snode, enode, instance in mytor:
328
329
330
331
332
333
334
335
336
337
338
339
340

      if self.opts.iallocator:
        pnode = snode = None
        import_log_msg = ("- Import instance %s from node %s (iallocator: %s)" %
                          (instance, enode, self.opts.iallocator))
      elif self.opts.disk_template not in constants.DTS_NET_MIRROR:
        snode = None
        import_log_msg = ("- Import instance %s from node %s to node %s" %
                          (instance, enode, pnode))
      else:
        import_log_msg = ("- Import instance %s from node %s to nodes %s/%s" %
                          (instance, enode, pnode, snode))

341
342
343
      exp_op = opcodes.OpExportInstance(instance_name=instance,
                                           target_node=enode,
                                           shutdown=True)
344
345
      rem_op = opcodes.OpRemoveInstance(instance_name=instance,
                                        ignore_failures=True)
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
                                           names=[instance])
      full_name = self.ExecOp(nam_op)[0][0]
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
                                        disk_size=self.opts.os_size,
                                        swap_size=self.opts.swap_size,
                                        disk_template=self.opts.disk_template,
                                        mode=constants.INSTANCE_IMPORT,
                                        src_node=enode,
                                        src_path=imp_dir,
                                        pnode=pnode,
                                        snode=snode,
                                        start=True,
                                        ip_check=True,
                                        wait_for_sync=True,
362
363
                                        mac="auto",
                                        file_storage_dir=None,
364
                                        file_driver=None,
365
                                        iallocator=self.opts.iallocator,
Iustin Pop's avatar
Iustin Pop committed
366
367
368
                                        beparams=self.bep,
                                        hvparams=self.hvp,
                                        )
369

Guido Trotter's avatar
Guido Trotter committed
370
      erem_op = opcodes.OpRemoveExport(instance_name=instance)
371
372
373
374
375
376

      Log("- Export instance %s to node %s" % (instance, enode))
      self.ExecOp(exp_op)
      Log("- Remove instance %s" % (instance))
      self.ExecOp(rem_op)
      self.to_rem.remove(instance)
377
      Log(import_log_msg)
378
      self.ExecOp(imp_op)
Guido Trotter's avatar
Guido Trotter committed
379
380
381
      Log("- Remove export of instance %s" % (instance))
      self.ExecOp(erem_op)

382
383
      self.to_rem.append(instance)

384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
  def StopInstance(self, instance):
    """Stop given instance."""
    op = opcodes.OpShutdownInstance(instance_name=instance)
    Log("- Shutdown instance %s" % instance)
    self.ExecOp(op)

  def StartInstance(self, instance):
    """Start given instance."""
    op = opcodes.OpStartupInstance(instance_name=instance, force=False)
    Log("- Start instance %s" % instance)
    self.ExecOp(op)

  def RenameInstance(self, instance, instance_new):
    """Rename instance."""
    op = opcodes.OpRenameInstance(instance_name=instance,
                                  new_name=instance_new)
    Log("- Rename instance %s to %s" % (instance, instance_new))
    self.ExecOp(op)

403
404
405
  def StopStart(self):
    """Stop/start the instances."""
    for instance in self.instances:
406
407
      self.StopInstance(instance)
      self.StartInstance(instance)
408
409
410
411

  def Remove(self):
    """Remove the instances."""
    for instance in self.to_rem:
412
413
      op = opcodes.OpRemoveInstance(instance_name=instance,
                                    ignore_failures=True)
Iustin Pop's avatar
Iustin Pop committed
414
415
      Log("- Remove instance %s" % instance)
      self.ExecOp(op)
416

417
418
419
420
421
422
423
424
425
426
427
428

  def Rename(self):
    """Rename the instances."""
    rename = self.opts.rename
    for instance in self.instances:
      self.StopInstance(instance)
      self.RenameInstance(instance, rename)
      self.StartInstance(rename)
      self.StopInstance(rename)
      self.RenameInstance(rename, instance)
      self.StartInstance(instance)

429
430
431
432
433
434
435
436
437
438
  def BurninCluster(self):
    """Test a cluster intensively.

    This will create instances and then start/stop/failover them.
    It is safe for existing instances but could impact performance.

    """

    opts = self.opts

Iustin Pop's avatar
Iustin Pop committed
439
    Log("- Testing global parameters")
440

441
    if (len(self.nodes) == 1 and
442
443
        opts.disk_template not in (constants.DT_DISKLESS, constants.DT_PLAIN,
                                   constants.DT_FILE)):
Iustin Pop's avatar
Iustin Pop committed
444
      Log("When one node is available/selected the disk template must"
445
          " be 'diskless', 'file' or 'plain'")
446
447
      sys.exit(1)

Iustin Pop's avatar
Iustin Pop committed
448
    has_err = True
449
450
451
    try:
      self.CreateInstances()
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
452
        self.ReplaceDisks1D8()
453
454
455
456
457
458
459
      if (opts.do_replace2 and len(self.nodes) > 2 and
          opts.disk_template in constants.DTS_NET_MIRROR) :
        self.ReplaceDisks2()

      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
        self.Failover()

460
461
462
      if opts.do_importexport:
        self.ImportExport()

463
464
465
      if opts.do_startstop:
        self.StopStart()

466
467
468
      if opts.rename:
        self.Rename()

Iustin Pop's avatar
Iustin Pop committed
469
      has_err = False
470
    finally:
Iustin Pop's avatar
Iustin Pop committed
471
472
473
474
      if has_err:
        Log("Error detected: opcode buffer follows:\n\n")
        Log(self.GetFeedbackBuf())
        Log("\n\n")
475
476
477
      self.Remove()

    return 0
Iustin Pop's avatar
Iustin Pop committed
478

479

Iustin Pop's avatar
Iustin Pop committed
480
def main():
481
482
  """Main function"""

483
  burner = Burner()
484
  return burner.BurninCluster()
Iustin Pop's avatar
Iustin Pop committed
485

486

Iustin Pop's avatar
Iustin Pop committed
487
if __name__ == "__main__":
488
  main()