burnin 13.1 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright (C) 2006, 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.


"""Burnin program

"""
25

26
import os
Iustin Pop's avatar
Iustin Pop committed
27
28
import sys
import optparse
29
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
30
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
31
32
33
34
35
36

from ganeti import opcodes
from ganeti import mcpu
from ganeti import constants
from ganeti import cli
from ganeti import logger
37
38
from ganeti import errors
from ganeti import utils
Iustin Pop's avatar
Iustin Pop committed
39

40

41
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
42

43

Iustin Pop's avatar
Iustin Pop committed
44
45
46
47
48
49
50
def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

51

Iustin Pop's avatar
Iustin Pop committed
52
def Log(msg):
53
54
55
56
  """Simple function that prints out its argument.

  """
  print msg
57
  sys.stdout.flush()
Iustin Pop's avatar
Iustin Pop committed
58

59

60
61
62
63
64
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
65
66
67
    logger.SetupLogging(debug=False, program="ganeti/burnin")
    self._feed_buf = StringIO()
    self.proc = mcpu.Processor(feedback=self.Feedback)
68
69
70
71
72
73
74
    self.nodes = []
    self.instances = []
    self.to_rem = []
    self.opts = None
    self.ParseOptions()
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
    self._feed_buf.write(msg)
    self._feed_buf.write("\n")

  def ExecOp(self, op):
    """Execute an opcode and manage the exec buffer."""
    self.ClearFeedbackBuf()
    return self.proc.ExecOpCode(op)

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """

    parser = optparse.OptionParser(usage="\n%s" % USAGE,
                                   version="%%prog (ganeti) %s" %
                                   constants.RELEASE_VERSION,
                                   option_class=cli.CliOption)

    parser.add_option("-o", "--os", dest="os", default=None,
                      help="OS to use during burnin",
                      metavar="<OS>")
    parser.add_option("--os-size", dest="os_size", help="Disk size",
                      default=4 * 1024, type="unit", metavar="<size>")
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
                      default=4 * 1024, type="unit", metavar="<size>")
    parser.add_option("-v", "--verbose",
                      action="store_true", dest="verbose", default=False,
                      help="print command execution messages to stdout")
    parser.add_option("--no-replace1", dest="do_replace1",
                      help="Skip disk replacement with the same secondary",
                      action="store_false", default=True)
    parser.add_option("--no-replace2", dest="do_replace2",
                      help="Skip disk replacement with a different secondary",
                      action="store_false", default=True)
    parser.add_option("--no-failover", dest="do_failover",
                      help="Skip instance failovers", action="store_false",
                      default=True)
125
126
127
    parser.add_option("--no-importexport", dest="do_importexport",
                      help="Skip instance export/import", action="store_false",
                      default=True)
128
    parser.add_option("-t", "--disk-template", dest="disk_template",
129
                      choices=("plain", "remote_raid1", "drbd"),
130
131
                      default="remote_raid1",
                      help="Template type for network mirroring (remote_raid1"
132
                      " or drbd) [remote_raid1]")
133
134
135
136
137
138
139
140
    parser.add_option("-n", "--nodes", dest="nodes", default="",
                      help="Comma separated list of nodes to perform"
                      " the burnin on (defaults to all nodes)")

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

141
142
143
    supported_disk_templates = (constants.DT_PLAIN, constants.DT_REMOTE_RAID1,
                                constants.DT_DRBD8)
    if options.disk_template not in supported_disk_templates:
Iustin Pop's avatar
Iustin Pop committed
144
      Log("Unknown disk template '%s'" % options.disk_template)
145
146
147
148
149
150
151
152
153
154
155
156
157
      sys.exit(1)

    self.opts = options
    self.instances = args

  def GetState(self):
    """Read the cluster state from the config."""
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
Iustin Pop's avatar
Iustin Pop committed
158
      result = self.ExecOp(op)
159
160
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
Iustin Pop's avatar
Iustin Pop committed
161
      Log(msg)
162
163
164
      sys.exit(err_code)
    self.nodes = [data[0] for data in result]

Iustin Pop's avatar
Iustin Pop committed
165
    result = self.ExecOp(opcodes.OpDiagnoseOS())
166
167

    if not result:
Iustin Pop's avatar
Iustin Pop committed
168
      Log("Can't get the OS list")
169
170
171
172
173
174
175
176
177
178
179
180
181
182
      sys.exit(1)

    # filter non-valid OS-es
    oses = {}
    for node_name in result:
      oses[node_name] = [obj for obj in result[node_name] if obj]

    fnode = oses.keys()[0]
    os_set = set([os_inst.name for os_inst in oses[fnode]])
    del oses[fnode]
    for node in oses:
      os_set &= set([os_inst.name for os_inst in oses[node]])

    if self.opts.os not in os_set:
Iustin Pop's avatar
Iustin Pop committed
183
      Log("OS '%s' not found" % self.opts.os)
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
      sys.exit(1)

  def CreateInstances(self):
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
    for pnode, snode, instance in mytor:
      op = opcodes.OpCreateInstance(instance_name=instance,
                                    mem_size=128,
                                    disk_size=self.opts.os_size,
                                    swap_size=self.opts.swap_size,
                                    disk_template=self.opts.disk_template,
Iustin Pop's avatar
Iustin Pop committed
200
                                    mode=constants.INSTANCE_CREATE,
201
202
203
204
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
                                    vcpus=1,
Iustin Pop's avatar
Iustin Pop committed
205
                                    start=True,
206
                                    ip_check=True,
207
208
209
                                    wait_for_sync=True,
                                    mac="auto",
                                    kernel_path=None,
210
211
                                    initrd_path=None,
                                    hvm_boot_order=None)
Iustin Pop's avatar
Iustin Pop committed
212
213
      Log("- Add instance %s on node %s" % (instance, pnode))
      self.ExecOp(op)
214
215
216
217
218
219
220
221
222
223
224
      self.to_rem.append(instance)

  def ReplaceDisks1R1(self):
    """Replace disks with the same secondary for rr1."""
    # replace all, both disks
    for instance in self.instances:
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  remote_node=None,
                                  mode=constants.REPLACE_DISK_ALL,
                                  disks=["sda", "sdb"])

Iustin Pop's avatar
Iustin Pop committed
225
226
      Log("- Replace disks for instance %s" % (instance))
      self.ExecOp(op)
227
228
229
230
231
232
233
234

  def ReplaceDisks1D8(self):
    """Replace disks on primary and secondary for drbd8."""
    for instance in self.instances:
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
                                    disks=["sda", "sdb"])
Iustin Pop's avatar
Iustin Pop committed
235
236
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
        self.ExecOp(op)
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251

  def ReplaceDisks2(self):
    """Replace secondary node."""
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
      mode = constants.REPLACE_DISK_ALL
    else:
      mode = constants.REPLACE_DISK_SEC

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
                                  disks=["sda", "sdb"])
Iustin Pop's avatar
Iustin Pop committed
252
253
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
      self.ExecOp(op)
254
255
256
257
258
259
260
261

  def Failover(self):
    """Failover the instances."""

    for instance in self.instances:
      op = opcodes.OpFailoverInstance(instance_name=instance,
                                      ignore_consistency=False)

Iustin Pop's avatar
Iustin Pop committed
262
263
      Log("- Failover instance %s" % (instance))
      self.ExecOp(op)
264

265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
  def ImportExport(self):
    """Export the instance, delete it, and import it back.

    """

    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 islice(cycle(self.nodes), 2, None),
                 self.instances)

    for pnode, snode, enode, instance in mytor:
      exp_op = opcodes.OpExportInstance(instance_name=instance,
                                           target_node=enode,
                                           shutdown=True)
      rem_op = opcodes.OpRemoveInstance(instance_name=instance)
      nam_op = opcodes.OpQueryInstances(output_fields=["name"],
                                           names=[instance])
      full_name = self.ExecOp(nam_op)[0][0]
      imp_dir = os.path.join(constants.EXPORT_DIR, full_name)
      imp_op = opcodes.OpCreateInstance(instance_name=instance,
                                        mem_size=128,
                                        disk_size=self.opts.os_size,
                                        swap_size=self.opts.swap_size,
                                        disk_template=self.opts.disk_template,
                                        mode=constants.INSTANCE_IMPORT,
                                        src_node=enode,
                                        src_path=imp_dir,
                                        pnode=pnode,
                                        snode=snode,
                                        vcpus=1,
                                        start=True,
                                        ip_check=True,
                                        wait_for_sync=True,
                                        mac="auto")

      Log("- Export instance %s to node %s" % (instance, enode))
      self.ExecOp(exp_op)
      Log("- Remove instance %s" % (instance))
      self.ExecOp(rem_op)
      self.to_rem.remove(instance)
      Log("- Import instance %s from node %s to node %s" %
          (instance, enode, pnode))
      self.ExecOp(imp_op)
      self.to_rem.append(instance)

310
311
312
313
  def StopStart(self):
    """Stop/start the instances."""
    for instance in self.instances:
      op = opcodes.OpShutdownInstance(instance_name=instance)
Iustin Pop's avatar
Iustin Pop committed
314
315
      Log("- Shutdown instance %s" % instance)
      self.ExecOp(op)
316
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
Iustin Pop's avatar
Iustin Pop committed
317
318
      Log("- Start instance %s" % instance)
      self.ExecOp(op)
319
320
321
322
323

  def Remove(self):
    """Remove the instances."""
    for instance in self.to_rem:
      op = opcodes.OpRemoveInstance(instance_name=instance)
Iustin Pop's avatar
Iustin Pop committed
324
325
      Log("- Remove instance %s" % instance)
      self.ExecOp(op)
326
327
328
329
330
331
332
333
334
335
336

  def BurninCluster(self):
    """Test a cluster intensively.

    This will create instances and then start/stop/failover them.
    It is safe for existing instances but could impact performance.

    """

    opts = self.opts

Iustin Pop's avatar
Iustin Pop committed
337
    Log("- Testing global parameters")
338
339

    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
Iustin Pop's avatar
Iustin Pop committed
340
      Log("When one node is available/selected the disk template must"
341
342
343
               " be 'plain'")
      sys.exit(1)

Iustin Pop's avatar
Iustin Pop committed
344
    has_err = True
345
346
347
348
349
350
351
352
353
354
355
356
357
358
    try:
      self.CreateInstances()
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
        if opts.disk_template == constants.DT_REMOTE_RAID1:
          self.ReplaceDisks1R1()
        elif opts.disk_template == constants.DT_DRBD8:
          self.ReplaceDisks1D8()
      if (opts.do_replace2 and len(self.nodes) > 2 and
          opts.disk_template in constants.DTS_NET_MIRROR) :
        self.ReplaceDisks2()

      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
        self.Failover()

359
360
361
      if opts.do_importexport:
        self.ImportExport()

362
      self.StopStart()
Iustin Pop's avatar
Iustin Pop committed
363
      has_err = False
364
    finally:
Iustin Pop's avatar
Iustin Pop committed
365
366
367
368
      if has_err:
        Log("Error detected: opcode buffer follows:\n\n")
        Log(self.GetFeedbackBuf())
        Log("\n\n")
369
370
371
      self.Remove()

    return 0
Iustin Pop's avatar
Iustin Pop committed
372

373

Iustin Pop's avatar
Iustin Pop committed
374
def main():
375
376
  """Main function"""

377
  burner = Burner()
378
379
380
381
382
383
  try:
    utils.Lock('cmd', max_retries=15, debug=True)
  except errors.LockError, err:
    logger.ToStderr(str(err))
    return 1
  try:
384
    retval = burner.BurninCluster()
385
386
387
388
  finally:
    utils.Unlock('cmd')
    utils.LockCleanup()
  return retval
Iustin Pop's avatar
Iustin Pop committed
389

390

Iustin Pop's avatar
Iustin Pop committed
391
if __name__ == "__main__":
392
  main()