burnin 10 KB
Newer Older
Iustin Pop's avatar
Iustin Pop committed
1
2
3
#!/usr/bin/python
#

4
5
"""Burnin program"""

Iustin Pop's avatar
Iustin Pop committed
6
7
import sys
import optparse
8
from itertools import izip, islice, cycle
Iustin Pop's avatar
Iustin Pop committed
9
from cStringIO import StringIO
Iustin Pop's avatar
Iustin Pop committed
10
11
12
13
14
15

from ganeti import opcodes
from ganeti import mcpu
from ganeti import constants
from ganeti import cli
from ganeti import logger
16
17
from ganeti import errors
from ganeti import utils
Iustin Pop's avatar
Iustin Pop committed
18

19
USAGE = ("\tburnin -o OS_NAME [options...] instance_name ...")
Iustin Pop's avatar
Iustin Pop committed
20
21
22
23
24
25
26
27

def Usage():
  """Shows program usage information and exits the program."""

  print >> sys.stderr, "Usage:"
  print >> sys.stderr, USAGE
  sys.exit(2)

Iustin Pop's avatar
Iustin Pop committed
28
def Log(msg):
29
30
31
32
  """Simple function that prints out its argument.

  """
  print msg
Iustin Pop's avatar
Iustin Pop committed
33

34
35
36
37
38
class Burner(object):
  """Burner class."""

  def __init__(self):
    """Constructor."""
Iustin Pop's avatar
Iustin Pop committed
39
40
41
    logger.SetupLogging(debug=False, program="ganeti/burnin")
    self._feed_buf = StringIO()
    self.proc = mcpu.Processor(feedback=self.Feedback)
42
43
44
45
46
47
48
    self.nodes = []
    self.instances = []
    self.to_rem = []
    self.opts = None
    self.ParseOptions()
    self.GetState()

Iustin Pop's avatar
Iustin Pop committed
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
  def ClearFeedbackBuf(self):
    """Clear the feedback buffer."""
    self._feed_buf.truncate(0)

  def GetFeedbackBuf(self):
    """Return the contents of the buffer."""
    return self._feed_buf.getvalue()

  def Feedback(self, msg):
    """Acumulate feedback in our buffer."""
    self._feed_buf.write(msg)
    self._feed_buf.write("\n")

  def ExecOp(self, op):
    """Execute an opcode and manage the exec buffer."""
    self.ClearFeedbackBuf()
    return self.proc.ExecOpCode(op)

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
  def ParseOptions(self):
    """Parses the command line options.

    In case of command line errors, it will show the usage and exit the
    program.

    """

    parser = optparse.OptionParser(usage="\n%s" % USAGE,
                                   version="%%prog (ganeti) %s" %
                                   constants.RELEASE_VERSION,
                                   option_class=cli.CliOption)

    parser.add_option("-o", "--os", dest="os", default=None,
                      help="OS to use during burnin",
                      metavar="<OS>")
    parser.add_option("--os-size", dest="os_size", help="Disk size",
                      default=4 * 1024, type="unit", metavar="<size>")
    parser.add_option("--swap-size", dest="swap_size", help="Swap size",
                      default=4 * 1024, type="unit", metavar="<size>")
    parser.add_option("-v", "--verbose",
                      action="store_true", dest="verbose", default=False,
                      help="print command execution messages to stdout")
    parser.add_option("--no-replace1", dest="do_replace1",
                      help="Skip disk replacement with the same secondary",
                      action="store_false", default=True)
    parser.add_option("--no-replace2", dest="do_replace2",
                      help="Skip disk replacement with a different secondary",
                      action="store_false", default=True)
    parser.add_option("--no-failover", dest="do_failover",
                      help="Skip instance failovers", action="store_false",
                      default=True)
    parser.add_option("-t", "--disk-template", dest="disk_template",
                      choices=("remote_raid1", "drbd8"),
                      default="remote_raid1",
                      help="Template type for network mirroring (remote_raid1"
                      " or drbd8) [remote_raid1]")
    parser.add_option("-n", "--nodes", dest="nodes", default="",
                      help="Comma separated list of nodes to perform"
                      " the burnin on (defaults to all nodes)")

    options, args = parser.parse_args()
    if len(args) < 1 or options.os is None:
      Usage()

    if options.disk_template == "plain":
      disk_template = constants.DT_PLAIN
    elif options.disk_template == "remote_raid1":
      disk_template = constants.DT_REMOTE_RAID1
    elif options.disk_template == "drbd8":
      disk_template = constants.DT_DRBD8
    else:
Iustin Pop's avatar
Iustin Pop committed
119
      Log("Unknown disk template '%s'" % options.disk_template)
120
121
122
123
124
125
126
127
128
129
130
131
132
133
      sys.exit(1)

    options.disk_template = disk_template
    self.opts = options
    self.instances = args

  def GetState(self):
    """Read the cluster state from the config."""
    if self.opts.nodes:
      names = self.opts.nodes.split(",")
    else:
      names = []
    try:
      op = opcodes.OpQueryNodes(output_fields=["name"], names=names)
Iustin Pop's avatar
Iustin Pop committed
134
      result = self.ExecOp(op)
135
136
    except errors.GenericError, err:
      err_code, msg = cli.FormatError(err)
Iustin Pop's avatar
Iustin Pop committed
137
      Log(msg)
138
139
140
      sys.exit(err_code)
    self.nodes = [data[0] for data in result]

Iustin Pop's avatar
Iustin Pop committed
141
    result = self.ExecOp(opcodes.OpDiagnoseOS())
142
143

    if not result:
Iustin Pop's avatar
Iustin Pop committed
144
      Log("Can't get the OS list")
145
146
147
148
149
150
151
152
153
154
155
156
157
158
      sys.exit(1)

    # filter non-valid OS-es
    oses = {}
    for node_name in result:
      oses[node_name] = [obj for obj in result[node_name] if obj]

    fnode = oses.keys()[0]
    os_set = set([os_inst.name for os_inst in oses[fnode]])
    del oses[fnode]
    for node in oses:
      os_set &= set([os_inst.name for os_inst in oses[node]])

    if self.opts.os not in os_set:
Iustin Pop's avatar
Iustin Pop committed
159
      Log("OS '%s' not found" % self.opts.os)
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
      sys.exit(1)

  def CreateInstances(self):
    """Create the given instances.

    """
    self.to_rem = []
    mytor = izip(cycle(self.nodes),
                 islice(cycle(self.nodes), 1, None),
                 self.instances)
    for pnode, snode, instance in mytor:
      op = opcodes.OpCreateInstance(instance_name=instance,
                                    mem_size=128,
                                    disk_size=self.opts.os_size,
                                    swap_size=self.opts.swap_size,
                                    disk_template=self.opts.disk_template,
Iustin Pop's avatar
Iustin Pop committed
176
                                    mode=constants.INSTANCE_CREATE,
177
178
179
180
                                    os_type=self.opts.os,
                                    pnode=pnode,
                                    snode=snode,
                                    vcpus=1,
Iustin Pop's avatar
Iustin Pop committed
181
                                    start=True,
182
                                    ip_check=True,
Iustin Pop's avatar
Iustin Pop committed
183
                                    wait_for_sync=True)
Iustin Pop's avatar
Iustin Pop committed
184
185
      Log("- Add instance %s on node %s" % (instance, pnode))
      self.ExecOp(op)
186
187
188
189
190
191
192
193
194
195
196
      self.to_rem.append(instance)

  def ReplaceDisks1R1(self):
    """Replace disks with the same secondary for rr1."""
    # replace all, both disks
    for instance in self.instances:
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  remote_node=None,
                                  mode=constants.REPLACE_DISK_ALL,
                                  disks=["sda", "sdb"])

Iustin Pop's avatar
Iustin Pop committed
197
198
      Log("- Replace disks for instance %s" % (instance))
      self.ExecOp(op)
199
200
201
202
203
204
205
206

  def ReplaceDisks1D8(self):
    """Replace disks on primary and secondary for drbd8."""
    for instance in self.instances:
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
        op = opcodes.OpReplaceDisks(instance_name=instance,
                                    mode=mode,
                                    disks=["sda", "sdb"])
Iustin Pop's avatar
Iustin Pop committed
207
208
        Log("- Replace disks (%s) for instance %s" % (mode, instance))
        self.ExecOp(op)
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

  def ReplaceDisks2(self):
    """Replace secondary node."""
    if self.opts.disk_template == constants.DT_REMOTE_RAID1:
      mode = constants.REPLACE_DISK_ALL
    else:
      mode = constants.REPLACE_DISK_SEC

    mytor = izip(islice(cycle(self.nodes), 2, None),
                 self.instances)
    for tnode, instance in mytor:
      op = opcodes.OpReplaceDisks(instance_name=instance,
                                  mode=mode,
                                  remote_node=tnode,
                                  disks=["sda", "sdb"])
Iustin Pop's avatar
Iustin Pop committed
224
225
      Log("- Replace secondary (%s) for instance %s" % (mode, instance))
      self.ExecOp(op)
226
227
228
229
230
231
232
233

  def Failover(self):
    """Failover the instances."""

    for instance in self.instances:
      op = opcodes.OpFailoverInstance(instance_name=instance,
                                      ignore_consistency=False)

Iustin Pop's avatar
Iustin Pop committed
234
235
      Log("- Failover instance %s" % (instance))
      self.ExecOp(op)
236
237
238
239
240

  def StopStart(self):
    """Stop/start the instances."""
    for instance in self.instances:
      op = opcodes.OpShutdownInstance(instance_name=instance)
Iustin Pop's avatar
Iustin Pop committed
241
242
      Log("- Shutdown instance %s" % instance)
      self.ExecOp(op)
243
      op = opcodes.OpStartupInstance(instance_name=instance, force=False)
Iustin Pop's avatar
Iustin Pop committed
244
245
      Log("- Start instance %s" % instance)
      self.ExecOp(op)
246
247
248
249
250

  def Remove(self):
    """Remove the instances."""
    for instance in self.to_rem:
      op = opcodes.OpRemoveInstance(instance_name=instance)
Iustin Pop's avatar
Iustin Pop committed
251
252
      Log("- Remove instance %s" % instance)
      self.ExecOp(op)
253
254
255
256
257
258
259
260
261
262
263

  def BurninCluster(self):
    """Test a cluster intensively.

    This will create instances and then start/stop/failover them.
    It is safe for existing instances but could impact performance.

    """

    opts = self.opts

Iustin Pop's avatar
Iustin Pop committed
264
    Log("- Testing global parameters")
265
266

    if len(self.nodes) == 1 and opts.disk_template != constants.DT_PLAIN:
Iustin Pop's avatar
Iustin Pop committed
267
      Log("When one node is available/selected the disk template must"
268
269
270
               " be 'plain'")
      sys.exit(1)

Iustin Pop's avatar
Iustin Pop committed
271
    has_err = True
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
    try:
      self.CreateInstances()
      if opts.do_replace1 and opts.disk_template in constants.DTS_NET_MIRROR:
        if opts.disk_template == constants.DT_REMOTE_RAID1:
          self.ReplaceDisks1R1()
        elif opts.disk_template == constants.DT_DRBD8:
          self.ReplaceDisks1D8()
      if (opts.do_replace2 and len(self.nodes) > 2 and
          opts.disk_template in constants.DTS_NET_MIRROR) :
        self.ReplaceDisks2()

      if opts.do_failover and opts.disk_template in constants.DTS_NET_MIRROR:
        self.Failover()

      self.StopStart()
Iustin Pop's avatar
Iustin Pop committed
287
      has_err = False
288
    finally:
Iustin Pop's avatar
Iustin Pop committed
289
290
291
292
      if has_err:
        Log("Error detected: opcode buffer follows:\n\n")
        Log(self.GetFeedbackBuf())
        Log("\n\n")
293
294
295
      self.Remove()

    return 0
Iustin Pop's avatar
Iustin Pop committed
296
297

def main():
298
299
  """Main function"""

300
  burner = Burner()
301
302
303
304
305
306
  try:
    utils.Lock('cmd', max_retries=15, debug=True)
  except errors.LockError, err:
    logger.ToStderr(str(err))
    return 1
  try:
307
    retval = burner.BurninCluster()
308
309
310
311
  finally:
    utils.Unlock('cmd')
    utils.LockCleanup()
  return retval
Iustin Pop's avatar
Iustin Pop committed
312
313

if __name__ == "__main__":
314
  main()