Commit 7181fba0 authored by Constantinos Venetsanopoulos's avatar Constantinos Venetsanopoulos Committed by Michael Hanselmann
Browse files

Implement rbd disk template



Introduce the rbd disk template, which handles provisioning and
management of instance disks as block devices mapped to rbd volumes
on a RADOS cluster.

The rbd template is of type DTS_EXT_MIRROR, meaning rbd-based
instances can failover or migrate to any node, without
primary-secondary limitations.

Use of this template requires the rbd kernel driver and rbd tool
on all VM-capable nodes, as well as an operational RADOS cluster,
accessible by them.
Signed-off-by: default avatarConstantinos Venetsanopoulos <cven@grnet.gr>
Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarMichael Hanselmann <hansmi@google.com>
parent 447af814
......@@ -2346,10 +2346,302 @@ class PersistentBlockDevice(BlockDev):
_ThrowError("Grow is not supported for PersistentBlockDev storage")
class RADOSBlockDevice(BlockDev):
"""A RADOS Block Device (rbd).
This class implements the RADOS Block Device for the backend. You need
the rbd kernel driver, the RADOS Tools and a working RADOS cluster for
this to be functional.
"""
def __init__(self, unique_id, children, size, params):
"""Attaches to an rbd device.
"""
super(RADOSBlockDevice, self).__init__(unique_id, children, size, params)
if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2:
raise ValueError("Invalid configuration data %s" % str(unique_id))
self.driver, self.rbd_name = unique_id
self.major = self.minor = None
self.Attach()
@classmethod
def Create(cls, unique_id, children, size, params):
"""Create a new rbd device.
Provision a new rbd volume inside a RADOS pool.
"""
if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2:
raise errors.ProgrammerError("Invalid configuration data %s" %
str(unique_id))
rbd_pool = params[constants.LDP_POOL]
rbd_name = unique_id[1]
# Provision a new rbd volume (Image) inside the RADOS cluster.
cmd = [constants.RBD_CMD, "create", "-p", rbd_pool,
rbd_name, "--size", "%s" % size]
result = utils.RunCmd(cmd)
if result.failed:
_ThrowError("rbd creation failed (%s): %s",
result.fail_reason, result.output)
return RADOSBlockDevice(unique_id, children, size, params)
def Remove(self):
"""Remove the rbd device.
"""
rbd_pool = self.params[constants.LDP_POOL]
rbd_name = self.unique_id[1]
if not self.minor and not self.Attach():
# The rbd device doesn't exist.
return
# First shutdown the device (remove mappings).
self.Shutdown()
# Remove the actual Volume (Image) from the RADOS cluster.
cmd = [constants.RBD_CMD, "rm", "-p", rbd_pool, rbd_name]
result = utils.RunCmd(cmd)
if result.failed:
_ThrowError("Can't remove Volume from cluster with rbd rm: %s - %s",
result.fail_reason, result.output)
def Rename(self, new_id):
"""Rename this device.
"""
pass
def Attach(self):
"""Attach to an existing rbd device.
This method maps the rbd volume that matches our name with
an rbd device and then attaches to this device.
"""
self.attached = False
# Map the rbd volume to a block device under /dev
self.dev_path = self._MapVolumeToBlockdev(self.unique_id)
try:
st = os.stat(self.dev_path)
except OSError, err:
logging.error("Error stat()'ing %s: %s", self.dev_path, str(err))
return False
if not stat.S_ISBLK(st.st_mode):
logging.error("%s is not a block device", self.dev_path)
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.attached = True
return True
def _MapVolumeToBlockdev(self, unique_id):
"""Maps existing rbd volumes to block devices.
This method should be idempotent if the mapping already exists.
@rtype: string
@return: the block device path that corresponds to the volume
"""
pool = self.params[constants.LDP_POOL]
name = unique_id[1]
# Check if the mapping already exists.
showmap_cmd = [constants.RBD_CMD, "showmapped", "-p", pool]
result = utils.RunCmd(showmap_cmd)
if result.failed:
_ThrowError("rbd showmapped failed (%s): %s",
result.fail_reason, result.output)
rbd_dev = self._ParseRbdShowmappedOutput(result.output, name)
if rbd_dev:
# The mapping exists. Return it.
return rbd_dev
# The mapping doesn't exist. Create it.
map_cmd = [constants.RBD_CMD, "map", "-p", pool, name]
result = utils.RunCmd(map_cmd)
if result.failed:
_ThrowError("rbd map failed (%s): %s",
result.fail_reason, result.output)
# Find the corresponding rbd device.
showmap_cmd = [constants.RBD_CMD, "showmapped", "-p", pool]
result = utils.RunCmd(showmap_cmd)
if result.failed:
_ThrowError("rbd map succeeded, but showmapped failed (%s): %s",
result.fail_reason, result.output)
rbd_dev = self._ParseRbdShowmappedOutput(result.output, name)
if not rbd_dev:
_ThrowError("rbd map succeeded, but could not find the rbd block"
" device in output of showmapped, for volume: %s", name)
# The device was successfully mapped. Return it.
return rbd_dev
@staticmethod
def _ParseRbdShowmappedOutput(output, volume_name):
"""Parse the output of `rbd showmapped'.
This method parses the output of `rbd showmapped' and returns
the rbd block device path (e.g. /dev/rbd0) that matches the
given rbd volume.
@type output: string
@param output: the whole output of `rbd showmapped'
@type volume_name: string
@param volume_name: the name of the volume whose device we search for
@rtype: string or None
@return: block device path if the volume is mapped, else None
"""
allfields = 5
volumefield = 2
devicefield = 4
field_sep = "\t"
lines = output.splitlines()
splitted_lines = map(lambda l: l.split(field_sep), lines)
# Check empty output.
if not splitted_lines:
_ThrowError("rbd showmapped returned empty output")
# Check showmapped header line, to determine number of fields.
field_cnt = len(splitted_lines[0])
if field_cnt != allfields:
_ThrowError("Cannot parse rbd showmapped output because its format"
" seems to have changed; expected %s fields, found %s",
allfields, field_cnt)
matched_lines = \
filter(lambda l: len(l) == allfields and l[volumefield] == volume_name,
splitted_lines)
if len(matched_lines) > 1:
_ThrowError("The rbd volume %s is mapped more than once."
" This shouldn't happen, try to unmap the extra"
" devices manually.", volume_name)
if matched_lines:
# rbd block device found. Return it.
rbd_dev = matched_lines[0][devicefield]
return rbd_dev
# The given volume is not mapped.
return None
def Assemble(self):
"""Assemble the device.
"""
pass
def Shutdown(self):
"""Shutdown the device.
"""
if not self.minor and not self.Attach():
# The rbd device doesn't exist.
return
# Unmap the block device from the Volume.
self._UnmapVolumeFromBlockdev(self.unique_id)
self.minor = None
self.dev_path = None
def _UnmapVolumeFromBlockdev(self, unique_id):
"""Unmaps the rbd device from the Volume it is mapped.
Unmaps the rbd device from the Volume it was previously mapped to.
This method should be idempotent if the Volume isn't mapped.
"""
pool = self.params[constants.LDP_POOL]
name = unique_id[1]
# Check if the mapping already exists.
showmap_cmd = [constants.RBD_CMD, "showmapped", "-p", pool]
result = utils.RunCmd(showmap_cmd)
if result.failed:
_ThrowError("rbd showmapped failed [during unmap](%s): %s",
result.fail_reason, result.output)
rbd_dev = self._ParseRbdShowmappedOutput(result.output, name)
if rbd_dev:
# The mapping exists. Unmap the rbd device.
unmap_cmd = [constants.RBD_CMD, "unmap", "%s" % rbd_dev]
result = utils.RunCmd(unmap_cmd)
if result.failed:
_ThrowError("rbd unmap failed (%s): %s",
result.fail_reason, result.output)
def Open(self, force=False):
"""Make the device ready for I/O.
"""
pass
def Close(self):
"""Notifies that the device will no longer be used for I/O.
"""
pass
def Grow(self, amount, dryrun):
"""Grow the Volume.
@type amount: integer
@param amount: the amount (in mebibytes) to grow with
@type dryrun: boolean
@param dryrun: whether to execute the operation in simulation mode
only, without actually increasing the size
"""
if not self.Attach():
_ThrowError("Can't attach to rbd device during Grow()")
if dryrun:
# the rbd tool does not support dry runs of resize operations.
# Since rbd volumes are thinly provisioned, we assume
# there is always enough free space for the operation.
return
rbd_pool = self.params[constants.LDP_POOL]
rbd_name = self.unique_id[1]
new_size = self.size + amount
# Resize the rbd volume (Image) inside the RADOS cluster.
cmd = [constants.RBD_CMD, "resize", "-p", rbd_pool,
rbd_name, "--size", "%s" % new_size]
result = utils.RunCmd(cmd)
if result.failed:
_ThrowError("rbd resize failed (%s): %s",
result.fail_reason, result.output)
DEV_MAP = {
constants.LD_LV: LogicalVolume,
constants.LD_DRBD8: DRBD8,
constants.LD_BLOCKDEV: PersistentBlockDevice,
constants.LD_RBD: RADOSBlockDevice,
}
if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
......
......@@ -8200,6 +8200,21 @@ class TLMigrateInstance(Tasklet):
self._GoReconnect(False)
self._WaitUntilSync()
 
# If the instance's disk template is `rbd' and there was a successful
# migration, unmap the device from the source node.
if self.instance.disk_template == constants.DT_RBD:
disks = _ExpandCheckDisks(instance, instance.disks)
self.feedback_fn("* unmapping instance's disks from %s" % source_node)
for disk in disks:
result = self.rpc.call_blockdev_shutdown(source_node, disk)
msg = result.fail_msg
if msg:
logging.error("Migration was successful, but couldn't unmap the"
" block device %s on source node %s: %s",
disk.iv_name, source_node, msg)
logging.error("You need to unmap the device %s manually on %s",
disk.iv_name, source_node)
self.feedback_fn("* done")
 
def _ExecFailover(self):
......@@ -8467,6 +8482,15 @@ def _ComputeLDParams(disk_template, disk_params):
elif disk_template == constants.DT_BLOCK:
result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
 
elif disk_template == constants.DT_RBD:
params = {
constants.LDP_POOL: dt_params[constants.RBD_POOL]
}
params = \
objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
params)
result.append(params)
return result
 
 
......@@ -8603,6 +8627,22 @@ def _GenerateDiskTemplate(lu, template_name,
mode=disk[constants.IDISK_MODE],
params=ld_params[0])
disks.append(disk_dev)
elif template_name == constants.DT_RBD:
if secondary_nodes:
raise errors.ProgrammerError("Wrong template configuration")
names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
for i in range(disk_count)])
for idx, disk in enumerate(disk_info):
disk_index = idx + base_index
disk_dev = objects.Disk(dev_type=constants.LD_RBD,
size=disk[constants.IDISK_SIZE],
logical_id=("rbd", names[idx]),
iv_name="disk/%d" % disk_index,
mode=disk[constants.IDISK_MODE],
params=ld_params[0])
disks.append(disk_dev)
 
else:
raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
......@@ -8843,6 +8883,7 @@ def _ComputeDiskSize(disk_template, disks):
constants.DT_FILE: None,
constants.DT_SHARED_FILE: 0,
constants.DT_BLOCK: 0,
constants.DT_RBD: 0,
}
 
if disk_template not in req_size_dict:
......@@ -9677,9 +9718,15 @@ class LUInstanceCreate(LogicalUnit):
self.diskparams = group_info.diskparams
 
if not self.adopt_disks:
# Check lv size requirements, if not adopting
req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
_CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
if self.op.disk_template == constants.DT_RBD:
# _CheckRADOSFreeSpace() is just a placeholder.
# Any function that checks prerequisites can be placed here.
# Check if there is enough space on the RADOS cluster.
_CheckRADOSFreeSpace()
else:
# Check lv size requirements, if not adopting
req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
_CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
 
elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
......@@ -9992,6 +10039,14 @@ class LUInstanceCreate(LogicalUnit):
return list(iobj.all_nodes)
 
 
def _CheckRADOSFreeSpace():
"""Compute disk size requirements inside the RADOS cluster.
"""
# For the RADOS cluster we assume there is always enough space.
pass
class LUInstanceConsole(NoHooksLU):
"""Connect to an instance's console.
 
......@@ -11347,7 +11402,8 @@ class LUInstanceGrowDisk(LogicalUnit):
self.disk = instance.FindDisk(self.op.disk)
 
if instance.disk_template not in (constants.DT_FILE,
constants.DT_SHARED_FILE):
constants.DT_SHARED_FILE,
constants.DT_RBD):
# TODO: check the free disk space for file, when that feature will be
# supported
_CheckNodesFreeDiskPerVG(self, nodenames,
......
......@@ -430,18 +430,20 @@ DT_DRBD8 = "drbd"
DT_FILE = "file"
DT_SHARED_FILE = "sharedfile"
DT_BLOCK = "blockdev"
DT_RBD = "rbd"
# the set of network-mirrored disk templates
DTS_INT_MIRROR = frozenset([DT_DRBD8])
# the set of externally-mirrored disk templates (e.g. SAN, NAS)
DTS_EXT_MIRROR = frozenset([DT_SHARED_FILE, DT_BLOCK])
DTS_EXT_MIRROR = frozenset([DT_SHARED_FILE, DT_BLOCK, DT_RBD])
# the set of non-lvm-based disk templates
DTS_NOT_LVM = frozenset([DT_DISKLESS, DT_FILE, DT_SHARED_FILE, DT_BLOCK])
DTS_NOT_LVM = frozenset([DT_DISKLESS, DT_FILE, DT_SHARED_FILE,
DT_BLOCK, DT_RBD])
# the set of disk templates which can be grown
DTS_GROWABLE = frozenset([DT_PLAIN, DT_DRBD8, DT_FILE, DT_SHARED_FILE])
DTS_GROWABLE = frozenset([DT_PLAIN, DT_DRBD8, DT_FILE, DT_SHARED_FILE, DT_RBD])
# the set of disk templates that allow adoption
DTS_MAY_ADOPT = frozenset([DT_PLAIN, DT_BLOCK])
......@@ -460,14 +462,16 @@ LD_LV = "lvm"
LD_DRBD8 = "drbd8"
LD_FILE = "file"
LD_BLOCKDEV = "blockdev"
LD_RBD = "rbd"
LOGICAL_DISK_TYPES = frozenset([
LD_LV,
LD_DRBD8,
LD_FILE,
LD_BLOCKDEV,
LD_RBD,
])
LDS_BLOCK = frozenset([LD_LV, LD_DRBD8, LD_BLOCKDEV])
LDS_BLOCK = frozenset([LD_LV, LD_DRBD8, LD_BLOCKDEV, LD_RBD])
# drbd constants
DRBD_HMAC_ALG = "md5"
......@@ -492,6 +496,9 @@ DRBD_VALID_BARRIER_OPT = frozenset([
frozenset([DRBD_B_DISK_BARRIERS, DRBD_B_DISK_FLUSH, DRBD_B_DISK_DRAIN]),
])
# rbd tool command
RBD_CMD = "rbd"
# file backend driver
FD_LOOP = "loop"
FD_BLKTAP = "blktap"
......@@ -571,7 +578,8 @@ DISK_TEMPLATES = frozenset([
DT_DRBD8,
DT_FILE,
DT_SHARED_FILE,
DT_BLOCK
DT_BLOCK,
DT_RBD
])
FILE_DRIVER = frozenset([FD_LOOP, FD_BLKTAP])
......@@ -973,6 +981,7 @@ LDP_FILL_TARGET = "c-fill-target"
LDP_DELAY_TARGET = "c-delay-target"
LDP_MAX_RATE = "c-max-rate"
LDP_MIN_RATE = "c-min-rate"
LDP_POOL = "pool"
DISK_LD_TYPES = {
LDP_RESYNC_RATE: VTYPE_INT,
LDP_STRIPES: VTYPE_INT,
......@@ -987,6 +996,7 @@ DISK_LD_TYPES = {
LDP_DELAY_TARGET: VTYPE_INT,
LDP_MAX_RATE: VTYPE_INT,
LDP_MIN_RATE: VTYPE_INT,
LDP_POOL: VTYPE_STRING,
}
DISK_LD_PARAMETERS = frozenset(DISK_LD_TYPES.keys())
......@@ -1007,6 +1017,7 @@ DRBD_DELAY_TARGET = "c-delay-target"
DRBD_MAX_RATE = "c-max-rate"
DRBD_MIN_RATE = "c-min-rate"
LV_STRIPES = "stripes"
RBD_POOL = "pool"
DISK_DT_TYPES = {
DRBD_RESYNC_RATE: VTYPE_INT,
DRBD_DATA_STRIPES: VTYPE_INT,
......@@ -1023,6 +1034,7 @@ DISK_DT_TYPES = {
DRBD_MAX_RATE: VTYPE_INT,
DRBD_MIN_RATE: VTYPE_INT,
LV_STRIPES: VTYPE_INT,
RBD_POOL: VTYPE_STRING,
}
DISK_DT_PARAMETERS = frozenset(DISK_DT_TYPES.keys())
......@@ -1822,6 +1834,9 @@ DISK_LD_DEFAULTS = {
},
LD_BLOCKDEV: {
},
LD_RBD: {
LDP_POOL: "rbd"
},
}
# readability shortcuts
......@@ -1856,6 +1871,9 @@ DISK_DT_DEFAULTS = {
},
DT_BLOCK: {
},
DT_RBD: {
RBD_POOL: DISK_LD_DEFAULTS[LD_RBD][LDP_POOL]
},
}
# we don't want to export the shortcuts
......
......@@ -599,6 +599,8 @@ class Disk(ConfigObject):
return "/dev/%s/%s" % (self.logical_id[0], self.logical_id[1])
elif self.dev_type == constants.LD_BLOCKDEV:
return self.logical_id[1]
elif self.dev_type == constants.LD_RBD:
return "/dev/%s/%s" % (self.logical_id[0], self.logical_id[1])
return None
def ChildrenNeeded(self):
......@@ -642,7 +644,7 @@ class Disk(ConfigObject):
"""
if self.dev_type in [constants.LD_LV, constants.LD_FILE,
constants.LD_BLOCKDEV]:
constants.LD_BLOCKDEV, constants.LD_RBD]:
result = [node]
elif self.dev_type in constants.LDS_DRBD:
result = [self.logical_id[0], self.logical_id[1]]
......@@ -717,7 +719,8 @@ class Disk(ConfigObject):
actual algorithms from bdev.
"""
if self.dev_type in (constants.LD_LV, constants.LD_FILE):
if self.dev_type in (constants.LD_LV, constants.LD_FILE,
constants.LD_RBD):
self.size += amount
elif self.dev_type == constants.LD_DRBD8:
if self.children:
......
......@@ -340,5 +340,37 @@ class TestDRBD8Status(testutils.GanetiTestCase):
self.failUnless(stats.is_in_resync)
self.failUnless(stats.sync_percent is not None)
class TestRADOSBlockDevice(testutils.GanetiTestCase):
def test_ParseRbdShowmappedOutput(self):
volume_name = "abc9778-8e8ace5b.rbd.disk0"
output_ok = \
("0\trbd\te69f28e5-9817.rbd.disk0\t-\t/dev/rbd0\n"
"1\t/dev/rbd0\tabc9778-8e8ace5b.rbd.disk0\t-\t/dev/rbd16\n"
"line\twith\tfewer\tfields\n"
"")
output_empty = ""
output_no_matches = \
("0\trbd\te69f28e5-9817.rbd.disk0\t-\t/dev/rbd0\n"
"1\trbd\tabcdef01-9817.rbd.disk0\t-\t/dev/rbd10\n"
"2\trbd\tcdef0123-9817.rbd.disk0\t-\t/dev/rbd12\n"
"something\twith\tfewer\tfields"
"")
output_extra_matches = \
("0\t/dev/rbd0\tabc9778-8e8ace5b.rbd.disk0\t-\t/dev/rbd11\n"
"1\trbd\te69f28e5-9817.rbd.disk0\t-\t/dev/rbd0\n"
"2\t/dev/rbd0\tabc9778-8e8ace5b.rbd.disk0\t-\t/dev/rbd16\n"
"something\twith\tfewer\tfields"
"")
parse_function = bdev.RADOSBlockDevice._ParseRbdShowmappedOutput
self.assertEqual(parse_function(output_ok, volume_name), "/dev/rbd16")
self.assertRaises(errors.BlockDeviceError, parse_function,
output_empty, volume_name)
self.assertEqual(parse_function(output_no_matches, volume_name), None)
self.assertRaises(errors.BlockDeviceError, parse_function,
output_extra_matches, volume_name)
if __name__ == '__main__':
testutils.GanetiTestProgram()
......@@ -460,7 +460,9 @@ class Burner(object):
constants.DT_FILE,
constants.DT_SHARED_FILE,
constants.DT_PLAIN,
constants.DT_DRBD8)
constants.DT_DRBD8,
constants.DT_RBD,
)
if options.disk_template not in supported_disk_templates:
Err("Unknown disk template '%s'" % options.disk_template)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment