Commit b6135bbc authored by Apollon Oikonomopoulos's avatar Apollon Oikonomopoulos Committed by Iustin Pop

Shared block storage support

This patch introduces basic shared block storage support.

It introduces a new storage backend, bdev.PersistentBlockDevice, to
use as a backend for shared block storage. The new bdev requires a new
BLOCKDEV_DRIVER_MANUAL constant with the value "manual" and uses it as
the first part of the block device unique_id.

A new disk template, DT_BLOCK is introduced as well and added to
DTS_EXT_MIRROR and DTS_MAY_ADOPT. Also added DTS_MUST_ADOPT constant
and use it to check for the presence of the adopt keyword during LU
invocation. We enforce the /dev/disk limitation upon adoption, but we
allow block devices to reside anywhere under /dev.

This is very basic support and includes no storage manipulation (provisioning,
resizing, renaming) which will have to be implemented through a "driver"
framework.
Signed-off-by: default avatarApollon Oikonomopoulos <apollon@noc.grnet.gr>
[iustin@google.com: slight changes to bdev.py]
Signed-off-by: default avatarIustin Pop <iustin@google.com>
Reviewed-by: default avatarIustin Pop <iustin@google.com>
parent 2be7273c
......@@ -24,6 +24,7 @@
import re
import time
import errno
import stat
import pyparsing as pyp
import os
import logging
......@@ -2069,9 +2070,120 @@ class FileStorage(BlockDev):
return FileStorage(unique_id, children, size)
class PersistentBlockDevice(BlockDev):
"""A block device with persistent node
May be either directly attached, or exposed through DM (e.g. dm-multipath).
udev helpers are probably required to give persistent, human-friendly
names.
For the time being, pathnames are required to lie under /dev.
"""
def __init__(self, unique_id, children, size):
"""Attaches to a static block device.
The unique_id is a path under /dev.
"""
super(PersistentBlockDevice, self).__init__(unique_id, children, size)
if not isinstance(unique_id, (tuple, list)) or len(unique_id) != 2:
raise ValueError("Invalid configuration data %s" % str(unique_id))
self.dev_path = unique_id[1]
if not os.path.realpath(self.dev_path).startswith('/dev/'):
raise ValueError("Full path '%s' lies outside /dev" %
os.path.realpath(self.dev_path))
# TODO: this is just a safety guard checking that we only deal with devices
# we know how to handle. In the future this will be integrated with
# external storage backends and possible values will probably be collected
# from the cluster configuration.
if unique_id[0] != constants.BLOCKDEV_DRIVER_MANUAL:
raise ValueError("Got persistent block device of invalid type: %s" %
unique_id[0])
self.major = self.minor = None
self.Attach()
@classmethod
def Create(cls, unique_id, children, size):
"""Create a new device
This is a noop, we only return a PersistentBlockDevice instance
"""
return PersistentBlockDevice(unique_id, children, 0)
def Remove(self):
"""Remove a device
This is a noop
"""
pass
def Rename(self, new_id):
"""Rename this device.
"""
_ThrowError("Rename is not supported for PersistentBlockDev storage")
def Attach(self):
"""Attach to an existing block device.
"""
self.attached = False
try:
st = os.stat(self.dev_path)
except OSError, err:
logging.error("Error stat()'ing %s: %s", self.dev_path, str(err))
return False
if not stat.S_ISBLK(st.st_mode):
logging.error("%s is not a block device", self.dev_path)
return False
self.major = os.major(st.st_rdev)
self.minor = os.minor(st.st_rdev)
self.attached = True
return True
def Assemble(self):
"""Assemble the device.
"""
pass
def Shutdown(self):
"""Shutdown the device.
"""
pass
def Open(self, force=False):
"""Make the device ready for I/O.
"""
pass
def Close(self):
"""Notifies that the device will no longer be used for I/O.
"""
pass
def Grow(self, amount):
"""Grow the logical volume.
"""
_ThrowError("Grow is not supported for PersistentBlockDev storage")
DEV_MAP = {
constants.LD_LV: LogicalVolume,
constants.LD_DRBD8: DRBD8,
constants.LD_BLOCKDEV: PersistentBlockDevice,
}
if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
......
......@@ -6661,6 +6661,19 @@ def _GenerateDiskTemplate(lu, template_name,
disk_index)),
mode=disk["mode"])
disks.append(disk_dev)
elif template_name == constants.DT_BLOCK:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
for idx, disk in enumerate(disk_info):
disk_index = idx + base_index
disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"],
logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
disk["adopt"]),
iv_name="disk/%d" % disk_index,
mode=disk["mode"])
disks.append(disk_dev)
else:
raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
return disks
......@@ -6887,6 +6900,7 @@ def _ComputeDiskSize(disk_template, disks):
constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
constants.DT_FILE: None,
constants.DT_SHARED_FILE: 0,
constants.DT_BLOCK: 0,
}
if disk_template not in req_size_dict:
......@@ -7022,6 +7036,12 @@ class LUInstanceCreate(LogicalUnit):
if self.op.mode == constants.INSTANCE_IMPORT:
raise errors.OpPrereqError("Disk adoption not allowed for"
" instance import", errors.ECODE_INVAL)
else:
if self.op.disk_template in constants.DTS_MUST_ADOPT:
raise errors.OpPrereqError("Disk template %s requires disk adoption,"
" but no 'adopt' parameter given" %
self.op.disk_template,
errors.ECODE_INVAL)
self.adopt_disks = has_adopt
......@@ -7614,7 +7634,7 @@ class LUInstanceCreate(LogicalUnit):
req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
_CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
else: # instead, we must check the adoption data
elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
if len(all_lvs) != len(self.disks):
raise errors.OpPrereqError("Duplicate volume names given for adoption",
......@@ -7650,6 +7670,34 @@ class LUInstanceCreate(LogicalUnit):
for dsk in self.disks:
dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
elif self.op.disk_template == constants.DT_BLOCK:
# Normalize and de-duplicate device paths
all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks])
if len(all_disks) != len(self.disks):
raise errors.OpPrereqError("Duplicate disk names given for adoption",
errors.ECODE_INVAL)
baddisks = [d for d in all_disks
if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
if baddisks:
raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
" cannot be adopted" %
(", ".join(baddisks),
constants.ADOPTABLE_BLOCKDEV_ROOT),
errors.ECODE_INVAL)
node_disks = self.rpc.call_bdev_sizes([pnode.name],
list(all_disks))[pnode.name]
node_disks.Raise("Cannot get block device information from node %s" %
pnode.name)
node_disks = node_disks.payload
delta = all_disks.difference(node_disks.keys())
if delta:
raise errors.OpPrereqError("Missing block device(s): %s" %
utils.CommaJoin(delta),
errors.ECODE_INVAL)
for dsk in self.disks:
dsk["size"] = int(float(node_disks[dsk["adopt"]]))
_CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
_CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
......@@ -7721,17 +7769,18 @@ class LUInstanceCreate(LogicalUnit):
)
if self.adopt_disks:
# rename LVs to the newly-generated names; we need to construct
# 'fake' LV disks with the old data, plus the new unique_id
tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
rename_to = []
for t_dsk, a_dsk in zip (tmp_disks, self.disks):
rename_to.append(t_dsk.logical_id)
t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
self.cfg.SetDiskID(t_dsk, pnode_name)
result = self.rpc.call_blockdev_rename(pnode_name,
zip(tmp_disks, rename_to))
result.Raise("Failed to rename adoped LVs")
if self.op.disk_template == constants.DT_PLAIN:
# rename LVs to the newly-generated names; we need to construct
# 'fake' LV disks with the old data, plus the new unique_id
tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
rename_to = []
for t_dsk, a_dsk in zip (tmp_disks, self.disks):
rename_to.append(t_dsk.logical_id)
t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
self.cfg.SetDiskID(t_dsk, pnode_name)
result = self.rpc.call_blockdev_rename(pnode_name,
zip(tmp_disks, rename_to))
result.Raise("Failed to rename adoped LVs")
else:
feedback_fn("* creating instance disks...")
try:
......
......@@ -120,6 +120,7 @@ CRYPTO_KEYS_DIR = RUN_GANETI_DIR + "/crypto"
CRYPTO_KEYS_DIR_MODE = SECURE_DIR_MODE
IMPORT_EXPORT_DIR = RUN_GANETI_DIR + "/import-export"
IMPORT_EXPORT_DIR_MODE = 0755
ADOPTABLE_BLOCKDEV_ROOT = "/dev/disk/"
# keep RUN_GANETI_DIR first here, to make sure all get created when the node
# daemon is started (this takes care of RUN_DIR being tmpfs)
SUB_RUN_DIRS = [ RUN_GANETI_DIR, BDEV_CACHE_DIR, DISK_LINKS_DIR ]
......@@ -363,21 +364,25 @@ DT_PLAIN = "plain"
DT_DRBD8 = "drbd"
DT_FILE = "file"
DT_SHARED_FILE = "sharedfile"
DT_BLOCK = "blockdev"
# the set of network-mirrored disk templates
DTS_NET_MIRROR = frozenset([DT_DRBD8])
# the set of externally mirrored disk templates
DTS_EXT_MIRROR = frozenset([DT_SHARED_FILE])
# the set of externally-mirrored disk templates (e.g. SAN, NAS)
DTS_EXT_MIRROR = frozenset([DT_SHARED_FILE, DT_BLOCK])
# the set of non-lvm-based disk templates
DTS_NOT_LVM = frozenset([DT_DISKLESS, DT_FILE, DT_SHARED_FILE])
DTS_NOT_LVM = frozenset([DT_DISKLESS, DT_FILE, DT_SHARED_FILE, DT_BLOCK])
# the set of disk templates which can be grown
DTS_GROWABLE = frozenset([DT_PLAIN, DT_DRBD8, DT_FILE, DT_SHARED_FILE])
# the set of disk templates that allow adoption
DTS_MAY_ADOPT = frozenset([DT_PLAIN])
DTS_MAY_ADOPT = frozenset([DT_PLAIN, DT_BLOCK])
# the set of disk templates that *must* use adoption
DTS_MUST_ADOPT = frozenset([DT_BLOCK])
# the set of disk templates that allow migrations
DTS_MIRRORED = frozenset.union(DTS_NET_MIRROR, DTS_EXT_MIRROR)
......@@ -387,7 +392,8 @@ DTS_MIRRORED = frozenset.union(DTS_NET_MIRROR, DTS_EXT_MIRROR)
LD_LV = "lvm"
LD_DRBD8 = "drbd8"
LD_FILE = "file"
LDS_BLOCK = frozenset([LD_LV, LD_DRBD8])
LD_BLOCKDEV = "blockdev"
LDS_BLOCK = frozenset([LD_LV, LD_DRBD8, LD_BLOCKDEV])
# drbd constants
DRBD_HMAC_ALG = "md5"
......@@ -460,7 +466,7 @@ RIE_CONNECT_RETRIES = 10
CHILD_LINGER_TIMEOUT = 5.0
DISK_TEMPLATES = frozenset([DT_DISKLESS, DT_PLAIN, DT_DRBD8,
DT_FILE, DT_SHARED_FILE])
DT_FILE, DT_SHARED_FILE, DT_BLOCK])
FILE_DRIVER = frozenset([FD_LOOP, FD_BLKTAP])
......@@ -1312,3 +1318,6 @@ VALID_ALLOC_POLICIES = [
ALLOC_POLICY_LAST_RESORT,
ALLOC_POLICY_UNALLOCABLE,
]
# Temporary external/shared storage parameters
BLOCKDEV_DRIVER_MANUAL = "manual"
......@@ -441,6 +441,8 @@ class Disk(ConfigObject):
"""
if self.dev_type == constants.LD_LV:
return "/dev/%s/%s" % (self.logical_id[0], self.logical_id[1])
elif self.dev_type == constants.LD_BLOCKDEV:
return self.logical_id[1]
return None
def ChildrenNeeded(self):
......@@ -483,7 +485,8 @@ class Disk(ConfigObject):
devices needs to (or can) be assembled.
"""
if self.dev_type in [constants.LD_LV, constants.LD_FILE]:
if self.dev_type in [constants.LD_LV, constants.LD_FILE,
constants.LD_BLOCKDEV]:
result = [node]
elif self.dev_type in constants.LDS_DRBD:
result = [self.logical_id[0], self.logical_id[1]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment