Commit 6b93ec9d authored by Iustin Pop's avatar Iustin Pop
Browse files

Forward-port DrbdNetReconfig

This is a modified forward-port of DrbdNetReconfig and their associated
RPCs. In Ganeti 2.0, these functions will be used for two things:
  - live migration (as in 1.2)
  - and for other network reconfiguration tasks, since DRBD8.Attach()
    doesn't do them anymore

Because of the Attach() changes, we can now implement the
AttachNet/DisconnectNet functions as independent entities, and we don't
need the cache anymore.

Note these functions are copies of the latest 1.2 code, and not
cherry-picks of the (many) patches that went into 1.2.

Reviewed-by: ultrotter
parent f96e3c4f
......@@ -232,6 +232,44 @@ class NodeHttpServer(http.server.HttpServer):
disks = [objects.Disk.FromDict(cf) for cf in params[1]]
return backend.CloseBlockDevices(params[0], disks)
# blockdev/drbd specific methods ----------
@staticmethod
def perspective_drbd_disconnect_net(params):
"""Disconnects the network connection of drbd disks.
Note that this is only valid for drbd disks, so the members of the
disk list must all be drbd devices.
"""
nodes_ip, disks = params
disks = [objects.Disk.FromDict(cf) for cf in disks]
return backend.DrbdDisconnectNet(nodes_ip, disks)
@staticmethod
def perspective_drbd_attach_net(params):
"""Attaches the network connection of drbd disks.
Note that this is only valid for drbd disks, so the members of the
disk list must all be drbd devices.
"""
nodes_ip, disks, instance_name, multimaster = params
disks = [objects.Disk.FromDict(cf) for cf in disks]
return backend.DrbdAttachNet(nodes_ip, disks, instance_name, multimaster)
@staticmethod
def perspective_drbd_wait_sync(params):
"""Wait until DRBD disks are synched.
Note that this is only valid for drbd disks, so the members of the
disk list must all be drbd devices.
"""
nodes_ip, disks = params
disks = [objects.Disk.FromDict(cf) for cf in disks]
return backend.DrbdWaitSync(nodes_ip, disks)
# export/import --------------------------
@staticmethod
......
......@@ -2177,6 +2177,125 @@ def DemoteFromMC():
return (True, "Done")
def _FindDisks(nodes_ip, disks):
"""Sets the physical ID on disks and returns the block devices.
"""
# set the correct physical ID
my_name = utils.HostInfo().name
for cf in disks:
cf.SetPhysicalID(my_name, nodes_ip)
bdevs = []
for cf in disks:
rd = _RecursiveFindBD(cf)
if rd is None:
return (False, "Can't find device %s" % cf)
bdevs.append(rd)
return (True, bdevs)
def DrbdDisconnectNet(nodes_ip, disks):
"""Disconnects the network on a list of drbd devices.
"""
status, bdevs = _FindDisks(nodes_ip, disks)
if not status:
return status, bdevs
# disconnect disks
for rd in bdevs:
try:
rd.DisconnectNet()
except errors.BlockDeviceError, err:
logging.exception("Failed to go into standalone mode")
return (False, "Can't change network configuration: %s" % str(err))
return (True, "All disks are now disconnected")
def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster):
"""Attaches the network on a list of drbd devices.
"""
status, bdevs = _FindDisks(nodes_ip, disks)
if not status:
return status, bdevs
if multimaster:
for cf, rd in zip(disks, bdevs):
try:
_SymlinkBlockDev(instance_name, rd.dev_path, cf.iv_name)
except EnvironmentError, err:
return (False, "Can't create symlink: %s" % str(err))
# reconnect disks, switch to new master configuration and if
# needed primary mode
for rd in bdevs:
try:
rd.AttachNet(multimaster)
except errors.BlockDeviceError, err:
return (False, "Can't change network configuration: %s" % str(err))
# wait until the disks are connected; we need to retry the re-attach
# if the device becomes standalone, as this might happen if the one
# node disconnects and reconnects in a different mode before the
# other node reconnects; in this case, one or both of the nodes will
# decide it has wrong configuration and switch to standalone
RECONNECT_TIMEOUT = 2 * 60
sleep_time = 0.100 # start with 100 miliseconds
timeout_limit = time.time() + RECONNECT_TIMEOUT
while time.time() < timeout_limit:
all_connected = True
for rd in bdevs:
stats = rd.GetProcStatus()
if not (stats.is_connected or stats.is_in_resync):
all_connected = False
if stats.is_standalone:
# peer had different config info and this node became
# standalone, even though this should not happen with the
# new staged way of changing disk configs
try:
rd.ReAttachNet(multimaster)
except errors.BlockDeviceError, err:
return (False, "Can't change network configuration: %s" % str(err))
if all_connected:
break
time.sleep(sleep_time)
sleep_time = min(5, sleep_time * 1.5)
if not all_connected:
return (False, "Timeout in disk reconnecting")
if multimaster:
# change to primary mode
for rd in bdevs:
rd.Open()
if multimaster:
msg = "multi-master and primary"
else:
msg = "single-master"
return (True, "Disks are now configured as %s" % msg)
def DrbdWaitSync(nodes_ip, disks):
"""Wait until DRBDs have synchronized.
"""
status, bdevs = _FindDisks(nodes_ip, disks)
if not status:
return status, bdevs
min_resync = 100
alldone = True
failure = False
for rd in bdevs:
stats = rd.GetProcStatus()
if not (stats.is_connected or stats.is_in_resync):
failure = True
break
alldone = alldone and (not stats.is_in_resync)
if stats.sync_percent is not None:
min_resync = min(min_resync, stats.sync_percent)
return (not failure, (alldone, min_resync))
class HooksRunner(object):
"""Hook runner.
......
......@@ -567,6 +567,8 @@ class DRBD8Status(object):
self.is_diskless = self.ldisk == "Diskless"
self.is_disk_uptodate = self.ldisk == "UpToDate"
self.is_in_resync = self.cstatus in ('SyncSource', 'SyncTarget')
m = self.SYNC_RE.match(procline)
if m:
self.sync_percent = float(m.group(1))
......
......@@ -736,6 +736,35 @@ class RpcRunner(object):
params = [instance_name, [cf.ToDict() for cf in disks]]
return self._SingleNodeCall(node, "blockdev_close", params)
def call_drbd_disconnect_net(self, node_list, nodes_ip, disks):
"""Disconnects the network of the given drbd devices.
This is a multi-node call.
"""
return self._MultiNodeCall(node_list, "drbd_disconnect_net",
[nodes_ip, [cf.ToDict() for cf in disks]])
def call_drbd_attach_net(self, node_list, nodes_ip,
disks, instance_name, multimaster):
"""Disconnects the given drbd devices.
This is a multi-node call.
"""
return self._MultiNodeCall(node_list, "drbd_attach_net",
[nodes_ip, [cf.ToDict() for cf in disks],
instance_name, multimaster])
def call_drbd_wait_sync(self, node_list, nodes_ip, disks):
"""Waits for the synchronization of drbd devices is complete.
This is a multi-node call.
"""
return self._MultiNodeCall(node_list, "drbd_wait_sync",
[nodes_ip, [cf.ToDict() for cf in disks]])
@classmethod
def call_upload_file(cls, node_list, file_name, address_list=None):
"""Upload a file.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment