Commit 942be002 authored by Michael Hanselmann's avatar Michael Hanselmann
Browse files

cmdlib: Add new automatic disk replacement mode


Signed-off-by: default avatarMichael Hanselmann <hansmi@google.com>
Reviewed-by: default avatarIustin Pop <iustin@google.com>
parent 02a00186
...@@ -5715,6 +5715,23 @@ class TLReplaceDisks(Tasklet): ...@@ -5715,6 +5715,23 @@ class TLReplaceDisks(Tasklet):
return remote_node_name return remote_node_name
def _FindFaultyDisks(self, node_name):
faulty = []
for dev in self.instance.disks:
self.cfg.SetDiskID(dev, node_name)
result = self.rpc.call_blockdev_getmirrorstatus(node_name,
self.instance.disks)
result.Raise("Failed to get disk status from node %s" % node_name,
prereq=True)
for idx, bdev_status in enumerate(result.payload):
if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
faulty.append(idx)
return faulty
def CheckPrereq(self): def CheckPrereq(self):
"""Check prerequisites. """Check prerequisites.
...@@ -5757,35 +5774,63 @@ class TLReplaceDisks(Tasklet): ...@@ -5757,35 +5774,63 @@ class TLReplaceDisks(Tasklet):
raise errors.OpPrereqError("The specified node is already the" raise errors.OpPrereqError("The specified node is already the"
" secondary node of the instance.") " secondary node of the instance.")
if self.mode == constants.REPLACE_DISK_PRI: if self.mode == constants.REPLACE_DISK_AUTO:
self.target_node = self.instance.primary_node if self.disks:
self.other_node = secondary_node raise errors.OpPrereqError("Cannot specify disks to be replaced")
check_nodes = [self.target_node, self.other_node]
faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
faulty_secondary = self._FindFaultyDisks(secondary_node)
if faulty_primary and faulty_secondary:
raise errors.OpPrereqError("Instance %s has faulty disks on more than"
" one node and can not be repaired"
" automatically" % self.instance_name)
if faulty_primary:
self.disks = faulty_primary
self.target_node = self.instance.primary_node
self.other_node = secondary_node
check_nodes = [self.target_node, self.other_node]
elif faulty_secondary:
self.disks = faulty_secondary
self.target_node = secondary_node
self.other_node = self.instance.primary_node
check_nodes = [self.target_node, self.other_node]
else:
self.disks = []
check_nodes = []
elif self.mode == constants.REPLACE_DISK_SEC: else:
self.target_node = secondary_node # Non-automatic modes
self.other_node = self.instance.primary_node if self.mode == constants.REPLACE_DISK_PRI:
check_nodes = [self.target_node, self.other_node] self.target_node = self.instance.primary_node
self.other_node = secondary_node
check_nodes = [self.target_node, self.other_node]
elif self.mode == constants.REPLACE_DISK_CHG: elif self.mode == constants.REPLACE_DISK_SEC:
self.new_node = remote_node self.target_node = secondary_node
self.other_node = self.instance.primary_node self.other_node = self.instance.primary_node
self.target_node = secondary_node check_nodes = [self.target_node, self.other_node]
check_nodes = [self.new_node, self.other_node]
_CheckNodeNotDrained(self.lu, remote_node) elif self.mode == constants.REPLACE_DISK_CHG:
self.new_node = remote_node
self.other_node = self.instance.primary_node
self.target_node = secondary_node
check_nodes = [self.new_node, self.other_node]
else: _CheckNodeNotDrained(self.lu, remote_node)
raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
self.mode) else:
raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
self.mode)
# If not specified all disks should be replaced
if not self.disks:
self.disks = range(len(self.instance.disks))
for node in check_nodes: for node in check_nodes:
_CheckNodeOnline(self.lu, node) _CheckNodeOnline(self.lu, node)
# If not specified all disks should be replaced
if not self.disks:
self.disks = range(len(self.instance.disks))
# Check whether disks are valid # Check whether disks are valid
for disk_idx in self.disks: for disk_idx in self.disks:
self.instance.FindDisk(disk_idx) self.instance.FindDisk(disk_idx)
...@@ -5805,7 +5850,12 @@ class TLReplaceDisks(Tasklet): ...@@ -5805,7 +5850,12 @@ class TLReplaceDisks(Tasklet):
This dispatches the disk replacement to the appropriate handler. This dispatches the disk replacement to the appropriate handler.
""" """
feedback_fn("Replacing disks for %s" % self.instance.name) if not self.disks:
feedback_fn("No disks need replacement")
return
feedback_fn("Replacing disk(s) %s for %s" %
(", ".join([str(i) for i in self.disks]), self.instance.name))
activate_disks = (not self.instance.admin_up) activate_disks = (not self.instance.admin_up)
...@@ -5814,7 +5864,8 @@ class TLReplaceDisks(Tasklet): ...@@ -5814,7 +5864,8 @@ class TLReplaceDisks(Tasklet):
_StartInstanceDisks(self.lu, self.instance, True) _StartInstanceDisks(self.lu, self.instance, True)
try: try:
if self.mode == constants.REPLACE_DISK_CHG: # Should we replace the secondary node?
if self.new_node is not None:
return self._ExecDrbd8Secondary() return self._ExecDrbd8Secondary()
else: else:
return self._ExecDrbd8DiskOnly() return self._ExecDrbd8DiskOnly()
......
...@@ -254,6 +254,7 @@ DISK_ACCESS_SET = frozenset([DISK_RDONLY, DISK_RDWR]) ...@@ -254,6 +254,7 @@ DISK_ACCESS_SET = frozenset([DISK_RDONLY, DISK_RDWR])
REPLACE_DISK_PRI = "replace_on_primary" # replace disks on primary REPLACE_DISK_PRI = "replace_on_primary" # replace disks on primary
REPLACE_DISK_SEC = "replace_on_secondary" # replace disks on secondary REPLACE_DISK_SEC = "replace_on_secondary" # replace disks on secondary
REPLACE_DISK_CHG = "replace_new_secondary" # change secondary node REPLACE_DISK_CHG = "replace_new_secondary" # change secondary node
REPLACE_DISK_AUTO = "replace_auto"
# lock recalculate mode # lock recalculate mode
LOCKS_REPLACE = 'replace' LOCKS_REPLACE = 'replace'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment