Add support for replacing/refreshing OSDs

Adds commands to both replace an OSD disk, and refresh (reimport) an
existing OSD disk on a new node. This handles the cases where an OSD
disk should be replaced (either due to upgrades or failures) or where a
node is rebuilt in-place and an existing OSD must be re-imported to it.

This should avoid the need to do a full remove/add sequence for either
case.

Also cleans up some aspects of OSD removal that are identical between
methods (e.g. using safe-to-destroy and sleeping after stopping) and
fixes a bug if an OSD does not truly exist when the daemon starts up.
This commit is contained in:
2022-05-06 15:31:58 -04:00
parent 8027a6efdc
commit 7a40c7a55b
7 changed files with 853 additions and 37 deletions

View File

@ -255,6 +255,46 @@ def ceph_osd_add(config, node, device, weight, ext_db_flag, ext_db_ratio):
return retstatus, response.json().get("message", "")
def ceph_osd_replace(config, osdid, device, weight):
"""
Replace an existing Ceph OSD with a new device
API endpoint: POST /api/v1/storage/ceph/osd/{osdid}
API arguments: device={device}, weight={weight}
API schema: {"message":"{data}"}
"""
params = {"device": device, "weight": weight, "yes-i-really-mean-it": "yes"}
response = call_api(config, "post", f"/storage/ceph/osd/{osdid}", params=params)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get("message", "")
def ceph_osd_refresh(config, osdid, device):
"""
Refresh (reimport) an existing Ceph OSD with device {device}
API endpoint: PUT /api/v1/storage/ceph/osd/{osdid}
API arguments: device={device}
API schema: {"message":"{data}"}
"""
params = {
"device": device,
}
response = call_api(config, "put", f"/storage/ceph/osd/{osdid}", params=params)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get("message", "")
def ceph_osd_remove(config, osdid, force_flag):
"""
Remove Ceph OSD

View File

@ -3371,6 +3371,74 @@ def ceph_osd_add(node, device, weight, ext_db_flag, ext_db_ratio, confirm_flag):
cleanup(retcode, retmsg)
###############################################################################
# pvc storage osd replace
###############################################################################
@click.command(name="replace", short_help="Replace OSD block device.")
@click.argument("osdid")
@click.argument("device")
@click.option(
"-w",
"--weight",
"weight",
default=1.0,
show_default=True,
help="New weight of the OSD within the CRUSH map.",
)
@click.option(
"-y",
"--yes",
"confirm_flag",
is_flag=True,
default=False,
help="Confirm the removal",
)
@cluster_req
def ceph_osd_replace(osdid, device, weight, confirm_flag):
"""
Replace the block device of an existing OSD with ID OSDID with DEVICE. Use this command to replace a failed or smaller OSD block device with a new one.
DEVICE must be a valid raw block device (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'.
The weight of an OSD should reflect the ratio of the OSD to other OSDs in the storage cluster. For details, see 'pvc storage osd add --help'. Note that the current weight must be explicitly specified if it differs from the default.
Existing IDs, external DB devices, etc. of the OSD will be preserved; data will be lost and rebuilt from the remaining healthy OSDs.
"""
if not confirm_flag and not config["unsafe"]:
try:
click.confirm(
"Replace OSD {} with block device {}".format(osdid, device),
prompt_suffix="? ",
abort=True,
)
except Exception:
exit(0)
retcode, retmsg = pvc_ceph.ceph_osd_replace(config, osdid, device, weight)
cleanup(retcode, retmsg)
###############################################################################
# pvc storage osd refresh
###############################################################################
@click.command(name="refresh", short_help="Refresh (reimport) OSD device.")
@click.argument("osdid")
@click.argument("device")
@cluster_req
def ceph_osd_refresh(osdid, device):
"""
Refresh (reimport) the block DEVICE of an existing OSD with ID OSDID. Use this command to reimport a working OSD into a rebuilt/replaced node.
DEVICE must be a valid raw block device (e.g. '/dev/sda', '/dev/nvme0n1', '/dev/disk/by-path/...', '/dev/disk/by-id/...') or a "detect" string. Using partitions is not supported. A "detect" string is a string in the form "detect:<NAME>:<HUMAN-SIZE>:<ID>". For details, see 'pvc storage osd add --help'.
Existing data, IDs, weights, etc. of the OSD will be preserved.
NOTE: If a device had an external DB device, this is not automatically handled at this time. It is best to remove and re-add the OSD instead.
"""
retcode, retmsg = pvc_ceph.ceph_osd_refresh(config, osdid, device)
cleanup(retcode, retmsg)
###############################################################################
# pvc storage osd remove
###############################################################################
@ -5872,6 +5940,8 @@ ceph_benchmark.add_command(ceph_benchmark_list)
ceph_osd.add_command(ceph_osd_create_db_vg)
ceph_osd.add_command(ceph_osd_add)
ceph_osd.add_command(ceph_osd_replace)
ceph_osd.add_command(ceph_osd_refresh)
ceph_osd.add_command(ceph_osd_remove)
ceph_osd.add_command(ceph_osd_in)
ceph_osd.add_command(ceph_osd_out)