Support adding and removing Ceph pools

This commit is contained in:
2018-10-31 23:38:17 -04:00
parent 922a3a9ee1
commit 99fcb21e3b
6 changed files with 681 additions and 64 deletions

View File

@ -22,8 +22,9 @@
import re
import click
import ast
import json
import time
import math
import client_lib.ansiprint as ansiprint
import client_lib.zkhandler as zkhandler
@ -33,6 +34,22 @@ import client_lib.common as common
# Supplemental functions
#
# Verify OSD is valid in cluster
def verifyOSD(zk_conn, osd_id):
if zkhandler.exists(zk_conn, '/ceph/osds/{}'.format(osd_id)):
return True
else:
return False
# Verify OSD path is valid in cluster
def verifyOSDBlock(zk_conn, node, device):
for osd in zkhandler.listchildren(zk_conn, '/ceph/osds'):
osd_node = zkhandler.readdata(zk_conn, '/ceph/osds/{}/node'.format(osd))
osd_device = zkhandler.readdata(zk_conn, '/ceph/osds/{}/device'.format(osd))
if node == osd_node and device == osd_device:
return osd
return None
#
# Cluster search functions
#
@ -44,7 +61,7 @@ def getClusterOSDList(zk_conn):
def getOSDInformation(zk_conn, osd_id):
# Parse the stats data
osd_stats_raw = zkhandler.readdata(zk_conn, '/ceph/osds/{}/stats'.format(osd_id))
osd_stats = dict(ast.literal_eval(osd_stats_raw))
osd_stats = dict(json.loads(osd_stats_raw))
return osd_stats
def getCephOSDs(zk_conn):
@ -53,15 +70,19 @@ def getCephOSDs(zk_conn):
def formatOSDList(zk_conn, osd_list):
osd_list_output = []
osd_uuid = dict()
osd_up = dict()
osd_up_colour = dict()
osd_in = dict()
osd_in_colour = dict()
osd_weight = dict()
osd_pgs = dict()
osd_node = dict()
osd_used = dict()
osd_free = dict()
osd_util = dict()
osd_var= dict()
osd_wrops = dict()
osd_wrdata = dict()
osd_rdops = dict()
@ -73,9 +94,12 @@ def formatOSDList(zk_conn, osd_list):
osd_up_length = 4
osd_in_length = 4
osd_weight_length = 7
osd_pgs_length = 4
osd_node_length = 5
osd_used_length = 5
osd_free_length = 6
osd_util_length = 6
osd_var_length = 6
osd_wrops_length = 4
osd_wrdata_length = 5
osd_rdops_length = 4
@ -125,7 +149,13 @@ def formatOSDList(zk_conn, osd_list):
if _osd_weight_length > osd_weight_length:
osd_weight_length = _osd_weight_length
# Set the used/available space and length
# Set the pgs and length
osd_pgs[osd] = osd_stats['pgs']
_osd_pgs_length = len(str(osd_pgs[osd])) + 1
if _osd_pgs_length > osd_pgs_length:
osd_pgs_length = _osd_pgs_length
# Set the used/available/utlization%/variance and lengths
osd_used[osd] = osd_stats['used']
_osd_used_length = len(osd_used[osd]) + 1
if _osd_used_length > osd_used_length:
@ -134,6 +164,14 @@ def formatOSDList(zk_conn, osd_list):
_osd_free_length = len(osd_free[osd]) + 1
if _osd_free_length > osd_free_length:
osd_free_length = _osd_free_length
osd_util[osd] = round(osd_stats['utilization'], 2)
_osd_util_length = len(str(osd_util[osd])) + 1
if _osd_util_length > osd_util_length:
osd_util_length = _osd_util_length
osd_var[osd] = round(osd_stats['var'], 2)
_osd_var_length = len(str(osd_var[osd])) + 1
if _osd_var_length > osd_var_length:
osd_var_length = _osd_var_length
# Set the write IOPS/data and length
osd_wrops[osd] = osd_stats['wr_ops']
@ -162,8 +200,11 @@ def formatOSDList(zk_conn, osd_list):
{osd_up: <{osd_up_length}} \
{osd_in: <{osd_in_length}} \
{osd_weight: <{osd_weight_length}} \
{osd_pgs: <{osd_pgs_length}} \
Space: {osd_used: <{osd_used_length}} \
{osd_free: <{osd_free_length}} \
{osd_util: <{osd_util_length}} \
{osd_var: <{osd_var_length}} \
Write: {osd_wrops: <{osd_wrops_length}} \
{osd_wrdata: <{osd_wrdata_length}} \
Read: {osd_rdops: <{osd_rdops_length}} \
@ -176,8 +217,11 @@ Read: {osd_rdops: <{osd_rdops_length}} \
osd_up_length=osd_up_length,
osd_in_length=osd_in_length,
osd_weight_length=osd_weight_length,
osd_pgs_length=osd_pgs_length,
osd_used_length=osd_used_length,
osd_free_length=osd_free_length,
osd_util_length=osd_util_length,
osd_var_length=osd_var_length,
osd_wrops_length=osd_wrops_length,
osd_wrdata_length=osd_wrdata_length,
osd_rdops_length=osd_rdops_length,
@ -187,8 +231,11 @@ Read: {osd_rdops: <{osd_rdops_length}} \
osd_up='Up',
osd_in='In',
osd_weight='Weight',
osd_pgs='PGs',
osd_used='Used',
osd_free='Free',
osd_util='Util%',
osd_var='Var',
osd_wrops='OPS',
osd_wrdata='Data',
osd_rdops='OPS',
@ -203,8 +250,11 @@ Read: {osd_rdops: <{osd_rdops_length}} \
{osd_up_colour}{osd_up: <{osd_up_length}}{end_colour} \
{osd_in_colour}{osd_in: <{osd_in_length}}{end_colour} \
{osd_weight: <{osd_weight_length}} \
{osd_pgs: <{osd_pgs_length}} \
{osd_used: <{osd_used_length}} \
{osd_free: <{osd_free_length}} \
{osd_util: <{osd_util_length}} \
{osd_var: <{osd_var_length}} \
{osd_wrops: <{osd_wrops_length}} \
{osd_wrdata: <{osd_wrdata_length}} \
{osd_rdops: <{osd_rdops_length}} \
@ -218,8 +268,11 @@ Read: {osd_rdops: <{osd_rdops_length}} \
osd_up_length=osd_up_length,
osd_in_length=osd_in_length,
osd_weight_length=osd_weight_length,
osd_pgs_length=osd_pgs_length,
osd_used_length=osd_used_length,
osd_free_length=osd_free_length,
osd_util_length=osd_util_length,
osd_var_length=osd_var_length,
osd_wrops_length=osd_wrops_length,
osd_wrdata_length=osd_wrdata_length,
osd_rdops_length=osd_rdops_length,
@ -231,8 +284,11 @@ Read: {osd_rdops: <{osd_rdops_length}} \
osd_in_colour=osd_in_colour[osd],
osd_in=osd_in[osd],
osd_weight=osd_weight[osd],
osd_pgs=osd_pgs[osd],
osd_used=osd_used[osd],
osd_free=osd_free[osd],
osd_util=osd_util[osd],
osd_var=osd_var[osd],
osd_wrops=osd_wrops[osd],
osd_wrdata=osd_wrdata[osd],
osd_rdops=osd_rdops[osd],
@ -243,6 +299,232 @@ Read: {osd_rdops: <{osd_rdops_length}} \
output_string = osd_list_output_header + '\n' + '\n'.join(sorted(osd_list_output))
return output_string
def getClusterPoolList(zk_conn):
# Get a list of pools under /ceph/pools
pool_list = zkhandler.listchildren(zk_conn, '/ceph/pools')
return pool_list
def getPoolInformation(zk_conn, name):
# Parse the stats data
pool_stats_raw = zkhandler.readdata(zk_conn, '/ceph/pools/{}/stats'.format(name))
pool_stats = dict(json.loads(pool_stats_raw))
# Deal with the size issues
size_matrix = {
'b': 1,
'K': 1024,
'M': 1024*1024,
'G': 1024*1024*1024,
'T': 1024*1024*1024*1024,
'P': 1024*1024*1024*1024*1024
}
for datatype in 'size_bytes', 'read_bytes', 'write_bytes':
databytes = pool_stats[datatype]
databytes_formatted = ''
if databytes > 9999:
for unit in sorted(size_matrix, key=size_matrix.get, reverse=True):
new_bytes = int(math.ceil(databytes / size_matrix[unit]))
# Round up if 5 or more digits
if new_bytes > 9999:
# We can jump down another level
continue
else:
# We're at the end, display with this size
databytes_formatted = '{}{}'.format(new_bytes, unit)
else:
databytes_formatted = '{}B'.format(databytes)
new_name = datatype.replace('bytes', 'formatted')
pool_stats[new_name] = databytes_formatted
return pool_stats
def getCephPools(zk_conn):
pool_list = zkhandler.listchildren(zk_conn, '/ceph/pools')
return pool_list
def formatPoolList(zk_conn, pool_list):
pool_list_output = []
pool_id = dict()
pool_size = dict()
pool_num_objects = dict()
pool_num_clones = dict()
pool_num_copies = dict()
pool_num_degraded = dict()
pool_read_ops = dict()
pool_read_data = dict()
pool_write_ops = dict()
pool_write_data = dict()
pool_name_length = 5
pool_id_length = 3
pool_size_length = 5
pool_num_objects_length = 6
pool_num_clones_length = 7
pool_num_copies_length = 7
pool_num_degraded_length = 9
pool_read_ops_length = 4
pool_read_data_length = 5
pool_write_ops_length = 4
pool_write_data_length = 5
for pool in pool_list:
# Set the Pool name length
_pool_name_length = len(pool) + 1
if _pool_name_length > pool_name_length:
pool_name_length = _pool_name_length
# Get stats
pool_stats = getPoolInformation(zk_conn, pool)
# Set the parent node and length
try:
pool_id[pool] = pool_stats['id']
# If this happens, the node hasn't checked in fully yet, so just ignore it
if not pool_id[pool]:
continue
except KeyError:
continue
# Set the id and length
pool_id[pool] = pool_stats['id']
_pool_id_length = len(str(pool_id[pool])) + 1
if _pool_id_length > pool_id_length:
pool_id_length = _pool_id_length
# Set the size and length
pool_size[pool] = pool_stats['size_formatted']
_pool_size_length = len(str(pool_size[pool])) + 1
if _pool_size_length > pool_size_length:
pool_size_length = _pool_size_length
# Set the num_objects and length
pool_num_objects[pool] = pool_stats['num_objects']
_pool_num_objects_length = len(str(pool_num_objects[pool])) + 1
if _pool_num_objects_length > pool_num_objects_length:
pool_num_objects_length = _pool_num_objects_length
# Set the num_clones and length
pool_num_clones[pool] = pool_stats['num_object_clones']
_pool_num_clones_length = len(str(pool_num_clones[pool])) + 1
if _pool_num_clones_length > pool_num_clones_length:
pool_num_clones_length = _pool_num_clones_length
# Set the num_copies and length
pool_num_copies[pool] = pool_stats['num_object_copies']
_pool_num_copies_length = len(str(pool_num_copies[pool])) + 1
if _pool_num_copies_length > pool_num_copies_length:
pool_num_copies_length = _pool_num_copies_length
# Set the num_degraded and length
pool_num_degraded[pool] = pool_stats['num_objects_degraded']
_pool_num_degraded_length = len(str(pool_num_degraded[pool])) + 1
if _pool_num_degraded_length > pool_num_degraded_length:
pool_num_degraded_length = _pool_num_degraded_length
# Set the write IOPS/data and length
pool_write_ops[pool] = pool_stats['write_ops']
_pool_write_ops_length = len(str(pool_write_ops[pool])) + 1
if _pool_write_ops_length > pool_write_ops_length:
pool_write_ops_length = _pool_write_ops_length
pool_write_data[pool] = pool_stats['write_formatted']
_pool_write_data_length = len(pool_write_data[pool]) + 1
if _pool_write_data_length > pool_write_data_length:
pool_write_data_length = _pool_write_data_length
# Set the read IOPS/data and length
pool_read_ops[pool] = pool_stats['read_ops']
_pool_read_ops_length = len(str(pool_read_ops[pool])) + 1
if _pool_read_ops_length > pool_read_ops_length:
pool_read_ops_length = _pool_read_ops_length
pool_read_data[pool] = pool_stats['read_formatted']
_pool_read_data_length = len(pool_read_data[pool]) + 1
if _pool_read_data_length > pool_read_data_length:
pool_read_data_length = _pool_read_data_length
# Format the output header
pool_list_output_header = '{bold}\
{pool_id: <{pool_id_length}} \
{pool_name: <{pool_name_length}} \
{pool_size: <{pool_size_length}} \
Objects: {pool_objects: <{pool_objects_length}} \
{pool_clones: <{pool_clones_length}} \
{pool_copies: <{pool_copies_length}} \
{pool_degraded: <{pool_degraded_length}} \
Write: {pool_write_ops: <{pool_write_ops_length}} \
{pool_write_data: <{pool_write_data_length}} \
Read: {pool_read_ops: <{pool_read_ops_length}} \
{pool_read_data: <{pool_read_data_length}} \
{end_bold}'.format(
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
pool_id_length=pool_id_length,
pool_name_length=pool_name_length,
pool_size_length=pool_size_length,
pool_objects_length=pool_num_objects_length,
pool_clones_length=pool_num_clones_length,
pool_copies_length=pool_num_copies_length,
pool_degraded_length=pool_num_degraded_length,
pool_write_ops_length=pool_write_ops_length,
pool_write_data_length=pool_write_data_length,
pool_read_ops_length=pool_read_ops_length,
pool_read_data_length=pool_read_data_length,
pool_id='ID',
pool_name='Name',
pool_size='Used',
pool_objects='Count',
pool_clones='Clones',
pool_copies='Copies',
pool_degraded='Degraded',
pool_write_ops='OPS',
pool_write_data='Data',
pool_read_ops='OPS',
pool_read_data='Data'
)
for pool in pool_list:
# Format the output header
pool_list_output.append('{bold}\
{pool_id: <{pool_id_length}} \
{pool_name: <{pool_name_length}} \
{pool_size: <{pool_size_length}} \
{pool_objects: <{pool_objects_length}} \
{pool_clones: <{pool_clones_length}} \
{pool_copies: <{pool_copies_length}} \
{pool_degraded: <{pool_degraded_length}} \
{pool_write_ops: <{pool_write_ops_length}} \
{pool_write_data: <{pool_write_data_length}} \
{pool_read_ops: <{pool_read_ops_length}} \
{pool_read_data: <{pool_read_data_length}} \
{end_bold}'.format(
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
pool_id_length=pool_id_length,
pool_name_length=pool_name_length,
pool_size_length=pool_size_length,
pool_objects_length=pool_num_objects_length,
pool_clones_length=pool_num_clones_length,
pool_copies_length=pool_num_copies_length,
pool_degraded_length=pool_num_degraded_length,
pool_write_ops_length=pool_write_ops_length,
pool_write_data_length=pool_write_data_length,
pool_read_ops_length=pool_read_ops_length,
pool_read_data_length=pool_read_data_length,
pool_id=pool_id[pool],
pool_name=pool,
pool_size=pool_size[pool],
pool_objects=pool_num_objects[pool],
pool_clones=pool_num_clones[pool],
pool_copies=pool_num_copies[pool],
pool_degraded=pool_num_degraded[pool],
pool_write_ops=pool_write_ops[pool],
pool_write_data=pool_write_data[pool],
pool_read_ops=pool_read_ops[pool],
pool_read_data=pool_read_data[pool]
)
)
output_string = pool_list_output_header + '\n' + '\n'.join(sorted(pool_list_output))
return output_string
#
# Direct functions
#
@ -259,49 +541,60 @@ def add_osd(zk_conn, node, device):
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Verify target block device isn't in use
block_osd = verifyOSDBlock(zk_conn, node, device)
if block_osd:
return False, 'ERROR: Block device {} on node {} is used by OSD {}'.format(device, node, block_osd)
# Tell the cluster to create a new OSD for the host
add_osd_string = 'add {},{}'.format(node, device)
zkhandler.writedata(zk_conn, {'/ceph/osd_cmd': add_osd_string})
add_osd_string = 'osd_add {},{}'.format(node, device)
zkhandler.writedata(zk_conn, {'/ceph/cmd': add_osd_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/osd_cmd')
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
result = zkhandler.readdata(zk_conn, '/ceph/osd_cmd').split()[0]
if result == 'success-add':
success = True
else:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-osd_add':
message = 'Created new OSD with block device {} on node {}.'.format(device, node)
success = True
else:
message = 'ERROR: Failed to create new OSD; check node logs for details.'
success = False
except:
message = 'ERROR: Command ignored by node.'
success = False
zkhandler.writedata(zk_conn, {'/ceph/osd_cmd': ''})
if success:
return True, 'Created new OSD with block device {} on node {}.'.format(device, node)
else:
return False, 'Failed to create new OSD; check node logs for details.'
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def remove_osd(zk_conn, osd_id):
if not common.verifyOSD(zk_conn, osd_id):
if not verifyOSD(zk_conn, osd_id):
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id)
# Tell the cluster to remove an OSD
remove_osd_string = 'remove {}'.format(osd_id)
zkhandler.writedata(zk_conn, {'/ceph/osd_cmd': remove_osd_string})
remove_osd_string = 'osd_remove {}'.format(osd_id)
zkhandler.writedata(zk_conn, {'/ceph/cmd': remove_osd_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/osd_cmd')
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
result = zkhandler.readdata(zk_conn, '/ceph/osd_cmd').split()[0]
if result == 'success-remove':
success = True
else:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-osd_remove':
message = 'Removed OSD {} from the cluster.'.format(osd_id)
success = True
else:
message = 'ERROR: Failed to remove OSD; check node logs for details.'
success = False
except:
success = False
zkhandler.writedata(zk_conn, {'/ceph/osd_cmd': ''})
message = 'ERROR Command ignored by node.'
if success:
return True, 'Removed OSD {} from the cluster.'.format(osd_id)
else:
return False, 'Failed to remove OSD; check node logs for details.'
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def get_list_osd(zk_conn, limit):
osd_list = []
@ -332,3 +625,82 @@ def get_list_osd(zk_conn, limit):
click.echo(output_string)
return True, ''
def add_pool(zk_conn, name, pgs):
# Tell the cluster to create a new pool
add_pool_string = 'pool_add {},{}'.format(name, pgs)
zkhandler.writedata(zk_conn, {'/ceph/cmd': add_pool_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-pool_add':
message = 'Created new RBD pool {} with {} PGs.'.format(name, pgs)
success = True
else:
message = 'ERROR: Failed to create new pool; check node logs for details.'
success = False
except:
message = 'ERROR: Command ignored by node.'
success = False
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def remove_pool(zk_conn, name):
# Tell the cluster to create a new pool
remove_pool_string = 'pool_remove {}'.format(name)
zkhandler.writedata(zk_conn, {'/ceph/cmd': remove_pool_string})
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/ceph/cmd')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/ceph/cmd').split()[0]
if result == 'success-pool_remove':
message = 'Removed RBD pool {} and all volumes.'.format(name, pgs)
success = True
else:
message = 'ERROR: Failed to remove pool; check node logs for details.'
success = False
except:
message = 'ERROR: Command ignored by node.'
success = False
zkhandler.writedata(zk_conn, {'/ceph/cmd': ''})
return success, message
def get_list_pool(zk_conn, limit):
pool_list = []
full_pool_list = getCephPools(zk_conn)
if limit:
try:
# Implicitly assume fuzzy limits
if re.match('\^.*', limit) == None:
limit = '.*' + limit
if re.match('.*\$', limit) == None:
limit = limit + '.*'
except Exception as e:
return False, 'Regex Error: {}'.format(e)
for pool in full_pool_list:
valid_pool = False
if limit:
if re.match(limit, pool['pool_id']) != None:
valid_pool = True
else:
valid_pool = True
if valid_pool:
pool_list.append(pool)
output_string = formatPoolList(zk_conn, pool_list)
click.echo(output_string)
return True, ''