diff --git a/api-daemon/pvcapid/flaskapi.py b/api-daemon/pvcapid/flaskapi.py index 81b08bb5..0112d32f 100755 --- a/api-daemon/pvcapid/flaskapi.py +++ b/api-daemon/pvcapid/flaskapi.py @@ -268,7 +268,7 @@ class API_Initialize(Resource): """ Initialize a new PVC cluster - If the 'overwrite' option is not True, the cluster will return 400 if the `/primary_node` key is found. If 'overwrite' is True, the existing cluster + If the 'overwrite' option is not True, the cluster will return 400 if the `/config/primary_node` key is found. If 'overwrite' is True, the existing cluster data will be erased and new, empty data written in its place. All node daemons should be stopped before running this command, and the API daemon started manually to avoid undefined behavior. diff --git a/api-daemon/pvcapid/helper.py b/api-daemon/pvcapid/helper.py index 3f3f5b0a..94fb3d41 100755 --- a/api-daemon/pvcapid/helper.py +++ b/api-daemon/pvcapid/helper.py @@ -46,15 +46,16 @@ def initialize_cluster(zkhandler, overwrite=False): Initialize a new cluster """ # Abort if we've initialized the cluster before - if zkhandler.exists('/primary_node') and not overwrite: + if zkhandler.exists('/config/primary_node') and not overwrite: return False if overwrite: # Delete the existing keys; ignore any errors status = zkhandler.delete([ - '/primary_node', - '/upstream_ip', - '/maintenance', + '/config' + '/config/primary_node', + '/config/upstream_ip', + '/config/maintenance', '/nodes', '/domains', '/networks', @@ -76,9 +77,10 @@ def initialize_cluster(zkhandler, overwrite=False): # Create the root keys status = zkhandler.write([ - ('/primary_node', 'none'), - ('/upstream_ip', 'none'), - ('/maintenance', 'False'), + ('/config', ''), + ('/config/primary_node', 'none'), + ('/config/upstream_ip', 'none'), + ('/config/maintenance', 'False'), ('/nodes', ''), ('/domains', ''), ('/networks', ''), diff --git a/daemon-common/ceph.py b/daemon-common/ceph.py index c1ce33c3..c566d30f 100644 --- a/daemon-common/ceph.py +++ b/daemon-common/ceph.py @@ -144,7 +144,7 @@ def format_pct_tohuman(datapct): # Status functions # def get_status(zkhandler): - primary_node = zkhandler.read('/primary_node') + primary_node = zkhandler.read('/config/primary_node') ceph_status = zkhandler.read('/ceph').rstrip() # Create a data structure for the information @@ -157,7 +157,7 @@ def get_status(zkhandler): def get_util(zkhandler): - primary_node = zkhandler.read('/primary_node') + primary_node = zkhandler.read('/config/primary_node') ceph_df = zkhandler.read('/ceph/util').rstrip() # Create a data structure for the information diff --git a/daemon-common/cluster.py b/daemon-common/cluster.py index 23309fde..ef118545 100644 --- a/daemon-common/cluster.py +++ b/daemon-common/cluster.py @@ -32,12 +32,12 @@ def set_maintenance(zkhandler, maint_state): try: if maint_state == 'true': zkhandler.write([ - ('/maintenance', 'true') + ('/config/maintenance', 'true') ]) return True, 'Successfully set cluster in maintenance mode' else: zkhandler.write([ - ('/maintenance', 'false') + ('/config/maintenance', 'false') ]) return True, 'Successfully set cluster in normal mode' except Exception: @@ -47,7 +47,7 @@ def set_maintenance(zkhandler, maint_state): def getClusterInformation(zkhandler): # Get cluster maintenance state try: - maint_state = zkhandler.read('/maintenance') + maint_state = zkhandler.read('/config/maintenance') except Exception: maint_state = 'false' @@ -238,7 +238,7 @@ def getClusterInformation(zkhandler): 'storage_health': storage_health, 'storage_health_msg': storage_health_msg, 'primary_node': common.getPrimaryNode(zkhandler), - 'upstream_ip': zkhandler.read('/upstream_ip'), + 'upstream_ip': zkhandler.read('/config/upstream_ip'), 'nodes': formatted_node_states, 'vms': formatted_vm_states, 'networks': network_count, diff --git a/daemon-common/common.py b/daemon-common/common.py index 1938f949..7414e502 100644 --- a/daemon-common/common.py +++ b/daemon-common/common.py @@ -396,7 +396,7 @@ def getPrimaryNode(zkhandler): failcount = 0 while True: try: - primary_node = zkhandler.read('/primary_node') + primary_node = zkhandler.read('/config/primary_node') except Exception: primary_node == 'none' diff --git a/daemon-common/node.py b/daemon-common/node.py index 90dd72a7..7b6167cc 100644 --- a/daemon-common/node.py +++ b/daemon-common/node.py @@ -98,7 +98,7 @@ def secondary_node(zkhandler, node): if current_state == 'primary': retmsg = 'Setting node {} in secondary router mode.'.format(node) zkhandler.write([ - ('/primary_node', 'none') + ('/config/primary_node', 'none') ]) else: return False, 'Node "{}" is already in secondary router mode.'.format(node) @@ -126,7 +126,7 @@ def primary_node(zkhandler, node): if current_state == 'secondary': retmsg = 'Setting node {} in primary router mode.'.format(node) zkhandler.write([ - ('/primary_node', node) + ('/config/primary_node', node) ]) else: return False, 'Node "{}" is already in primary router mode.'.format(node) diff --git a/node-daemon/pvcnoded/Daemon.py b/node-daemon/pvcnoded/Daemon.py index 7cee2a7b..591b23a1 100644 --- a/node-daemon/pvcnoded/Daemon.py +++ b/node-daemon/pvcnoded/Daemon.py @@ -529,6 +529,39 @@ except Exception as e: logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e') exit(1) +# Create the /config key if it does not exist +try: + zkhandler.read('/config') +except Exception: + zkhandler.write([ + ('/config', ''), + ('/config/primary_node', 'none'), + ('/config/upstream_ip', 'none'), + ('/config/maintenance', 'False'), + ]) + +# MIGRATION - populate the keys from their old values +try: + primary_node = zkhandler.read('/primary_node') + zkhandler.write([ + ('/config/primary_node', primary_node) + ]) +except Exception: + pass +try: + upstream_ip = zkhandler.read('/upstream_ip') + zkhandler.write([ + ('/config/upstream_ip', upstream_ip) + ]) +except Exception: + pass +try: + maintenance = zkhandler.read('/maintenance') + zkhandler.write([ + ('/config/maintenance', maintenance) + ]) +except Exception: + pass ############################################################################### # PHASE 5 - Gracefully handle termination @@ -566,7 +599,7 @@ def cleanup(): try: if this_node.router_state == 'primary': zkhandler.write([ - ('/primary_node', 'none') + ('/config/primary_node', 'none') ]) logger.out('Waiting for primary migration', state='s') while this_node.router_state != 'secondary': @@ -673,7 +706,7 @@ else: # Check that the primary key exists, and create it with us as master if not try: - current_primary = zkhandler.read('/primary_node') + current_primary = zkhandler.read('/config/primary_node') except kazoo.exceptions.NoNodeError: current_primary = 'none' @@ -683,7 +716,7 @@ else: if config['daemon_mode'] == 'coordinator': logger.out('No primary node found; creating with us as primary.', state='i') zkhandler.write([ - ('/primary_node', myhostname) + ('/config/primary_node', myhostname) ]) ############################################################################### @@ -819,7 +852,7 @@ this_node = d_node[myhostname] # Maintenance mode -@zkhandler.zk_conn.DataWatch('/maintenance') +@zkhandler.zk_conn.DataWatch('/config/maintenance') def set_maintenance(_maintenance, stat, event=''): global maintenance try: @@ -829,7 +862,7 @@ def set_maintenance(_maintenance, stat, event=''): # Primary node -@zkhandler.zk_conn.DataWatch('/primary_node') +@zkhandler.zk_conn.DataWatch('/config/primary_node') def update_primary(new_primary, stat, event=''): try: new_primary = new_primary.decode('ascii') @@ -844,7 +877,7 @@ def update_primary(new_primary, stat, event=''): if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']: logger.out('Contending for primary coordinator state', state='i') # Acquire an exclusive lock on the primary_node key - primary_lock = zkhandler.exclusivelock('/primary_node') + primary_lock = zkhandler.exclusivelock('/config/primary_node') try: # This lock times out after 0.4s, which is 0.1s less than the pre-takeover # timeout below, thus ensuring that a primary takeover will not deadlock @@ -852,9 +885,9 @@ def update_primary(new_primary, stat, event=''): primary_lock.acquire(timeout=0.4) # Ensure when we get the lock that the versions are still consistent and that # another node hasn't already acquired primary state - if key_version == zkhandler.zk_conn.get('/primary_node')[1].version: + if key_version == zkhandler.zk_conn.get('/config/primary_node')[1].version: zkhandler.write([ - ('/primary_node', myhostname) + ('/config/primary_node', myhostname) ]) # Cleanly release the lock primary_lock.release() @@ -1475,11 +1508,11 @@ def node_keepalive(): if config['enable_networking']: if this_node.router_state == 'primary': try: - if zkhandler.read('/upstream_ip') != config['upstream_floating_ip']: + if zkhandler.read('/config/upstream_ip') != config['upstream_floating_ip']: raise except Exception: zkhandler.write([ - ('/upstream_ip', config['upstream_floating_ip']) + ('/config/upstream_ip', config['upstream_floating_ip']) ]) # Get past state and update if needed @@ -1498,9 +1531,9 @@ def node_keepalive(): if debug: logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread') if this_node.router_state == 'primary': - if zkhandler.read('/primary_node') != this_node.name: + if zkhandler.read('/config/primary_node') != this_node.name: zkhandler.write([ - ('/primary_node', this_node.name) + ('/config/primary_node', this_node.name) ]) # Run VM statistics collection in separate thread for parallelization diff --git a/node-daemon/pvcnoded/NodeInstance.py b/node-daemon/pvcnoded/NodeInstance.py index 3d6fc698..72bcc878 100644 --- a/node-daemon/pvcnoded/NodeInstance.py +++ b/node-daemon/pvcnoded/NodeInstance.py @@ -323,7 +323,7 @@ class NodeInstance(object): Acquire primary coordinator status from a peer node """ # Lock the primary node until transition is complete - primary_lock = zkhandler.exclusivelock(self.zk_conn, '/primary_node') + primary_lock = zkhandler.exclusivelock(self.zk_conn, '/config/primary_node') primary_lock.acquire() # Ensure our lock key is populated diff --git a/node-daemon/pvcnoded/fencing.py b/node-daemon/pvcnoded/fencing.py index 7a940a43..b2a0d597 100644 --- a/node-daemon/pvcnoded/fencing.py +++ b/node-daemon/pvcnoded/fencing.py @@ -63,8 +63,8 @@ def fenceNode(node_name, zk_conn, config, logger): if node_name in config['coordinators']: logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i') zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(node_name): 'secondary'}) - if zkhandler.readdata(zk_conn, '/primary_node') == node_name: - zkhandler.writedata(zk_conn, {'/primary_node': 'none'}) + if zkhandler.readdata(zk_conn, '/config/primary_node') == node_name: + zkhandler.writedata(zk_conn, {'/config/primary_node': 'none'}) # If the fence succeeded and successful_fence is migrate if fence_status and config['successful_fence'] == 'migrate':