Tie fence timers to keepalive_interval

Also wait 2 full keepalive intervals after fencing before doing anything
else, to give the Ceph cluster a chance to recover.
This commit is contained in:
2020-08-15 12:38:03 -04:00
parent 4afb288429
commit 0a01d84290
2 changed files with 6 additions and 6 deletions

View File

@ -35,7 +35,7 @@ def fenceNode(node_name, zk_conn, config, logger):
failcount = 0
while failcount < failcount_limit:
# Wait 5 seconds
time.sleep(5)
time.sleep(config.keepalive_interval)
# Get the state
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
# Is it still 'dead'
@ -56,8 +56,8 @@ def fenceNode(node_name, zk_conn, config, logger):
# Shoot it in the head
fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password, logger)
# Hold to ensure the fence takes effect
time.sleep(3)
# Hold to ensure the fence takes effect and system stabilizes
time.sleep(config.keepalive_interval * 2)
# Force into secondary network state if needed
if node_name in config['coordinators']: