From 80f04ce8eebbdc2bfddabe1274526fe0df890feb Mon Sep 17 00:00:00 2001 From: "Joshua M. Boniface" Date: Wed, 7 Jul 2021 11:50:14 -0400 Subject: [PATCH] Remove connection renewal in state handler Regenerating the ZK connection was fraught with issues, including duplicate connections, strange failures to reconnect, and various other wonkiness. Instead let Kazoo handle states sensibly. Kazoo moves to SUSPENDED state when it loses connectivity, and stays there indefinitely (based on cursory tests). And Kazoo seems to always resume from this just fine on its own. Thus all that hackery did nothing but complicate reconnection. This therefore turns the listener into a purely informational function, providing logs of when/why it failed, and we also add some additional output messages during initial connection and final disconnection. --- daemon-common/zkhandler.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/daemon-common/zkhandler.py b/daemon-common/zkhandler.py index 0389757e..0a3abbac 100644 --- a/daemon-common/zkhandler.py +++ b/daemon-common/zkhandler.py @@ -125,23 +125,9 @@ class ZKHandler(object): # def listener(self, state): if state == KazooState.CONNECTED: - self.log('Connection to Zookeeper started', state='o') + self.log('Connection to Zookeeper resumed', state='o') else: - self.log('Connection to Zookeeper lost', state='w') - - while True: - time.sleep(0.5) - - _zk_conn = KazooClient(hosts=self.coordinators) - try: - _zk_conn.start() - except Exception: - del _zk_conn - continue - - self.zk_conn = _zk_conn - self.zk_conn.add_listener(self.listener) - break + self.log('Connection to Zookeeper lost with state {}'.format(state), state='w') def connect(self, persistent=False): """ @@ -149,6 +135,7 @@ class ZKHandler(object): """ try: self.zk_conn.start() + self.log('Connection to Zookeeper started', state='o') if persistent: self.zk_conn.add_listener(self.listener) except Exception as e: @@ -162,6 +149,7 @@ class ZKHandler(object): """ self.zk_conn.stop() self.zk_conn.close() + self.log('Connection to Zookeeper terminated', state='o') # # Schema helper actions