Reformat code with Black code formatter
Unify the code style along PEP and Black principles using the tool.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -35,67 +35,77 @@ import yaml
|
||||
def get_zookeeper_key():
|
||||
# Get the interface from environment (passed by dnsmasq)
|
||||
try:
|
||||
interface = os.environ['DNSMASQ_BRIDGE_INTERFACE']
|
||||
interface = os.environ["DNSMASQ_BRIDGE_INTERFACE"]
|
||||
except Exception as e:
|
||||
print('ERROR: DNSMASQ_BRIDGE_INTERFACE environment variable not found: {}'.format(e), file=sys.stderr)
|
||||
print(
|
||||
"ERROR: DNSMASQ_BRIDGE_INTERFACE environment variable not found: {}".format(
|
||||
e
|
||||
),
|
||||
file=sys.stderr,
|
||||
)
|
||||
exit(1)
|
||||
# Get the ID of the interface (the digits)
|
||||
network_vni = re.findall(r'\d+', interface)[0]
|
||||
network_vni = re.findall(r"\d+", interface)[0]
|
||||
# Create the key
|
||||
zookeeper_key = '/networks/{}/dhcp4_leases'.format(network_vni)
|
||||
zookeeper_key = "/networks/{}/dhcp4_leases".format(network_vni)
|
||||
return zookeeper_key
|
||||
|
||||
|
||||
def get_lease_expiry():
|
||||
try:
|
||||
expiry = os.environ['DNSMASQ_LEASE_EXPIRES']
|
||||
expiry = os.environ["DNSMASQ_LEASE_EXPIRES"]
|
||||
except Exception:
|
||||
expiry = '0'
|
||||
expiry = "0"
|
||||
return expiry
|
||||
|
||||
|
||||
def get_client_id():
|
||||
try:
|
||||
client_id = os.environ['DNSMASQ_CLIENT_ID']
|
||||
client_id = os.environ["DNSMASQ_CLIENT_ID"]
|
||||
except Exception:
|
||||
client_id = '*'
|
||||
client_id = "*"
|
||||
return client_id
|
||||
|
||||
|
||||
def connect_zookeeper():
|
||||
# We expect the environ to contain the config file
|
||||
try:
|
||||
pvcnoded_config_file = os.environ['PVCD_CONFIG_FILE']
|
||||
pvcnoded_config_file = os.environ["PVCD_CONFIG_FILE"]
|
||||
except Exception:
|
||||
# Default place
|
||||
pvcnoded_config_file = '/etc/pvc/pvcnoded.yaml'
|
||||
pvcnoded_config_file = "/etc/pvc/pvcnoded.yaml"
|
||||
|
||||
with open(pvcnoded_config_file, 'r') as cfgfile:
|
||||
with open(pvcnoded_config_file, "r") as cfgfile:
|
||||
try:
|
||||
o_config = yaml.load(cfgfile)
|
||||
except Exception as e:
|
||||
print('ERROR: Failed to parse configuration file: {}'.format(e), file=sys.stderr)
|
||||
print(
|
||||
"ERROR: Failed to parse configuration file: {}".format(e),
|
||||
file=sys.stderr,
|
||||
)
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
zk_conn = kazoo.client.KazooClient(hosts=o_config['pvc']['cluster']['coordinators'])
|
||||
zk_conn = kazoo.client.KazooClient(
|
||||
hosts=o_config["pvc"]["cluster"]["coordinators"]
|
||||
)
|
||||
zk_conn.start()
|
||||
except Exception as e:
|
||||
print('ERROR: Failed to connect to Zookeeper: {}'.format(e), file=sys.stderr)
|
||||
print("ERROR: Failed to connect to Zookeeper: {}".format(e), file=sys.stderr)
|
||||
exit(1)
|
||||
|
||||
return zk_conn
|
||||
|
||||
|
||||
def read_data(zk_conn, key):
|
||||
return zk_conn.get(key)[0].decode('ascii')
|
||||
return zk_conn.get(key)[0].decode("ascii")
|
||||
|
||||
|
||||
def get_lease(zk_conn, zk_leases_key, macaddr):
|
||||
expiry = read_data(zk_conn, '{}/{}/expiry'.format(zk_leases_key, macaddr))
|
||||
ipaddr = read_data(zk_conn, '{}/{}/ipaddr'.format(zk_leases_key, macaddr))
|
||||
hostname = read_data(zk_conn, '{}/{}/hostname'.format(zk_leases_key, macaddr))
|
||||
clientid = read_data(zk_conn, '{}/{}/clientid'.format(zk_leases_key, macaddr))
|
||||
expiry = read_data(zk_conn, "{}/{}/expiry".format(zk_leases_key, macaddr))
|
||||
ipaddr = read_data(zk_conn, "{}/{}/ipaddr".format(zk_leases_key, macaddr))
|
||||
hostname = read_data(zk_conn, "{}/{}/hostname".format(zk_leases_key, macaddr))
|
||||
clientid = read_data(zk_conn, "{}/{}/clientid".format(zk_leases_key, macaddr))
|
||||
return expiry, ipaddr, hostname, clientid
|
||||
|
||||
|
||||
@ -107,38 +117,50 @@ def read_lease_database(zk_conn, zk_leases_key):
|
||||
output_list = []
|
||||
for macaddr in leases_list:
|
||||
expiry, ipaddr, hostname, clientid = get_lease(zk_conn, zk_leases_key, macaddr)
|
||||
data_string = '{} {} {} {} {}'.format(expiry, macaddr, ipaddr, hostname, clientid)
|
||||
print('Reading lease from Zookeeper: {}'.format(data_string), file=sys.stderr)
|
||||
output_list.append('{}'.format(data_string))
|
||||
data_string = "{} {} {} {} {}".format(
|
||||
expiry, macaddr, ipaddr, hostname, clientid
|
||||
)
|
||||
print("Reading lease from Zookeeper: {}".format(data_string), file=sys.stderr)
|
||||
output_list.append("{}".format(data_string))
|
||||
|
||||
# Output list
|
||||
print('\n'.join(output_list))
|
||||
print("\n".join(output_list))
|
||||
|
||||
|
||||
def add_lease(zk_conn, zk_leases_key, expiry, macaddr, ipaddr, hostname, clientid):
|
||||
if not hostname:
|
||||
hostname = ''
|
||||
hostname = ""
|
||||
transaction = zk_conn.transaction()
|
||||
transaction.create('{}/{}'.format(zk_leases_key, macaddr), ''.encode('ascii'))
|
||||
transaction.create('{}/{}/expiry'.format(zk_leases_key, macaddr), expiry.encode('ascii'))
|
||||
transaction.create('{}/{}/ipaddr'.format(zk_leases_key, macaddr), ipaddr.encode('ascii'))
|
||||
transaction.create('{}/{}/hostname'.format(zk_leases_key, macaddr), hostname.encode('ascii'))
|
||||
transaction.create('{}/{}/clientid'.format(zk_leases_key, macaddr), clientid.encode('ascii'))
|
||||
transaction.create("{}/{}".format(zk_leases_key, macaddr), "".encode("ascii"))
|
||||
transaction.create(
|
||||
"{}/{}/expiry".format(zk_leases_key, macaddr), expiry.encode("ascii")
|
||||
)
|
||||
transaction.create(
|
||||
"{}/{}/ipaddr".format(zk_leases_key, macaddr), ipaddr.encode("ascii")
|
||||
)
|
||||
transaction.create(
|
||||
"{}/{}/hostname".format(zk_leases_key, macaddr), hostname.encode("ascii")
|
||||
)
|
||||
transaction.create(
|
||||
"{}/{}/clientid".format(zk_leases_key, macaddr), clientid.encode("ascii")
|
||||
)
|
||||
transaction.commit()
|
||||
|
||||
|
||||
def del_lease(zk_conn, zk_leases_key, macaddr, expiry):
|
||||
zk_conn.delete('{}/{}'.format(zk_leases_key, macaddr), recursive=True)
|
||||
zk_conn.delete("{}/{}".format(zk_leases_key, macaddr), recursive=True)
|
||||
|
||||
|
||||
#
|
||||
# Instantiate the parser
|
||||
#
|
||||
parser = argparse.ArgumentParser(description='Store or retrieve dnsmasq leases in Zookeeper')
|
||||
parser.add_argument('action', type=str, help='Action')
|
||||
parser.add_argument('macaddr', type=str, help='MAC Address', nargs='?', default=None)
|
||||
parser.add_argument('ipaddr', type=str, help='IP Address', nargs='?', default=None)
|
||||
parser.add_argument('hostname', type=str, help='Hostname', nargs='?', default=None)
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Store or retrieve dnsmasq leases in Zookeeper"
|
||||
)
|
||||
parser.add_argument("action", type=str, help="Action")
|
||||
parser.add_argument("macaddr", type=str, help="MAC Address", nargs="?", default=None)
|
||||
parser.add_argument("ipaddr", type=str, help="IP Address", nargs="?", default=None)
|
||||
parser.add_argument("hostname", type=str, help="Hostname", nargs="?", default=None)
|
||||
args = parser.parse_args()
|
||||
|
||||
action = args.action
|
||||
@ -149,7 +171,7 @@ hostname = args.hostname
|
||||
zk_conn = connect_zookeeper()
|
||||
zk_leases_key = get_zookeeper_key()
|
||||
|
||||
if action == 'init':
|
||||
if action == "init":
|
||||
read_lease_database(zk_conn, zk_leases_key)
|
||||
exit(0)
|
||||
|
||||
@ -159,10 +181,13 @@ clientid = get_client_id()
|
||||
#
|
||||
# Choose action
|
||||
#
|
||||
print('Lease action - {} {} {} {}'.format(action, macaddr, ipaddr, hostname), file=sys.stderr)
|
||||
if action == 'add':
|
||||
print(
|
||||
"Lease action - {} {} {} {}".format(action, macaddr, ipaddr, hostname),
|
||||
file=sys.stderr,
|
||||
)
|
||||
if action == "add":
|
||||
add_lease(zk_conn, zk_leases_key, expiry, macaddr, ipaddr, hostname, clientid)
|
||||
elif action == 'del':
|
||||
elif action == "del":
|
||||
del_lease(zk_conn, zk_leases_key, macaddr, expiry)
|
||||
elif action == 'old':
|
||||
elif action == "old":
|
||||
pass
|
||||
|
@ -38,63 +38,73 @@ class CephOSDInstance(object):
|
||||
self.size = None
|
||||
self.stats = dict()
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.node', self.osd_id))
|
||||
def watch_osd_node(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("osd.node", self.osd_id)
|
||||
)
|
||||
def watch_osd_node(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = ''
|
||||
data = ""
|
||||
|
||||
if data and data != self.node:
|
||||
self.node = data
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('osd.stats', self.osd_id))
|
||||
def watch_osd_stats(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("osd.stats", self.osd_id)
|
||||
)
|
||||
def watch_osd_stats(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = ''
|
||||
data = ""
|
||||
|
||||
if data and data != self.stats:
|
||||
self.stats = json.loads(data)
|
||||
|
||||
@staticmethod
|
||||
def add_osd(zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05):
|
||||
def add_osd(
|
||||
zkhandler, logger, node, device, weight, ext_db_flag=False, ext_db_ratio=0.05
|
||||
):
|
||||
# We are ready to create a new OSD on this node
|
||||
logger.out('Creating new OSD disk on block device {}'.format(device), state='i')
|
||||
logger.out("Creating new OSD disk on block device {}".format(device), state="i")
|
||||
try:
|
||||
# 1. Create an OSD; we do this so we know what ID will be gen'd
|
||||
retcode, stdout, stderr = common.run_os_command('ceph osd create')
|
||||
retcode, stdout, stderr = common.run_os_command("ceph osd create")
|
||||
if retcode:
|
||||
print('ceph osd create')
|
||||
print("ceph osd create")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
osd_id = stdout.rstrip()
|
||||
|
||||
# 2. Remove that newly-created OSD
|
||||
retcode, stdout, stderr = common.run_os_command('ceph osd rm {}'.format(osd_id))
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"ceph osd rm {}".format(osd_id)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph osd rm')
|
||||
print("ceph osd rm")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 3a. Zap the disk to ensure it is ready to go
|
||||
logger.out('Zapping disk {}'.format(device), state='i')
|
||||
retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(device))
|
||||
logger.out("Zapping disk {}".format(device), state="i")
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"ceph-volume lvm zap --destroy {}".format(device)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph-volume lvm zap')
|
||||
print("ceph-volume lvm zap")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
@ -103,9 +113,13 @@ class CephOSDInstance(object):
|
||||
|
||||
# 3b. Prepare the logical volume if ext_db_flag
|
||||
if ext_db_flag:
|
||||
_, osd_size_bytes, _ = common.run_os_command('blockdev --getsize64 {}'.format(device))
|
||||
_, osd_size_bytes, _ = common.run_os_command(
|
||||
"blockdev --getsize64 {}".format(device)
|
||||
)
|
||||
osd_size_bytes = int(osd_size_bytes)
|
||||
result = CephOSDInstance.create_osd_db_lv(zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes)
|
||||
result = CephOSDInstance.create_osd_db_lv(
|
||||
zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes
|
||||
)
|
||||
if not result:
|
||||
raise Exception
|
||||
db_device = "osd-db/osd-{}".format(osd_id)
|
||||
@ -114,63 +128,67 @@ class CephOSDInstance(object):
|
||||
db_device = ""
|
||||
|
||||
# 3c. Create the OSD for real
|
||||
logger.out('Preparing LVM for new OSD disk with ID {} on {}'.format(osd_id, device), state='i')
|
||||
logger.out(
|
||||
"Preparing LVM for new OSD disk with ID {} on {}".format(
|
||||
osd_id, device
|
||||
),
|
||||
state="i",
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'ceph-volume lvm prepare --bluestore {devices}'.format(
|
||||
osdid=osd_id,
|
||||
devices=dev_flags
|
||||
"ceph-volume lvm prepare --bluestore {devices}".format(
|
||||
osdid=osd_id, devices=dev_flags
|
||||
)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph-volume lvm prepare')
|
||||
print("ceph-volume lvm prepare")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 4a. Get OSD FSID
|
||||
logger.out('Getting OSD FSID for ID {} on {}'.format(osd_id, device), state='i')
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'ceph-volume lvm list {device}'.format(
|
||||
osdid=osd_id,
|
||||
device=device
|
||||
)
|
||||
logger.out(
|
||||
"Getting OSD FSID for ID {} on {}".format(osd_id, device), state="i"
|
||||
)
|
||||
for line in stdout.split('\n'):
|
||||
if 'osd fsid' in line:
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"ceph-volume lvm list {device}".format(osdid=osd_id, device=device)
|
||||
)
|
||||
for line in stdout.split("\n"):
|
||||
if "osd fsid" in line:
|
||||
osd_fsid = line.split()[-1]
|
||||
|
||||
if not osd_fsid:
|
||||
print('ceph-volume lvm list')
|
||||
print('Could not find OSD fsid in data:')
|
||||
print("ceph-volume lvm list")
|
||||
print("Could not find OSD fsid in data:")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 4b. Activate the OSD
|
||||
logger.out('Activating new OSD disk with ID {}'.format(osd_id, device), state='i')
|
||||
logger.out(
|
||||
"Activating new OSD disk with ID {}".format(osd_id, device), state="i"
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'ceph-volume lvm activate --bluestore {osdid} {osdfsid}'.format(
|
||||
osdid=osd_id,
|
||||
osdfsid=osd_fsid
|
||||
"ceph-volume lvm activate --bluestore {osdid} {osdfsid}".format(
|
||||
osdid=osd_id, osdfsid=osd_fsid
|
||||
)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph-volume lvm activate')
|
||||
print("ceph-volume lvm activate")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 5. Add it to the crush map
|
||||
logger.out('Adding new OSD disk with ID {} to CRUSH map'.format(osd_id), state='i')
|
||||
logger.out(
|
||||
"Adding new OSD disk with ID {} to CRUSH map".format(osd_id), state="i"
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'ceph osd crush add osd.{osdid} {weight} root=default host={node}'.format(
|
||||
osdid=osd_id,
|
||||
weight=weight,
|
||||
node=node
|
||||
"ceph osd crush add osd.{osdid} {weight} root=default host={node}".format(
|
||||
osdid=osd_id, weight=weight, node=node
|
||||
)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph osd crush add')
|
||||
print("ceph osd crush add")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
@ -178,65 +196,73 @@ class CephOSDInstance(object):
|
||||
|
||||
# 6. Verify it started
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'systemctl status ceph-osd@{osdid}'.format(
|
||||
osdid=osd_id
|
||||
)
|
||||
"systemctl status ceph-osd@{osdid}".format(osdid=osd_id)
|
||||
)
|
||||
if retcode:
|
||||
print('systemctl status')
|
||||
print("systemctl status")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 7. Add the new OSD to the list
|
||||
logger.out('Adding new OSD disk with ID {} to Zookeeper'.format(osd_id), state='i')
|
||||
zkhandler.write([
|
||||
(('osd', osd_id), ''),
|
||||
(('osd.node', osd_id), node),
|
||||
(('osd.device', osd_id), device),
|
||||
(('osd.db_device', osd_id), db_device),
|
||||
(('osd.stats', osd_id), '{}'),
|
||||
])
|
||||
logger.out(
|
||||
"Adding new OSD disk with ID {} to Zookeeper".format(osd_id), state="i"
|
||||
)
|
||||
zkhandler.write(
|
||||
[
|
||||
(("osd", osd_id), ""),
|
||||
(("osd.node", osd_id), node),
|
||||
(("osd.device", osd_id), device),
|
||||
(("osd.db_device", osd_id), db_device),
|
||||
(("osd.stats", osd_id), "{}"),
|
||||
]
|
||||
)
|
||||
|
||||
# Log it
|
||||
logger.out('Created new OSD disk with ID {}'.format(osd_id), state='o')
|
||||
logger.out("Created new OSD disk with ID {}".format(osd_id), state="o")
|
||||
return True
|
||||
except Exception as e:
|
||||
# Log it
|
||||
logger.out('Failed to create new OSD disk: {}'.format(e), state='e')
|
||||
logger.out("Failed to create new OSD disk: {}".format(e), state="e")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def remove_osd(zkhandler, logger, osd_id, osd_obj):
|
||||
logger.out('Removing OSD disk {}'.format(osd_id), state='i')
|
||||
logger.out("Removing OSD disk {}".format(osd_id), state="i")
|
||||
try:
|
||||
# 1. Verify the OSD is present
|
||||
retcode, stdout, stderr = common.run_os_command('ceph osd ls')
|
||||
osd_list = stdout.split('\n')
|
||||
retcode, stdout, stderr = common.run_os_command("ceph osd ls")
|
||||
osd_list = stdout.split("\n")
|
||||
if osd_id not in osd_list:
|
||||
logger.out('Could not find OSD {} in the cluster'.format(osd_id), state='e')
|
||||
logger.out(
|
||||
"Could not find OSD {} in the cluster".format(osd_id), state="e"
|
||||
)
|
||||
return True
|
||||
|
||||
# 1. Set the OSD out so it will flush
|
||||
logger.out('Setting out OSD disk with ID {}'.format(osd_id), state='i')
|
||||
retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id))
|
||||
logger.out("Setting out OSD disk with ID {}".format(osd_id), state="i")
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"ceph osd out {}".format(osd_id)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph osd out')
|
||||
print("ceph osd out")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 2. Wait for the OSD to flush
|
||||
logger.out('Flushing OSD disk with ID {}'.format(osd_id), state='i')
|
||||
logger.out("Flushing OSD disk with ID {}".format(osd_id), state="i")
|
||||
osd_string = str()
|
||||
while True:
|
||||
try:
|
||||
retcode, stdout, stderr = common.run_os_command('ceph pg dump osds --format json')
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"ceph pg dump osds --format json"
|
||||
)
|
||||
dump_string = json.loads(stdout)
|
||||
for osd in dump_string:
|
||||
if str(osd['osd']) == osd_id:
|
||||
if str(osd["osd"]) == osd_id:
|
||||
osd_string = osd
|
||||
num_pgs = osd_string['num_pgs']
|
||||
num_pgs = osd_string["num_pgs"]
|
||||
if num_pgs > 0:
|
||||
time.sleep(5)
|
||||
else:
|
||||
@ -245,10 +271,12 @@ class CephOSDInstance(object):
|
||||
break
|
||||
|
||||
# 3. Stop the OSD process and wait for it to be terminated
|
||||
logger.out('Stopping OSD disk with ID {}'.format(osd_id), state='i')
|
||||
retcode, stdout, stderr = common.run_os_command('systemctl stop ceph-osd@{}'.format(osd_id))
|
||||
logger.out("Stopping OSD disk with ID {}".format(osd_id), state="i")
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"systemctl stop ceph-osd@{}".format(osd_id)
|
||||
)
|
||||
if retcode:
|
||||
print('systemctl stop')
|
||||
print("systemctl stop")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
@ -257,161 +285,213 @@ class CephOSDInstance(object):
|
||||
while True:
|
||||
is_osd_up = False
|
||||
# Find if there is a process named ceph-osd with arg '--id {id}'
|
||||
for p in psutil.process_iter(attrs=['name', 'cmdline']):
|
||||
if 'ceph-osd' == p.info['name'] and '--id {}'.format(osd_id) in ' '.join(p.info['cmdline']):
|
||||
for p in psutil.process_iter(attrs=["name", "cmdline"]):
|
||||
if "ceph-osd" == p.info["name"] and "--id {}".format(
|
||||
osd_id
|
||||
) in " ".join(p.info["cmdline"]):
|
||||
is_osd_up = True
|
||||
# If there isn't, continue
|
||||
if not is_osd_up:
|
||||
break
|
||||
|
||||
# 4. Determine the block devices
|
||||
retcode, stdout, stderr = common.run_os_command('readlink /var/lib/ceph/osd/ceph-{}/block'.format(osd_id))
|
||||
vg_name = stdout.split('/')[-2] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
|
||||
retcode, stdout, stderr = common.run_os_command('vgs --separator , --noheadings -o pv_name {}'.format(vg_name))
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"readlink /var/lib/ceph/osd/ceph-{}/block".format(osd_id)
|
||||
)
|
||||
vg_name = stdout.split("/")[-2] # e.g. /dev/ceph-<uuid>/osd-block-<uuid>
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"vgs --separator , --noheadings -o pv_name {}".format(vg_name)
|
||||
)
|
||||
pv_block = stdout.strip()
|
||||
|
||||
# 5. Zap the volumes
|
||||
logger.out('Zapping OSD disk with ID {} on {}'.format(osd_id, pv_block), state='i')
|
||||
retcode, stdout, stderr = common.run_os_command('ceph-volume lvm zap --destroy {}'.format(pv_block))
|
||||
logger.out(
|
||||
"Zapping OSD disk with ID {} on {}".format(osd_id, pv_block), state="i"
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"ceph-volume lvm zap --destroy {}".format(pv_block)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph-volume lvm zap')
|
||||
print("ceph-volume lvm zap")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 6. Purge the OSD from Ceph
|
||||
logger.out('Purging OSD disk with ID {}'.format(osd_id), state='i')
|
||||
retcode, stdout, stderr = common.run_os_command('ceph osd purge {} --yes-i-really-mean-it'.format(osd_id))
|
||||
logger.out("Purging OSD disk with ID {}".format(osd_id), state="i")
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"ceph osd purge {} --yes-i-really-mean-it".format(osd_id)
|
||||
)
|
||||
if retcode:
|
||||
print('ceph osd purge')
|
||||
print("ceph osd purge")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 7. Remove the DB device
|
||||
if zkhandler.exists(('osd.db_device', osd_id)):
|
||||
db_device = zkhandler.read(('osd.db_device', osd_id))
|
||||
logger.out('Removing OSD DB logical volume "{}"'.format(db_device), state='i')
|
||||
retcode, stdout, stderr = common.run_os_command('lvremove --yes --force {}'.format(db_device))
|
||||
if zkhandler.exists(("osd.db_device", osd_id)):
|
||||
db_device = zkhandler.read(("osd.db_device", osd_id))
|
||||
logger.out(
|
||||
'Removing OSD DB logical volume "{}"'.format(db_device), state="i"
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
"lvremove --yes --force {}".format(db_device)
|
||||
)
|
||||
|
||||
# 8. Delete OSD from ZK
|
||||
logger.out('Deleting OSD disk with ID {} from Zookeeper'.format(osd_id), state='i')
|
||||
zkhandler.delete(('osd', osd_id), recursive=True)
|
||||
logger.out(
|
||||
"Deleting OSD disk with ID {} from Zookeeper".format(osd_id), state="i"
|
||||
)
|
||||
zkhandler.delete(("osd", osd_id), recursive=True)
|
||||
|
||||
# Log it
|
||||
logger.out('Removed OSD disk with ID {}'.format(osd_id), state='o')
|
||||
logger.out("Removed OSD disk with ID {}".format(osd_id), state="o")
|
||||
return True
|
||||
except Exception as e:
|
||||
# Log it
|
||||
logger.out('Failed to purge OSD disk with ID {}: {}'.format(osd_id, e), state='e')
|
||||
logger.out(
|
||||
"Failed to purge OSD disk with ID {}: {}".format(osd_id, e), state="e"
|
||||
)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def add_db_vg(zkhandler, logger, device):
|
||||
logger.out('Creating new OSD database volume group on block device {}'.format(device), state='i')
|
||||
logger.out(
|
||||
"Creating new OSD database volume group on block device {}".format(device),
|
||||
state="i",
|
||||
)
|
||||
try:
|
||||
# 0. Check if an existsing volume group exists
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'vgdisplay osd-db'
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command("vgdisplay osd-db")
|
||||
if retcode != 5:
|
||||
logger.out('Ceph OSD database VG "osd-db" already exists', state='e')
|
||||
logger.out('Ceph OSD database VG "osd-db" already exists', state="e")
|
||||
return False
|
||||
|
||||
# 1. Create an empty partition table
|
||||
logger.out('Creating partitions on block device {}'.format(device), state='i')
|
||||
logger.out(
|
||||
"Creating partitions on block device {}".format(device), state="i"
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'sgdisk --clear {}'.format(device)
|
||||
"sgdisk --clear {}".format(device)
|
||||
)
|
||||
if retcode:
|
||||
print('sgdisk create partition table')
|
||||
print("sgdisk create partition table")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'sgdisk --new 1:: --typecode 1:8e00 {}'.format(device)
|
||||
"sgdisk --new 1:: --typecode 1:8e00 {}".format(device)
|
||||
)
|
||||
if retcode:
|
||||
print('sgdisk create pv partition')
|
||||
print("sgdisk create pv partition")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# Handle the partition ID portion
|
||||
if search(r'by-path', device) or search(r'by-id', device):
|
||||
if search(r"by-path", device) or search(r"by-id", device):
|
||||
# /dev/disk/by-path/pci-0000:03:00.0-scsi-0:1:0:0 -> pci-0000:03:00.0-scsi-0:1:0:0-part1
|
||||
partition = '{}-part1'.format(device)
|
||||
elif search(r'nvme', device):
|
||||
partition = "{}-part1".format(device)
|
||||
elif search(r"nvme", device):
|
||||
# /dev/nvme0n1 -> nvme0n1p1
|
||||
partition = '{}p1'.format(device)
|
||||
partition = "{}p1".format(device)
|
||||
else:
|
||||
# /dev/sda -> sda1
|
||||
# No other '/dev/disk/by-*' types are valid for raw block devices anyways
|
||||
partition = '{}1'.format(device)
|
||||
partition = "{}1".format(device)
|
||||
|
||||
# 2. Create the PV
|
||||
logger.out('Creating PV on block device {}'.format(partition), state='i')
|
||||
logger.out("Creating PV on block device {}".format(partition), state="i")
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'pvcreate --force {}'.format(partition)
|
||||
"pvcreate --force {}".format(partition)
|
||||
)
|
||||
if retcode:
|
||||
print('pv creation')
|
||||
print("pv creation")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# 2. Create the VG (named 'osd-db')
|
||||
logger.out('Creating VG "osd-db" on block device {}'.format(partition), state='i')
|
||||
logger.out(
|
||||
'Creating VG "osd-db" on block device {}'.format(partition), state="i"
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'vgcreate --force osd-db {}'.format(partition)
|
||||
"vgcreate --force osd-db {}".format(partition)
|
||||
)
|
||||
if retcode:
|
||||
print('vg creation')
|
||||
print("vg creation")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# Log it
|
||||
logger.out('Created new OSD database volume group on block device {}'.format(device), state='o')
|
||||
logger.out(
|
||||
"Created new OSD database volume group on block device {}".format(
|
||||
device
|
||||
),
|
||||
state="o",
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
# Log it
|
||||
logger.out('Failed to create OSD database volume group: {}'.format(e), state='e')
|
||||
logger.out(
|
||||
"Failed to create OSD database volume group: {}".format(e), state="e"
|
||||
)
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def create_osd_db_lv(zkhandler, logger, osd_id, ext_db_ratio, osd_size_bytes):
|
||||
logger.out('Creating new OSD database logical volume for OSD ID {}'.format(osd_id), state='i')
|
||||
logger.out(
|
||||
"Creating new OSD database logical volume for OSD ID {}".format(osd_id),
|
||||
state="i",
|
||||
)
|
||||
try:
|
||||
# 0. Check if an existsing logical volume exists
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'lvdisplay osd-db/osd{}'.format(osd_id)
|
||||
"lvdisplay osd-db/osd{}".format(osd_id)
|
||||
)
|
||||
if retcode != 5:
|
||||
logger.out('Ceph OSD database LV "osd-db/osd{}" already exists'.format(osd_id), state='e')
|
||||
logger.out(
|
||||
'Ceph OSD database LV "osd-db/osd{}" already exists'.format(osd_id),
|
||||
state="e",
|
||||
)
|
||||
return False
|
||||
|
||||
# 1. Determine LV sizing
|
||||
osd_db_size = int(osd_size_bytes * ext_db_ratio / 1024 / 1024)
|
||||
|
||||
# 2. Create the LV
|
||||
logger.out('Creating DB LV "osd-db/osd-{}" of {}M ({} * {})'.format(osd_id, osd_db_size, osd_size_bytes, ext_db_ratio), state='i')
|
||||
logger.out(
|
||||
'Creating DB LV "osd-db/osd-{}" of {}M ({} * {})'.format(
|
||||
osd_id, osd_db_size, osd_size_bytes, ext_db_ratio
|
||||
),
|
||||
state="i",
|
||||
)
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
'lvcreate --yes --name osd-{} --size {} osd-db'.format(osd_id, osd_db_size)
|
||||
"lvcreate --yes --name osd-{} --size {} osd-db".format(
|
||||
osd_id, osd_db_size
|
||||
)
|
||||
)
|
||||
if retcode:
|
||||
print('db lv creation')
|
||||
print("db lv creation")
|
||||
print(stdout)
|
||||
print(stderr)
|
||||
raise Exception
|
||||
|
||||
# Log it
|
||||
logger.out('Created new OSD database logical volume "osd-db/osd-{}"'.format(osd_id), state='o')
|
||||
logger.out(
|
||||
'Created new OSD database logical volume "osd-db/osd-{}"'.format(
|
||||
osd_id
|
||||
),
|
||||
state="o",
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
# Log it
|
||||
logger.out('Failed to create OSD database logical volume: {}'.format(e), state='e')
|
||||
logger.out(
|
||||
"Failed to create OSD database logical volume: {}".format(e), state="e"
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
@ -420,35 +500,39 @@ class CephPoolInstance(object):
|
||||
self.zkhandler = zkhandler
|
||||
self.this_node = this_node
|
||||
self.name = name
|
||||
self.pgs = ''
|
||||
self.pgs = ""
|
||||
self.stats = dict()
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.pgs', self.name))
|
||||
def watch_pool_node(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("pool.pgs", self.name)
|
||||
)
|
||||
def watch_pool_node(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = ''
|
||||
data = ""
|
||||
|
||||
if data and data != self.pgs:
|
||||
self.pgs = data
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('pool.stats', self.name))
|
||||
def watch_pool_stats(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("pool.stats", self.name)
|
||||
)
|
||||
def watch_pool_stats(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = ''
|
||||
data = ""
|
||||
|
||||
if data and data != self.stats:
|
||||
self.stats = json.loads(data)
|
||||
@ -462,17 +546,19 @@ class CephVolumeInstance(object):
|
||||
self.name = name
|
||||
self.stats = dict()
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('volume.stats', f'{self.pool}/{self.name}'))
|
||||
def watch_volume_stats(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("volume.stats", f"{self.pool}/{self.name}")
|
||||
)
|
||||
def watch_volume_stats(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = ''
|
||||
data = ""
|
||||
|
||||
if data and data != self.stats:
|
||||
self.stats = json.loads(data)
|
||||
@ -487,17 +573,21 @@ class CephSnapshotInstance(object):
|
||||
self.name = name
|
||||
self.stats = dict()
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('snapshot.stats', f'{self.pool}/{self.volume}/{self.name}'))
|
||||
def watch_snapshot_stats(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path(
|
||||
"snapshot.stats", f"{self.pool}/{self.volume}/{self.name}"
|
||||
)
|
||||
)
|
||||
def watch_snapshot_stats(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = ''
|
||||
data = ""
|
||||
|
||||
if data and data != self.stats:
|
||||
self.stats = json.loads(data)
|
||||
@ -510,77 +600,69 @@ def ceph_command(zkhandler, logger, this_node, data, d_osd):
|
||||
command, args = data.split()
|
||||
|
||||
# Adding a new OSD
|
||||
if command == 'osd_add':
|
||||
node, device, weight, ext_db_flag, ext_db_ratio = args.split(',')
|
||||
if command == "osd_add":
|
||||
node, device, weight, ext_db_flag, ext_db_ratio = args.split(",")
|
||||
ext_db_flag = bool(strtobool(ext_db_flag))
|
||||
ext_db_ratio = float(ext_db_ratio)
|
||||
if node == this_node.name:
|
||||
# Lock the command queue
|
||||
zk_lock = zkhandler.writelock('base.cmd.ceph')
|
||||
zk_lock = zkhandler.writelock("base.cmd.ceph")
|
||||
with zk_lock:
|
||||
# Add the OSD
|
||||
result = CephOSDInstance.add_osd(zkhandler, logger, node, device, weight, ext_db_flag, ext_db_ratio)
|
||||
result = CephOSDInstance.add_osd(
|
||||
zkhandler, logger, node, device, weight, ext_db_flag, ext_db_ratio
|
||||
)
|
||||
# Command succeeded
|
||||
if result:
|
||||
# Update the command queue
|
||||
zkhandler.write([
|
||||
('base.cmd.ceph', 'success-{}'.format(data))
|
||||
])
|
||||
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
|
||||
# Command failed
|
||||
else:
|
||||
# Update the command queue
|
||||
zkhandler.write([
|
||||
('base.cmd.ceph', 'failure-{}'.format(data))
|
||||
])
|
||||
zkhandler.write([("base.cmd.ceph", "failure-{}".format(data))])
|
||||
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
|
||||
time.sleep(1)
|
||||
|
||||
# Removing an OSD
|
||||
elif command == 'osd_remove':
|
||||
elif command == "osd_remove":
|
||||
osd_id = args
|
||||
|
||||
# Verify osd_id is in the list
|
||||
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
|
||||
# Lock the command queue
|
||||
zk_lock = zkhandler.writelock('base.cmd.ceph')
|
||||
zk_lock = zkhandler.writelock("base.cmd.ceph")
|
||||
with zk_lock:
|
||||
# Remove the OSD
|
||||
result = CephOSDInstance.remove_osd(zkhandler, logger, osd_id, d_osd[osd_id])
|
||||
result = CephOSDInstance.remove_osd(
|
||||
zkhandler, logger, osd_id, d_osd[osd_id]
|
||||
)
|
||||
# Command succeeded
|
||||
if result:
|
||||
# Update the command queue
|
||||
zkhandler.write([
|
||||
('base.cmd.ceph', 'success-{}'.format(data))
|
||||
])
|
||||
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
|
||||
# Command failed
|
||||
else:
|
||||
# Update the command queue
|
||||
zkhandler.write([
|
||||
('base.cmd.ceph', 'failure-{}'.format(data))
|
||||
])
|
||||
zkhandler.write([("base.cmd.ceph", "failure-{}".format(data))])
|
||||
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
|
||||
time.sleep(1)
|
||||
|
||||
# Adding a new DB VG
|
||||
elif command == 'db_vg_add':
|
||||
node, device = args.split(',')
|
||||
elif command == "db_vg_add":
|
||||
node, device = args.split(",")
|
||||
if node == this_node.name:
|
||||
# Lock the command queue
|
||||
zk_lock = zkhandler.writelock('base.cmd.ceph')
|
||||
zk_lock = zkhandler.writelock("base.cmd.ceph")
|
||||
with zk_lock:
|
||||
# Add the VG
|
||||
result = CephOSDInstance.add_db_vg(zkhandler, logger, device)
|
||||
# Command succeeded
|
||||
if result:
|
||||
# Update the command queue
|
||||
zkhandler.write([
|
||||
('base.cmd.ceph', 'success-{}'.format(data))
|
||||
])
|
||||
zkhandler.write([("base.cmd.ceph", "success-{}".format(data))])
|
||||
# Command failed
|
||||
else:
|
||||
# Update the command queue
|
||||
zkhandler.write([
|
||||
('base.cmd.ceph', 'failure={}'.format(data))
|
||||
])
|
||||
zkhandler.write([("base.cmd.ceph", "failure={}".format(data))])
|
||||
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
|
||||
time.sleep(1)
|
||||
|
@ -74,69 +74,71 @@ class PowerDNSInstance(object):
|
||||
self.dns_server_daemon = None
|
||||
|
||||
# Floating upstreams
|
||||
self.cluster_floatingipaddr, self.cluster_cidrnetmask = self.config['cluster_floating_ip'].split('/')
|
||||
self.upstream_floatingipaddr, self.upstream_cidrnetmask = self.config['upstream_floating_ip'].split('/')
|
||||
self.cluster_floatingipaddr, self.cluster_cidrnetmask = self.config[
|
||||
"cluster_floating_ip"
|
||||
].split("/")
|
||||
self.upstream_floatingipaddr, self.upstream_cidrnetmask = self.config[
|
||||
"upstream_floating_ip"
|
||||
].split("/")
|
||||
|
||||
def start(self):
|
||||
self.logger.out(
|
||||
'Starting PowerDNS zone aggregator',
|
||||
state='i'
|
||||
)
|
||||
self.logger.out("Starting PowerDNS zone aggregator", state="i")
|
||||
# Define the PowerDNS config
|
||||
dns_configuration = [
|
||||
# Option # Explanation
|
||||
'--no-config',
|
||||
'--daemon=no', # Start directly
|
||||
'--guardian=yes', # Use a guardian
|
||||
'--disable-syslog=yes', # Log only to stdout (which is then captured)
|
||||
'--disable-axfr=no', # Allow AXFRs
|
||||
'--allow-axfr-ips=0.0.0.0/0', # Allow AXFRs to anywhere
|
||||
'--local-address={},{}'.format(self.cluster_floatingipaddr, self.upstream_floatingipaddr), # Listen on floating IPs
|
||||
'--local-port=53', # On port 53
|
||||
'--log-dns-details=on', # Log details
|
||||
'--loglevel=3', # Log info
|
||||
'--master=yes', # Enable master mode
|
||||
'--slave=yes', # Enable slave mode
|
||||
'--slave-renotify=yes', # Renotify out for our slaved zones
|
||||
'--version-string=powerdns', # Set the version string
|
||||
'--default-soa-name=dns.pvc.local', # Override dnsmasq's invalid name
|
||||
'--socket-dir={}'.format(self.config['pdns_dynamic_directory']), # Standard socket directory
|
||||
'--launch=gpgsql', # Use the PostgreSQL backend
|
||||
'--gpgsql-host={}'.format(self.config['pdns_postgresql_host']), # PostgreSQL instance
|
||||
'--gpgsql-port={}'.format(self.config['pdns_postgresql_port']), # Default port
|
||||
'--gpgsql-dbname={}'.format(self.config['pdns_postgresql_dbname']), # Database name
|
||||
'--gpgsql-user={}'.format(self.config['pdns_postgresql_user']), # User name
|
||||
'--gpgsql-password={}'.format(self.config['pdns_postgresql_password']), # User password
|
||||
'--gpgsql-dnssec=no', # Do DNSSEC elsewhere
|
||||
"--no-config",
|
||||
"--daemon=no", # Start directly
|
||||
"--guardian=yes", # Use a guardian
|
||||
"--disable-syslog=yes", # Log only to stdout (which is then captured)
|
||||
"--disable-axfr=no", # Allow AXFRs
|
||||
"--allow-axfr-ips=0.0.0.0/0", # Allow AXFRs to anywhere
|
||||
"--local-address={},{}".format(
|
||||
self.cluster_floatingipaddr, self.upstream_floatingipaddr
|
||||
), # Listen on floating IPs
|
||||
"--local-port=53", # On port 53
|
||||
"--log-dns-details=on", # Log details
|
||||
"--loglevel=3", # Log info
|
||||
"--master=yes", # Enable master mode
|
||||
"--slave=yes", # Enable slave mode
|
||||
"--slave-renotify=yes", # Renotify out for our slaved zones
|
||||
"--version-string=powerdns", # Set the version string
|
||||
"--default-soa-name=dns.pvc.local", # Override dnsmasq's invalid name
|
||||
"--socket-dir={}".format(
|
||||
self.config["pdns_dynamic_directory"]
|
||||
), # Standard socket directory
|
||||
"--launch=gpgsql", # Use the PostgreSQL backend
|
||||
"--gpgsql-host={}".format(
|
||||
self.config["pdns_postgresql_host"]
|
||||
), # PostgreSQL instance
|
||||
"--gpgsql-port={}".format(
|
||||
self.config["pdns_postgresql_port"]
|
||||
), # Default port
|
||||
"--gpgsql-dbname={}".format(
|
||||
self.config["pdns_postgresql_dbname"]
|
||||
), # Database name
|
||||
"--gpgsql-user={}".format(self.config["pdns_postgresql_user"]), # User name
|
||||
"--gpgsql-password={}".format(
|
||||
self.config["pdns_postgresql_password"]
|
||||
), # User password
|
||||
"--gpgsql-dnssec=no", # Do DNSSEC elsewhere
|
||||
]
|
||||
# Start the pdns process in a thread
|
||||
self.dns_server_daemon = common.run_os_daemon(
|
||||
'/usr/sbin/pdns_server {}'.format(
|
||||
' '.join(dns_configuration)
|
||||
),
|
||||
"/usr/sbin/pdns_server {}".format(" ".join(dns_configuration)),
|
||||
environment=None,
|
||||
logfile='{}/pdns-aggregator.log'.format(self.config['pdns_log_directory'])
|
||||
logfile="{}/pdns-aggregator.log".format(self.config["pdns_log_directory"]),
|
||||
)
|
||||
if self.dns_server_daemon:
|
||||
self.logger.out(
|
||||
'Successfully started PowerDNS zone aggregator',
|
||||
state='o'
|
||||
)
|
||||
self.logger.out("Successfully started PowerDNS zone aggregator", state="o")
|
||||
|
||||
def stop(self):
|
||||
if self.dns_server_daemon:
|
||||
self.logger.out(
|
||||
'Stopping PowerDNS zone aggregator',
|
||||
state='i'
|
||||
)
|
||||
self.logger.out("Stopping PowerDNS zone aggregator", state="i")
|
||||
# Terminate, then kill
|
||||
self.dns_server_daemon.signal('term')
|
||||
self.dns_server_daemon.signal("term")
|
||||
time.sleep(0.2)
|
||||
self.dns_server_daemon.signal('kill')
|
||||
self.logger.out(
|
||||
'Successfully stopped PowerDNS zone aggregator',
|
||||
state='o'
|
||||
)
|
||||
self.dns_server_daemon.signal("kill")
|
||||
self.logger.out("Successfully stopped PowerDNS zone aggregator", state="o")
|
||||
|
||||
|
||||
class DNSNetworkInstance(object):
|
||||
@ -153,29 +155,24 @@ class DNSNetworkInstance(object):
|
||||
network_domain = self.network.domain
|
||||
|
||||
self.logger.out(
|
||||
'Adding entry for client domain {}'.format(
|
||||
network_domain
|
||||
),
|
||||
prefix='DNS aggregator',
|
||||
state='o'
|
||||
"Adding entry for client domain {}".format(network_domain),
|
||||
prefix="DNS aggregator",
|
||||
state="o",
|
||||
)
|
||||
|
||||
# Connect to the database
|
||||
self.sql_conn = psycopg2.connect(
|
||||
"host='{}' port='{}' dbname='{}' user='{}' password='{}' sslmode='disable'".format(
|
||||
self.config['pdns_postgresql_host'],
|
||||
self.config['pdns_postgresql_port'],
|
||||
self.config['pdns_postgresql_dbname'],
|
||||
self.config['pdns_postgresql_user'],
|
||||
self.config['pdns_postgresql_password']
|
||||
self.config["pdns_postgresql_host"],
|
||||
self.config["pdns_postgresql_port"],
|
||||
self.config["pdns_postgresql_dbname"],
|
||||
self.config["pdns_postgresql_user"],
|
||||
self.config["pdns_postgresql_password"],
|
||||
)
|
||||
)
|
||||
sql_curs = self.sql_conn.cursor()
|
||||
# Try to access the domains entry
|
||||
sql_curs.execute(
|
||||
"SELECT * FROM domains WHERE name=%s",
|
||||
(network_domain,)
|
||||
)
|
||||
sql_curs.execute("SELECT * FROM domains WHERE name=%s", (network_domain,))
|
||||
results = sql_curs.fetchone()
|
||||
|
||||
# If we got back a result, don't try to add the domain to the DB
|
||||
@ -188,14 +185,11 @@ class DNSNetworkInstance(object):
|
||||
if self.aggregator.is_active and write_domain:
|
||||
sql_curs.execute(
|
||||
"INSERT INTO domains (name, type, account, notified_serial) VALUES (%s, 'MASTER', 'internal', 0)",
|
||||
(network_domain,)
|
||||
(network_domain,),
|
||||
)
|
||||
self.sql_conn.commit()
|
||||
|
||||
sql_curs.execute(
|
||||
"SELECT id FROM domains WHERE name=%s",
|
||||
(network_domain,)
|
||||
)
|
||||
sql_curs.execute("SELECT id FROM domains WHERE name=%s", (network_domain,))
|
||||
domain_id = sql_curs.fetchone()
|
||||
|
||||
sql_curs.execute(
|
||||
@ -203,13 +197,22 @@ class DNSNetworkInstance(object):
|
||||
INSERT INTO records (domain_id, name, content, type, ttl, prio) VALUES
|
||||
(%s, %s, %s, %s, %s, %s)
|
||||
""",
|
||||
(domain_id, network_domain, 'nsX.{d} root.{d} 1 10800 1800 86400 86400'.format(d=self.config['upstream_domain']), 'SOA', 86400, 0)
|
||||
(
|
||||
domain_id,
|
||||
network_domain,
|
||||
"nsX.{d} root.{d} 1 10800 1800 86400 86400".format(
|
||||
d=self.config["upstream_domain"]
|
||||
),
|
||||
"SOA",
|
||||
86400,
|
||||
0,
|
||||
),
|
||||
)
|
||||
|
||||
if self.network.name_servers:
|
||||
ns_servers = self.network.name_servers
|
||||
else:
|
||||
ns_servers = ['pvc-dns.{}'.format(self.config['upstream_domain'])]
|
||||
ns_servers = ["pvc-dns.{}".format(self.config["upstream_domain"])]
|
||||
|
||||
for ns_server in ns_servers:
|
||||
sql_curs.execute(
|
||||
@ -217,7 +220,7 @@ class DNSNetworkInstance(object):
|
||||
INSERT INTO records (domain_id, name, content, type, ttl, prio) VALUES
|
||||
(%s, %s, %s, %s, %s, %s)
|
||||
""",
|
||||
(domain_id, network_domain, ns_server, 'NS', 86400, 0)
|
||||
(domain_id, network_domain, ns_server, "NS", 86400, 0),
|
||||
)
|
||||
|
||||
self.sql_conn.commit()
|
||||
@ -229,42 +232,31 @@ class DNSNetworkInstance(object):
|
||||
network_domain = self.network.domain
|
||||
|
||||
self.logger.out(
|
||||
'Removing entry for client domain {}'.format(
|
||||
network_domain
|
||||
),
|
||||
prefix='DNS aggregator',
|
||||
state='o'
|
||||
"Removing entry for client domain {}".format(network_domain),
|
||||
prefix="DNS aggregator",
|
||||
state="o",
|
||||
)
|
||||
|
||||
# Connect to the database
|
||||
self.sql_conn = psycopg2.connect(
|
||||
"host='{}' port='{}' dbname='{}' user='{}' password='{}' sslmode='disable'".format(
|
||||
self.config['pdns_postgresql_host'],
|
||||
self.config['pdns_postgresql_port'],
|
||||
self.config['pdns_postgresql_dbname'],
|
||||
self.config['pdns_postgresql_user'],
|
||||
self.config['pdns_postgresql_password']
|
||||
self.config["pdns_postgresql_host"],
|
||||
self.config["pdns_postgresql_port"],
|
||||
self.config["pdns_postgresql_dbname"],
|
||||
self.config["pdns_postgresql_user"],
|
||||
self.config["pdns_postgresql_password"],
|
||||
)
|
||||
)
|
||||
sql_curs = self.sql_conn.cursor()
|
||||
|
||||
# Get the domain ID
|
||||
sql_curs.execute(
|
||||
"SELECT id FROM domains WHERE name=%s",
|
||||
(network_domain,)
|
||||
)
|
||||
sql_curs.execute("SELECT id FROM domains WHERE name=%s", (network_domain,))
|
||||
domain_id = sql_curs.fetchone()
|
||||
|
||||
# Delete the domain from the database if we're active
|
||||
if self.aggregator.is_active and domain_id:
|
||||
sql_curs.execute(
|
||||
"DELETE FROM domains WHERE id=%s",
|
||||
(domain_id,)
|
||||
)
|
||||
sql_curs.execute(
|
||||
"DELETE FROM records WHERE domain_id=%s",
|
||||
(domain_id,)
|
||||
)
|
||||
sql_curs.execute("DELETE FROM domains WHERE id=%s", (domain_id,))
|
||||
sql_curs.execute("DELETE FROM records WHERE domain_id=%s", (domain_id,))
|
||||
|
||||
self.sql_conn.commit()
|
||||
self.sql_conn.close()
|
||||
@ -295,11 +287,11 @@ class AXFRDaemonInstance(object):
|
||||
# after the leader transitions
|
||||
self.sql_conn = psycopg2.connect(
|
||||
"host='{}' port='{}' dbname='{}' user='{}' password='{}' sslmode='disable'".format(
|
||||
self.config['pdns_postgresql_host'],
|
||||
self.config['pdns_postgresql_port'],
|
||||
self.config['pdns_postgresql_dbname'],
|
||||
self.config['pdns_postgresql_user'],
|
||||
self.config['pdns_postgresql_password']
|
||||
self.config["pdns_postgresql_host"],
|
||||
self.config["pdns_postgresql_port"],
|
||||
self.config["pdns_postgresql_dbname"],
|
||||
self.config["pdns_postgresql_user"],
|
||||
self.config["pdns_postgresql_password"],
|
||||
)
|
||||
)
|
||||
|
||||
@ -328,7 +320,7 @@ class AXFRDaemonInstance(object):
|
||||
|
||||
# Set up our basic variables
|
||||
domain = network.domain
|
||||
if network.ip4_gateway != 'None':
|
||||
if network.ip4_gateway != "None":
|
||||
dnsmasq_ip = network.ip4_gateway
|
||||
else:
|
||||
dnsmasq_ip = network.ip6_gateway
|
||||
@ -341,53 +333,67 @@ class AXFRDaemonInstance(object):
|
||||
z = dns.zone.from_xfr(axfr)
|
||||
records_raw = [z[n].to_text(n) for n in z.nodes.keys()]
|
||||
except Exception as e:
|
||||
if self.config['debug']:
|
||||
self.logger.out('{} {} ({})'.format(e, dnsmasq_ip, domain), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"{} {} ({})".format(e, dnsmasq_ip, domain),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
continue
|
||||
|
||||
# Fix the formatting because it's useless
|
||||
# reference: ['@ 600 IN SOA . . 4 1200 180 1209600 600\n@ 600 IN NS .', 'test3 600 IN A 10.1.1.203\ntest3 600 IN AAAA 2001:b23e:1113:0:5054:ff:fe5c:f131', etc.]
|
||||
# We don't really care about dnsmasq's terrible SOA or NS records which are in [0]
|
||||
string_records = '\n'.join(records_raw[1:])
|
||||
string_records = "\n".join(records_raw[1:])
|
||||
# Split into individual records
|
||||
records_new = list()
|
||||
for element in string_records.split('\n'):
|
||||
for element in string_records.split("\n"):
|
||||
if element:
|
||||
record = element.split()
|
||||
# Handle space-containing data elements
|
||||
if domain not in record[0]:
|
||||
name = '{}.{}'.format(record[0], domain)
|
||||
name = "{}.{}".format(record[0], domain)
|
||||
else:
|
||||
name = record[0]
|
||||
entry = '{} {} IN {} {}'.format(name, record[1], record[3], ' '.join(record[4:]))
|
||||
entry = "{} {} IN {} {}".format(
|
||||
name, record[1], record[3], " ".join(record[4:])
|
||||
)
|
||||
records_new.append(entry)
|
||||
|
||||
#
|
||||
# Get the current zone from the database
|
||||
#
|
||||
try:
|
||||
sql_curs.execute(
|
||||
"SELECT id FROM domains WHERE name=%s",
|
||||
(domain,)
|
||||
)
|
||||
sql_curs.execute("SELECT id FROM domains WHERE name=%s", (domain,))
|
||||
domain_id = sql_curs.fetchone()
|
||||
sql_curs.execute(
|
||||
"SELECT * FROM records WHERE domain_id=%s",
|
||||
(domain_id,)
|
||||
"SELECT * FROM records WHERE domain_id=%s", (domain_id,)
|
||||
)
|
||||
results = list(sql_curs.fetchall())
|
||||
if self.config['debug']:
|
||||
self.logger.out('SQL query results: {}'.format(results), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"SQL query results: {}".format(results),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.out('ERROR: Failed to obtain DNS records from database: {}'.format(e))
|
||||
self.logger.out(
|
||||
"ERROR: Failed to obtain DNS records from database: {}".format(
|
||||
e
|
||||
)
|
||||
)
|
||||
|
||||
# Fix the formatting because it's useless for comparison
|
||||
# reference: ((10, 28, 'testnet01.i.bonilan.net', 'SOA', 'nsX.pvc.local root.pvc.local 1 10800 1800 86400 86400', 86400, 0, None, 0, None, 1), etc.)
|
||||
records_old = list()
|
||||
records_old_ids = list()
|
||||
if not results:
|
||||
if self.config['debug']:
|
||||
self.logger.out('No results found, skipping.', state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"No results found, skipping.",
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
continue
|
||||
for record in results:
|
||||
# Skip the non-A
|
||||
@ -397,14 +403,24 @@ class AXFRDaemonInstance(object):
|
||||
r_type = record[3]
|
||||
r_data = record[4]
|
||||
# Assemble a list element in the same format as the AXFR data
|
||||
entry = '{} {} IN {} {}'.format(r_name, r_ttl, r_type, r_data)
|
||||
if self.config['debug']:
|
||||
self.logger.out('Found record: {}'.format(entry), state='d', prefix='dns-aggregator')
|
||||
entry = "{} {} IN {} {}".format(r_name, r_ttl, r_type, r_data)
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"Found record: {}".format(entry),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
|
||||
# Skip non-A or AAAA records
|
||||
if r_type != 'A' and r_type != 'AAAA':
|
||||
if self.config['debug']:
|
||||
self.logger.out('Skipping record {}, not A or AAAA: "{}"'.format(entry, r_type), state='d', prefix='dns-aggregator')
|
||||
if r_type != "A" and r_type != "AAAA":
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
'Skipping record {}, not A or AAAA: "{}"'.format(
|
||||
entry, r_type
|
||||
),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
continue
|
||||
|
||||
records_old.append(entry)
|
||||
@ -413,9 +429,17 @@ class AXFRDaemonInstance(object):
|
||||
records_new.sort()
|
||||
records_old.sort()
|
||||
|
||||
if self.config['debug']:
|
||||
self.logger.out('New: {}'.format(records_new), state='d', prefix='dns-aggregator')
|
||||
self.logger.out('Old: {}'.format(records_old), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"New: {}".format(records_new),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
self.logger.out(
|
||||
"Old: {}".format(records_old),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
|
||||
# Find the differences between the lists
|
||||
# Basic check one: are they completely equal
|
||||
@ -426,9 +450,17 @@ class AXFRDaemonInstance(object):
|
||||
in_new_not_in_old = in_new - in_old
|
||||
in_old_not_in_new = in_old - in_new
|
||||
|
||||
if self.config['debug']:
|
||||
self.logger.out('New but not old: {}'.format(in_new_not_in_old), state='d', prefix='dns-aggregator')
|
||||
self.logger.out('Old but not new: {}'.format(in_old_not_in_new), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"New but not old: {}".format(in_new_not_in_old),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
self.logger.out(
|
||||
"Old but not new: {}".format(in_old_not_in_new),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
|
||||
# Go through the old list
|
||||
remove_records = list() # list of database IDs
|
||||
@ -445,18 +477,24 @@ class AXFRDaemonInstance(object):
|
||||
for newrecord in in_new_not_in_old:
|
||||
splitnewrecord = newrecord.split()
|
||||
# If there's a name and type match with different content, remove the old one
|
||||
if splitrecord[0] == splitnewrecord[0] and splitrecord[3] == splitnewrecord[3]:
|
||||
if (
|
||||
splitrecord[0] == splitnewrecord[0]
|
||||
and splitrecord[3] == splitnewrecord[3]
|
||||
):
|
||||
remove_records.append(record_id)
|
||||
|
||||
changed = False
|
||||
if len(remove_records) > 0:
|
||||
# Remove the invalid old records
|
||||
for record_id in remove_records:
|
||||
if self.config['debug']:
|
||||
self.logger.out('Removing record: {}'.format(record_id), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"Removing record: {}".format(record_id),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
sql_curs.execute(
|
||||
"DELETE FROM records WHERE id=%s",
|
||||
(record_id,)
|
||||
"DELETE FROM records WHERE id=%s", (record_id,)
|
||||
)
|
||||
changed = True
|
||||
|
||||
@ -469,53 +507,81 @@ class AXFRDaemonInstance(object):
|
||||
r_ttl = record[1]
|
||||
r_type = record[3]
|
||||
r_data = record[4]
|
||||
if self.config['debug']:
|
||||
self.logger.out('Add record: {}'.format(name), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"Add record: {}".format(name),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
try:
|
||||
sql_curs.execute(
|
||||
"INSERT INTO records (domain_id, name, ttl, type, prio, content) VALUES (%s, %s, %s, %s, %s, %s)",
|
||||
(domain_id, r_name, r_ttl, r_type, 0, r_data)
|
||||
(domain_id, r_name, r_ttl, r_type, 0, r_data),
|
||||
)
|
||||
changed = True
|
||||
except psycopg2.IntegrityError as e:
|
||||
if self.config['debug']:
|
||||
self.logger.out('Failed to add record due to {}: {}'.format(e, name), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"Failed to add record due to {}: {}".format(
|
||||
e, name
|
||||
),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
except psycopg2.errors.InFailedSqlTransaction as e:
|
||||
if self.config['debug']:
|
||||
self.logger.out('Failed to add record due to {}: {}'.format(e, name), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"Failed to add record due to {}: {}".format(
|
||||
e, name
|
||||
),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
|
||||
if changed:
|
||||
# Increase SOA serial
|
||||
sql_curs.execute(
|
||||
"SELECT content FROM records WHERE domain_id=%s AND type='SOA'",
|
||||
(domain_id,)
|
||||
(domain_id,),
|
||||
)
|
||||
soa_record = list(sql_curs.fetchone())[0].split()
|
||||
current_serial = int(soa_record[2])
|
||||
new_serial = current_serial + 1
|
||||
soa_record[2] = str(new_serial)
|
||||
if self.config['debug']:
|
||||
self.logger.out('Records changed; bumping SOA: {}'.format(new_serial), state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"Records changed; bumping SOA: {}".format(new_serial),
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
sql_curs.execute(
|
||||
"UPDATE records SET content=%s WHERE domain_id=%s AND type='SOA'",
|
||||
(' '.join(soa_record), domain_id)
|
||||
(" ".join(soa_record), domain_id),
|
||||
)
|
||||
|
||||
# Commit all the previous changes
|
||||
if self.config['debug']:
|
||||
self.logger.out('Committing database changes and reloading PDNS', state='d', prefix='dns-aggregator')
|
||||
if self.config["debug"]:
|
||||
self.logger.out(
|
||||
"Committing database changes and reloading PDNS",
|
||||
state="d",
|
||||
prefix="dns-aggregator",
|
||||
)
|
||||
try:
|
||||
self.sql_conn.commit()
|
||||
except Exception as e:
|
||||
self.logger.out('ERROR: Failed to commit DNS aggregator changes: {}'.format(e), state='e')
|
||||
self.logger.out(
|
||||
"ERROR: Failed to commit DNS aggregator changes: {}".format(
|
||||
e
|
||||
),
|
||||
state="e",
|
||||
)
|
||||
|
||||
# Reload the domain
|
||||
common.run_os_command(
|
||||
'/usr/bin/pdns_control --socket-dir={} reload {}'.format(
|
||||
self.config['pdns_dynamic_directory'],
|
||||
domain
|
||||
"/usr/bin/pdns_control --socket-dir={} reload {}".format(
|
||||
self.config["pdns_dynamic_directory"], domain
|
||||
),
|
||||
background=False
|
||||
background=False,
|
||||
)
|
||||
|
||||
# Wait for 10 seconds
|
||||
|
@ -46,45 +46,52 @@ class MetadataAPIInstance(object):
|
||||
|
||||
# Add flask routes inside our instance
|
||||
def add_routes(self):
|
||||
@self.mdapi.route('/', methods=['GET'])
|
||||
@self.mdapi.route("/", methods=["GET"])
|
||||
def api_root():
|
||||
return flask.jsonify({"message": "PVC Provisioner Metadata API version 1"}), 209
|
||||
return (
|
||||
flask.jsonify({"message": "PVC Provisioner Metadata API version 1"}),
|
||||
209,
|
||||
)
|
||||
|
||||
@self.mdapi.route('/<version>/meta-data/', methods=['GET'])
|
||||
@self.mdapi.route("/<version>/meta-data/", methods=["GET"])
|
||||
def api_metadata_root(version):
|
||||
metadata = """instance-id\nname\nprofile"""
|
||||
return metadata, 200
|
||||
|
||||
@self.mdapi.route('/<version>/meta-data/instance-id', methods=['GET'])
|
||||
@self.mdapi.route("/<version>/meta-data/instance-id", methods=["GET"])
|
||||
def api_metadata_instanceid(version):
|
||||
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
|
||||
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
|
||||
vm_details = self.get_vm_details(source_address)
|
||||
instance_id = vm_details.get('uuid', None)
|
||||
instance_id = vm_details.get("uuid", None)
|
||||
return instance_id, 200
|
||||
|
||||
@self.mdapi.route('/<version>/meta-data/name', methods=['GET'])
|
||||
@self.mdapi.route("/<version>/meta-data/name", methods=["GET"])
|
||||
def api_metadata_hostname(version):
|
||||
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
|
||||
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
|
||||
vm_details = self.get_vm_details(source_address)
|
||||
vm_name = vm_details.get('name', None)
|
||||
vm_name = vm_details.get("name", None)
|
||||
return vm_name, 200
|
||||
|
||||
@self.mdapi.route('/<version>/meta-data/profile', methods=['GET'])
|
||||
@self.mdapi.route("/<version>/meta-data/profile", methods=["GET"])
|
||||
def api_metadata_profile(version):
|
||||
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
|
||||
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
|
||||
vm_details = self.get_vm_details(source_address)
|
||||
vm_profile = vm_details.get('profile', None)
|
||||
vm_profile = vm_details.get("profile", None)
|
||||
return vm_profile, 200
|
||||
|
||||
@self.mdapi.route('/<version>/user-data', methods=['GET'])
|
||||
@self.mdapi.route("/<version>/user-data", methods=["GET"])
|
||||
def api_userdata(version):
|
||||
source_address = flask.request.__dict__['environ']['REMOTE_ADDR']
|
||||
source_address = flask.request.__dict__["environ"]["REMOTE_ADDR"]
|
||||
vm_details = self.get_vm_details(source_address)
|
||||
vm_profile = vm_details.get('profile', None)
|
||||
vm_profile = vm_details.get("profile", None)
|
||||
# Get the userdata
|
||||
if vm_profile:
|
||||
userdata = self.get_profile_userdata(vm_profile)
|
||||
self.logger.out("Returning userdata for profile {}".format(vm_profile), state='i', prefix='Metadata API')
|
||||
self.logger.out(
|
||||
"Returning userdata for profile {}".format(vm_profile),
|
||||
state="i",
|
||||
prefix="Metadata API",
|
||||
)
|
||||
else:
|
||||
userdata = None
|
||||
return flask.Response(userdata)
|
||||
@ -92,46 +99,46 @@ class MetadataAPIInstance(object):
|
||||
def launch_wsgi(self):
|
||||
try:
|
||||
self.md_http_server = gevent.pywsgi.WSGIServer(
|
||||
('169.254.169.254', 80),
|
||||
("169.254.169.254", 80),
|
||||
self.mdapi,
|
||||
log=sys.stdout,
|
||||
error_log=sys.stdout
|
||||
error_log=sys.stdout,
|
||||
)
|
||||
self.md_http_server.serve_forever()
|
||||
except Exception as e:
|
||||
self.logger.out('Error starting Metadata API: {}'.format(e), state='e')
|
||||
self.logger.out("Error starting Metadata API: {}".format(e), state="e")
|
||||
|
||||
# WSGI start/stop
|
||||
def start(self):
|
||||
# Launch Metadata API
|
||||
self.logger.out('Starting Metadata API at 169.254.169.254:80', state='i')
|
||||
self.logger.out("Starting Metadata API at 169.254.169.254:80", state="i")
|
||||
self.thread = Thread(target=self.launch_wsgi)
|
||||
self.thread.start()
|
||||
self.logger.out('Successfully started Metadata API thread', state='o')
|
||||
self.logger.out("Successfully started Metadata API thread", state="o")
|
||||
|
||||
def stop(self):
|
||||
if not self.md_http_server:
|
||||
return
|
||||
|
||||
self.logger.out('Stopping Metadata API at 169.254.169.254:80', state='i')
|
||||
self.logger.out("Stopping Metadata API at 169.254.169.254:80", state="i")
|
||||
try:
|
||||
self.md_http_server.stop()
|
||||
time.sleep(0.1)
|
||||
self.md_http_server.close()
|
||||
time.sleep(0.1)
|
||||
self.md_http_server = None
|
||||
self.logger.out('Successfully stopped Metadata API', state='o')
|
||||
self.logger.out("Successfully stopped Metadata API", state="o")
|
||||
except Exception as e:
|
||||
self.logger.out('Error stopping Metadata API: {}'.format(e), state='e')
|
||||
self.logger.out("Error stopping Metadata API: {}".format(e), state="e")
|
||||
|
||||
# Helper functions
|
||||
def open_database(self):
|
||||
conn = psycopg2.connect(
|
||||
host=self.config['metadata_postgresql_host'],
|
||||
port=self.config['metadata_postgresql_port'],
|
||||
dbname=self.config['metadata_postgresql_dbname'],
|
||||
user=self.config['metadata_postgresql_user'],
|
||||
password=self.config['metadata_postgresql_password']
|
||||
host=self.config["metadata_postgresql_host"],
|
||||
port=self.config["metadata_postgresql_port"],
|
||||
dbname=self.config["metadata_postgresql_dbname"],
|
||||
user=self.config["metadata_postgresql_user"],
|
||||
password=self.config["metadata_postgresql_password"],
|
||||
)
|
||||
cur = conn.cursor(cursor_factory=RealDictCursor)
|
||||
return conn, cur
|
||||
@ -153,7 +160,7 @@ class MetadataAPIInstance(object):
|
||||
data_raw = cur.fetchone()
|
||||
self.close_database(conn, cur)
|
||||
if data_raw is not None:
|
||||
data = data_raw.get('userdata', None)
|
||||
data = data_raw.get("userdata", None)
|
||||
return data
|
||||
else:
|
||||
return None
|
||||
@ -165,27 +172,31 @@ class MetadataAPIInstance(object):
|
||||
|
||||
# Figure out which server this is via the DHCP address
|
||||
host_information = dict()
|
||||
networks_managed = (x for x in networks if x.get('type') == 'managed')
|
||||
networks_managed = (x for x in networks if x.get("type") == "managed")
|
||||
for network in networks_managed:
|
||||
network_leases = pvc_network.getNetworkDHCPLeases(self.zkhandler, network.get('vni'))
|
||||
network_leases = pvc_network.getNetworkDHCPLeases(
|
||||
self.zkhandler, network.get("vni")
|
||||
)
|
||||
for network_lease in network_leases:
|
||||
information = pvc_network.getDHCPLeaseInformation(self.zkhandler, network.get('vni'), network_lease)
|
||||
information = pvc_network.getDHCPLeaseInformation(
|
||||
self.zkhandler, network.get("vni"), network_lease
|
||||
)
|
||||
try:
|
||||
if information.get('ip4_address', None) == source_address:
|
||||
if information.get("ip4_address", None) == source_address:
|
||||
host_information = information
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get our real information on the host; now we can start querying about it
|
||||
client_macaddr = host_information.get('mac_address', None)
|
||||
client_macaddr = host_information.get("mac_address", None)
|
||||
|
||||
# Find the VM with that MAC address - we can't assume that the hostname is actually right
|
||||
_discard, vm_list = pvc_vm.get_list(self.zkhandler, None, None, None, None)
|
||||
vm_details = dict()
|
||||
for vm in vm_list:
|
||||
try:
|
||||
for network in vm.get('networks'):
|
||||
if network.get('mac', None) == client_macaddr:
|
||||
for network in vm.get("networks"):
|
||||
if network.get("mac", None) == client_macaddr:
|
||||
vm_details = vm
|
||||
except Exception:
|
||||
pass
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -23,10 +23,10 @@ import daemon_lib.common as common
|
||||
|
||||
|
||||
def boolToOnOff(state):
|
||||
if state and str(state) == 'True':
|
||||
return 'on'
|
||||
if state and str(state) == "True":
|
||||
return "on"
|
||||
else:
|
||||
return 'off'
|
||||
return "off"
|
||||
|
||||
|
||||
class SRIOVVFInstance(object):
|
||||
@ -39,12 +39,20 @@ class SRIOVVFInstance(object):
|
||||
self.this_node = this_node
|
||||
self.myhostname = self.this_node.name
|
||||
|
||||
self.pf = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.pf', self.vf))
|
||||
self.mtu = self.zkhandler.read(('node.sriov.vf', self.myhostname, 'sriov_vf.mtu', self.vf))
|
||||
self.vfid = self.vf.replace('{}v'.format(self.pf), '')
|
||||
self.pf = self.zkhandler.read(
|
||||
("node.sriov.vf", self.myhostname, "sriov_vf.pf", self.vf)
|
||||
)
|
||||
self.mtu = self.zkhandler.read(
|
||||
("node.sriov.vf", self.myhostname, "sriov_vf.mtu", self.vf)
|
||||
)
|
||||
self.vfid = self.vf.replace("{}v".format(self.pf), "")
|
||||
|
||||
self.logger.out('Setting MTU to {}'.format(self.mtu), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} mtu {}'.format(self.vf, self.mtu))
|
||||
self.logger.out(
|
||||
"Setting MTU to {}".format(self.mtu),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command("ip link set {} mtu {}".format(self.vf, self.mtu))
|
||||
|
||||
# These properties are set via the DataWatch functions, to ensure they are configured on the system
|
||||
self.mac = None
|
||||
@ -58,153 +66,244 @@ class SRIOVVFInstance(object):
|
||||
self.query_rss = None
|
||||
|
||||
# Zookeeper handlers for changed configs
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.mac', self.vf))
|
||||
def watch_vf_mac(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.mac", self.vf)
|
||||
)
|
||||
def watch_vf_mac(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = '00:00:00:00:00:00'
|
||||
data = "00:00:00:00:00:00"
|
||||
|
||||
if data != self.mac:
|
||||
self.mac = data
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_id', self.vf))
|
||||
def watch_vf_vlan_id(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.vlan_id", self.vf)
|
||||
)
|
||||
def watch_vf_vlan_id(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = '0'
|
||||
data = "0"
|
||||
|
||||
if data != self.vlan_id:
|
||||
self.vlan_id = data
|
||||
self.logger.out('Setting vLAN ID to {}'.format(self.vlan_id), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos))
|
||||
self.logger.out(
|
||||
"Setting vLAN ID to {}".format(self.vlan_id),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} vlan {} qos {}".format(
|
||||
self.pf, self.vfid, self.vlan_id, self.vlan_qos
|
||||
)
|
||||
)
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.vlan_qos', self.vf))
|
||||
def watch_vf_vlan_qos(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.vlan_qos", self.vf)
|
||||
)
|
||||
def watch_vf_vlan_qos(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = '0'
|
||||
data = "0"
|
||||
|
||||
if data != self.vlan_qos:
|
||||
self.vlan_qos = data
|
||||
self.logger.out('Setting vLAN QOS to {}'.format(self.vlan_qos), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} vlan {} qos {}'.format(self.pf, self.vfid, self.vlan_id, self.vlan_qos))
|
||||
self.logger.out(
|
||||
"Setting vLAN QOS to {}".format(self.vlan_qos),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} vlan {} qos {}".format(
|
||||
self.pf, self.vfid, self.vlan_id, self.vlan_qos
|
||||
)
|
||||
)
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_min', self.vf))
|
||||
def watch_vf_tx_rate_min(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.tx_rate_min", self.vf)
|
||||
)
|
||||
def watch_vf_tx_rate_min(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = '0'
|
||||
data = "0"
|
||||
|
||||
if data != self.tx_rate_min:
|
||||
self.tx_rate_min = data
|
||||
self.logger.out('Setting minimum TX rate to {}'.format(self.tx_rate_min), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} min_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_min))
|
||||
self.logger.out(
|
||||
"Setting minimum TX rate to {}".format(self.tx_rate_min),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} min_tx_rate {}".format(
|
||||
self.pf, self.vfid, self.tx_rate_min
|
||||
)
|
||||
)
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.tx_rate_max', self.vf))
|
||||
def watch_vf_tx_rate_max(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.tx_rate_max", self.vf)
|
||||
)
|
||||
def watch_vf_tx_rate_max(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; termaxate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = '0'
|
||||
data = "0"
|
||||
|
||||
if data != self.tx_rate_max:
|
||||
self.tx_rate_max = data
|
||||
self.logger.out('Setting maximum TX rate to {}'.format(self.tx_rate_max), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} max_tx_rate {}'.format(self.pf, self.vfid, self.tx_rate_max))
|
||||
self.logger.out(
|
||||
"Setting maximum TX rate to {}".format(self.tx_rate_max),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} max_tx_rate {}".format(
|
||||
self.pf, self.vfid, self.tx_rate_max
|
||||
)
|
||||
)
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.spoof_check', self.vf))
|
||||
def watch_vf_spoof_check(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.spoof_check", self.vf)
|
||||
)
|
||||
def watch_vf_spoof_check(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = '0'
|
||||
data = "0"
|
||||
|
||||
if data != self.spoof_check:
|
||||
self.spoof_check = data
|
||||
self.logger.out('Setting spoof checking {}'.format(boolToOnOff(self.spoof_check)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} spoofchk {}'.format(self.pf, self.vfid, boolToOnOff(self.spoof_check)))
|
||||
self.logger.out(
|
||||
"Setting spoof checking {}".format(boolToOnOff(self.spoof_check)),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} spoofchk {}".format(
|
||||
self.pf, self.vfid, boolToOnOff(self.spoof_check)
|
||||
)
|
||||
)
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.link_state', self.vf))
|
||||
def watch_vf_link_state(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.link_state", self.vf)
|
||||
)
|
||||
def watch_vf_link_state(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = 'on'
|
||||
data = "on"
|
||||
|
||||
if data != self.link_state:
|
||||
self.link_state = data
|
||||
self.logger.out('Setting link state to {}'.format(boolToOnOff(self.link_state)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} state {}'.format(self.pf, self.vfid, self.link_state))
|
||||
self.logger.out(
|
||||
"Setting link state to {}".format(boolToOnOff(self.link_state)),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} state {}".format(
|
||||
self.pf, self.vfid, self.link_state
|
||||
)
|
||||
)
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.trust', self.vf))
|
||||
def watch_vf_trust(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.trust", self.vf)
|
||||
)
|
||||
def watch_vf_trust(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = 'off'
|
||||
data = "off"
|
||||
|
||||
if data != self.trust:
|
||||
self.trust = data
|
||||
self.logger.out('Setting trust mode {}'.format(boolToOnOff(self.trust)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} trust {}'.format(self.pf, self.vfid, boolToOnOff(self.trust)))
|
||||
self.logger.out(
|
||||
"Setting trust mode {}".format(boolToOnOff(self.trust)),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} trust {}".format(
|
||||
self.pf, self.vfid, boolToOnOff(self.trust)
|
||||
)
|
||||
)
|
||||
|
||||
@self.zkhandler.zk_conn.DataWatch(self.zkhandler.schema.path('node.sriov.vf', self.myhostname) + self.zkhandler.schema.path('sriov_vf.config.query_rss', self.vf))
|
||||
def watch_vf_query_rss(data, stat, event=''):
|
||||
if event and event.type == 'DELETED':
|
||||
@self.zkhandler.zk_conn.DataWatch(
|
||||
self.zkhandler.schema.path("node.sriov.vf", self.myhostname)
|
||||
+ self.zkhandler.schema.path("sriov_vf.config.query_rss", self.vf)
|
||||
)
|
||||
def watch_vf_query_rss(data, stat, event=""):
|
||||
if event and event.type == "DELETED":
|
||||
# The key has been deleted after existing before; terminate this watcher
|
||||
# because this class instance is about to be reaped in Daemon.py
|
||||
return False
|
||||
|
||||
try:
|
||||
data = data.decode('ascii')
|
||||
data = data.decode("ascii")
|
||||
except AttributeError:
|
||||
data = 'off'
|
||||
data = "off"
|
||||
|
||||
if data != self.query_rss:
|
||||
self.query_rss = data
|
||||
self.logger.out('Setting RSS query ability {}'.format(boolToOnOff(self.query_rss)), state='i', prefix='SR-IOV VF {}'.format(self.vf))
|
||||
common.run_os_command('ip link set {} vf {} query_rss {}'.format(self.pf, self.vfid, boolToOnOff(self.query_rss)))
|
||||
self.logger.out(
|
||||
"Setting RSS query ability {}".format(boolToOnOff(self.query_rss)),
|
||||
state="i",
|
||||
prefix="SR-IOV VF {}".format(self.vf),
|
||||
)
|
||||
common.run_os_command(
|
||||
"ip link set {} vf {} query_rss {}".format(
|
||||
self.pf, self.vfid, boolToOnOff(self.query_rss)
|
||||
)
|
||||
)
|
||||
|
@ -33,22 +33,26 @@ class VMConsoleWatcherInstance(object):
|
||||
self.domname = domname
|
||||
self.zkhandler = zkhandler
|
||||
self.config = config
|
||||
self.logfile = '{}/{}.log'.format(config['console_log_directory'], self.domname)
|
||||
self.console_log_lines = config['console_log_lines']
|
||||
self.logfile = "{}/{}.log".format(config["console_log_directory"], self.domname)
|
||||
self.console_log_lines = config["console_log_lines"]
|
||||
self.logger = logger
|
||||
self.this_node = this_node
|
||||
|
||||
# Try to append (create) the logfile and set its permissions
|
||||
open(self.logfile, 'a').close()
|
||||
open(self.logfile, "a").close()
|
||||
os.chmod(self.logfile, 0o600)
|
||||
|
||||
try:
|
||||
self.logdeque = deque(open(self.logfile), self.console_log_lines)
|
||||
except UnicodeDecodeError:
|
||||
# There is corruption in the log file; overwrite it
|
||||
self.logger.out('Failed to decode console log file; clearing existing file', state='w', prefix='Domain {}'.format(self.domuuid))
|
||||
with open(self.logfile, 'w') as lfh:
|
||||
lfh.write('\n')
|
||||
self.logger.out(
|
||||
"Failed to decode console log file; clearing existing file",
|
||||
state="w",
|
||||
prefix="Domain {}".format(self.domuuid),
|
||||
)
|
||||
with open(self.logfile, "w") as lfh:
|
||||
lfh.write("\n")
|
||||
self.logdeque = deque(open(self.logfile), self.console_log_lines)
|
||||
|
||||
self.stamp = None
|
||||
@ -66,13 +70,19 @@ class VMConsoleWatcherInstance(object):
|
||||
def start(self):
|
||||
self.thread_stopper.clear()
|
||||
self.thread = Thread(target=self.run, args=(), kwargs={})
|
||||
self.logger.out('Starting VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||
self.logger.out(
|
||||
"Starting VM log parser", state="i", prefix="Domain {}".format(self.domuuid)
|
||||
)
|
||||
self.thread.start()
|
||||
|
||||
# Stop execution thread
|
||||
def stop(self):
|
||||
if self.thread and self.thread.is_alive():
|
||||
self.logger.out('Stopping VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||
self.logger.out(
|
||||
"Stopping VM log parser",
|
||||
state="i",
|
||||
prefix="Domain {}".format(self.domuuid),
|
||||
)
|
||||
self.thread_stopper.set()
|
||||
# Do one final flush
|
||||
self.update()
|
||||
@ -91,11 +101,11 @@ class VMConsoleWatcherInstance(object):
|
||||
self.fetch_lines()
|
||||
# Update Zookeeper with the new loglines if they changed
|
||||
if self.loglines != self.last_loglines:
|
||||
self.zkhandler.write([
|
||||
(('domain.console.log', self.domuuid), self.loglines)
|
||||
])
|
||||
self.zkhandler.write(
|
||||
[(("domain.console.log", self.domuuid), self.loglines)]
|
||||
)
|
||||
self.last_loglines = self.loglines
|
||||
|
||||
def fetch_lines(self):
|
||||
self.logdeque = deque(open(self.logfile), self.console_log_lines)
|
||||
self.loglines = ''.join(self.logdeque)
|
||||
self.loglines = "".join(self.logdeque)
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -36,8 +36,9 @@ class MalformedConfigurationError(Exception):
|
||||
"""
|
||||
An except when parsing the PVC Node daemon configuration file
|
||||
"""
|
||||
|
||||
def __init__(self, error=None):
|
||||
self.msg = f'ERROR: Configuration file is malformed: {error}'
|
||||
self.msg = f"ERROR: Configuration file is malformed: {error}"
|
||||
|
||||
def __str__(self):
|
||||
return str(self.msg)
|
||||
@ -50,19 +51,19 @@ def get_static_data():
|
||||
staticdata = list()
|
||||
staticdata.append(str(cpu_count())) # CPU count
|
||||
staticdata.append(
|
||||
subprocess.run(
|
||||
['uname', '-r'], stdout=subprocess.PIPE
|
||||
).stdout.decode('ascii').strip()
|
||||
subprocess.run(["uname", "-r"], stdout=subprocess.PIPE)
|
||||
.stdout.decode("ascii")
|
||||
.strip()
|
||||
)
|
||||
staticdata.append(
|
||||
subprocess.run(
|
||||
['uname', '-o'], stdout=subprocess.PIPE
|
||||
).stdout.decode('ascii').strip()
|
||||
subprocess.run(["uname", "-o"], stdout=subprocess.PIPE)
|
||||
.stdout.decode("ascii")
|
||||
.strip()
|
||||
)
|
||||
staticdata.append(
|
||||
subprocess.run(
|
||||
['uname', '-m'], stdout=subprocess.PIPE
|
||||
).stdout.decode('ascii').strip()
|
||||
subprocess.run(["uname", "-m"], stdout=subprocess.PIPE)
|
||||
.stdout.decode("ascii")
|
||||
.strip()
|
||||
)
|
||||
|
||||
return staticdata
|
||||
@ -70,7 +71,7 @@ def get_static_data():
|
||||
|
||||
def get_configuration_path():
|
||||
try:
|
||||
return os.environ['PVCD_CONFIG_FILE']
|
||||
return os.environ["PVCD_CONFIG_FILE"]
|
||||
except KeyError:
|
||||
print('ERROR: The "PVCD_CONFIG_FILE" environment variable must be set.')
|
||||
os._exit(1)
|
||||
@ -78,10 +79,10 @@ def get_configuration_path():
|
||||
|
||||
def get_hostname():
|
||||
node_fqdn = gethostname()
|
||||
node_hostname = node_fqdn.split('.', 1)[0]
|
||||
node_domain = ''.join(node_fqdn.split('.', 1)[1:])
|
||||
node_hostname = node_fqdn.split(".", 1)[0]
|
||||
node_domain = "".join(node_fqdn.split(".", 1)[1:])
|
||||
try:
|
||||
node_id = findall(r'\d+', node_hostname)[-1]
|
||||
node_id = findall(r"\d+", node_hostname)[-1]
|
||||
except IndexError:
|
||||
node_id = 0
|
||||
|
||||
@ -89,27 +90,33 @@ def get_hostname():
|
||||
|
||||
|
||||
def validate_floating_ip(config, network):
|
||||
if network not in ['cluster', 'storage', 'upstream']:
|
||||
if network not in ["cluster", "storage", "upstream"]:
|
||||
return False, f'Specified network type "{network}" is not valid'
|
||||
|
||||
floating_key = f'{network}_floating_ip'
|
||||
network_key = f'{network}_network'
|
||||
floating_key = f"{network}_floating_ip"
|
||||
network_key = f"{network}_network"
|
||||
|
||||
# Verify the network provided is valid
|
||||
try:
|
||||
network = ip_network(config[network_key])
|
||||
except Exception:
|
||||
return False, f'Network address {config[network_key]} for {network_key} is not valid'
|
||||
return (
|
||||
False,
|
||||
f"Network address {config[network_key]} for {network_key} is not valid",
|
||||
)
|
||||
|
||||
# Verify that the floating IP is valid (and in the network)
|
||||
try:
|
||||
floating_address = ip_address(config[floating_key].split('/')[0])
|
||||
floating_address = ip_address(config[floating_key].split("/")[0])
|
||||
if floating_address not in list(network.hosts()):
|
||||
raise
|
||||
except Exception:
|
||||
return False, f'Floating address {config[floating_key]} for {floating_key} is not valid'
|
||||
return (
|
||||
False,
|
||||
f"Floating address {config[floating_key]} for {floating_key} is not valid",
|
||||
)
|
||||
|
||||
return True, ''
|
||||
return True, ""
|
||||
|
||||
|
||||
def get_configuration():
|
||||
@ -120,11 +127,11 @@ def get_configuration():
|
||||
|
||||
print('Loading configuration from file "{}"'.format(pvcnoded_config_file))
|
||||
|
||||
with open(pvcnoded_config_file, 'r') as cfgfile:
|
||||
with open(pvcnoded_config_file, "r") as cfgfile:
|
||||
try:
|
||||
o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
|
||||
except Exception as e:
|
||||
print('ERROR: Failed to parse configuration file: {}'.format(e))
|
||||
print("ERROR: Failed to parse configuration file: {}".format(e))
|
||||
os._exit(1)
|
||||
|
||||
node_fqdn, node_hostname, node_domain, node_id = get_hostname()
|
||||
@ -134,263 +141,287 @@ def get_configuration():
|
||||
|
||||
# Get the initial base configuration
|
||||
try:
|
||||
o_base = o_config['pvc']
|
||||
o_cluster = o_config['pvc']['cluster']
|
||||
o_base = o_config["pvc"]
|
||||
o_cluster = o_config["pvc"]["cluster"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_general = {
|
||||
'node': o_base.get('node', node_hostname),
|
||||
'node_hostname': node_hostname,
|
||||
'node_fqdn': node_fqdn,
|
||||
'node_domain': node_domain,
|
||||
'node_id': node_id,
|
||||
'coordinators': o_cluster.get('coordinators', list()),
|
||||
'debug': o_base.get('debug', False),
|
||||
"node": o_base.get("node", node_hostname),
|
||||
"node_hostname": node_hostname,
|
||||
"node_fqdn": node_fqdn,
|
||||
"node_domain": node_domain,
|
||||
"node_id": node_id,
|
||||
"coordinators": o_cluster.get("coordinators", list()),
|
||||
"debug": o_base.get("debug", False),
|
||||
}
|
||||
|
||||
config = {**config, **config_general}
|
||||
|
||||
# Get the functions configuration
|
||||
try:
|
||||
o_functions = o_config['pvc']['functions']
|
||||
o_functions = o_config["pvc"]["functions"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_functions = {
|
||||
'enable_hypervisor': o_functions.get('enable_hypervisor', False),
|
||||
'enable_networking': o_functions.get('enable_networking', False),
|
||||
'enable_storage': o_functions.get('enable_storage', False),
|
||||
'enable_api': o_functions.get('enable_api', False),
|
||||
"enable_hypervisor": o_functions.get("enable_hypervisor", False),
|
||||
"enable_networking": o_functions.get("enable_networking", False),
|
||||
"enable_storage": o_functions.get("enable_storage", False),
|
||||
"enable_api": o_functions.get("enable_api", False),
|
||||
}
|
||||
|
||||
config = {**config, **config_functions}
|
||||
|
||||
# Get the directory configuration
|
||||
try:
|
||||
o_directories = o_config['pvc']['system']['configuration']['directories']
|
||||
o_directories = o_config["pvc"]["system"]["configuration"]["directories"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_directories = {
|
||||
'dynamic_directory': o_directories.get('dynamic_directory', None),
|
||||
'log_directory': o_directories.get('log_directory', None),
|
||||
'console_log_directory': o_directories.get('console_log_directory', None),
|
||||
"dynamic_directory": o_directories.get("dynamic_directory", None),
|
||||
"log_directory": o_directories.get("log_directory", None),
|
||||
"console_log_directory": o_directories.get("console_log_directory", None),
|
||||
}
|
||||
|
||||
# Define our dynamic directory schema
|
||||
config_directories['dnsmasq_dynamic_directory'] = config_directories['dynamic_directory'] + '/dnsmasq'
|
||||
config_directories['pdns_dynamic_directory'] = config_directories['dynamic_directory'] + '/pdns'
|
||||
config_directories['nft_dynamic_directory'] = config_directories['dynamic_directory'] + '/nft'
|
||||
config_directories["dnsmasq_dynamic_directory"] = (
|
||||
config_directories["dynamic_directory"] + "/dnsmasq"
|
||||
)
|
||||
config_directories["pdns_dynamic_directory"] = (
|
||||
config_directories["dynamic_directory"] + "/pdns"
|
||||
)
|
||||
config_directories["nft_dynamic_directory"] = (
|
||||
config_directories["dynamic_directory"] + "/nft"
|
||||
)
|
||||
|
||||
# Define our log directory schema
|
||||
config_directories['dnsmasq_log_directory'] = config_directories['log_directory'] + '/dnsmasq'
|
||||
config_directories['pdns_log_directory'] = config_directories['log_directory'] + '/pdns'
|
||||
config_directories['nft_log_directory'] = config_directories['log_directory'] + '/nft'
|
||||
config_directories["dnsmasq_log_directory"] = (
|
||||
config_directories["log_directory"] + "/dnsmasq"
|
||||
)
|
||||
config_directories["pdns_log_directory"] = (
|
||||
config_directories["log_directory"] + "/pdns"
|
||||
)
|
||||
config_directories["nft_log_directory"] = (
|
||||
config_directories["log_directory"] + "/nft"
|
||||
)
|
||||
|
||||
config = {**config, **config_directories}
|
||||
|
||||
# Get the logging configuration
|
||||
try:
|
||||
o_logging = o_config['pvc']['system']['configuration']['logging']
|
||||
o_logging = o_config["pvc"]["system"]["configuration"]["logging"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_logging = {
|
||||
'file_logging': o_logging.get('file_logging', False),
|
||||
'stdout_logging': o_logging.get('stdout_logging', False),
|
||||
'zookeeper_logging': o_logging.get('zookeeper_logging', False),
|
||||
'log_colours': o_logging.get('log_colours', False),
|
||||
'log_dates': o_logging.get('log_dates', False),
|
||||
'log_keepalives': o_logging.get('log_keepalives', False),
|
||||
'log_keepalive_cluster_details': o_logging.get('log_keepalive_cluster_details', False),
|
||||
'log_keepalive_storage_details': o_logging.get('log_keepalive_storage_details', False),
|
||||
'console_log_lines': o_logging.get('console_log_lines', False),
|
||||
'node_log_lines': o_logging.get('node_log_lines', False),
|
||||
"file_logging": o_logging.get("file_logging", False),
|
||||
"stdout_logging": o_logging.get("stdout_logging", False),
|
||||
"zookeeper_logging": o_logging.get("zookeeper_logging", False),
|
||||
"log_colours": o_logging.get("log_colours", False),
|
||||
"log_dates": o_logging.get("log_dates", False),
|
||||
"log_keepalives": o_logging.get("log_keepalives", False),
|
||||
"log_keepalive_cluster_details": o_logging.get(
|
||||
"log_keepalive_cluster_details", False
|
||||
),
|
||||
"log_keepalive_storage_details": o_logging.get(
|
||||
"log_keepalive_storage_details", False
|
||||
),
|
||||
"console_log_lines": o_logging.get("console_log_lines", False),
|
||||
"node_log_lines": o_logging.get("node_log_lines", False),
|
||||
}
|
||||
|
||||
config = {**config, **config_logging}
|
||||
|
||||
# Get the interval configuration
|
||||
try:
|
||||
o_intervals = o_config['pvc']['system']['intervals']
|
||||
o_intervals = o_config["pvc"]["system"]["intervals"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_intervals = {
|
||||
'vm_shutdown_timeout': int(o_intervals.get('vm_shutdown_timeout', 60)),
|
||||
'keepalive_interval': int(o_intervals.get('keepalive_interval', 5)),
|
||||
'fence_intervals': int(o_intervals.get('fence_intervals', 6)),
|
||||
'suicide_intervals': int(o_intervals.get('suicide_interval', 0)),
|
||||
"vm_shutdown_timeout": int(o_intervals.get("vm_shutdown_timeout", 60)),
|
||||
"keepalive_interval": int(o_intervals.get("keepalive_interval", 5)),
|
||||
"fence_intervals": int(o_intervals.get("fence_intervals", 6)),
|
||||
"suicide_intervals": int(o_intervals.get("suicide_interval", 0)),
|
||||
}
|
||||
|
||||
config = {**config, **config_intervals}
|
||||
|
||||
# Get the fencing configuration
|
||||
try:
|
||||
o_fencing = o_config['pvc']['system']['fencing']
|
||||
o_fencing_actions = o_fencing['actions']
|
||||
o_fencing_ipmi = o_fencing['ipmi']
|
||||
o_fencing = o_config["pvc"]["system"]["fencing"]
|
||||
o_fencing_actions = o_fencing["actions"]
|
||||
o_fencing_ipmi = o_fencing["ipmi"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_fencing = {
|
||||
'successful_fence': o_fencing_actions.get('successful_fence', None),
|
||||
'failed_fence': o_fencing_actions.get('failed_fence', None),
|
||||
'ipmi_hostname': o_fencing_ipmi.get('host', f'{node_hostname}-lom.{node_domain}'),
|
||||
'ipmi_username': o_fencing_ipmi.get('user', 'null'),
|
||||
'ipmi_password': o_fencing_ipmi.get('pass', 'null'),
|
||||
"successful_fence": o_fencing_actions.get("successful_fence", None),
|
||||
"failed_fence": o_fencing_actions.get("failed_fence", None),
|
||||
"ipmi_hostname": o_fencing_ipmi.get(
|
||||
"host", f"{node_hostname}-lom.{node_domain}"
|
||||
),
|
||||
"ipmi_username": o_fencing_ipmi.get("user", "null"),
|
||||
"ipmi_password": o_fencing_ipmi.get("pass", "null"),
|
||||
}
|
||||
|
||||
config = {**config, **config_fencing}
|
||||
|
||||
# Get the migration configuration
|
||||
try:
|
||||
o_migration = o_config['pvc']['system']['migration']
|
||||
o_migration = o_config["pvc"]["system"]["migration"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_migration = {
|
||||
'migration_target_selector': o_migration.get('target_selector', 'mem'),
|
||||
"migration_target_selector": o_migration.get("target_selector", "mem"),
|
||||
}
|
||||
|
||||
config = {**config, **config_migration}
|
||||
|
||||
if config['enable_networking']:
|
||||
if config["enable_networking"]:
|
||||
# Get the node networks configuration
|
||||
try:
|
||||
o_networks = o_config['pvc']['cluster']['networks']
|
||||
o_network_cluster = o_networks['cluster']
|
||||
o_network_storage = o_networks['storage']
|
||||
o_network_upstream = o_networks['upstream']
|
||||
o_sysnetworks = o_config['pvc']['system']['configuration']['networking']
|
||||
o_sysnetwork_cluster = o_sysnetworks['cluster']
|
||||
o_sysnetwork_storage = o_sysnetworks['storage']
|
||||
o_sysnetwork_upstream = o_sysnetworks['upstream']
|
||||
o_networks = o_config["pvc"]["cluster"]["networks"]
|
||||
o_network_cluster = o_networks["cluster"]
|
||||
o_network_storage = o_networks["storage"]
|
||||
o_network_upstream = o_networks["upstream"]
|
||||
o_sysnetworks = o_config["pvc"]["system"]["configuration"]["networking"]
|
||||
o_sysnetwork_cluster = o_sysnetworks["cluster"]
|
||||
o_sysnetwork_storage = o_sysnetworks["storage"]
|
||||
o_sysnetwork_upstream = o_sysnetworks["upstream"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_networks = {
|
||||
'cluster_domain': o_network_cluster.get('domain', None),
|
||||
'cluster_network': o_network_cluster.get('network', None),
|
||||
'cluster_floating_ip': o_network_cluster.get('floating_ip', None),
|
||||
'cluster_dev': o_sysnetwork_cluster.get('device', None),
|
||||
'cluster_mtu': o_sysnetwork_cluster.get('mtu', None),
|
||||
'cluster_dev_ip': o_sysnetwork_cluster.get('address', None),
|
||||
'storage_domain': o_network_storage.get('domain', None),
|
||||
'storage_network': o_network_storage.get('network', None),
|
||||
'storage_floating_ip': o_network_storage.get('floating_ip', None),
|
||||
'storage_dev': o_sysnetwork_storage.get('device', None),
|
||||
'storage_mtu': o_sysnetwork_storage.get('mtu', None),
|
||||
'storage_dev_ip': o_sysnetwork_storage.get('address', None),
|
||||
'upstream_domain': o_network_upstream.get('domain', None),
|
||||
'upstream_network': o_network_upstream.get('network', None),
|
||||
'upstream_floating_ip': o_network_upstream.get('floating_ip', None),
|
||||
'upstream_gateway': o_network_upstream.get('gateway', None),
|
||||
'upstream_dev': o_sysnetwork_upstream.get('device', None),
|
||||
'upstream_mtu': o_sysnetwork_upstream.get('mtu', None),
|
||||
'upstream_dev_ip': o_sysnetwork_upstream.get('address', None),
|
||||
'bridge_dev': o_sysnetworks.get('bridge_device', None),
|
||||
'bridge_mtu': o_sysnetworks.get('bridge_mtu', None),
|
||||
'enable_sriov': o_sysnetworks.get('sriov_enable', False),
|
||||
'sriov_device': o_sysnetworks.get('sriov_device', list())
|
||||
"cluster_domain": o_network_cluster.get("domain", None),
|
||||
"cluster_network": o_network_cluster.get("network", None),
|
||||
"cluster_floating_ip": o_network_cluster.get("floating_ip", None),
|
||||
"cluster_dev": o_sysnetwork_cluster.get("device", None),
|
||||
"cluster_mtu": o_sysnetwork_cluster.get("mtu", None),
|
||||
"cluster_dev_ip": o_sysnetwork_cluster.get("address", None),
|
||||
"storage_domain": o_network_storage.get("domain", None),
|
||||
"storage_network": o_network_storage.get("network", None),
|
||||
"storage_floating_ip": o_network_storage.get("floating_ip", None),
|
||||
"storage_dev": o_sysnetwork_storage.get("device", None),
|
||||
"storage_mtu": o_sysnetwork_storage.get("mtu", None),
|
||||
"storage_dev_ip": o_sysnetwork_storage.get("address", None),
|
||||
"upstream_domain": o_network_upstream.get("domain", None),
|
||||
"upstream_network": o_network_upstream.get("network", None),
|
||||
"upstream_floating_ip": o_network_upstream.get("floating_ip", None),
|
||||
"upstream_gateway": o_network_upstream.get("gateway", None),
|
||||
"upstream_dev": o_sysnetwork_upstream.get("device", None),
|
||||
"upstream_mtu": o_sysnetwork_upstream.get("mtu", None),
|
||||
"upstream_dev_ip": o_sysnetwork_upstream.get("address", None),
|
||||
"bridge_dev": o_sysnetworks.get("bridge_device", None),
|
||||
"bridge_mtu": o_sysnetworks.get("bridge_mtu", None),
|
||||
"enable_sriov": o_sysnetworks.get("sriov_enable", False),
|
||||
"sriov_device": o_sysnetworks.get("sriov_device", list()),
|
||||
}
|
||||
|
||||
if config_networks['bridge_mtu'] is None:
|
||||
if config_networks["bridge_mtu"] is None:
|
||||
# Read the current MTU of bridge_dev and set bridge_mtu to it; avoids weird resets
|
||||
retcode, stdout, stderr = common.run_os_command(f"ip -json link show dev {config_networks['bridge_dev']}")
|
||||
current_bridge_mtu = loads(stdout)[0]['mtu']
|
||||
print(f"Config key bridge_mtu not explicitly set; using live MTU {current_bridge_mtu} from {config_networks['bridge_dev']}")
|
||||
config_networks['bridge_mtu'] = current_bridge_mtu
|
||||
retcode, stdout, stderr = common.run_os_command(
|
||||
f"ip -json link show dev {config_networks['bridge_dev']}"
|
||||
)
|
||||
current_bridge_mtu = loads(stdout)[0]["mtu"]
|
||||
print(
|
||||
f"Config key bridge_mtu not explicitly set; using live MTU {current_bridge_mtu} from {config_networks['bridge_dev']}"
|
||||
)
|
||||
config_networks["bridge_mtu"] = current_bridge_mtu
|
||||
|
||||
config = {**config, **config_networks}
|
||||
|
||||
for network_type in ['cluster', 'storage', 'upstream']:
|
||||
for network_type in ["cluster", "storage", "upstream"]:
|
||||
result, msg = validate_floating_ip(config, network_type)
|
||||
if not result:
|
||||
raise MalformedConfigurationError(msg)
|
||||
|
||||
address_key = '{}_dev_ip'.format(network_type)
|
||||
network_key = f'{network_type}_network'
|
||||
address_key = "{}_dev_ip".format(network_type)
|
||||
network_key = f"{network_type}_network"
|
||||
network = ip_network(config[network_key])
|
||||
# With autoselection of addresses, construct an IP from the relevant network
|
||||
if config[address_key] == 'by-id':
|
||||
if config[address_key] == "by-id":
|
||||
# The NodeID starts at 1, but indexes start at 0
|
||||
address_id = int(config['node_id']) - 1
|
||||
address_id = int(config["node_id"]) - 1
|
||||
# Grab the nth address from the network
|
||||
config[address_key] = '{}/{}'.format(list(network.hosts())[address_id], network.prefixlen)
|
||||
config[address_key] = "{}/{}".format(
|
||||
list(network.hosts())[address_id], network.prefixlen
|
||||
)
|
||||
# Validate the provided IP instead
|
||||
else:
|
||||
try:
|
||||
address = ip_address(config[address_key].split('/')[0])
|
||||
address = ip_address(config[address_key].split("/")[0])
|
||||
if address not in list(network.hosts()):
|
||||
raise
|
||||
except Exception:
|
||||
raise MalformedConfigurationError(
|
||||
f'IP address {config[address_key]} for {address_key} is not valid'
|
||||
f"IP address {config[address_key]} for {address_key} is not valid"
|
||||
)
|
||||
|
||||
# Get the PowerDNS aggregator database configuration
|
||||
try:
|
||||
o_pdnsdb = o_config['pvc']['coordinator']['dns']['database']
|
||||
o_pdnsdb = o_config["pvc"]["coordinator"]["dns"]["database"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_pdnsdb = {
|
||||
'pdns_postgresql_host': o_pdnsdb.get('host', None),
|
||||
'pdns_postgresql_port': o_pdnsdb.get('port', None),
|
||||
'pdns_postgresql_dbname': o_pdnsdb.get('name', None),
|
||||
'pdns_postgresql_user': o_pdnsdb.get('user', None),
|
||||
'pdns_postgresql_password': o_pdnsdb.get('pass', None),
|
||||
"pdns_postgresql_host": o_pdnsdb.get("host", None),
|
||||
"pdns_postgresql_port": o_pdnsdb.get("port", None),
|
||||
"pdns_postgresql_dbname": o_pdnsdb.get("name", None),
|
||||
"pdns_postgresql_user": o_pdnsdb.get("user", None),
|
||||
"pdns_postgresql_password": o_pdnsdb.get("pass", None),
|
||||
}
|
||||
|
||||
config = {**config, **config_pdnsdb}
|
||||
|
||||
# Get the Cloud-Init Metadata database configuration
|
||||
try:
|
||||
o_metadatadb = o_config['pvc']['coordinator']['metadata']['database']
|
||||
o_metadatadb = o_config["pvc"]["coordinator"]["metadata"]["database"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_metadatadb = {
|
||||
'metadata_postgresql_host': o_metadatadb.get('host', None),
|
||||
'metadata_postgresql_port': o_metadatadb.get('port', None),
|
||||
'metadata_postgresql_dbname': o_metadatadb.get('name', None),
|
||||
'metadata_postgresql_user': o_metadatadb.get('user', None),
|
||||
'metadata_postgresql_password': o_metadatadb.get('pass', None),
|
||||
"metadata_postgresql_host": o_metadatadb.get("host", None),
|
||||
"metadata_postgresql_port": o_metadatadb.get("port", None),
|
||||
"metadata_postgresql_dbname": o_metadatadb.get("name", None),
|
||||
"metadata_postgresql_user": o_metadatadb.get("user", None),
|
||||
"metadata_postgresql_password": o_metadatadb.get("pass", None),
|
||||
}
|
||||
|
||||
config = {**config, **config_metadatadb}
|
||||
|
||||
if config['enable_storage']:
|
||||
if config["enable_storage"]:
|
||||
# Get the storage configuration
|
||||
try:
|
||||
o_storage = o_config['pvc']['system']['configuration']['storage']
|
||||
o_storage = o_config["pvc"]["system"]["configuration"]["storage"]
|
||||
except Exception as e:
|
||||
raise MalformedConfigurationError(e)
|
||||
|
||||
config_storage = {
|
||||
'ceph_config_file': o_storage.get('ceph_config_file', None),
|
||||
'ceph_admin_keyring': o_storage.get('ceph_admin_keyring', None),
|
||||
"ceph_config_file": o_storage.get("ceph_config_file", None),
|
||||
"ceph_admin_keyring": o_storage.get("ceph_admin_keyring", None),
|
||||
}
|
||||
|
||||
config = {**config, **config_storage}
|
||||
|
||||
# Add our node static data to the config
|
||||
config['static_data'] = get_static_data()
|
||||
config["static_data"] = get_static_data()
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def validate_directories(config):
|
||||
if not os.path.exists(config['dynamic_directory']):
|
||||
os.makedirs(config['dynamic_directory'])
|
||||
os.makedirs(config['dnsmasq_dynamic_directory'])
|
||||
os.makedirs(config['pdns_dynamic_directory'])
|
||||
os.makedirs(config['nft_dynamic_directory'])
|
||||
if not os.path.exists(config["dynamic_directory"]):
|
||||
os.makedirs(config["dynamic_directory"])
|
||||
os.makedirs(config["dnsmasq_dynamic_directory"])
|
||||
os.makedirs(config["pdns_dynamic_directory"])
|
||||
os.makedirs(config["nft_dynamic_directory"])
|
||||
|
||||
if not os.path.exists(config['log_directory']):
|
||||
os.makedirs(config['log_directory'])
|
||||
os.makedirs(config['dnsmasq_log_directory'])
|
||||
os.makedirs(config['pdns_log_directory'])
|
||||
os.makedirs(config['nft_log_directory'])
|
||||
if not os.path.exists(config["log_directory"]):
|
||||
os.makedirs(config["log_directory"])
|
||||
os.makedirs(config["dnsmasq_log_directory"])
|
||||
os.makedirs(config["pdns_log_directory"])
|
||||
os.makedirs(config["nft_log_directory"])
|
||||
|
@ -35,74 +35,89 @@ def fence_node(node_name, zkhandler, config, logger):
|
||||
failcount = 0
|
||||
while failcount < failcount_limit:
|
||||
# Wait 5 seconds
|
||||
time.sleep(config['keepalive_interval'])
|
||||
time.sleep(config["keepalive_interval"])
|
||||
# Get the state
|
||||
node_daemon_state = zkhandler.read(('node.state.daemon', node_name))
|
||||
node_daemon_state = zkhandler.read(("node.state.daemon", node_name))
|
||||
# Is it still 'dead'
|
||||
if node_daemon_state == 'dead':
|
||||
if node_daemon_state == "dead":
|
||||
failcount += 1
|
||||
logger.out('Node "{}" failed {}/{} saving throws'.format(node_name, failcount, failcount_limit), state='s')
|
||||
logger.out(
|
||||
'Node "{}" failed {}/{} saving throws'.format(
|
||||
node_name, failcount, failcount_limit
|
||||
),
|
||||
state="s",
|
||||
)
|
||||
# It changed back to something else so it must be alive
|
||||
else:
|
||||
logger.out('Node "{}" passed a saving throw; canceling fence'.format(node_name), state='o')
|
||||
logger.out(
|
||||
'Node "{}" passed a saving throw; canceling fence'.format(node_name),
|
||||
state="o",
|
||||
)
|
||||
return
|
||||
|
||||
logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state='s')
|
||||
logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state="s")
|
||||
|
||||
# Get IPMI information
|
||||
ipmi_hostname = zkhandler.read(('node.ipmi.hostname', node_name))
|
||||
ipmi_username = zkhandler.read(('node.ipmi.username', node_name))
|
||||
ipmi_password = zkhandler.read(('node.ipmi.password', node_name))
|
||||
ipmi_hostname = zkhandler.read(("node.ipmi.hostname", node_name))
|
||||
ipmi_username = zkhandler.read(("node.ipmi.username", node_name))
|
||||
ipmi_password = zkhandler.read(("node.ipmi.password", node_name))
|
||||
|
||||
# Shoot it in the head
|
||||
fence_status = reboot_via_ipmi(ipmi_hostname, ipmi_username, ipmi_password, logger)
|
||||
|
||||
# Hold to ensure the fence takes effect and system stabilizes
|
||||
logger.out('Waiting {}s for fence of node "{}" to take effect'.format(config['keepalive_interval'], node_name), state='i')
|
||||
time.sleep(config['keepalive_interval'])
|
||||
logger.out(
|
||||
'Waiting {}s for fence of node "{}" to take effect'.format(
|
||||
config["keepalive_interval"], node_name
|
||||
),
|
||||
state="i",
|
||||
)
|
||||
time.sleep(config["keepalive_interval"])
|
||||
|
||||
if fence_status:
|
||||
logger.out('Marking node "{}" as fenced'.format(node_name), state='i')
|
||||
logger.out('Marking node "{}" as fenced'.format(node_name), state="i")
|
||||
while True:
|
||||
try:
|
||||
zkhandler.write([
|
||||
(('node.state.daemon', node_name), 'fenced')
|
||||
])
|
||||
zkhandler.write([(("node.state.daemon", node_name), "fenced")])
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Force into secondary network state if needed
|
||||
if node_name in config['coordinators']:
|
||||
logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i')
|
||||
zkhandler.write([
|
||||
(('node.state.router', node_name), 'secondary')
|
||||
])
|
||||
if zkhandler.read('base.config.primary_node') == node_name:
|
||||
zkhandler.write([
|
||||
('base.config.primary_node', 'none')
|
||||
])
|
||||
if node_name in config["coordinators"]:
|
||||
logger.out(
|
||||
'Forcing secondary status for node "{}"'.format(node_name), state="i"
|
||||
)
|
||||
zkhandler.write([(("node.state.router", node_name), "secondary")])
|
||||
if zkhandler.read("base.config.primary_node") == node_name:
|
||||
zkhandler.write([("base.config.primary_node", "none")])
|
||||
|
||||
# If the fence succeeded and successful_fence is migrate
|
||||
if fence_status and config['successful_fence'] == 'migrate':
|
||||
if fence_status and config["successful_fence"] == "migrate":
|
||||
migrateFromFencedNode(zkhandler, node_name, config, logger)
|
||||
|
||||
# If the fence failed and failed_fence is migrate
|
||||
if not fence_status and config['failed_fence'] == 'migrate' and config['suicide_intervals'] != '0':
|
||||
if (
|
||||
not fence_status
|
||||
and config["failed_fence"] == "migrate"
|
||||
and config["suicide_intervals"] != "0"
|
||||
):
|
||||
migrateFromFencedNode(zkhandler, node_name, config, logger)
|
||||
|
||||
|
||||
# Migrate hosts away from a fenced node
|
||||
def migrateFromFencedNode(zkhandler, node_name, config, logger):
|
||||
logger.out('Migrating VMs from dead node "{}" to new hosts'.format(node_name), state='i')
|
||||
logger.out(
|
||||
'Migrating VMs from dead node "{}" to new hosts'.format(node_name), state="i"
|
||||
)
|
||||
|
||||
# Get the list of VMs
|
||||
dead_node_running_domains = zkhandler.read(('node.running_domains', node_name)).split()
|
||||
dead_node_running_domains = zkhandler.read(
|
||||
("node.running_domains", node_name)
|
||||
).split()
|
||||
|
||||
# Set the node to a custom domainstate so we know what's happening
|
||||
zkhandler.write([
|
||||
(('node.state.domain', node_name), 'fence-flush')
|
||||
])
|
||||
zkhandler.write([(("node.state.domain", node_name), "fence-flush")])
|
||||
|
||||
# Migrate a VM after a flush
|
||||
def fence_migrate_vm(dom_uuid):
|
||||
@ -111,28 +126,40 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
|
||||
target_node = common.findTargetNode(zkhandler, dom_uuid)
|
||||
|
||||
if target_node is not None:
|
||||
logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')
|
||||
zkhandler.write([
|
||||
(('domain.state', dom_uuid), 'start'),
|
||||
(('domain.node', dom_uuid), target_node),
|
||||
(('domain.last_node', dom_uuid), node_name),
|
||||
])
|
||||
logger.out(
|
||||
'Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node),
|
||||
state="i",
|
||||
)
|
||||
zkhandler.write(
|
||||
[
|
||||
(("domain.state", dom_uuid), "start"),
|
||||
(("domain.node", dom_uuid), target_node),
|
||||
(("domain.last_node", dom_uuid), node_name),
|
||||
]
|
||||
)
|
||||
else:
|
||||
logger.out('No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(dom_uuid), state='i')
|
||||
zkhandler.write({
|
||||
(('domain.state', dom_uuid), 'stopped'),
|
||||
(('domain.meta.autostart', dom_uuid), 'True'),
|
||||
})
|
||||
logger.out(
|
||||
'No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(
|
||||
dom_uuid
|
||||
),
|
||||
state="i",
|
||||
)
|
||||
zkhandler.write(
|
||||
{
|
||||
(("domain.state", dom_uuid), "stopped"),
|
||||
(("domain.meta.autostart", dom_uuid), "True"),
|
||||
}
|
||||
)
|
||||
|
||||
# Loop through the VMs
|
||||
for dom_uuid in dead_node_running_domains:
|
||||
fence_migrate_vm(dom_uuid)
|
||||
|
||||
# Set node in flushed state for easy remigrating when it comes back
|
||||
zkhandler.write([
|
||||
(('node.state.domain', node_name), 'flushed')
|
||||
])
|
||||
logger.out('All VMs flushed from dead node "{}" to new hosts'.format(node_name), state='i')
|
||||
zkhandler.write([(("node.state.domain", node_name), "flushed")])
|
||||
logger.out(
|
||||
'All VMs flushed from dead node "{}" to new hosts'.format(node_name), state="i"
|
||||
)
|
||||
|
||||
|
||||
#
|
||||
@ -140,68 +167,100 @@ def migrateFromFencedNode(zkhandler, node_name, config, logger):
|
||||
#
|
||||
def reboot_via_ipmi(ipmi_hostname, ipmi_user, ipmi_password, logger):
|
||||
# Power off the node the node
|
||||
logger.out('Sending power off to dead node', state='i')
|
||||
ipmi_command_stop = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power off'.format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
logger.out("Sending power off to dead node", state="i")
|
||||
ipmi_command_stop = (
|
||||
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power off".format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
)
|
||||
)
|
||||
ipmi_stop_retcode, ipmi_stop_stdout, ipmi_stop_stderr = common.run_os_command(
|
||||
ipmi_command_stop
|
||||
)
|
||||
ipmi_stop_retcode, ipmi_stop_stdout, ipmi_stop_stderr = common.run_os_command(ipmi_command_stop)
|
||||
|
||||
if ipmi_stop_retcode != 0:
|
||||
logger.out(f'Failed to power off dead node: {ipmi_stop_stderr}', state='e')
|
||||
logger.out(f"Failed to power off dead node: {ipmi_stop_stderr}", state="e")
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
# Check the chassis power state
|
||||
logger.out('Checking power state of dead node', state='i')
|
||||
ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
logger.out("Checking power state of dead node", state="i")
|
||||
ipmi_command_status = (
|
||||
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status".format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
)
|
||||
)
|
||||
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
|
||||
ipmi_command_status
|
||||
)
|
||||
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status)
|
||||
if ipmi_status_retcode == 0:
|
||||
logger.out(f'Current chassis power state is: {ipmi_status_stdout.strip()}', state='i')
|
||||
logger.out(
|
||||
f"Current chassis power state is: {ipmi_status_stdout.strip()}", state="i"
|
||||
)
|
||||
else:
|
||||
logger.out(f'Current chassis power state is: Unknown', state='w')
|
||||
logger.out(f"Current chassis power state is: Unknown", state="w")
|
||||
|
||||
# Power on the node
|
||||
logger.out('Sending power on to dead node', state='i')
|
||||
ipmi_command_start = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power on'.format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
logger.out("Sending power on to dead node", state="i")
|
||||
ipmi_command_start = (
|
||||
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power on".format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
)
|
||||
)
|
||||
ipmi_start_retcode, ipmi_start_stdout, ipmi_start_stderr = common.run_os_command(
|
||||
ipmi_command_start
|
||||
)
|
||||
ipmi_start_retcode, ipmi_start_stdout, ipmi_start_stderr = common.run_os_command(ipmi_command_start)
|
||||
|
||||
if ipmi_start_retcode != 0:
|
||||
logger.out(f'Failed to power on dead node: {ipmi_start_stderr}', state='w')
|
||||
logger.out(f"Failed to power on dead node: {ipmi_start_stderr}", state="w")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Check the chassis power state
|
||||
logger.out('Checking power state of dead node', state='i')
|
||||
ipmi_command_status = '/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status'.format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
logger.out("Checking power state of dead node", state="i")
|
||||
ipmi_command_status = (
|
||||
"/usr/bin/ipmitool -I lanplus -H {} -U {} -P {} chassis power status".format(
|
||||
ipmi_hostname, ipmi_user, ipmi_password
|
||||
)
|
||||
)
|
||||
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
|
||||
ipmi_command_status
|
||||
)
|
||||
ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(ipmi_command_status)
|
||||
|
||||
if ipmi_stop_retcode == 0:
|
||||
if ipmi_status_stdout.strip() == "Chassis Power is on":
|
||||
# We successfully rebooted the node and it is powered on; this is a succeessful fence
|
||||
logger.out('Successfully rebooted dead node', state='o')
|
||||
logger.out("Successfully rebooted dead node", state="o")
|
||||
return True
|
||||
elif ipmi_status_stdout.strip() == "Chassis Power is off":
|
||||
# We successfully rebooted the node but it is powered off; this might be expected or not, but the node is confirmed off so we can call it a successful fence
|
||||
logger.out('Chassis power is in confirmed off state after successfuly IPMI reboot; proceeding with fence-flush', state='o')
|
||||
logger.out(
|
||||
"Chassis power is in confirmed off state after successfuly IPMI reboot; proceeding with fence-flush",
|
||||
state="o",
|
||||
)
|
||||
return True
|
||||
else:
|
||||
# We successfully rebooted the node but it is in some unknown power state; since this might indicate a silent failure, we must call it a failed fence
|
||||
logger.out('Chassis power is in an unknown state ({}) after successful IPMI reboot; not performing fence-flush'.format(ipmi_status_stdout.strip()), state='e')
|
||||
logger.out(
|
||||
"Chassis power is in an unknown state ({}) after successful IPMI reboot; not performing fence-flush".format(
|
||||
ipmi_status_stdout.strip()
|
||||
),
|
||||
state="e",
|
||||
)
|
||||
return False
|
||||
else:
|
||||
if ipmi_status_stdout.strip() == "Chassis Power is off":
|
||||
# We failed to reboot the node but it is powered off; it has probably suffered a serious hardware failure, but the node is confirmed off so we can call it a successful fence
|
||||
logger.out('Chassis power is in confirmed off state after failed IPMI reboot; proceeding with fence-flush', state='o')
|
||||
logger.out(
|
||||
"Chassis power is in confirmed off state after failed IPMI reboot; proceeding with fence-flush",
|
||||
state="o",
|
||||
)
|
||||
return True
|
||||
else:
|
||||
# We failed to reboot the node but it is in some unknown power state (including "on"); since this might indicate a silent failure, we must call it a failed fence
|
||||
logger.out('Chassis power is not in confirmed off state after failed IPMI reboot; not performing fence-flush', state='e')
|
||||
logger.out(
|
||||
"Chassis power is not in confirmed off state after failed IPMI reboot; not performing fence-flush",
|
||||
state="e",
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
@ -209,7 +268,7 @@ def reboot_via_ipmi(ipmi_hostname, ipmi_user, ipmi_password, logger):
|
||||
# Verify that IPMI connectivity to this host exists (used during node init)
|
||||
#
|
||||
def verify_ipmi(ipmi_hostname, ipmi_user, ipmi_password):
|
||||
ipmi_command = f'/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status'
|
||||
ipmi_command = f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
|
||||
retcode, stdout, stderr = common.run_os_command(ipmi_command, timeout=2)
|
||||
if retcode == 0 and stdout.strip() == "Chassis Power is on":
|
||||
return True
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -23,14 +23,14 @@ import libvirt
|
||||
|
||||
|
||||
def validate_libvirtd(logger, config):
|
||||
if config['enable_hypervisor']:
|
||||
if config["enable_hypervisor"]:
|
||||
libvirt_check_name = f'qemu+tcp://{config["node_hostname"]}/system'
|
||||
logger.out(f'Connecting to Libvirt daemon at {libvirt_check_name}', state='i')
|
||||
logger.out(f"Connecting to Libvirt daemon at {libvirt_check_name}", state="i")
|
||||
try:
|
||||
lv_conn = libvirt.open(libvirt_check_name)
|
||||
lv_conn.close()
|
||||
except Exception as e:
|
||||
logger.out(f'Failed to connect to Libvirt daemon: {e}', state='e')
|
||||
logger.out(f"Failed to connect to Libvirt daemon: {e}", state="e")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -26,141 +26,192 @@ from os import makedirs
|
||||
|
||||
|
||||
def setup_sriov(logger, config):
|
||||
logger.out('Setting up SR-IOV device support', state='i')
|
||||
logger.out("Setting up SR-IOV device support", state="i")
|
||||
|
||||
# Enable unsafe interrupts for the vfio_iommu_type1 kernel module
|
||||
try:
|
||||
common.run_os_command('modprobe vfio_iommu_type1 allow_unsafe_interrupts=1')
|
||||
with open('/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts', 'w') as mfh:
|
||||
mfh.write('Y')
|
||||
common.run_os_command("modprobe vfio_iommu_type1 allow_unsafe_interrupts=1")
|
||||
with open(
|
||||
"/sys/module/vfio_iommu_type1/parameters/allow_unsafe_interrupts", "w"
|
||||
) as mfh:
|
||||
mfh.write("Y")
|
||||
except Exception:
|
||||
logger.out('Failed to enable vfio_iommu_type1 kernel module; SR-IOV may fail', state='w')
|
||||
logger.out(
|
||||
"Failed to enable vfio_iommu_type1 kernel module; SR-IOV may fail",
|
||||
state="w",
|
||||
)
|
||||
|
||||
# Loop through our SR-IOV NICs and enable the numvfs for each
|
||||
for device in config['sriov_device']:
|
||||
logger.out(f'Preparing SR-IOV PF {device["phy"]} with {device["vfcount"]} VFs', state='i')
|
||||
for device in config["sriov_device"]:
|
||||
logger.out(
|
||||
f'Preparing SR-IOV PF {device["phy"]} with {device["vfcount"]} VFs',
|
||||
state="i",
|
||||
)
|
||||
try:
|
||||
with open(f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', 'r') as vfh:
|
||||
with open(
|
||||
f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', "r"
|
||||
) as vfh:
|
||||
current_vf_count = vfh.read().strip()
|
||||
with open(f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', 'w') as vfh:
|
||||
vfh.write(str(device['vfcount']))
|
||||
with open(
|
||||
f'/sys/class/net/{device["phy"]}/device/sriov_numvfs', "w"
|
||||
) as vfh:
|
||||
vfh.write(str(device["vfcount"]))
|
||||
except FileNotFoundError:
|
||||
logger.out(f'Failed to open SR-IOV configuration for PF {device["phy"]}; device may not support SR-IOV', state='w')
|
||||
logger.out(
|
||||
f'Failed to open SR-IOV configuration for PF {device["phy"]}; device may not support SR-IOV',
|
||||
state="w",
|
||||
)
|
||||
except OSError:
|
||||
logger.out(f'Failed to set SR-IOV VF count for PF {device["phy"]} to {device["vfcount"]}; already set to {current_vf_count}', state='w')
|
||||
logger.out(
|
||||
f'Failed to set SR-IOV VF count for PF {device["phy"]} to {device["vfcount"]}; already set to {current_vf_count}',
|
||||
state="w",
|
||||
)
|
||||
|
||||
if device.get('mtu', None) is not None:
|
||||
logger.out(f'Setting SR-IOV PF {device["phy"]} to MTU {device["mtu"]}', state='i')
|
||||
if device.get("mtu", None) is not None:
|
||||
logger.out(
|
||||
f'Setting SR-IOV PF {device["phy"]} to MTU {device["mtu"]}', state="i"
|
||||
)
|
||||
common.run_os_command(f'ip link set {device["phy"]} mtu {device["mtu"]} up')
|
||||
|
||||
|
||||
def setup_interfaces(logger, config):
|
||||
# Set up the Cluster interface
|
||||
cluster_dev = config['cluster_dev']
|
||||
cluster_mtu = config['cluster_mtu']
|
||||
cluster_dev_ip = config['cluster_dev_ip']
|
||||
cluster_dev = config["cluster_dev"]
|
||||
cluster_mtu = config["cluster_mtu"]
|
||||
cluster_dev_ip = config["cluster_dev_ip"]
|
||||
|
||||
logger.out(f'Setting up Cluster network interface {cluster_dev} with MTU {cluster_mtu}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Cluster network interface {cluster_dev} with MTU {cluster_mtu}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
common.run_os_command(f'ip link set {cluster_dev} mtu {cluster_mtu} up')
|
||||
common.run_os_command(f"ip link set {cluster_dev} mtu {cluster_mtu} up")
|
||||
|
||||
logger.out(f'Setting up Cluster network bridge on interface {cluster_dev} with IP {cluster_dev_ip}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Cluster network bridge on interface {cluster_dev} with IP {cluster_dev_ip}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
common.run_os_command(f'brctl addbr brcluster')
|
||||
common.run_os_command(f'brctl addif brcluster {cluster_dev}')
|
||||
common.run_os_command(f'ip link set brcluster mtu {cluster_mtu} up')
|
||||
common.run_os_command(f'ip address add {cluster_dev_ip} dev brcluster')
|
||||
common.run_os_command(f"brctl addbr brcluster")
|
||||
common.run_os_command(f"brctl addif brcluster {cluster_dev}")
|
||||
common.run_os_command(f"ip link set brcluster mtu {cluster_mtu} up")
|
||||
common.run_os_command(f"ip address add {cluster_dev_ip} dev brcluster")
|
||||
|
||||
# Set up the Storage interface
|
||||
storage_dev = config['storage_dev']
|
||||
storage_mtu = config['storage_mtu']
|
||||
storage_dev_ip = config['storage_dev_ip']
|
||||
storage_dev = config["storage_dev"]
|
||||
storage_mtu = config["storage_mtu"]
|
||||
storage_dev_ip = config["storage_dev_ip"]
|
||||
|
||||
logger.out(f'Setting up Storage network interface {storage_dev} with MTU {storage_mtu}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Storage network interface {storage_dev} with MTU {storage_mtu}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
common.run_os_command(f'ip link set {storage_dev} mtu {storage_mtu} up')
|
||||
common.run_os_command(f"ip link set {storage_dev} mtu {storage_mtu} up")
|
||||
|
||||
if storage_dev == cluster_dev:
|
||||
if storage_dev_ip != cluster_dev_ip:
|
||||
logger.out(f'Setting up Storage network on Cluster network bridge with IP {storage_dev_ip}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Storage network on Cluster network bridge with IP {storage_dev_ip}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
common.run_os_command(f'ip address add {storage_dev_ip} dev brcluster')
|
||||
common.run_os_command(f"ip address add {storage_dev_ip} dev brcluster")
|
||||
else:
|
||||
logger.out(f'Setting up Storage network bridge on interface {storage_dev} with IP {storage_dev_ip}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Storage network bridge on interface {storage_dev} with IP {storage_dev_ip}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
common.run_os_command(f'brctl addbr brstorage')
|
||||
common.run_os_command(f'brctl addif brstorage {storage_dev}')
|
||||
common.run_os_command(f'ip link set brstorage mtu {storage_mtu} up')
|
||||
common.run_os_command(f'ip address add {storage_dev_ip} dev brstorage')
|
||||
common.run_os_command(f"brctl addbr brstorage")
|
||||
common.run_os_command(f"brctl addif brstorage {storage_dev}")
|
||||
common.run_os_command(f"ip link set brstorage mtu {storage_mtu} up")
|
||||
common.run_os_command(f"ip address add {storage_dev_ip} dev brstorage")
|
||||
|
||||
# Set up the Upstream interface
|
||||
upstream_dev = config['upstream_dev']
|
||||
upstream_mtu = config['upstream_mtu']
|
||||
upstream_dev_ip = config['upstream_dev_ip']
|
||||
upstream_dev = config["upstream_dev"]
|
||||
upstream_mtu = config["upstream_mtu"]
|
||||
upstream_dev_ip = config["upstream_dev_ip"]
|
||||
|
||||
logger.out(f'Setting up Upstream network interface {upstream_dev} with MTU {upstream_mtu}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Upstream network interface {upstream_dev} with MTU {upstream_mtu}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
if upstream_dev == cluster_dev:
|
||||
if upstream_dev_ip != cluster_dev_ip:
|
||||
logger.out(f'Setting up Upstream network on Cluster network bridge with IP {upstream_dev_ip}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Upstream network on Cluster network bridge with IP {upstream_dev_ip}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
common.run_os_command(f'ip address add {upstream_dev_ip} dev brcluster')
|
||||
common.run_os_command(f"ip address add {upstream_dev_ip} dev brcluster")
|
||||
else:
|
||||
logger.out(f'Setting up Upstream network bridge on interface {upstream_dev} with IP {upstream_dev_ip}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Upstream network bridge on interface {upstream_dev} with IP {upstream_dev_ip}",
|
||||
state="i",
|
||||
)
|
||||
|
||||
common.run_os_command(f'brctl addbr brupstream')
|
||||
common.run_os_command(f'brctl addif brupstream {upstream_dev}')
|
||||
common.run_os_command(f'ip link set brupstream mtu {upstream_mtu} up')
|
||||
common.run_os_command(f'ip address add {upstream_dev_ip} dev brupstream')
|
||||
common.run_os_command(f"brctl addbr brupstream")
|
||||
common.run_os_command(f"brctl addif brupstream {upstream_dev}")
|
||||
common.run_os_command(f"ip link set brupstream mtu {upstream_mtu} up")
|
||||
common.run_os_command(f"ip address add {upstream_dev_ip} dev brupstream")
|
||||
|
||||
upstream_gateway = config['upstream_gateway']
|
||||
upstream_gateway = config["upstream_gateway"]
|
||||
if upstream_gateway is not None:
|
||||
logger.out(f'Setting up Upstream network default gateway IP {upstream_gateway}', state='i')
|
||||
logger.out(
|
||||
f"Setting up Upstream network default gateway IP {upstream_gateway}",
|
||||
state="i",
|
||||
)
|
||||
if upstream_dev == cluster_dev:
|
||||
common.run_os_command(f'ip route add default via {upstream_gateway} dev brcluster')
|
||||
common.run_os_command(
|
||||
f"ip route add default via {upstream_gateway} dev brcluster"
|
||||
)
|
||||
else:
|
||||
common.run_os_command(f'ip route add default via {upstream_gateway} dev brupstream')
|
||||
common.run_os_command(
|
||||
f"ip route add default via {upstream_gateway} dev brupstream"
|
||||
)
|
||||
|
||||
# Set up sysctl tweaks to optimize networking
|
||||
# Enable routing functions
|
||||
common.run_os_command('sysctl net.ipv4.ip_forward=1')
|
||||
common.run_os_command('sysctl net.ipv6.ip_forward=1')
|
||||
common.run_os_command("sysctl net.ipv4.ip_forward=1")
|
||||
common.run_os_command("sysctl net.ipv6.ip_forward=1")
|
||||
# Enable send redirects
|
||||
common.run_os_command('sysctl net.ipv4.conf.all.send_redirects=1')
|
||||
common.run_os_command('sysctl net.ipv4.conf.default.send_redirects=1')
|
||||
common.run_os_command('sysctl net.ipv6.conf.all.send_redirects=1')
|
||||
common.run_os_command('sysctl net.ipv6.conf.default.send_redirects=1')
|
||||
common.run_os_command("sysctl net.ipv4.conf.all.send_redirects=1")
|
||||
common.run_os_command("sysctl net.ipv4.conf.default.send_redirects=1")
|
||||
common.run_os_command("sysctl net.ipv6.conf.all.send_redirects=1")
|
||||
common.run_os_command("sysctl net.ipv6.conf.default.send_redirects=1")
|
||||
# Accept source routes
|
||||
common.run_os_command('sysctl net.ipv4.conf.all.accept_source_route=1')
|
||||
common.run_os_command('sysctl net.ipv4.conf.default.accept_source_route=1')
|
||||
common.run_os_command('sysctl net.ipv6.conf.all.accept_source_route=1')
|
||||
common.run_os_command('sysctl net.ipv6.conf.default.accept_source_route=1')
|
||||
common.run_os_command("sysctl net.ipv4.conf.all.accept_source_route=1")
|
||||
common.run_os_command("sysctl net.ipv4.conf.default.accept_source_route=1")
|
||||
common.run_os_command("sysctl net.ipv6.conf.all.accept_source_route=1")
|
||||
common.run_os_command("sysctl net.ipv6.conf.default.accept_source_route=1")
|
||||
# Disable RP filtering on Cluster and Upstream interfaces (to allow traffic pivoting)
|
||||
common.run_os_command(f'sysctl net.ipv4.conf.{cluster_dev}.rp_filter=0')
|
||||
common.run_os_command(f'sysctl net.ipv4.conf.brcluster.rp_filter=0')
|
||||
common.run_os_command(f'sysctl net.ipv4.conf.{upstream_dev}.rp_filter=0')
|
||||
common.run_os_command(f'sysctl net.ipv4.conf.brupstream.rp_filter=0')
|
||||
common.run_os_command(f'sysctl net.ipv6.conf.{cluster_dev}.rp_filter=0')
|
||||
common.run_os_command(f'sysctl net.ipv6.conf.brcluster.rp_filter=0')
|
||||
common.run_os_command(f'sysctl net.ipv6.conf.{upstream_dev}.rp_filter=0')
|
||||
common.run_os_command(f'sysctl net.ipv6.conf.brupstream.rp_filter=0')
|
||||
common.run_os_command(f"sysctl net.ipv4.conf.{cluster_dev}.rp_filter=0")
|
||||
common.run_os_command(f"sysctl net.ipv4.conf.brcluster.rp_filter=0")
|
||||
common.run_os_command(f"sysctl net.ipv4.conf.{upstream_dev}.rp_filter=0")
|
||||
common.run_os_command(f"sysctl net.ipv4.conf.brupstream.rp_filter=0")
|
||||
common.run_os_command(f"sysctl net.ipv6.conf.{cluster_dev}.rp_filter=0")
|
||||
common.run_os_command(f"sysctl net.ipv6.conf.brcluster.rp_filter=0")
|
||||
common.run_os_command(f"sysctl net.ipv6.conf.{upstream_dev}.rp_filter=0")
|
||||
common.run_os_command(f"sysctl net.ipv6.conf.brupstream.rp_filter=0")
|
||||
|
||||
# Stop DNSMasq if it is running
|
||||
common.run_os_command('systemctl stop dnsmasq.service')
|
||||
common.run_os_command("systemctl stop dnsmasq.service")
|
||||
|
||||
logger.out('Waiting 3 seconds for networking to come up', state='s')
|
||||
logger.out("Waiting 3 seconds for networking to come up", state="s")
|
||||
sleep(3)
|
||||
|
||||
|
||||
def create_nft_configuration(logger, config):
|
||||
if config['enable_networking']:
|
||||
logger.out('Creating NFT firewall configuration', state='i')
|
||||
if config["enable_networking"]:
|
||||
logger.out("Creating NFT firewall configuration", state="i")
|
||||
|
||||
dynamic_directory = config['nft_dynamic_directory']
|
||||
dynamic_directory = config["nft_dynamic_directory"]
|
||||
|
||||
# Create directories
|
||||
makedirs(f'{dynamic_directory}/networks', exist_ok=True)
|
||||
makedirs(f'{dynamic_directory}/static', exist_ok=True)
|
||||
makedirs(f"{dynamic_directory}/networks", exist_ok=True)
|
||||
makedirs(f"{dynamic_directory}/static", exist_ok=True)
|
||||
|
||||
# Set up the base rules
|
||||
nftables_base_rules = f"""# Base rules
|
||||
@ -175,7 +226,7 @@ def create_nft_configuration(logger, config):
|
||||
"""
|
||||
|
||||
# Write the base firewall config
|
||||
nftables_base_filename = f'{dynamic_directory}/base.nft'
|
||||
with open(nftables_base_filename, 'w') as nftfh:
|
||||
nftables_base_filename = f"{dynamic_directory}/base.nft"
|
||||
with open(nftables_base_filename, "w") as nftfh:
|
||||
nftfh.write(nftables_base_rules)
|
||||
common.reload_firewall_rules(nftables_base_filename, logger)
|
||||
|
@ -24,45 +24,49 @@ from time import sleep
|
||||
|
||||
|
||||
def start_zookeeper(logger, config):
|
||||
if config['daemon_mode'] == 'coordinator':
|
||||
logger.out('Starting Zookeeper daemon', state='i')
|
||||
if config["daemon_mode"] == "coordinator":
|
||||
logger.out("Starting Zookeeper daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command('systemctl start zookeeper.service')
|
||||
common.run_os_command("systemctl start zookeeper.service")
|
||||
|
||||
|
||||
def start_libvirtd(logger, config):
|
||||
if config['enable_hypervisor']:
|
||||
logger.out('Starting Libvirt daemon', state='i')
|
||||
if config["enable_hypervisor"]:
|
||||
logger.out("Starting Libvirt daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command('systemctl start libvirtd.service')
|
||||
common.run_os_command("systemctl start libvirtd.service")
|
||||
|
||||
|
||||
def start_patroni(logger, config):
|
||||
if config['enable_networking'] and config['daemon_mode'] == 'coordinator':
|
||||
logger.out('Starting Patroni daemon', state='i')
|
||||
if config["enable_networking"] and config["daemon_mode"] == "coordinator":
|
||||
logger.out("Starting Patroni daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command('systemctl start patroni.service')
|
||||
common.run_os_command("systemctl start patroni.service")
|
||||
|
||||
|
||||
def start_frrouting(logger, config):
|
||||
if config['enable_networking'] and config['daemon_mode'] == 'coordinator':
|
||||
logger.out('Starting FRRouting daemon', state='i')
|
||||
if config["enable_networking"] and config["daemon_mode"] == "coordinator":
|
||||
logger.out("Starting FRRouting daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command('systemctl start frr.service')
|
||||
common.run_os_command("systemctl start frr.service")
|
||||
|
||||
|
||||
def start_ceph_mon(logger, config):
|
||||
if config['enable_storage'] and config['daemon_mode'] == 'coordinator':
|
||||
logger.out('Starting Ceph Monitor daemon', state='i')
|
||||
if config["enable_storage"] and config["daemon_mode"] == "coordinator":
|
||||
logger.out("Starting Ceph Monitor daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command(f'systemctl start ceph-mon@{config["node_hostname"]}.service')
|
||||
common.run_os_command(
|
||||
f'systemctl start ceph-mon@{config["node_hostname"]}.service'
|
||||
)
|
||||
|
||||
|
||||
def start_ceph_mgr(logger, config):
|
||||
if config['enable_storage'] and config['daemon_mode'] == 'coordinator':
|
||||
logger.out('Starting Ceph Manager daemon', state='i')
|
||||
if config["enable_storage"] and config["daemon_mode"] == "coordinator":
|
||||
logger.out("Starting Ceph Manager daemon", state="i")
|
||||
# TODO: Move our handling out of Systemd and integrate it directly as a subprocess?
|
||||
common.run_os_command(f'systemctl start ceph-mgr@{config["node_hostname"]}.service')
|
||||
common.run_os_command(
|
||||
f'systemctl start ceph-mgr@{config["node_hostname"]}.service'
|
||||
)
|
||||
|
||||
|
||||
def start_system_services(logger, config):
|
||||
@ -73,5 +77,5 @@ def start_system_services(logger, config):
|
||||
start_ceph_mon(logger, config)
|
||||
start_ceph_mgr(logger, config)
|
||||
|
||||
logger.out('Waiting 3 seconds for daemons to start', state='s')
|
||||
logger.out("Waiting 3 seconds for daemons to start", state="s")
|
||||
sleep(3)
|
||||
|
@ -31,45 +31,61 @@ def connect(logger, config):
|
||||
zkhandler = ZKHandler(config, logger)
|
||||
|
||||
try:
|
||||
logger.out('Connecting to Zookeeper on coordinator nodes {}'.format(config['coordinators']), state='i')
|
||||
logger.out(
|
||||
"Connecting to Zookeeper on coordinator nodes {}".format(
|
||||
config["coordinators"]
|
||||
),
|
||||
state="i",
|
||||
)
|
||||
# Start connection
|
||||
zkhandler.connect(persistent=True)
|
||||
except Exception as e:
|
||||
logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
|
||||
logger.out(
|
||||
"ERROR: Failed to connect to Zookeeper cluster: {}".format(e), state="e"
|
||||
)
|
||||
os._exit(1)
|
||||
|
||||
logger.out('Validating Zookeeper schema', state='i')
|
||||
logger.out("Validating Zookeeper schema", state="i")
|
||||
|
||||
try:
|
||||
node_schema_version = int(zkhandler.read(('node.data.active_schema', config['node_hostname'])))
|
||||
node_schema_version = int(
|
||||
zkhandler.read(("node.data.active_schema", config["node_hostname"]))
|
||||
)
|
||||
except Exception:
|
||||
node_schema_version = int(zkhandler.read('base.schema.version'))
|
||||
zkhandler.write([
|
||||
(('node.data.active_schema', config['node_hostname']), node_schema_version)
|
||||
])
|
||||
node_schema_version = int(zkhandler.read("base.schema.version"))
|
||||
zkhandler.write(
|
||||
[
|
||||
(
|
||||
("node.data.active_schema", config["node_hostname"]),
|
||||
node_schema_version,
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Load in the current node schema version
|
||||
zkhandler.schema.load(node_schema_version)
|
||||
|
||||
# Record the latest intalled schema version
|
||||
latest_schema_version = zkhandler.schema.find_latest()
|
||||
logger.out('Latest installed schema is {}'.format(latest_schema_version), state='i')
|
||||
zkhandler.write([
|
||||
(('node.data.latest_schema', config['node_hostname']), latest_schema_version)
|
||||
])
|
||||
logger.out("Latest installed schema is {}".format(latest_schema_version), state="i")
|
||||
zkhandler.write(
|
||||
[(("node.data.latest_schema", config["node_hostname"]), latest_schema_version)]
|
||||
)
|
||||
|
||||
# If we are the last node to get a schema update, fire the master update
|
||||
if latest_schema_version > node_schema_version:
|
||||
node_latest_schema_version = list()
|
||||
for node in zkhandler.children('base.node'):
|
||||
node_latest_schema_version.append(int(zkhandler.read(('node.data.latest_schema', node))))
|
||||
for node in zkhandler.children("base.node"):
|
||||
node_latest_schema_version.append(
|
||||
int(zkhandler.read(("node.data.latest_schema", node)))
|
||||
)
|
||||
|
||||
# This is true if all elements of the latest schema version are identical to the latest version,
|
||||
# i.e. they have all had the latest schema installed and ready to load.
|
||||
if node_latest_schema_version.count(latest_schema_version) == len(node_latest_schema_version):
|
||||
zkhandler.write([
|
||||
('base.schema.version', latest_schema_version)
|
||||
])
|
||||
if node_latest_schema_version.count(latest_schema_version) == len(
|
||||
node_latest_schema_version
|
||||
):
|
||||
zkhandler.write([("base.schema.version", latest_schema_version)])
|
||||
|
||||
return zkhandler, node_schema_version
|
||||
|
||||
@ -77,56 +93,95 @@ def connect(logger, config):
|
||||
def validate_schema(logger, zkhandler):
|
||||
# Validate our schema against the active version
|
||||
if not zkhandler.schema.validate(zkhandler, logger):
|
||||
logger.out('Found schema violations, applying', state='i')
|
||||
logger.out("Found schema violations, applying", state="i")
|
||||
zkhandler.schema.apply(zkhandler)
|
||||
else:
|
||||
logger.out('Schema successfully validated', state='o')
|
||||
logger.out("Schema successfully validated", state="o")
|
||||
|
||||
|
||||
def setup_node(logger, config, zkhandler):
|
||||
# Check if our node exists in Zookeeper, and create it if not
|
||||
if config['daemon_mode'] == 'coordinator':
|
||||
init_routerstate = 'secondary'
|
||||
if config["daemon_mode"] == "coordinator":
|
||||
init_routerstate = "secondary"
|
||||
else:
|
||||
init_routerstate = 'client'
|
||||
init_routerstate = "client"
|
||||
|
||||
if zkhandler.exists(('node', config['node_hostname'])):
|
||||
logger.out(f'Node is {logger.fmt_green}present{logger.fmt_end} in Zookeeper', state='i')
|
||||
if zkhandler.exists(("node", config["node_hostname"])):
|
||||
logger.out(
|
||||
f"Node is {logger.fmt_green}present{logger.fmt_end} in Zookeeper", state="i"
|
||||
)
|
||||
# Update static data just in case it's changed
|
||||
zkhandler.write([
|
||||
(('node', config['node_hostname']), config['daemon_mode']),
|
||||
(('node.mode', config['node_hostname']), config['daemon_mode']),
|
||||
(('node.state.daemon', config['node_hostname']), 'init'),
|
||||
(('node.state.router', config['node_hostname']), init_routerstate),
|
||||
(('node.data.static', config['node_hostname']), ' '.join(config['static_data'])),
|
||||
(('node.data.pvc_version', config['node_hostname']), config['pvcnoded_version']),
|
||||
(('node.ipmi.hostname', config['node_hostname']), config['ipmi_hostname']),
|
||||
(('node.ipmi.username', config['node_hostname']), config['ipmi_username']),
|
||||
(('node.ipmi.password', config['node_hostname']), config['ipmi_password']),
|
||||
])
|
||||
zkhandler.write(
|
||||
[
|
||||
(("node", config["node_hostname"]), config["daemon_mode"]),
|
||||
(("node.mode", config["node_hostname"]), config["daemon_mode"]),
|
||||
(("node.state.daemon", config["node_hostname"]), "init"),
|
||||
(("node.state.router", config["node_hostname"]), init_routerstate),
|
||||
(
|
||||
("node.data.static", config["node_hostname"]),
|
||||
" ".join(config["static_data"]),
|
||||
),
|
||||
(
|
||||
("node.data.pvc_version", config["node_hostname"]),
|
||||
config["pvcnoded_version"],
|
||||
),
|
||||
(
|
||||
("node.ipmi.hostname", config["node_hostname"]),
|
||||
config["ipmi_hostname"],
|
||||
),
|
||||
(
|
||||
("node.ipmi.username", config["node_hostname"]),
|
||||
config["ipmi_username"],
|
||||
),
|
||||
(
|
||||
("node.ipmi.password", config["node_hostname"]),
|
||||
config["ipmi_password"],
|
||||
),
|
||||
]
|
||||
)
|
||||
else:
|
||||
logger.out(f'Node is {logger.fmt_red}absent{logger.fmt_end} in Zookeeper; adding new node', state='i')
|
||||
logger.out(
|
||||
f"Node is {logger.fmt_red}absent{logger.fmt_end} in Zookeeper; adding new node",
|
||||
state="i",
|
||||
)
|
||||
keepalive_time = int(time.time())
|
||||
zkhandler.write([
|
||||
(('node', config['node_hostname']), config['daemon_mode']),
|
||||
(('node.keepalive', config['node_hostname']), str(keepalive_time)),
|
||||
(('node.mode', config['node_hostname']), config['daemon_mode']),
|
||||
(('node.state.daemon', config['node_hostname']), 'init'),
|
||||
(('node.state.domain', config['node_hostname']), 'flushed'),
|
||||
(('node.state.router', config['node_hostname']), init_routerstate),
|
||||
(('node.data.static', config['node_hostname']), ' '.join(config['static_data'])),
|
||||
(('node.data.pvc_version', config['node_hostname']), config['pvcnoded_version']),
|
||||
(('node.ipmi.hostname', config['node_hostname']), config['ipmi_hostname']),
|
||||
(('node.ipmi.username', config['node_hostname']), config['ipmi_username']),
|
||||
(('node.ipmi.password', config['node_hostname']), config['ipmi_password']),
|
||||
(('node.memory.total', config['node_hostname']), '0'),
|
||||
(('node.memory.used', config['node_hostname']), '0'),
|
||||
(('node.memory.free', config['node_hostname']), '0'),
|
||||
(('node.memory.allocated', config['node_hostname']), '0'),
|
||||
(('node.memory.provisioned', config['node_hostname']), '0'),
|
||||
(('node.vcpu.allocated', config['node_hostname']), '0'),
|
||||
(('node.cpu.load', config['node_hostname']), '0.0'),
|
||||
(('node.running_domains', config['node_hostname']), '0'),
|
||||
(('node.count.provisioned_domains', config['node_hostname']), '0'),
|
||||
(('node.count.networks', config['node_hostname']), '0'),
|
||||
])
|
||||
zkhandler.write(
|
||||
[
|
||||
(("node", config["node_hostname"]), config["daemon_mode"]),
|
||||
(("node.keepalive", config["node_hostname"]), str(keepalive_time)),
|
||||
(("node.mode", config["node_hostname"]), config["daemon_mode"]),
|
||||
(("node.state.daemon", config["node_hostname"]), "init"),
|
||||
(("node.state.domain", config["node_hostname"]), "flushed"),
|
||||
(("node.state.router", config["node_hostname"]), init_routerstate),
|
||||
(
|
||||
("node.data.static", config["node_hostname"]),
|
||||
" ".join(config["static_data"]),
|
||||
),
|
||||
(
|
||||
("node.data.pvc_version", config["node_hostname"]),
|
||||
config["pvcnoded_version"],
|
||||
),
|
||||
(
|
||||
("node.ipmi.hostname", config["node_hostname"]),
|
||||
config["ipmi_hostname"],
|
||||
),
|
||||
(
|
||||
("node.ipmi.username", config["node_hostname"]),
|
||||
config["ipmi_username"],
|
||||
),
|
||||
(
|
||||
("node.ipmi.password", config["node_hostname"]),
|
||||
config["ipmi_password"],
|
||||
),
|
||||
(("node.memory.total", config["node_hostname"]), "0"),
|
||||
(("node.memory.used", config["node_hostname"]), "0"),
|
||||
(("node.memory.free", config["node_hostname"]), "0"),
|
||||
(("node.memory.allocated", config["node_hostname"]), "0"),
|
||||
(("node.memory.provisioned", config["node_hostname"]), "0"),
|
||||
(("node.vcpu.allocated", config["node_hostname"]), "0"),
|
||||
(("node.cpu.load", config["node_hostname"]), "0.0"),
|
||||
(("node.running_domains", config["node_hostname"]), "0"),
|
||||
(("node.count.provisioned_domains", config["node_hostname"]), "0"),
|
||||
(("node.count.networks", config["node_hostname"]), "0"),
|
||||
]
|
||||
)
|
||||
|
Reference in New Issue
Block a user