Files
pvc/client-common/node.py
Joshua Boniface f198f62563 Massive rejigger into single daemon
Completely restructure the daemon code to move the 4 discrete daemons
into a single daemon that can be run on every hypervisor. Introduce the
idea of a static list of "coordinator" nodes which are configured at
install time to run Zookeeper and FRR in router mode, and which are
allowed to take on client network management duties (gateway, DHCP, DNS,
etc.) while also allowing them to run VMs (i.e. no dedicated "router"
nodes required).
2018-10-14 02:40:54 -04:00

376 lines
16 KiB
Python

#!/usr/bin/env python3
# node.py - PVC client function library, node management
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os
import socket
import time
import uuid
import re
import tempfile
import subprocess
import difflib
import colorama
import click
import lxml.objectify
import configparser
import kazoo.client
import client_lib.ansiiprint as ansiiprint
import client_lib.zkhandler as zkhandler
import client_lib.common as common
import client_lib.vm as pvc_vm
def getInformationFromNode(zk_conn, node_name, long_output):
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
node_router_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node_name))
node_domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name))
node_static_data = zkhandler.readdata(zk_conn, '/nodes/{}/staticdata'.format(node_name)).split()
node_cpu_count = node_static_data[0]
node_kernel = node_static_data[1]
node_os = node_static_data[2]
node_arch = node_static_data[3]
node_mem_allocated = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node_name)))
node_mem_used = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node_name)))
node_mem_free = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node_name)))
node_mem_total = node_mem_used + node_mem_free
node_load = zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node_name))
node_domains_count = zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node_name))
node_running_domains = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split()
if node_daemon_state == 'run':
daemon_state_colour = ansiiprint.green()
elif node_daemon_state == 'stop':
daemon_state_colour = ansiiprint.red()
elif node_daemon_state == 'init':
daemon_state_colour = ansiiprint.yellow()
elif node_daemon_state == 'dead':
daemon_state_colour = ansiiprint.red() + ansiiprint.bold()
else:
daemon_state_colour = ansiiprint.blue()
if node_router_state == 'primary':
router_state_colour = ansiiprint.green()
elif node_router_state == 'secondary':
router_state_colour = ansiiprint.blue()
else:
router_state_colour = ansiiprint.purple()
if node_domain_state == 'ready':
domain_state_colour = ansiiprint.green()
else:
domain_state_colour = ansiiprint.blue()
# Format a nice output; do this line-by-line then concat the elements at the end
ainformation = []
ainformation.append('{}Hypervisor Node information:{}'.format(ansiiprint.bold(), ansiiprint.end()))
ainformation.append('')
# Basic information
ainformation.append('{}Name:{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_name))
ainformation.append('{}Daemon State:{} {}{}{}'.format(ansiiprint.purple(), ansiiprint.end(), daemon_state_colour, node_daemon_state, ansiiprint.end()))
ainformation.append('{}Router State:{} {}{}{}'.format(ansiiprint.purple(), ansiiprint.end(), router_state_colour, node_router_state, ansiiprint.end()))
ainformation.append('{}Domain State:{} {}{}{}'.format(ansiiprint.purple(), ansiiprint.end(), domain_state_colour, node_domain_state, ansiiprint.end()))
ainformation.append('{}Active VM Count:{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_domains_count))
if long_output == True:
ainformation.append('')
ainformation.append('{}Architecture:{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_arch))
ainformation.append('{}Operating System:{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_os))
ainformation.append('{}Kernel Version:{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_kernel))
ainformation.append('')
ainformation.append('{}CPUs:{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_cpu_count))
ainformation.append('{}Load:{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_load))
ainformation.append('{}Total RAM (MiB):{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_mem_total))
ainformation.append('{}Used RAM (MiB):{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_mem_used))
ainformation.append('{}Free RAM (MiB):{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_mem_free))
ainformation.append('{}Allocated RAM (MiB):{} {}'.format(ansiiprint.purple(), ansiiprint.end(), node_mem_allocated))
# Join it all together
information = '\n'.join(ainformation)
return information
#
# Direct Functions
#
def secondary_node(zk_conn, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.readdata(zk_conn, '/nodes/{}/daemonmode'.format(node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Get current state
current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node))
if current_state == 'primary':
click.echo('Setting node {} in secondary router mode.'.format(node))
zkhandler.writedata(zk_conn, {
'/primary_node': 'none'
})
else:
click.echo('Node {} is already in secondary router mode.'.format(node))
return True, ''
def primary_node(zk_conn, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.readdata(zk_conn, '/nodes/{}/daemonmode'.format(node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Get current state
current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node))
if current_state == 'secondary':
click.echo('Setting node {} in primary router mode.'.format(node))
zkhandler.writedata(zk_conn, {
'/primary_node': node
})
else:
click.echo('Node {} is already in primary router mode.'.format(node))
return True, ''
def flush_node(zk_conn, node, wait):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
click.echo('Flushing hypervisor {} of running VMs.'.format(node))
# Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, {
'/nodes/{}/domainstate'.format(node): 'flush'
})
if wait == True:
while True:
time.sleep(1)
node_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node))
if node_state == "flushed":
break
return True, ''
def ready_node(zk_conn, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
click.echo('Restoring hypervisor {} to active service.'.format(node))
# Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, {
'/nodes/{}/domainstate'.format(node): 'unflush'
})
return True, ''
def get_info(zk_conn, node, long_output):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Get information about node in a pretty format
information = getInformationFromNode(zk_conn, node, long_output)
if information == None:
return False, 'ERROR: Could not find a node matching that name.'
click.echo(information)
if long_output == True:
click.echo('')
click.echo('{}Virtual machines on node:{}'.format(ansiiprint.bold(), ansiiprint.end()))
click.echo('')
# List all VMs on this node
pvc_vm.get_list(zk_conn, node, None)
click.echo('')
return True, ''
def get_list(zk_conn, limit):
# Match our limit
node_list = []
full_node_list = zk_conn.get_children('/nodes')
for node in full_node_list:
if limit != None:
try:
# Implcitly assume fuzzy limits
if re.match('\^.*', limit) == None:
limit = '.*' + limit
if re.match('.*\$', limit) == None:
limit = limit + '.*'
if re.match(limit, node) != None:
node_list.append(node)
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
node_list.append(node)
node_list_output = []
node_daemon_state = {}
node_router_state = {}
node_domain_state = {}
node_cpu_count = {}
node_mem_used = {}
node_mem_free = {}
node_mem_total = {}
node_domains_count = {}
node_running_domains = {}
node_mem_allocated = {}
node_load = {}
# Gather information for printing
for node_name in node_list:
node_daemon_state[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
node_router_state[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node_name))
node_domain_state[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name))
node_cpu_count[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/staticdata'.format(node_name)).split()[0]
node_mem_allocated[node_name] = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node_name)))
node_mem_used[node_name] = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node_name)))
node_mem_free[node_name] = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node_name)))
node_mem_total[node_name] = node_mem_used[node_name] + node_mem_free[node_name]
node_load[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node_name))
node_domains_count[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node_name))
node_running_domains[node_name] = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split()
# Determine optimal column widths
# Dynamic columns: node_name, daemon_state, network_state, domain_state, load
node_name_length = 5
daemon_state_length = 7
router_state_length = 7
domain_state_length = 7
for node_name in node_list:
# node_name column
_node_name_length = len(node_name) + 1
if _node_name_length > node_name_length:
node_name_length = _node_name_length
# daemon_state column
_daemon_state_length = len(node_daemon_state[node_name]) + 1
if _daemon_state_length > daemon_state_length:
daemon_state_length = _daemon_state_length
# router_state column
_router_state_length = len(node_router_state[node_name]) + 1
if _router_state_length > router_state_length:
router_state_length = _router_state_length
# domain_state column
_domain_state_length = len(node_domain_state[node_name]) + 1
if _domain_state_length > domain_state_length:
domain_state_length = _domain_state_length
# Format the string (header)
node_list_output.append(
'{bold}{node_name: <{node_name_length}} \
State: {daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {router_state_colour}{node_router_state: <{router_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
Resources: {node_domains_count: <4} {node_cpu_count: <5} {node_load: <6} \
RAM (MiB): {node_mem_total: <6} {node_mem_used: <6} {node_mem_free: <6} {node_mem_allocated: <6}{end_bold}'.format(
node_name_length=node_name_length,
daemon_state_length=daemon_state_length,
router_state_length=router_state_length,
domain_state_length=domain_state_length,
bold=ansiiprint.bold(),
end_bold=ansiiprint.end(),
daemon_state_colour='',
router_state_colour='',
domain_state_colour='',
end_colour='',
node_name='Name',
node_daemon_state='Daemon',
node_router_state='Router',
node_domain_state='Domain',
node_domains_count='VMs',
node_cpu_count='CPUs',
node_load='Load',
node_mem_total='Total',
node_mem_used='Used',
node_mem_free='Free',
node_mem_allocated='VMs'
)
)
# Format the string (elements)
for node_name in node_list:
if node_daemon_state[node_name] == 'run':
daemon_state_colour = ansiiprint.green()
elif node_daemon_state[node_name] == 'stop':
daemon_state_colour = ansiiprint.red()
elif node_daemon_state[node_name] == 'init':
daemon_state_colour = ansiiprint.yellow()
elif node_daemon_state[node_name] == 'dead':
daemon_state_colour = ansiiprint.red() + ansiiprint.bold()
else:
daemon_state_colour = ansiiprint.blue()
if node_router_state[node_name] == 'primary':
router_state_colour = ansiiprint.green()
elif node_router_state[node_name] == 'secondary':
router_state_colour = ansiiprint.blue()
else:
router_state_colour = ansiiprint.purple()
if node_mem_allocated[node_name] != 0 and node_mem_allocated[node_name] >= node_mem_total[node_name]:
node_domain_state[node_name] = 'overprov'
domain_state_colour = ansiiprint.yellow()
elif node_domain_state[node_name] == 'ready':
domain_state_colour = ansiiprint.green()
else:
domain_state_colour = ansiiprint.blue()
node_list_output.append(
'{bold}{node_name: <{node_name_length}} \
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {router_state_colour}{node_router_state: <{router_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
{node_domains_count: <4} {node_cpu_count: <5} {node_load: <6} \
{node_mem_total: <6} {node_mem_used: <6} {node_mem_free: <6} {node_mem_allocated: <6}{end_bold}'.format(
node_name_length=node_name_length,
daemon_state_length=daemon_state_length,
router_state_length=router_state_length,
domain_state_length=domain_state_length,
bold='',
end_bold='',
daemon_state_colour=daemon_state_colour,
router_state_colour=router_state_colour,
domain_state_colour=domain_state_colour,
end_colour=ansiiprint.end(),
node_name=node_name,
node_daemon_state=node_daemon_state[node_name],
node_router_state=node_router_state[node_name],
node_domain_state=node_domain_state[node_name],
node_domains_count=node_domains_count[node_name],
node_cpu_count=node_cpu_count[node_name],
node_load=node_load[node_name],
node_mem_total=node_mem_total[node_name],
node_mem_used=node_mem_used[node_name],
node_mem_free=node_mem_free[node_name],
node_mem_allocated=node_mem_allocated[node_name]
)
)
click.echo('\n'.join(sorted(node_list_output)))
return True, ''