Compare commits
21 Commits
Author | SHA1 | Date | |
---|---|---|---|
2e9f6ac201 | |||
f09849bedf | |||
8c975e5c46 | |||
c76149141f | |||
f00c4d07f4 | |||
20b66c10e1 | |||
cfeba50b17 | |||
0699c48d10 | |||
551bae2518 | |||
4832245d9c | |||
2138f2f59f | |||
d1d355a96b | |||
2b5dc286ab | |||
c0c9327a7d | |||
5ffabcfef5 | |||
330cf14638 | |||
9d0eb20197 | |||
3f5b7045a2 | |||
80fe96b24d | |||
80f04ce8ee | |||
65d14ccd92 |
16
README.md
16
README.md
@ -42,6 +42,22 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
|
|||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
#### v0.9.25
|
||||||
|
|
||||||
|
* [Node Daemon] Returns to Rados library calls for Ceph due to performance problems
|
||||||
|
* [Node Daemon] Adds a date output to keepalive messages
|
||||||
|
* [Daemons] Configures ZK connection logging only for persistent connections
|
||||||
|
* [API Provisioner] Add context manager-based chroot to Debootstrap example script
|
||||||
|
* [Node Daemon] Fixes a bug where shutdown daemon state was overwritten
|
||||||
|
|
||||||
|
#### v0.9.24
|
||||||
|
|
||||||
|
* [Node Daemon] Removes Rados module polling of Ceph cluster and returns to command-based polling for timeout purposes, and removes some flaky return statements
|
||||||
|
* [Node Daemon] Removes flaky Zookeeper connection renewals that caused problems
|
||||||
|
* [CLI Client] Allow raw lists of clusters from `pvc cluster list`
|
||||||
|
* [API Daemon] Fixes several issues when getting VM data without stats
|
||||||
|
* [API Daemon] Fixes issues with removing VMs while disks are still in use (failed provisioning, etc.)
|
||||||
|
|
||||||
#### v0.9.23
|
#### v0.9.23
|
||||||
|
|
||||||
* [Daemons] Fixes a critical overwriting bug in zkhandler when schema paths are not yet valid
|
* [Daemons] Fixes a critical overwriting bug in zkhandler when schema paths are not yet valid
|
||||||
|
@ -34,6 +34,29 @@
|
|||||||
# with that.
|
# with that.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
|
||||||
|
# Create a chroot context manager
|
||||||
|
# This can be used later in the script to chroot to the destination directory
|
||||||
|
# for instance to run commands within the target.
|
||||||
|
@contextmanager
|
||||||
|
def chroot_target(destination):
|
||||||
|
try:
|
||||||
|
real_root = os.open("/", os.O_RDONLY)
|
||||||
|
os.chroot(destination)
|
||||||
|
fake_root = os.open("/", os.O_RDONLY)
|
||||||
|
os.fchdir(fake_root)
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
os.fchdir(real_root)
|
||||||
|
os.chroot(".")
|
||||||
|
os.fchdir(real_root)
|
||||||
|
os.close(fake_root)
|
||||||
|
os.close(real_root)
|
||||||
|
del fake_root
|
||||||
|
del real_root
|
||||||
|
|
||||||
|
|
||||||
# Installation function - performs a debootstrap install of a Debian system
|
# Installation function - performs a debootstrap install of a Debian system
|
||||||
# Note that the only arguments are keyword arguments.
|
# Note that the only arguments are keyword arguments.
|
||||||
@ -193,40 +216,25 @@ GRUB_DISABLE_LINUX_UUID=false
|
|||||||
fh.write(data)
|
fh.write(data)
|
||||||
|
|
||||||
# Chroot, do some in-root tasks, then exit the chroot
|
# Chroot, do some in-root tasks, then exit the chroot
|
||||||
# EXITING THE CHROOT IS VERY IMPORTANT OR THE FOLLOWING STAGES OF THE PROVISIONER
|
with chroot_target(temporary_directory):
|
||||||
# WILL FAIL IN UNEXPECTED WAYS! Keep this in mind when using chroot in your scripts.
|
# Install and update GRUB
|
||||||
real_root = os.open("/", os.O_RDONLY)
|
os.system(
|
||||||
os.chroot(temporary_directory)
|
"grub-install --force /dev/rbd/{}/{}_{}".format(root_disk['pool'], vm_name, root_disk['disk_id'])
|
||||||
fake_root = os.open("/", os.O_RDONLY)
|
)
|
||||||
os.fchdir(fake_root)
|
os.system(
|
||||||
|
"update-grub"
|
||||||
# Install and update GRUB
|
)
|
||||||
os.system(
|
# Set a really dumb root password [TEMPORARY]
|
||||||
"grub-install --force /dev/rbd/{}/{}_{}".format(root_disk['pool'], vm_name, root_disk['disk_id'])
|
os.system(
|
||||||
)
|
"echo root:test123 | chpasswd"
|
||||||
os.system(
|
)
|
||||||
"update-grub"
|
# Enable cloud-init target on (first) boot
|
||||||
)
|
# NOTE: Your user-data should handle this and disable it once done, or things get messy.
|
||||||
# Set a really dumb root password [TEMPORARY]
|
# That cloud-init won't run without this hack seems like a bug... but even the official
|
||||||
os.system(
|
# Debian cloud images are affected, so who knows.
|
||||||
"echo root:test123 | chpasswd"
|
os.system(
|
||||||
)
|
"systemctl enable cloud-init.target"
|
||||||
# Enable cloud-init target on (first) boot
|
)
|
||||||
# NOTE: Your user-data should handle this and disable it once done, or things get messy.
|
|
||||||
# That cloud-init won't run without this hack seems like a bug... but even the official
|
|
||||||
# Debian cloud images are affected, so who knows.
|
|
||||||
os.system(
|
|
||||||
"systemctl enable cloud-init.target"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Restore our original root/exit the chroot
|
|
||||||
# EXITING THE CHROOT IS VERY IMPORTANT OR THE FOLLOWING STAGES OF THE PROVISIONER
|
|
||||||
# WILL FAIL IN UNEXPECTED WAYS! Keep this in mind when using chroot in your scripts.
|
|
||||||
os.fchdir(real_root)
|
|
||||||
os.chroot(".")
|
|
||||||
os.fchdir(real_root)
|
|
||||||
os.close(fake_root)
|
|
||||||
os.close(real_root)
|
|
||||||
|
|
||||||
# Unmount the bound devfs
|
# Unmount the bound devfs
|
||||||
os.system(
|
os.system(
|
||||||
@ -235,8 +243,4 @@ GRUB_DISABLE_LINUX_UUID=false
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Clean up file handles so paths can be unmounted
|
|
||||||
del fake_root
|
|
||||||
del real_root
|
|
||||||
|
|
||||||
# Everything else is done via cloud-init user-data
|
# Everything else is done via cloud-init user-data
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
# This script will run under root privileges as the provisioner does. Be careful
|
# This script will run under root privileges as the provisioner does. Be careful
|
||||||
# with that.
|
# with that.
|
||||||
|
|
||||||
# Installation function - performs a debootstrap install of a Debian system
|
# Installation function - performs no actions then returns
|
||||||
# Note that the only arguments are keyword arguments.
|
# Note that the only arguments are keyword arguments.
|
||||||
def install(**kwargs):
|
def install(**kwargs):
|
||||||
# The provisioner has already mounted the disks on kwargs['temporary_directory'].
|
# The provisioner has already mounted the disks on kwargs['temporary_directory'].
|
||||||
|
@ -25,7 +25,7 @@ import yaml
|
|||||||
from distutils.util import strtobool as dustrtobool
|
from distutils.util import strtobool as dustrtobool
|
||||||
|
|
||||||
# Daemon version
|
# Daemon version
|
||||||
version = '0.9.23'
|
version = '0.9.25'
|
||||||
|
|
||||||
# API version
|
# API version
|
||||||
API_VERSION = 1.0
|
API_VERSION = 1.0
|
||||||
|
@ -251,7 +251,11 @@ def cluster_remove(name):
|
|||||||
# pvc cluster list
|
# pvc cluster list
|
||||||
###############################################################################
|
###############################################################################
|
||||||
@click.command(name='list', short_help='List all available clusters.')
|
@click.command(name='list', short_help='List all available clusters.')
|
||||||
def cluster_list():
|
@click.option(
|
||||||
|
'-r', '--raw', 'raw', is_flag=True, default=False,
|
||||||
|
help='Display the raw list of cluster names only.'
|
||||||
|
)
|
||||||
|
def cluster_list(raw):
|
||||||
"""
|
"""
|
||||||
List all the available PVC clusters configured in this CLI instance.
|
List all the available PVC clusters configured in this CLI instance.
|
||||||
"""
|
"""
|
||||||
@ -302,27 +306,28 @@ def cluster_list():
|
|||||||
if _api_key_length > api_key_length:
|
if _api_key_length > api_key_length:
|
||||||
api_key_length = _api_key_length
|
api_key_length = _api_key_length
|
||||||
|
|
||||||
# Display the data nicely
|
if not raw:
|
||||||
click.echo("Available clusters:")
|
# Display the data nicely
|
||||||
click.echo()
|
click.echo("Available clusters:")
|
||||||
click.echo(
|
click.echo()
|
||||||
'{bold}{name: <{name_length}} {description: <{description_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
|
click.echo(
|
||||||
bold=ansiprint.bold(),
|
'{bold}{name: <{name_length}} {description: <{description_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
|
||||||
end_bold=ansiprint.end(),
|
bold=ansiprint.bold(),
|
||||||
name="Name",
|
end_bold=ansiprint.end(),
|
||||||
name_length=name_length,
|
name="Name",
|
||||||
description="Description",
|
name_length=name_length,
|
||||||
description_length=description_length,
|
description="Description",
|
||||||
address="Address",
|
description_length=description_length,
|
||||||
address_length=address_length,
|
address="Address",
|
||||||
port="Port",
|
address_length=address_length,
|
||||||
port_length=port_length,
|
port="Port",
|
||||||
scheme="Scheme",
|
port_length=port_length,
|
||||||
scheme_length=scheme_length,
|
scheme="Scheme",
|
||||||
api_key="API Key",
|
scheme_length=scheme_length,
|
||||||
api_key_length=api_key_length
|
api_key="API Key",
|
||||||
|
api_key_length=api_key_length
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
for cluster in clusters:
|
for cluster in clusters:
|
||||||
cluster_details = clusters[cluster]
|
cluster_details = clusters[cluster]
|
||||||
@ -341,24 +346,27 @@ def cluster_list():
|
|||||||
if not api_key:
|
if not api_key:
|
||||||
api_key = 'N/A'
|
api_key = 'N/A'
|
||||||
|
|
||||||
click.echo(
|
if not raw:
|
||||||
'{bold}{name: <{name_length}} {description: <{description_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
|
click.echo(
|
||||||
bold='',
|
'{bold}{name: <{name_length}} {description: <{description_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
|
||||||
end_bold='',
|
bold='',
|
||||||
name=cluster,
|
end_bold='',
|
||||||
name_length=name_length,
|
name=cluster,
|
||||||
description=description,
|
name_length=name_length,
|
||||||
description_length=description_length,
|
description=description,
|
||||||
address=address,
|
description_length=description_length,
|
||||||
address_length=address_length,
|
address=address,
|
||||||
port=port,
|
address_length=address_length,
|
||||||
port_length=port_length,
|
port=port,
|
||||||
scheme=scheme,
|
port_length=port_length,
|
||||||
scheme_length=scheme_length,
|
scheme=scheme,
|
||||||
api_key=api_key,
|
scheme_length=scheme_length,
|
||||||
api_key_length=api_key_length
|
api_key=api_key,
|
||||||
|
api_key_length=api_key_length
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
else:
|
||||||
|
click.echo(cluster)
|
||||||
|
|
||||||
|
|
||||||
# Validate that the cluster is set for a given command
|
# Validate that the cluster is set for a given command
|
||||||
@ -1652,7 +1660,7 @@ def vm_dump(filename, domain):
|
|||||||
@cluster_req
|
@cluster_req
|
||||||
def vm_list(target_node, target_state, limit, raw):
|
def vm_list(target_node, target_state, limit, raw):
|
||||||
"""
|
"""
|
||||||
List all virtual machines; optionally only match names matching regex LIMIT.
|
List all virtual machines; optionally only match names or full UUIDs matching regex LIMIT.
|
||||||
|
|
||||||
NOTE: Red-coloured network lists indicate one or more configured networks are missing/invalid.
|
NOTE: Red-coloured network lists indicate one or more configured networks are missing/invalid.
|
||||||
"""
|
"""
|
||||||
|
@ -2,7 +2,7 @@ from setuptools import setup
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='pvc',
|
name='pvc',
|
||||||
version='0.9.23',
|
version='0.9.25',
|
||||||
packages=['pvc', 'pvc.cli_lib'],
|
packages=['pvc', 'pvc.cli_lib'],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'Click',
|
'Click',
|
||||||
|
@ -343,8 +343,13 @@ def getInformationFromXML(zkhandler, uuid):
|
|||||||
|
|
||||||
parsed_xml = getDomainXML(zkhandler, uuid)
|
parsed_xml = getDomainXML(zkhandler, uuid)
|
||||||
|
|
||||||
stats_data = loads(zkhandler.read(('domain.stats', uuid)))
|
stats_data = zkhandler.read(('domain.stats', uuid))
|
||||||
if stats_data is None:
|
if stats_data is not None:
|
||||||
|
try:
|
||||||
|
stats_data = loads(stats_data)
|
||||||
|
except Exception:
|
||||||
|
stats_data = {}
|
||||||
|
else:
|
||||||
stats_data = {}
|
stats_data = {}
|
||||||
|
|
||||||
domain_uuid, domain_name, domain_description, domain_memory, domain_vcpu, domain_vcputopo = getDomainMainDetails(parsed_xml)
|
domain_uuid, domain_name, domain_description, domain_memory, domain_vcpu, domain_vcputopo = getDomainMainDetails(parsed_xml)
|
||||||
|
@ -449,14 +449,6 @@ def remove_vm(zkhandler, domain):
|
|||||||
if current_vm_state != 'stop':
|
if current_vm_state != 'stop':
|
||||||
change_state(zkhandler, dom_uuid, 'stop')
|
change_state(zkhandler, dom_uuid, 'stop')
|
||||||
|
|
||||||
# Gracefully terminate the class instances
|
|
||||||
change_state(zkhandler, dom_uuid, 'delete')
|
|
||||||
|
|
||||||
# Delete the configurations
|
|
||||||
zkhandler.delete([
|
|
||||||
('domain', dom_uuid)
|
|
||||||
])
|
|
||||||
|
|
||||||
# Wait for 1 second to allow state to flow to all nodes
|
# Wait for 1 second to allow state to flow to all nodes
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
@ -465,11 +457,28 @@ def remove_vm(zkhandler, domain):
|
|||||||
# vmpool/vmname_volume
|
# vmpool/vmname_volume
|
||||||
try:
|
try:
|
||||||
disk_pool, disk_name = disk.split('/')
|
disk_pool, disk_name = disk.split('/')
|
||||||
retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name)
|
|
||||||
except ValueError:
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return True, 'Removed VM "{}" and disks from the cluster.'.format(domain)
|
retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name)
|
||||||
|
if not retcode:
|
||||||
|
if re.match('^ERROR: No volume with name', message):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
return False, message
|
||||||
|
|
||||||
|
# Gracefully terminate the class instances
|
||||||
|
change_state(zkhandler, dom_uuid, 'delete')
|
||||||
|
|
||||||
|
# Wait for 1/2 second to allow state to flow to all nodes
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# Delete the VM configuration from Zookeeper
|
||||||
|
zkhandler.delete([
|
||||||
|
('domain', dom_uuid)
|
||||||
|
])
|
||||||
|
|
||||||
|
return True, 'Removed VM "{}" and its disks from the cluster.'.format(domain)
|
||||||
|
|
||||||
|
|
||||||
def start_vm(zkhandler, domain):
|
def start_vm(zkhandler, domain):
|
||||||
@ -789,7 +798,10 @@ def get_console_log(zkhandler, domain, lines=1000):
|
|||||||
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
|
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
|
||||||
|
|
||||||
# Get the data from ZK
|
# Get the data from ZK
|
||||||
console_log = zkhandler.read(('domain.log.console', dom_uuid))
|
console_log = zkhandler.read(('domain.console.log', dom_uuid))
|
||||||
|
|
||||||
|
if console_log is None:
|
||||||
|
return True, ''
|
||||||
|
|
||||||
# Shrink the log buffer to length lines
|
# Shrink the log buffer to length lines
|
||||||
shrunk_log = console_log.split('\n')[-lines:]
|
shrunk_log = console_log.split('\n')[-lines:]
|
||||||
@ -897,6 +909,9 @@ def get_list(zkhandler, node, state, limit, is_fuzzy=True):
|
|||||||
for vm_uuid in vm_execute_list:
|
for vm_uuid in vm_execute_list:
|
||||||
futures.append(executor.submit(common.getInformationFromXML, zkhandler, vm_uuid))
|
futures.append(executor.submit(common.getInformationFromXML, zkhandler, vm_uuid))
|
||||||
for future in futures:
|
for future in futures:
|
||||||
vm_data_list.append(future.result())
|
try:
|
||||||
|
vm_data_list.append(future.result())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return True, vm_data_list
|
return True, vm_data_list
|
||||||
|
@ -124,37 +124,29 @@ class ZKHandler(object):
|
|||||||
# State/connection management
|
# State/connection management
|
||||||
#
|
#
|
||||||
def listener(self, state):
|
def listener(self, state):
|
||||||
|
"""
|
||||||
|
Listen for KazooState changes and log accordingly.
|
||||||
|
|
||||||
|
This function does not do anything except for log the state, and Kazoo handles the rest.
|
||||||
|
"""
|
||||||
if state == KazooState.CONNECTED:
|
if state == KazooState.CONNECTED:
|
||||||
self.log('Connection to Zookeeper started', state='o')
|
self.log('Connection to Zookeeper resumed', state='o')
|
||||||
else:
|
else:
|
||||||
self.log('Connection to Zookeeper lost', state='w')
|
self.log('Connection to Zookeeper lost with state {}'.format(state), state='w')
|
||||||
|
|
||||||
while True:
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
_zk_conn = KazooClient(hosts=self.coordinators)
|
|
||||||
try:
|
|
||||||
_zk_conn.start()
|
|
||||||
except Exception:
|
|
||||||
del _zk_conn
|
|
||||||
continue
|
|
||||||
|
|
||||||
self.zk_conn = _zk_conn
|
|
||||||
self.zk_conn.add_listener(self.listener)
|
|
||||||
break
|
|
||||||
|
|
||||||
def connect(self, persistent=False):
|
def connect(self, persistent=False):
|
||||||
"""
|
"""
|
||||||
Start the zk_conn object and connect to the cluster, then load the current schema version
|
Start the zk_conn object and connect to the cluster
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self.zk_conn.start()
|
self.zk_conn.start()
|
||||||
if persistent:
|
if persistent:
|
||||||
|
self.log('Connection to Zookeeper started', state='o')
|
||||||
self.zk_conn.add_listener(self.listener)
|
self.zk_conn.add_listener(self.listener)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ZKConnectionException(self, e)
|
raise ZKConnectionException(self, e)
|
||||||
|
|
||||||
def disconnect(self):
|
def disconnect(self, persistent=False):
|
||||||
"""
|
"""
|
||||||
Stop and close the zk_conn object and disconnect from the cluster
|
Stop and close the zk_conn object and disconnect from the cluster
|
||||||
|
|
||||||
@ -162,11 +154,27 @@ class ZKHandler(object):
|
|||||||
"""
|
"""
|
||||||
self.zk_conn.stop()
|
self.zk_conn.stop()
|
||||||
self.zk_conn.close()
|
self.zk_conn.close()
|
||||||
|
if persistent:
|
||||||
|
self.log('Connection to Zookeeper terminated', state='o')
|
||||||
|
|
||||||
#
|
#
|
||||||
# Schema helper actions
|
# Schema helper actions
|
||||||
#
|
#
|
||||||
def get_schema_path(self, key):
|
def get_schema_path(self, key):
|
||||||
|
"""
|
||||||
|
Get the Zookeeper path for {key} from the current schema based on its format.
|
||||||
|
|
||||||
|
If {key} is a tuple of length 2, it's treated as a path plus an item instance of that path (e.g. a node, a VM, etc.).
|
||||||
|
|
||||||
|
If {key} is a tuple of length 4, it is treated as a path plus an item instance, as well as another item instance of the subpath.
|
||||||
|
|
||||||
|
If {key} is just a string, it's treated as a lone path (mostly used for the 'base' schema group.
|
||||||
|
|
||||||
|
Otherwise, returns None since this is not a valid key.
|
||||||
|
|
||||||
|
This function also handles the special case where a string that looks like an existing path (i.e. starts with '/') is passed;
|
||||||
|
in that case it will silently return the same path back. This was mostly a migration functionality and is deprecated.
|
||||||
|
"""
|
||||||
if isinstance(key, tuple):
|
if isinstance(key, tuple):
|
||||||
# This is a key tuple with both an ipath and an item
|
# This is a key tuple with both an ipath and an item
|
||||||
if len(key) == 2:
|
if len(key) == 2:
|
||||||
|
20
debian/changelog
vendored
20
debian/changelog
vendored
@ -1,3 +1,23 @@
|
|||||||
|
pvc (0.9.25-0) unstable; urgency=high
|
||||||
|
|
||||||
|
* [Node Daemon] Returns to Rados library calls for Ceph due to performance problems
|
||||||
|
* [Node Daemon] Adds a date output to keepalive messages
|
||||||
|
* [Daemons] Configures ZK connection logging only for persistent connections
|
||||||
|
* [API Provisioner] Add context manager-based chroot to Debootstrap example script
|
||||||
|
* [Node Daemon] Fixes a bug where shutdown daemon state was overwritten
|
||||||
|
|
||||||
|
-- Joshua M. Boniface <joshua@boniface.me> Sun, 11 Jul 2021 23:19:09 -0400
|
||||||
|
|
||||||
|
pvc (0.9.24-0) unstable; urgency=high
|
||||||
|
|
||||||
|
* [Node Daemon] Removes Rados module polling of Ceph cluster and returns to command-based polling for timeout purposes, and removes some flaky return statements
|
||||||
|
* [Node Daemon] Removes flaky Zookeeper connection renewals that caused problems
|
||||||
|
* [CLI Client] Allow raw lists of clusters from `pvc cluster list`
|
||||||
|
* [API Daemon] Fixes several issues when getting VM data without stats
|
||||||
|
* [API Daemon] Fixes issues with removing VMs while disks are still in use (failed provisioning, etc.)
|
||||||
|
|
||||||
|
-- Joshua M. Boniface <joshua@boniface.me> Fri, 09 Jul 2021 15:58:36 -0400
|
||||||
|
|
||||||
pvc (0.9.23-0) unstable; urgency=high
|
pvc (0.9.23-0) unstable; urgency=high
|
||||||
|
|
||||||
* [Daemons] Fixes a critical overwriting bug in zkhandler when schema paths are not yet valid
|
* [Daemons] Fixes a critical overwriting bug in zkhandler when schema paths are not yet valid
|
||||||
|
@ -42,6 +42,22 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
|
|||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
#### v0.9.25
|
||||||
|
|
||||||
|
* [Node Daemon] Returns to Rados library calls for Ceph due to performance problems
|
||||||
|
* [Node Daemon] Adds a date output to keepalive messages
|
||||||
|
* [Daemons] Configures ZK connection logging only for persistent connections
|
||||||
|
* [API Provisioner] Add context manager-based chroot to Debootstrap example script
|
||||||
|
* [Node Daemon] Fixes a bug where shutdown daemon state was overwritten
|
||||||
|
|
||||||
|
#### v0.9.24
|
||||||
|
|
||||||
|
* [Node Daemon] Removes Rados module polling of Ceph cluster and returns to command-based polling for timeout purposes, and removes some flaky return statements
|
||||||
|
* [Node Daemon] Removes flaky Zookeeper connection renewals that caused problems
|
||||||
|
* [CLI Client] Allow raw lists of clusters from `pvc cluster list`
|
||||||
|
* [API Daemon] Fixes several issues when getting VM data without stats
|
||||||
|
* [API Daemon] Fixes issues with removing VMs while disks are still in use (failed provisioning, etc.)
|
||||||
|
|
||||||
#### v0.9.23
|
#### v0.9.23
|
||||||
|
|
||||||
* [Daemons] Fixes a critical overwriting bug in zkhandler when schema paths are not yet valid
|
* [Daemons] Fixes a critical overwriting bug in zkhandler when schema paths are not yet valid
|
||||||
|
@ -32,6 +32,7 @@ import yaml
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
from socket import gethostname
|
from socket import gethostname
|
||||||
|
from datetime import datetime
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from ipaddress import ip_address, ip_network
|
from ipaddress import ip_address, ip_network
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
@ -55,7 +56,7 @@ import pvcnoded.CephInstance as CephInstance
|
|||||||
import pvcnoded.MetadataAPIInstance as MetadataAPIInstance
|
import pvcnoded.MetadataAPIInstance as MetadataAPIInstance
|
||||||
|
|
||||||
# Version string for startup output
|
# Version string for startup output
|
||||||
version = '0.9.23'
|
version = '0.9.25'
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# PVCD - node daemon startup program
|
# PVCD - node daemon startup program
|
||||||
@ -658,7 +659,7 @@ def update_schema(new_schema_version, stat, event=''):
|
|||||||
# Restart ourselves with the new schema
|
# Restart ourselves with the new schema
|
||||||
logger.out('Reloading node daemon', state='s')
|
logger.out('Reloading node daemon', state='s')
|
||||||
try:
|
try:
|
||||||
zkhandler.disconnect()
|
zkhandler.disconnect(persistent=True)
|
||||||
del zkhandler
|
del zkhandler
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@ -751,7 +752,7 @@ def cleanup():
|
|||||||
|
|
||||||
# Close the Zookeeper connection
|
# Close the Zookeeper connection
|
||||||
try:
|
try:
|
||||||
zkhandler.disconnect()
|
zkhandler.disconnect(persistent=True)
|
||||||
del zkhandler
|
del zkhandler
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@ -1334,11 +1335,13 @@ def collect_ceph_stats(queue):
|
|||||||
ceph_health = health_status['status']
|
ceph_health = health_status['status']
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.out('Failed to obtain Ceph health data: {}'.format(e), state='e')
|
logger.out('Failed to obtain Ceph health data: {}'.format(e), state='e')
|
||||||
return
|
ceph_health = 'HEALTH_UNKN'
|
||||||
|
|
||||||
if ceph_health == 'HEALTH_OK':
|
if ceph_health in ['HEALTH_OK']:
|
||||||
ceph_health_colour = fmt_green
|
ceph_health_colour = fmt_green
|
||||||
elif ceph_health == 'HEALTH_WARN':
|
elif ceph_health in ['HEALTH_UNKN']:
|
||||||
|
ceph_health_colour = fmt_cyan
|
||||||
|
elif ceph_health in ['HEALTH_WARN']:
|
||||||
ceph_health_colour = fmt_yellow
|
ceph_health_colour = fmt_yellow
|
||||||
else:
|
else:
|
||||||
ceph_health_colour = fmt_red
|
ceph_health_colour = fmt_red
|
||||||
@ -1356,7 +1359,6 @@ def collect_ceph_stats(queue):
|
|||||||
])
|
])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.out('Failed to set Ceph status data: {}'.format(e), state='e')
|
logger.out('Failed to set Ceph status data: {}'.format(e), state='e')
|
||||||
return
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
logger.out("Set ceph rados df information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
logger.out("Set ceph rados df information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
||||||
@ -1370,15 +1372,15 @@ def collect_ceph_stats(queue):
|
|||||||
])
|
])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.out('Failed to set Ceph utilization data: {}'.format(e), state='e')
|
logger.out('Failed to set Ceph utilization data: {}'.format(e), state='e')
|
||||||
return
|
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
||||||
|
|
||||||
# Get pool info
|
# Get pool info
|
||||||
retcode, stdout, stderr = common.run_os_command('ceph df --format json', timeout=1)
|
command = {"prefix": "df", "format": "json"}
|
||||||
|
ceph_df_output = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
|
||||||
try:
|
try:
|
||||||
ceph_pool_df_raw = json.loads(stdout)['pools']
|
ceph_pool_df_raw = json.loads(ceph_df_output)['pools']
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w')
|
logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w')
|
||||||
ceph_pool_df_raw = []
|
ceph_pool_df_raw = []
|
||||||
@ -1449,9 +1451,9 @@ def collect_ceph_stats(queue):
|
|||||||
osd_dump = dict()
|
osd_dump = dict()
|
||||||
|
|
||||||
command = {"prefix": "osd dump", "format": "json"}
|
command = {"prefix": "osd dump", "format": "json"}
|
||||||
|
osd_dump_output = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
|
||||||
try:
|
try:
|
||||||
retcode, stdout, stderr = common.run_os_command('ceph osd dump --format json --connect-timeout 2', timeout=2)
|
osd_dump_raw = json.loads(osd_dump_output)['osds']
|
||||||
osd_dump_raw = json.loads(stdout)['osds']
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
|
logger.out('Failed to obtain OSD data: {}'.format(e), state='w')
|
||||||
osd_dump_raw = []
|
osd_dump_raw = []
|
||||||
@ -1608,7 +1610,6 @@ def collect_vm_stats(queue):
|
|||||||
lv_conn = libvirt.open(libvirt_name)
|
lv_conn = libvirt.open(libvirt_name)
|
||||||
if lv_conn is None:
|
if lv_conn is None:
|
||||||
logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
|
logger.out('Failed to open connection to "{}"'.format(libvirt_name), state='e')
|
||||||
return
|
|
||||||
|
|
||||||
memalloc = 0
|
memalloc = 0
|
||||||
memprov = 0
|
memprov = 0
|
||||||
@ -1778,8 +1779,9 @@ def node_keepalive():
|
|||||||
# Get past state and update if needed
|
# Get past state and update if needed
|
||||||
if debug:
|
if debug:
|
||||||
logger.out("Get past state and update if needed", state='d', prefix='main-thread')
|
logger.out("Get past state and update if needed", state='d', prefix='main-thread')
|
||||||
|
|
||||||
past_state = zkhandler.read(('node.state.daemon', this_node.name))
|
past_state = zkhandler.read(('node.state.daemon', this_node.name))
|
||||||
if past_state != 'run':
|
if past_state != 'run' and past_state != 'shutdown':
|
||||||
this_node.daemon_state = 'run'
|
this_node.daemon_state = 'run'
|
||||||
zkhandler.write([
|
zkhandler.write([
|
||||||
(('node.state.daemon', this_node.name), 'run')
|
(('node.state.daemon', this_node.name), 'run')
|
||||||
@ -1868,7 +1870,6 @@ def node_keepalive():
|
|||||||
])
|
])
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.out('Failed to set keepalive data', state='e')
|
logger.out('Failed to set keepalive data', state='e')
|
||||||
return
|
|
||||||
|
|
||||||
# Display node information to the terminal
|
# Display node information to the terminal
|
||||||
if config['log_keepalives']:
|
if config['log_keepalives']:
|
||||||
@ -1879,9 +1880,10 @@ def node_keepalive():
|
|||||||
else:
|
else:
|
||||||
cst_colour = fmt_cyan
|
cst_colour = fmt_cyan
|
||||||
logger.out(
|
logger.out(
|
||||||
'{}{} keepalive{} [{}{}{}]'.format(
|
'{}{} keepalive @ {}{} [{}{}{}]'.format(
|
||||||
fmt_purple,
|
fmt_purple,
|
||||||
myhostname,
|
myhostname,
|
||||||
|
datetime.now(),
|
||||||
fmt_end,
|
fmt_end,
|
||||||
fmt_bold + cst_colour,
|
fmt_bold + cst_colour,
|
||||||
this_node.router_state,
|
this_node.router_state,
|
||||||
|
Reference in New Issue
Block a user