Compare commits

...

6 Commits

Author SHA1 Message Date
df40b779af Bump version to 0.9.92 2024-01-29 09:39:10 -05:00
db4f0881a2 Improve error handling and retries
1. Use the actual response code from the server on error, or 504 on
timeouts instead of 500.
2. Retry GET requests 3 times and only error if the last fails
2024-01-29 09:35:14 -05:00
9b51fe9f10 Use get() for newer keys in client 2024-01-29 09:21:02 -05:00
a66449541d Improve script error handling and variables 2024-01-26 15:41:34 -05:00
d28fb71f57 Fix incorrect variable set 2024-01-24 14:40:40 -05:00
e5e9c7086a Add missing restore state to colours 2024-01-24 09:34:59 -05:00
14 changed files with 144 additions and 37 deletions

View File

@ -1 +1 @@
0.9.91
0.9.92

View File

@ -1,5 +1,13 @@
## PVC Changelog
###### [v0.9.92](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.92)
* [CLI Client] Adds the new restore state to the colours list for VM status
* [API Daemon] Fixes an incorrect variable assignment
* [Provisioner] Improves the error handling of various steps in the debootstrap and rinse example scripts
* [CLI Client] Fixes two bugs around missing keys that were added recently (uses get() instead direct dictionary refs)
* [CLI Client] Improves API error handling via GET retries (x3) and better server status code handling
###### [v0.9.91](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.91)
* [Client CLI] Fixes a bug and improves output during cluster task events.

View File

@ -150,6 +150,10 @@
from daemon_lib.vmbuilder import VMBuilder
# These are some global variables used below
default_root_password = "test123"
# The VMBuilderScript class must be named as such, and extend VMBuilder.
class VMBuilderScript(VMBuilder):
def setup(self):
@ -498,11 +502,15 @@ class VMBuilderScript(VMBuilder):
ret = os.system(
f"debootstrap --include={','.join(deb_packages)} {deb_release} {temp_dir} {deb_mirror}"
)
ret = int(ret >> 8)
if ret > 0:
self.fail("Failed to run debootstrap")
self.fail(f"Debootstrap failed with exit code {ret}")
# Bind mount the devfs so we can grub-install later
os.system("mount --bind /dev {}/dev".format(temp_dir))
ret = os.system("mount --bind /dev {}/dev".format(temp_dir))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/dev bind mount failed with exit code {ret}")
# Create an fstab entry for each volume
fstab_file = "{}/etc/fstab".format(temp_dir)
@ -688,23 +696,36 @@ GRUB_DISABLE_LINUX_UUID=false
# Do some tasks inside the chroot using the provided context manager
with chroot(temp_dir):
# Install and update GRUB
os.system(
ret = os.system(
"grub-install --force /dev/rbd/{}/{}_{}".format(
root_volume["pool"], vm_name, root_volume["disk_id"]
)
)
os.system("update-grub")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB install failed with exit code {ret}")
ret = os.system("update-grub")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB update failed with exit code {ret}")
# Set a really dumb root password so the VM can be debugged
# EITHER CHANGE THIS YOURSELF, here or in Userdata, or run something after install
# to change the root password: don't leave it like this on an Internet-facing machine!
os.system("echo root:test123 | chpasswd")
ret = os.system(f"echo root:{default_root_password} | chpasswd")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Root password change failed with exit code {ret}")
# Enable cloud-init target on (first) boot
# Your user-data should handle this and disable it once done, or things get messy.
# That cloud-init won't run without this hack seems like a bug... but even the official
# Debian cloud images are affected, so who knows.
os.system("systemctl enable cloud-init.target")
ret = os.system("systemctl enable cloud-init.target")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of cloud-init failed with exit code {ret}")
def cleanup(self):
"""
@ -729,7 +750,7 @@ GRUB_DISABLE_LINUX_UUID=false
temp_dir = "/tmp/target"
# Unmount the bound devfs
os.system("umount {}/dev".format(temp_dir))
os.system("umount -f {}/dev".format(temp_dir))
# Use this construct for reversing the list, as the normal reverse() messes with the list
for volume in list(reversed(self.vm_data["volumes"])):
@ -746,7 +767,7 @@ GRUB_DISABLE_LINUX_UUID=false
):
# Unmount filesystem
retcode, stdout, stderr = pvc_common.run_os_command(
f"umount {mount_path}"
f"umount -f {mount_path}"
)
if retcode:
self.log_err(

View File

@ -150,6 +150,11 @@
from daemon_lib.vmbuilder import VMBuilder
# These are some global variables used below
default_root_password = "test123"
default_local_time = "UTC"
# The VMBuilderScript class must be named as such, and extend VMBuilder.
class VMBuilderScript(VMBuilder):
def setup(self):
@ -524,13 +529,23 @@ class VMBuilderScript(VMBuilder):
ret = os.system(
f"rinse --arch {rinse_architecture} --directory {temporary_directory} --distribution {rinse_release} --cache-dir {rinse_cache} --add-pkg-list /tmp/addpkg --verbose {mirror_arg}"
)
ret = int(ret >> 8)
if ret > 0:
self.fail("Failed to run rinse")
self.fail(f"Rinse failed with exit code {ret}")
# Bind mount the devfs, sysfs, and procfs so we can grub-install later
os.system("mount --bind /dev {}/dev".format(temporary_directory))
os.system("mount --bind /sys {}/sys".format(temporary_directory))
os.system("mount --bind /proc {}/proc".format(temporary_directory))
ret = os.system("mount --bind /dev {}/dev".format(temporary_directory))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/dev bind mount failed with exit code {ret}")
ret = os.system("mount --bind /sys {}/sys".format(temporary_directory))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/sys bind mount failed with exit code {ret}")
ret = os.system("mount --bind /proc {}/proc".format(temporary_directory))
ret = int(ret >> 8)
if ret > 0:
self.fail(f"/proc bind mount failed with exit code {ret}")
# Create an fstab entry for each volume
fstab_file = "{}/etc/fstab".format(temporary_directory)
@ -642,41 +657,76 @@ GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=
# Do some tasks inside the chroot using the provided context manager
with chroot(temporary_directory):
# Fix the broken kernel from rinse by setting a systemd machine ID and running the post scripts
os.system("systemd-machine-id-setup")
os.system(
ret = os.system("systemd-machine-id-setup")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Machine ID setup failed with exit code {ret}")
ret = os.system(
"rpm -q --scripts kernel-core | grep -A20 'posttrans scriptlet' | tail -n+2 | bash -x"
)
ret = int(ret >> 8)
if ret > 0:
self.fail(f"RPM kernel reinstall failed with exit code {ret}")
# Install any post packages
os.system(f"dnf install -y {' '.join(post_packages)}")
if len(post_packages) > 0:
ret = os.system(f"dnf install -y {' '.join(post_packages)}")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"DNF install failed with exit code {ret}")
# Install and update GRUB config
os.system(
ret = os.system(
"grub2-install --force /dev/rbd/{}/{}_{}".format(
root_volume["pool"], vm_name, root_volume["disk_id"]
)
)
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB install failed with exit code {ret}")
os.system("grub2-mkconfig -o /boot/grub2/grub.cfg")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"GRUB update failed with exit code {ret}")
# Set a really dumb root password so the VM can be debugged
# EITHER CHANGE THIS YOURSELF, here or in Userdata, or run something after install
# to change the root password: don't leave it like this on an Internet-facing machine!
os.system("echo root:test123 | chpasswd")
ret = os.system(f"echo root:{default_root_password} | chpasswd")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Root password change failed with exit code {ret}")
# Enable dbus-broker
os.system("systemctl enable dbus-broker.service")
ret = os.system("systemctl enable dbus-broker.service")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of dbus-broker failed with exit code {ret}")
# Enable NetworkManager
os.system("systemctl enable NetworkManager.service")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of NetworkManager failed with exit code {ret}")
# Enable cloud-init target on (first) boot
# Your user-data should handle this and disable it once done, or things get messy.
# That cloud-init won't run without this hack seems like a bug... but even the official
# Debian cloud images are affected, so who knows.
os.system("systemctl enable cloud-init.target")
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Enable of cloud-init failed with exit code {ret}")
# Set the timezone to UTC
os.system("ln -sf ../usr/share/zoneinfo/UTC /etc/localtime")
ret = os.system(
f"ln -sf ../usr/share/zoneinfo/{default_local_time} /etc/localtime"
)
ret = int(ret >> 8)
if ret > 0:
self.fail(f"Localtime update failed with exit code {ret}")
def cleanup(self):
"""

View File

@ -27,7 +27,7 @@ from distutils.util import strtobool as dustrtobool
import daemon_lib.config as cfg
# Daemon version
version = "0.9.91"
version = "0.9.92"
# API version
API_VERSION = 1.0

View File

@ -140,7 +140,12 @@ def call_api(
# Determine the request type and hit the API
disable_warnings()
try:
response = None
if operation == "get":
retry_on_code = [429, 500, 502, 503, 504]
for i in range(3):
failed = False
try:
response = requests.get(
uri,
timeout=timeout,
@ -149,6 +154,17 @@ def call_api(
data=data,
verify=config["verify_ssl"],
)
if response.status_code in retry_on_code:
failed = True
continue
except requests.exceptions.ConnectionError:
failed = True
pass
if failed:
error = f"Code {response.status_code}" if response else "Timeout"
raise requests.exceptions.ConnectionError(
f"Failed to connect after 3 tries ({error})"
)
if operation == "post":
response = requests.post(
uri,
@ -189,7 +205,8 @@ def call_api(
)
except Exception as e:
message = "Failed to connect to the API: {}".format(e)
response = ErrorResponse({"message": message}, 500)
code = response.status_code if response else 504
response = ErrorResponse({"message": message}, code)
# Display debug output
if config["debug"]:

View File

@ -430,7 +430,7 @@ def format_list_osd(config, osd_list):
)
continue
if osd_information["is_split"]:
if osd_information.get("is_split") is not None:
osd_information["device"] = f"{osd_information['device']} [s]"
# Deal with the size to human readable

View File

@ -1632,6 +1632,7 @@ def format_info(config, domain_information, long_output):
"migrate": ansiprint.blue(),
"unmigrate": ansiprint.blue(),
"provision": ansiprint.blue(),
"restore": ansiprint.blue(),
}
ainformation.append(
"{}State:{} {}{}{}".format(
@ -1716,7 +1717,7 @@ def format_info(config, domain_information, long_output):
"{}Max live downtime:{} {}".format(
ansiprint.purple(),
ansiprint.end(),
f"{domain_information['migration_max_downtime']} ms",
f"{domain_information.get('migration_max_downtime')} ms",
)
)

View File

@ -2,7 +2,7 @@ from setuptools import setup
setup(
name="pvc",
version="0.9.91",
version="0.9.92",
packages=["pvc.cli", "pvc.lib"],
install_requires=[
"Click",

View File

@ -1201,7 +1201,7 @@ def get_resource_metrics(zkhandler):
try:
user_time = vm["vcpu_stats"]["user_time"] / 1000000
except Exception:
cpu_time = 0
user_time = 0
output_lines.append(
f"pvc_vm_vcpus_user_time{{vm=\"{vm['name']}\"}} {user_time}"
)

10
debian/changelog vendored
View File

@ -1,3 +1,13 @@
pvc (0.9.92-0) unstable; urgency=high
* [CLI Client] Adds the new restore state to the colours list for VM status
* [API Daemon] Fixes an incorrect variable assignment
* [Provisioner] Improves the error handling of various steps in the debootstrap and rinse example scripts
* [CLI Client] Fixes two bugs around missing keys that were added recently (uses get() instead direct dictionary refs)
* [CLI Client] Improves API error handling via GET retries (x3) and better server status code handling
-- Joshua M. Boniface <joshua@boniface.me> Mon, 29 Jan 2024 09:39:10 -0500
pvc (0.9.91-0) unstable; urgency=high
* [Client CLI] Fixes a bug and improves output during cluster task events.

View File

@ -33,7 +33,7 @@ import os
import signal
# Daemon version
version = "0.9.91"
version = "0.9.92"
##########################################################

View File

@ -49,7 +49,7 @@ import re
import json
# Daemon version
version = "0.9.91"
version = "0.9.92"
##########################################################

View File

@ -44,7 +44,7 @@ from daemon_lib.vmbuilder import (
)
# Daemon version
version = "0.9.91"
version = "0.9.92"
config = cfg.get_configuration()