Bump version to 0.9.98

Add --version flag to pvcnoded.py for info
Fix bugs listing snapshots by pool/volume
2024-06-05 12:01:31 -04:00 · 2024-06-05 11:57:47 -04:00 · 2024-05-16 16:32:22 -04:00 · 2024-05-13 15:29:43 -04:00 · 2024-05-13 15:24:51 -04:00 · 2024-05-08 10:55:15 -04:00
24 changed files with 294 additions and 56 deletions
--- a/.version
+++ b/.version
@ -1 +1 @@
-0.9.95
+0.9.98
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,26 @@
 ## PVC Changelog

+###### [v0.9.98](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.98)
+
+  * [CLI Client] Fixed output when API call times out
+  * [Node Daemon] Improves the handling of fence states
+  * [API Daemon/CLI Client] Adds support for storage snapshot rollback
+  * [CLI Client] Adds additional warning messages about snapshot consistency to help output
+  * [API Daemon] Fixes a bug listing snapshots by pool/volume
+  * [Node Daemon] Adds a --version flag for information gathering by update-motd.sh
+
+###### [v0.9.97](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.97)
+
+  * [Client CLI] Ensures --lines is always an integer value
+  * [Node Daemon] Fixes a bug if d_network changes during iteration
+  * [Node Daemon] Moves to using allocated instead of free memory for node reporting
+  * [API Daemon] Fixes a bug if lingering RBD snapshots exist when removing a volume (#180)
+
+###### [v0.9.96](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.96)
+
+  * [API Daemon] Fixes a bug when reporting node stats
+  * [API Daemon] Fixes a bug deleteing successful benchmark results
+
 ###### [v0.9.95](https://github.com/parallelvirtualcluster/pvc/releases/tag/v0.9.95)

  * [API Daemon/CLI Client] Adds a flag to allow duplicate VNIs in network templates
--- a/api-daemon/pvcapid/Daemon.py
+++ b/api-daemon/pvcapid/Daemon.py
@ -27,7 +27,7 @@ from distutils.util import strtobool as dustrtobool
 import daemon_lib.config as cfg

 # Daemon version
-version = "0.9.95"
+version = "0.9.98"

 # API version
 API_VERSION = 1.0
--- a/api-daemon/pvcapid/flaskapi.py
+++ b/api-daemon/pvcapid/flaskapi.py
@ -6362,6 +6362,59 @@ api.add_resource(
 )


+# /storage/ceph/snapshot/<pool>/<volume>/<snapshot>/rollback
+class API_Storage_Ceph_Snapshot_Rollback_Element(Resource):
+    @Authenticator
+    def post(self, pool, volume, snapshot):
+        """
+        Roll back an RBD volume {volume} in pool {pool} to snapshot {snapshot}
+
+        WARNING: This action cannot be done on an active RBD volume. All IO MUST be stopped first.
+        ---
+        tags:
+          - storage / ceph
+        parameters:
+          - in: query
+            name: snapshot
+            type: string
+            required: true
+            description: The name of the snapshot
+          - in: query
+            name: volume
+            type: string
+            required: true
+            description: The name of the volume
+          - in: query
+            name: pool
+            type: integer
+            required: true
+            description: The name of the pool
+        responses:
+          200:
+            description: OK
+            schema:
+              type: object
+              id: Message
+          404:
+            description: Not found
+            schema:
+              type: object
+              id: Message
+          400:
+            description: Bad request
+            schema:
+              type: object
+              id: Message
+        """
+        return api_helper.ceph_volume_snapshot_rollback(pool, volume, snapshot)
+
+
+api.add_resource(
+    API_Storage_Ceph_Snapshot_Rollback_Element,
+    "/storage/ceph/snapshot/<pool>/<volume>/<snapshot>/rollback",
+)
+
+
 ##########################################################
 # Provisioner API
 ##########################################################
--- a/api-daemon/pvcapid/helper.py
+++ b/api-daemon/pvcapid/helper.py
@ -2183,6 +2183,22 @@ def ceph_volume_snapshot_rename(zkhandler, pool, volume, name, new_name):
    return output, retcode


+@ZKConnection(config)
+def ceph_volume_snapshot_rollback(zkhandler, pool, volume, name):
+    """
+    Roll back a Ceph RBD volume to a given snapshot in the PVC Ceph storage cluster.
+    """
+    retflag, retdata = pvc_ceph.rollback_snapshot(zkhandler, pool, volume, name)
+
+    if retflag:
+        retcode = 200
+    else:
+        retcode = 400
+
+    output = {"message": retdata.replace('"', "'")}
+    return output, retcode
+
+
@ZKConnection(config)
 def ceph_volume_snapshot_remove(zkhandler, pool, volume, name):
    """
--- a/client-cli/pvc/cli/cli.py
+++ b/client-cli/pvc/cli/cli.py
@ -671,9 +671,9 @@ def cli_cluster_maintenance_off():
@format_opt(
    {
        "pretty": cli_cluster_task_format_pretty,
-        "raw": lambda d: "\n".join([t["id"] for t in d])
-        if isinstance(d, list)
-        else d["state"],
+        "raw": lambda d: (
+            "\n".join([t["id"] for t in d]) if isinstance(d, list) else d["state"]
+        ),
        "json": lambda d: jdumps(d),
        "json-pretty": lambda d: jdumps(d, indent=2),
    }
@ -892,6 +892,7 @@ def cli_node_ready(
    "--lines",
    "lines",
    default=None,
+    type=int,
    show_default=False,
    help="Display this many log lines from the end of the log buffer.  [default: 1000; with follow: 10]",
 )
@ -2516,6 +2517,7 @@ def cli_vm_volume_remove(domain, volume, live_flag, restart_flag):
    "--lines",
    "lines",
    default=None,
+    type=int,
    show_default=False,
    help="Display this many log lines from the end of the log buffer.  [default: 1000; with follow: 10]",
 )
@ -4323,6 +4325,10 @@ def cli_storage_volume_snapshot():
 def cli_storage_volume_snapshot_add(pool, volume, name):
    """
    Add a snapshot with name NAME of Ceph RBD volume VOLUME in pool POOL.
+
+    WARNING: RBD snapshots are crash-consistent but not filesystem-aware. If a snapshot was taken
+    of a running VM, restoring that snapshot will be equivalent to having forcibly restarted the
+    VM at the moment of the snapshot.
    """

    retcode, retmsg = pvc.lib.storage.ceph_snapshot_add(CLI_CONFIG, pool, volume, name)
@ -4370,6 +4376,36 @@ def cli_storage_volume_snapshot_remove(pool, volume, name):
    finish(retcode, retmsg)


+###############################################################################
+# > pvc storage volume snapshot rollback
+###############################################################################
+@click.command(name="rollback", short_help="Roll back RBD volume to snapshot.")
+@connection_req
+@click.argument("pool")
+@click.argument("volume")
+@click.argument("name")
+@confirm_opt("Roll back to snapshot {name} for volume {pool}/{volume}")
+def cli_storage_volume_snapshot_rollback(pool, volume, name):
+    """
+    Roll back the Ceph RBD volume VOLUME in pool POOL to the snapshot NAME.
+
+    DANGER: All data written to the volume since the given snapshot will be permanently lost.
+
+    WARNING: A rollback cannot be performed on an RBD volume with active I/O. Doing so will cause
+    undefined behaviour and possible corruption. Ensure that any VM(s) using this RBD volume are
+    stopped or disabled before attempting a snapshot rollback.
+
+    WARNING: RBD snapshots are crash-consistent but not filesystem-aware. If a snapshot was taken
+    of a running VM, restoring that snapshot will be equivalent to having forcibly restarted the
+    VM at the moment of the snapshot.
+    """
+
+    retcode, retmsg = pvc.lib.storage.ceph_snapshot_rollback(
+        CLI_CONFIG, pool, volume, name
+    )
+    finish(retcode, retmsg)
+
+
 ###############################################################################
 # > pvc storage volume snapshot list
 ###############################################################################
@ -6347,6 +6383,7 @@ cli_storage_volume.add_command(cli_storage_volume_list)
 cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_add)
 cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_rename)
 cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_remove)
+cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_rollback)
 cli_storage_volume_snapshot.add_command(cli_storage_volume_snapshot_list)
 cli_storage_volume.add_command(cli_storage_volume_snapshot)
 cli_storage.add_command(cli_storage_volume)
--- a/client-cli/pvc/cli/formatters.py
+++ b/client-cli/pvc/cli/formatters.py
@ -580,9 +580,11 @@ def cli_cluster_fault_list_format_long(CLI_CONFIG, fault_data):
                fault_id=fault["id"],
                fault_status=fault["status"].title(),
                fault_health_delta=f"-{fault['health_delta']}%",
-                fault_acknowledged_at=fault["acknowledged_at"]
-                if fault["acknowledged_at"] != ""
-                else "N/A",
+                fault_acknowledged_at=(
+                    fault["acknowledged_at"]
+                    if fault["acknowledged_at"] != ""
+                    else "N/A"
+                ),
                fault_last_reported=fault["last_reported"],
                fault_first_reported=fault["first_reported"],
            )
--- a/client-cli/pvc/lib/common.py
+++ b/client-cli/pvc/lib/common.py
@ -108,9 +108,10 @@ class UploadProgressBar(object):


 class ErrorResponse(requests.Response):
-    def __init__(self, json_data, status_code):
+    def __init__(self, json_data, status_code, headers):
        self.json_data = json_data
        self.status_code = status_code
+        self.headers = headers

    def json(self):
        return self.json_data
@ -206,7 +207,7 @@ def call_api(
    except Exception as e:
        message = "Failed to connect to the API: {}".format(e)
        code = response.status_code if response else 504
-        response = ErrorResponse({"message": message}, code)
+        response = ErrorResponse({"message": message}, code, None)

    # Display debug output
    if config["debug"]:
--- a/client-cli/pvc/lib/storage.py
+++ b/client-cli/pvc/lib/storage.py
@ -1544,6 +1544,30 @@ def ceph_snapshot_add(config, pool, volume, snapshot):
    return retstatus, response.json().get("message", "")


+def ceph_snapshot_rollback(config, pool, volume, snapshot):
+    """
+    Roll back Ceph volume to snapshot
+
+    API endpoint: POST /api/v1/storage/ceph/snapshot/{pool}/{volume}/{snapshot}/rollback
+    API arguments:
+    API schema: {"message":"{data}"}
+    """
+    response = call_api(
+        config,
+        "post",
+        "/storage/ceph/snapshot/{pool}/{volume}/{snapshot}/rollback".format(
+            snapshot=snapshot, volume=volume, pool=pool
+        ),
+    )
+
+    if response.status_code == 200:
+        retstatus = True
+    else:
+        retstatus = False
+
+    return retstatus, response.json().get("message", "")
+
+
 def ceph_snapshot_remove(config, pool, volume, snapshot):
    """
    Remove Ceph snapshot
--- a/client-cli/pvc/lib/vm.py
+++ b/client-cli/pvc/lib/vm.py
@ -1765,9 +1765,9 @@ def format_info(config, domain_information, long_output):
                    tags_name=tag["name"],
                    tags_type=tag["type"],
                    tags_protected=str(tag["protected"]),
-                    tags_protected_colour=ansiprint.green()
-                    if tag["protected"]
-                    else ansiprint.blue(),
+                    tags_protected_colour=(
+                        ansiprint.green() if tag["protected"] else ansiprint.blue()
+                    ),
                    end=ansiprint.end(),
                )
            )
--- a/client-cli/setup.py
+++ b/client-cli/setup.py
@ -2,7 +2,7 @@ from setuptools import setup

 setup(
    name="pvc",
-    version="0.9.95",
+    version="0.9.98",
    packages=["pvc.cli", "pvc.lib"],
    install_requires=[
        "Click",
--- a/daemon-common/benchmark.py
+++ b/daemon-common/benchmark.py
@ -115,12 +115,13 @@ class BenchmarkError(Exception):
 #


-def cleanup(job_name, db_conn=None, db_cur=None, zkhandler=None):
+def cleanup(job_name, db_conn=None, db_cur=None, zkhandler=None, final=False):
    if db_conn is not None and db_cur is not None:
-        # Clean up our dangling result
-        query = "DELETE FROM storage_benchmarks WHERE job = %s;"
-        args = (job_name,)
-        db_cur.execute(query, args)
+        if not final:
+            # Clean up our dangling result (non-final runs only)
+            query = "DELETE FROM storage_benchmarks WHERE job = %s;"
+            args = (job_name,)
+            db_cur.execute(query, args)
        db_conn.commit()
        # Close the database connections cleanly
        close_database(db_conn, db_cur)
@ -410,6 +411,7 @@ def worker_run_benchmark(zkhandler, celery, config, pool):
        db_conn=db_conn,
        db_cur=db_cur,
        zkhandler=zkhandler,
+        final=True,
    )

    current_stage += 1
--- a/daemon-common/ceph.py
+++ b/daemon-common/ceph.py
@ -320,7 +320,11 @@ def get_list_osd(zkhandler, limit=None, is_fuzzy=True):
 #
 def getPoolInformation(zkhandler, pool):
    # Parse the stats data
-    (pool_stats_raw, tier, pgs,) = zkhandler.read_many(
+    (
+        pool_stats_raw,
+        tier,
+        pgs,
+    ) = zkhandler.read_many(
        [
            ("pool.stats", pool),
            ("pool.tier", pool),
@ -536,7 +540,10 @@ def getCephVolumes(zkhandler, pool):
        pool_list = [pool]

    for pool_name in pool_list:
-        for volume_name in zkhandler.children(("volume", pool_name)):
+        children = zkhandler.children(("volume", pool_name))
+        if children is None:
+            continue
+        for volume_name in children:
            volume_list.append("{}/{}".format(pool_name, volume_name))

    return volume_list
@ -824,10 +831,22 @@ def remove_volume(zkhandler, pool, name):
            name, pool
        )

-    # 1. Remove volume snapshots
+    # 1a. Remove PVC-managed volume snapshots
    for snapshot in zkhandler.children(("snapshot", f"{pool}/{name}")):
        remove_snapshot(zkhandler, pool, name, snapshot)

+    # 1b. Purge any remaining volume snapshots
+    retcode, stdout, stderr = common.run_os_command(
+        "rbd snap purge {}/{}".format(pool, name)
+    )
+    if retcode:
+        return (
+            False,
+            'ERROR: Failed to purge snapshots from RBD volume "{}" in pool "{}": {}'.format(
+                name, pool, stderr
+            ),
+        )
+
    # 2. Remove the volume
    retcode, stdout, stderr = common.run_os_command("rbd rm {}/{}".format(pool, name))
    if retcode:
@ -1066,6 +1085,36 @@ def rename_snapshot(zkhandler, pool, volume, name, new_name):
    )


+def rollback_snapshot(zkhandler, pool, volume, name):
+    if not verifyVolume(zkhandler, pool, volume):
+        return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
+            volume, pool
+        )
+    if not verifySnapshot(zkhandler, pool, volume, name):
+        return (
+            False,
+            'ERROR: No snapshot with name "{}" is present for volume "{}" in pool "{}".'.format(
+                name, volume, pool
+            ),
+        )
+
+        # 1. Roll back the snapshot
+        retcode, stdout, stderr = common.run_os_command(
+            "rbd snap rollback {}/{}@{}".format(pool, volume, name)
+        )
+        if retcode:
+            return (
+                False,
+                'ERROR: Failed to roll back RBD volume "{}" in pool "{}" to snapshot "{}": {}'.format(
+                    volume, pool, name, stderr
+                ),
+            )
+
+    return True, 'Rolled back RBD volume "{}" in pool "{}" to snapshot "{}".'.format(
+        volume, pool, name
+    )
+
+
 def remove_snapshot(zkhandler, pool, volume, name):
    if not verifyVolume(zkhandler, pool, volume):
        return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(
@ -1107,20 +1156,9 @@ def remove_snapshot(zkhandler, pool, volume, name):
    )


-def get_list_snapshot(zkhandler, pool, volume, limit=None, is_fuzzy=True):
+def get_list_snapshot(zkhandler, target_pool, target_volume, limit=None, is_fuzzy=True):
    snapshot_list = []
-    if pool and not verifyPool(zkhandler, pool):
-        return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(
-            pool
-        )
-
-    if volume and not verifyPool(zkhandler, volume):
-        return (
-            False,
-            'ERROR: No volume with name "{}" is present in the cluster.'.format(volume),
-        )
-
-    full_snapshot_list = getCephSnapshots(zkhandler, pool, volume)
+    full_snapshot_list = getCephSnapshots(zkhandler, target_pool, target_volume)

    if is_fuzzy and limit:
        # Implicitly assume fuzzy limits
@ -1132,6 +1170,10 @@ def get_list_snapshot(zkhandler, pool, volume, limit=None, is_fuzzy=True):
    for snapshot in full_snapshot_list:
        volume, snapshot_name = snapshot.split("@")
        pool_name, volume_name = volume.split("/")
+        if target_pool and pool_name != target_pool:
+            continue
+        if target_volume and volume_name != target_volume:
+            continue
        if limit:
            try:
                if re.fullmatch(limit, snapshot_name):
--- a/daemon-common/config.py
+++ b/daemon-common/config.py
@ -244,9 +244,9 @@ def get_parsed_configuration(config_file):
                    ]
                ][0]

-            config_cluster_networks_specific[
-                f"{network_type}_dev_ip"
-            ] = f"{list(network.hosts())[address_id]}/{network.prefixlen}"
+            config_cluster_networks_specific[f"{network_type}_dev_ip"] = (
+                f"{list(network.hosts())[address_id]}/{network.prefixlen}"
+            )

            config = {**config, **config_cluster_networks_specific}

--- a/daemon-common/node.py
+++ b/daemon-common/node.py
@ -69,6 +69,8 @@ def getNodeHealthDetails(zkhandler, node_name, node_health_plugins):
            plugin_message,
            plugin_data,
        ) = tuple(all_plugin_data[pos_start:pos_end])
+        if plugin_data is None:
+            continue
        plugin_output = {
            "name": plugin,
            "last_run": int(plugin_last_run) if plugin_last_run is not None else None,
@ -156,9 +158,9 @@ def getNodeInformation(zkhandler, node_name):
        zkhandler, node_name, node_health_plugins
    )

-    if _node_network_stats is not None:
+    try:
        node_network_stats = json.loads(_node_network_stats)
-    else:
+    except Exception:
        node_network_stats = dict()

    # Construct a data structure to represent the data
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,30 @@
+pvc (0.9.98-0) unstable; urgency=high
+
+  * [CLI Client] Fixed output when API call times out
+  * [Node Daemon] Improves the handling of fence states
+  * [API Daemon/CLI Client] Adds support for storage snapshot rollback
+  * [CLI Client] Adds additional warning messages about snapshot consistency to help output
+  * [API Daemon] Fixes a bug listing snapshots by pool/volume
+  * [Node Daemon] Adds a --version flag for information gathering by update-motd.sh
+
+ -- Joshua M. Boniface <joshua@boniface.me>  Wed, 05 Jun 2024 12:01:31 -0400
+
+pvc (0.9.97-0) unstable; urgency=high
+
+  * [Client CLI] Ensures --lines is always an integer value
+  * [Node Daemon] Fixes a bug if d_network changes during iteration
+  * [Node Daemon] Moves to using allocated instead of free memory for node reporting
+  * [API Daemon] Fixes a bug if lingering RBD snapshots exist when removing a volume (#180)
+
+ -- Joshua M. Boniface <joshua@boniface.me>  Fri, 19 Apr 2024 10:32:16 -0400
+
+pvc (0.9.96-0) unstable; urgency=high
+
+  * [API Daemon] Fixes a bug when reporting node stats
+  * [API Daemon] Fixes a bug deleteing successful benchmark results
+
+ -- Joshua M. Boniface <joshua@boniface.me>  Fri, 08 Mar 2024 14:23:06 -0500
+
 pvc (0.9.95-0) unstable; urgency=high

  * [API Daemon/CLI Client] Adds a flag to allow duplicate VNIs in network templates
--- a/health-daemon/pvchealthd/Daemon.py
+++ b/health-daemon/pvchealthd/Daemon.py
@ -33,7 +33,7 @@ import os
 import signal

 # Daemon version
-version = "0.9.95"
+version = "0.9.98"


 ##########################################################
--- a/node-daemon/pvcnoded.py
+++ b/node-daemon/pvcnoded.py
@ -19,6 +19,11 @@
 #
 ###############################################################################

+from sys import argv
 import pvcnoded.Daemon  # noqa: F401

+if "--version" in argv:
+    print(pvcnoded.Daemon.version)
+    exit(0)
+
 pvcnoded.Daemon.entrypoint()
--- a/node-daemon/pvcnoded/Daemon.py
+++ b/node-daemon/pvcnoded/Daemon.py
@ -49,7 +49,7 @@ import re
 import json

 # Daemon version
-version = "0.9.95"
+version = "0.9.98"


 ##########################################################
--- a/node-daemon/pvcnoded/objects/NetstatsInstance.py
+++ b/node-daemon/pvcnoded/objects/NetstatsInstance.py
@ -231,7 +231,7 @@ class NetstatsInstance(object):
        # Get a list of all active interfaces
        net_root_path = "/sys/class/net"
        all_ifaces = list()
-        for (_, dirnames, _) in walk(net_root_path):
+        for _, dirnames, _ in walk(net_root_path):
            all_ifaces.extend(dirnames)
        all_ifaces.sort()

--- a/node-daemon/pvcnoded/objects/NodeInstance.py
+++ b/node-daemon/pvcnoded/objects/NodeInstance.py
@ -521,7 +521,7 @@ class NodeInstance(object):
        self.logger.out("Acquired write lock for synchronization phase F", state="o")
        time.sleep(0.2)  # Time fir reader to acquire the lock
        # 4. Add gateway IPs
-        for network in self.d_network:
+        for network in self.d_network.copy():
            self.d_network[network].createGateways()
        self.logger.out("Releasing write lock for synchronization phase F", state="i")
        self.zkhandler.write([("base.config.primary_node.sync_lock", "")])
--- a/node-daemon/pvcnoded/util/fencing.py
+++ b/node-daemon/pvcnoded/util/fencing.py
@ -253,12 +253,16 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
        state="i",
        prefix=f"fencing {node_name}",
    )
-    ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
+    (
+        ipmi_intermediate_status_retcode,
+        ipmi_intermediate_status_stdout,
+        ipmi_intermediate_status_stderr,
+    ) = common.run_os_command(
        f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
    )
-    if ipmi_status_retcode == 0:
+    if ipmi_intermediate_status_retcode == 0:
        logger.out(
-            f"Current chassis power state is: {ipmi_status_stdout.strip()}",
+            f"Current chassis power state is: {ipmi_intermediate_status_stdout.strip()}",
            state="i",
            prefix=f"fencing {node_name}",
        )
@ -299,12 +303,14 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
        state="i",
        prefix=f"fencing {node_name}",
    )
-    ipmi_status_retcode, ipmi_status_stdout, ipmi_status_stderr = common.run_os_command(
-        f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
+    ipmi_final_status_retcode, ipmi_final_status_stdout, ipmi_final_status_stderr = (
+        common.run_os_command(
+            f"/usr/bin/ipmitool -I lanplus -H {ipmi_hostname} -U {ipmi_user} -P {ipmi_password} chassis power status"
+        )
    )

-    if ipmi_stop_retcode == 0:
-        if ipmi_status_stdout.strip() == "Chassis Power is on":
+    if ipmi_intermediate_status_stdout.strip() == "Chassis power is off":
+        if ipmi_final_status_stdout.strip() == "Chassis Power is on":
            # We successfully rebooted the node and it is powered on; this is a succeessful fence
            logger.out(
                "Successfully rebooted dead node; proceeding with fence recovery action",
@ -312,7 +318,7 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
                prefix=f"fencing {node_name}",
            )
            return True
-        elif ipmi_status_stdout.strip() == "Chassis Power is off":
+        elif ipmi_final_status_stdout.strip() == "Chassis Power is off":
            # We successfully rebooted the node but it is powered off; this might be expected or not, but the node is confirmed off so we can call it a successful fence
            logger.out(
                "Chassis power is in confirmed off state after successfuly IPMI reboot; proceeding with fence recovery action",
@ -323,13 +329,13 @@ def reboot_via_ipmi(node_name, ipmi_hostname, ipmi_user, ipmi_password, logger):
        else:
            # We successfully rebooted the node but it is in some unknown power state; since this might indicate a silent failure, we must call it a failed fence
            logger.out(
-                f"Chassis power is in an unknown state ({ipmi_status_stdout.strip()}) after successful IPMI reboot; NOT proceeding fence recovery action",
+                f"Chassis power is in an unknown state ({ipmi_final_status_stdout.strip()}) after successful IPMI reboot; NOT proceeding fence recovery action",
                state="e",
                prefix=f"fencing {node_name}",
            )
            return False
    else:
-        if ipmi_status_stdout.strip() == "Chassis Power is off":
+        if ipmi_final_status_stdout.strip() == "Chassis Power is off":
            # We failed to reboot the node but it is powered off; it has probably suffered a serious hardware failure, but the node is confirmed off so we can call it a successful fence
            logger.out(
                "Chassis power is in confirmed off state after failed IPMI reboot; proceeding with fence recovery action",
--- a/node-daemon/pvcnoded/util/keepalive.py
+++ b/node-daemon/pvcnoded/util/keepalive.py
@ -743,7 +743,7 @@ def node_keepalive(logger, config, zkhandler, this_node, netstats):
    # Get node performance statistics
    this_node.memtotal = int(psutil.virtual_memory().total / 1024 / 1024)
    this_node.memused = int(psutil.virtual_memory().used / 1024 / 1024)
-    this_node.memfree = int(psutil.virtual_memory().free / 1024 / 1024)
+    this_node.memfree = int(psutil.virtual_memory().available / 1024 / 1024)
    this_node.cpuload = round(os.getloadavg()[0], 2)

    # Get node network statistics via netstats instance
--- a/worker-daemon/pvcworkerd/Daemon.py
+++ b/worker-daemon/pvcworkerd/Daemon.py
@ -44,7 +44,7 @@ from daemon_lib.vmbuilder import (
 )

 # Daemon version
-version = "0.9.95"
+version = "0.9.98"


 config = cfg.get_configuration()
Author	SHA1	Message	Date
Joshua M. Boniface	1aa5999109	Bump version to 0.9.98	2024-06-05 12:01:31 -04:00
Joshua M. Boniface	570460e5ee	Add --version flag to pvcnoded.py for info	2024-06-05 11:57:47 -04:00
Joshua M. Boniface	7a99e0e524	Fix bugs listing snapshots by pool/volume The logic of this didn't work, so reconfigure to use these like limits. Also fixes a bug in the upper getCephVolumes for invalid pools.	2024-05-16 16:32:22 -04:00
Joshua M. Boniface	234d6ae83b	Add warnings about snapshot consistency	2024-05-13 15:29:43 -04:00
Joshua M. Boniface	5d0e7931d1	Add support for rolling back snapshots We supported creating snapshots, but not doing anything with them. This removes the manual task of restoring a snapshot and replace it with a PVC abstraction of rolling back to a snapshot. While Ceph recommends cloning a snapshot instead of rolling back, due to the time taken, in our usecase I don't think that is an optimal strategy, as it will leave dangling clones that we'd then have to manage. Closes #183	2024-05-13 15:24:51 -04:00
Joshua M. Boniface	dcb9c0d12c	Improve fence handling conditions Use the intermediate output text when judging the fence status, rather than the retcode of the stop as this should be more reliable.	2024-05-08 10:55:15 -04:00
Joshua M. Boniface	f6e856bf98	Fix debug output on timeout	2024-05-06 10:49:57 -04:00
Joshua M. Boniface	f1fe0c63f5	Bump version to 0.9.97	2024-04-19 10:32:16 -04:00
Joshua M. Boniface	ab944f9b95	Add RBD snap purge during volume removal Fixes #180	2024-04-19 10:31:11 -04:00
Joshua M. Boniface	9714ac20b2	Update formatting for Black 24.4.0	2024-04-19 10:26:06 -04:00
Joshua M. Boniface	79ad09ae59	Switch virtual memory free to allocated Avoids incorrect reporting if cache/buffers exceeds normal.	2024-04-19 10:25:33 -04:00
Joshua M. Boniface	4c6aabec6a	Fix bug if d_network changes	2024-04-05 14:05:51 -04:00
Joshua M. Boniface	559400ed90	Explicitly set --lines to integer type	2024-03-13 13:01:02 -04:00
Joshua M. Boniface	78c774b607	Bump version to 0.9.96	2024-03-08 14:23:07 -05:00
Joshua M. Boniface	a461791ce8	Fix bug cleaning up successful benchmark results	2024-03-08 14:22:07 -05:00
Joshua M. Boniface	9fdb6d8708	Fix bug with network stats	2024-03-07 15:44:35 -05:00
Joshua M. Boniface	2fb7c40497	Work around bad plugin data	2024-03-07 14:37:05 -05:00
 @ -1 +1 @@
 .9.95
 .9.98