Add live migrate max downtime selector meta field

Adds a new flag to VM metadata to allow setting the VM live migration
max downtime. This will enable very busy VMs that hang live migration to
have this value changed.
This commit is contained in:
2024-01-10 16:13:31 -05:00
parent 38eeb78423
commit 09269f182c
17 changed files with 283 additions and 30 deletions

View File

@ -46,7 +46,7 @@ from flask_sqlalchemy import SQLAlchemy
app = flask.Flask(__name__)
# Set up SQLAlchemy backend
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = True
app.config["SQLALCHEMY_DATABASE_URI"] = "postgresql://{}:{}@{}:{}/{}".format(
config["api_postgresql_user"],
config["api_postgresql_password"],
@ -1591,6 +1591,9 @@ class API_VM_Root(Resource):
migration_method:
type: string
description: The preferred migration method (live, shutdown, none)
migration_max_downtime:
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
tags:
type: array
description: The tag(s) of the VM
@ -1843,6 +1846,10 @@ class API_VM_Root(Resource):
"choices": ("live", "shutdown", "none"),
"helptext": "A valid migration_method must be specified",
},
{
"name": "migration_max_downtime",
"helptext": "A valid migration_max_downtime must be specified",
},
{"name": "user_tags", "action": "append"},
{"name": "protected_tags", "action": "append"},
{
@ -1903,6 +1910,12 @@ class API_VM_Root(Resource):
- live
- shutdown
- none
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
default: 300
- in: query
name: user_tags
type: array
@ -1943,6 +1956,7 @@ class API_VM_Root(Resource):
reqargs.get("selector", "none"),
bool(strtobool(reqargs.get("autostart", "false"))),
reqargs.get("migration_method", "none"),
reqargs.get("migration_max_downtime", 300),
user_tags,
protected_tags,
)
@ -1990,6 +2004,10 @@ class API_VM_Element(Resource):
"choices": ("live", "shutdown", "none"),
"helptext": "A valid migration_method must be specified",
},
{
"name": "migration_max_downtime",
"helptext": "A valid migration_max_downtime must be specified",
},
{"name": "user_tags", "action": "append"},
{"name": "protected_tags", "action": "append"},
{
@ -2052,6 +2070,12 @@ class API_VM_Element(Resource):
- live
- shutdown
- none
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
default: 300
- in: query
name: user_tags
type: array
@ -2092,6 +2116,7 @@ class API_VM_Element(Resource):
reqargs.get("selector", "none"),
bool(strtobool(reqargs.get("autostart", "false"))),
reqargs.get("migration_method", "none"),
reqargs.get("migration_max_downtime", 300),
user_tags,
protected_tags,
)
@ -2218,6 +2243,9 @@ class API_VM_Metadata(Resource):
migration_method:
type: string
description: The preferred migration method (live, shutdown, none)
migration_max_downtime:
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
404:
description: VM not found
schema:
@ -2241,6 +2269,10 @@ class API_VM_Metadata(Resource):
"choices": ("live", "shutdown", "none"),
"helptext": "A valid migration_method must be specified",
},
{
"name": "migration_max_downtime",
"helptext": "A valid migration_max_downtime must be specified",
},
]
)
@Authenticator
@ -2288,6 +2320,12 @@ class API_VM_Metadata(Resource):
- live
- shutdown
- none
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
default: none
responses:
200:
description: OK
@ -2312,6 +2350,7 @@ class API_VM_Metadata(Resource):
reqargs.get("autostart", None),
reqargs.get("profile", None),
reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
)
@ -6387,6 +6426,9 @@ class API_Provisioner_Template_System_Root(Resource):
migration_method:
type: string
description: The preferred migration method (live, shutdown, none)
migration_max_downtime:
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
parameters:
- in: query
name: limit
@ -6431,6 +6473,7 @@ class API_Provisioner_Template_System_Root(Resource):
{"name": "node_selector"},
{"name": "node_autostart"},
{"name": "migration_method"},
{"name": "migration_max_downtime"},
]
)
@Authenticator
@ -6491,6 +6534,11 @@ class API_Provisioner_Template_System_Root(Resource):
type: string
required: false
description: The preferred migration method (live, shutdown, none)
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
responses:
200:
description: OK
@ -6541,6 +6589,7 @@ class API_Provisioner_Template_System_Root(Resource):
reqargs.get("node_selector", None),
node_autostart,
reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
)
@ -6596,6 +6645,7 @@ class API_Provisioner_Template_System_Element(Resource):
{"name": "node_selector"},
{"name": "node_autostart"},
{"name": "migration_method"},
{"name": "migration_max_downtime"},
]
)
@Authenticator
@ -6651,6 +6701,11 @@ class API_Provisioner_Template_System_Element(Resource):
type: string
required: false
description: The preferred migration method (live, shutdown, none)
- in: query
name: migration_max_downtime
type: integer
required: false
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
responses:
200:
description: OK
@ -6701,6 +6756,7 @@ class API_Provisioner_Template_System_Element(Resource):
reqargs.get("node_selector", None),
node_autostart,
reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
)
@RequestParser(
@ -6714,6 +6770,7 @@ class API_Provisioner_Template_System_Element(Resource):
{"name": "node_selector"},
{"name": "node_autostart"},
{"name": "migration_method"},
{"name": "migration_max_downtime"},
]
)
@Authenticator
@ -6760,6 +6817,10 @@ class API_Provisioner_Template_System_Element(Resource):
name: migration_method
type: string
description: The preferred migration method (live, shutdown, none)
- in: query
name: migration_max_downtime
type: integer
description: The maximum time in milliseconds that a VM can be down for during a live migration; busy VMs may require a larger max_downtime
responses:
200:
description: OK
@ -6783,6 +6844,7 @@ class API_Provisioner_Template_System_Element(Resource):
reqargs.get("node_selector", None),
reqargs.get("node_autostart", None),
reqargs.get("migration_method", None),
reqargs.get("migration_max_downtime", None),
)
@Authenticator

View File

@ -641,6 +641,7 @@ def vm_define(
selector,
autostart,
migration_method,
migration_max_downtime=300,
user_tags=[],
protected_tags=[],
):
@ -668,6 +669,7 @@ def vm_define(
selector,
autostart,
migration_method,
migration_max_downtime,
profile=None,
tags=tags,
)
@ -826,6 +828,7 @@ def get_vm_meta(zkhandler, vm):
domain_node_selector,
domain_node_autostart,
domain_migrate_method,
domain_migrate_max_downtime,
) = pvc_common.getDomainMetadata(zkhandler, dom_uuid)
retcode = 200
@ -835,6 +838,7 @@ def get_vm_meta(zkhandler, vm):
"node_selector": domain_node_selector.lower(),
"node_autostart": domain_node_autostart,
"migration_method": domain_migrate_method.lower(),
"migration_max_downtime": int(domain_migrate_max_downtime),
}
return retdata, retcode
@ -842,7 +846,14 @@ def get_vm_meta(zkhandler, vm):
@ZKConnection(config)
def update_vm_meta(
zkhandler, vm, limit, selector, autostart, provisioner_profile, migration_method
zkhandler,
vm,
limit,
selector,
autostart,
provisioner_profile,
migration_method,
migration_max_downtime,
):
"""
Update metadata of a VM.
@ -858,7 +869,14 @@ def update_vm_meta(
autostart = False
retflag, retdata = pvc_vm.modify_vm_metadata(
zkhandler, vm, limit, selector, autostart, provisioner_profile, migration_method
zkhandler,
vm,
limit,
selector,
autostart,
provisioner_profile,
migration_method,
migration_max_downtime,
)
if retflag:

View File

@ -36,6 +36,7 @@ class DBSystemTemplate(db.Model):
node_selector = db.Column(db.Text)
node_autostart = db.Column(db.Boolean, nullable=False)
migration_method = db.Column(db.Text)
migration_max_downtime = db.Column(db.Integer, default=300, server_default="300")
ova = db.Column(db.Integer, db.ForeignKey("ova.id"), nullable=True)
def __init__(
@ -50,6 +51,7 @@ class DBSystemTemplate(db.Model):
node_selector,
node_autostart,
migration_method,
migration_max_downtime,
ova=None,
):
self.name = name
@ -62,6 +64,7 @@ class DBSystemTemplate(db.Model):
self.node_selector = node_selector
self.node_autostart = node_autostart
self.migration_method = migration_method
self.migration_max_downtime = migration_max_downtime
self.ova = ova
def __repr__(self):

View File

@ -221,6 +221,7 @@ def create_template_system(
node_selector=None,
node_autostart=False,
migration_method=None,
migration_max_downtime=None,
ova=None,
):
if list_template_system(name, is_fuzzy=False)[-1] != 404:
@ -231,7 +232,7 @@ def create_template_system(
if node_selector == "none":
node_selector = None
query = "INSERT INTO system_template (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, ova) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
query = "INSERT INTO system_template (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, migration_max_downtime, ova) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
args = (
name,
vcpu_count,
@ -243,6 +244,7 @@ def create_template_system(
node_selector,
node_autostart,
migration_method,
migration_max_downtime,
ova,
)
@ -438,6 +440,7 @@ def modify_template_system(
node_selector=None,
node_autostart=None,
migration_method=None,
migration_max_downtime=None,
):
if list_template_system(name, is_fuzzy=False)[-1] != 200:
retmsg = {"message": 'The system template "{}" does not exist.'.format(name)}
@ -505,6 +508,11 @@ def modify_template_system(
if migration_method is not None:
fields.append({"field": "migration_method", "data": migration_method})
if migration_max_downtime is not None:
fields.append(
{"field": "migration_max_downtime", "data": int(migration_max_downtime)}
)
conn, cur = open_database(config)
try:
for field in fields: