Implement VM metadata and use it
Implements the storing of three VM metadata attributes: 1. Node limits - allows specifying a list of hosts on which the VM must run. This limit influences the migration behaviour of VMs. 2. Per-VM node selectors - allows each VM to have its migration autoselection method specified, to automatically allow different methods per VM based on the administrator's preferences. 3. VM autorestart - allows a VM to be automatically restarted from a stopped state, presumably due to a failure to find a target node (either due to limits or otherwise) during a flush/fence recovery, on the next node unflush/ready state of its home hypervisor. Useful mostly in conjunction with limits to ensure that VMs which were shut down due to there being no valid migration targets are started back up when their node becomes ready again. Includes the full client interaction with these metadata options, including printing, as well as defining a new function to modify this metadata. For the CLI it is set/modified either on `vm define` or via the `vm meta` command. For the API it is set/modified either on a POST to the `/vm` endpoint (during VM definition) or on POST to the `/vm/<vm>` endpoint. For the API this replaces the previous reserved word for VM creation from scratch as this will no longer be implemented in-daemon (see #22). Closes #52
This commit is contained in:
@ -382,7 +382,7 @@ class NodeInstance(object):
|
||||
|
||||
self.logger.out('Selecting target to migrate VM "{}"'.format(dom_uuid), state='i')
|
||||
|
||||
target_node = common.findTargetHypervisor(self.zk_conn, 'mem', dom_uuid)
|
||||
target_node = common.findTargetHypervisor(self.zk_conn, self.config, dom_uuid)
|
||||
|
||||
# Don't replace the previous node if the VM is already migrated
|
||||
if zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(dom_uuid)):
|
||||
@ -390,9 +390,10 @@ class NodeInstance(object):
|
||||
else:
|
||||
current_node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid))
|
||||
|
||||
if target_node == None:
|
||||
self.logger.out('Failed to find migration target for VM "{}"; shutting down'.format(dom_uuid), state='e')
|
||||
if target_node is None:
|
||||
self.logger.out('Failed to find migration target for VM "{}"; shutting down and setting autostart flag'.format(dom_uuid), state='e')
|
||||
zkhandler.writedata(self.zk_conn, { '/domains/{}/state'.format(dom_uuid): 'shutdown' })
|
||||
zkhandler.writedata(self.zk_conn, { '/domains/{}/node_autostart'.format(dom_uuid): 'True' })
|
||||
|
||||
# Wait for the VM to shut down
|
||||
while zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(dom_uuid)) != 'stop':
|
||||
@ -427,6 +428,19 @@ class NodeInstance(object):
|
||||
self.flush_stopper = False
|
||||
return
|
||||
|
||||
# Handle autostarts
|
||||
autostart = zkhandler.readdata(self.zk_conn, '/domains/{}/node_autostart'.format(dom_uuid))
|
||||
node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid))
|
||||
if autostart == 'True' and node == self.name:
|
||||
self.logger.out('Starting autostart VM "{}"'.format(dom_uuid), state='i')
|
||||
zkhandler.writedata(self.zk_conn, {
|
||||
'/domains/{}/state'.format(dom_uuid): 'start',
|
||||
'/domains/{}/node'.format(dom_uuid): self.name,
|
||||
'/domains/{}/lastnode'.format(dom_uuid): '',
|
||||
'/domains/{}/node_autostart'.format(dom_uuid): 'False'
|
||||
})
|
||||
continue
|
||||
|
||||
try:
|
||||
last_node = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(dom_uuid))
|
||||
except:
|
||||
|
@ -105,6 +105,10 @@ class VMInstance(object):
|
||||
self.domname = zkhandler.readdata(zk_conn, '/domains/{}'.format(domuuid))
|
||||
self.state = zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid))
|
||||
self.node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
|
||||
try:
|
||||
self.pinpolicy = zkhandler.readdata(self.zk_conn, '/domains/{}/pinpolicy'.format(self.domuuid))
|
||||
except:
|
||||
self.pinpolicy = "None"
|
||||
|
||||
# These will all be set later
|
||||
self.instart = False
|
||||
|
@ -139,24 +139,44 @@ def removeIPAddress(ipaddr, cidrnetmask, dev):
|
||||
#
|
||||
# Find a migration target
|
||||
#
|
||||
def findTargetHypervisor(zk_conn, search_field, dom_uuid):
|
||||
def findTargetHypervisor(zk_conn, config, dom_uuid):
|
||||
# Determine VM node limits; set config value if read fails
|
||||
try:
|
||||
node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(node)).split(',')
|
||||
except:
|
||||
node_limit = None
|
||||
zkhandler.writedata(zk_conn, { '/domains/{}/node_limit'.format(node): 'None' })
|
||||
|
||||
# Determine VM search field or use default; set config value if read fails
|
||||
try:
|
||||
search_field = zkhandler.readdata(zk_conn, '/domains/{}/node_selector'.format(node)).split(',')
|
||||
except:
|
||||
search_field = config.migration_target_selector
|
||||
zkhandler.writedata(zk_conn, { '/domains/{}/node_selector'.format(node): config.migration_target_selector })
|
||||
|
||||
# Execute the search
|
||||
if search_field == 'mem':
|
||||
return findTargetHypervisorMem(zk_conn, dom_uuid)
|
||||
return findTargetHypervisorMem(zk_conn, node_limit, dom_uuid)
|
||||
if search_field == 'load':
|
||||
return findTargetHypervisorLoad(zk_conn, dom_uuid)
|
||||
return findTargetHypervisorLoad(zk_conn, node_limit, dom_uuid)
|
||||
if search_field == 'vcpus':
|
||||
return findTargetHypervisorVCPUs(zk_conn, dom_uuid)
|
||||
return findTargetHypervisorVCPUs(zk_conn, node_limit, dom_uuid)
|
||||
if search_field == 'vms':
|
||||
return findTargetHypervisorVMs(zk_conn, dom_uuid)
|
||||
return findTargetHypervisorVMs(zk_conn, node_limit, dom_uuid)
|
||||
|
||||
# Nothing was found
|
||||
return None
|
||||
|
||||
# Get the list of valid target nodes
|
||||
def getHypervisors(zk_conn, dom_uuid):
|
||||
def getHypervisors(zk_conn, node_limit, dom_uuid):
|
||||
valid_node_list = []
|
||||
full_node_list = zkhandler.listchildren(zk_conn, '/nodes')
|
||||
current_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(dom_uuid))
|
||||
|
||||
for node in full_node_list:
|
||||
if node_limit and node not in node_limit:
|
||||
continue
|
||||
|
||||
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
|
||||
domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node))
|
||||
|
||||
@ -171,11 +191,11 @@ def getHypervisors(zk_conn, dom_uuid):
|
||||
return valid_node_list
|
||||
|
||||
# via free memory (relative to allocated memory)
|
||||
def findTargetHypervisorMem(zk_conn, dom_uuid):
|
||||
def findTargetHypervisorMem(zk_conn, node_limit, dom_uuid):
|
||||
most_allocfree = 0
|
||||
target_node = None
|
||||
|
||||
node_list = getHypervisors(zk_conn, dom_uuid)
|
||||
node_list = getHypervisors(zk_conn, node_limit, dom_uuid)
|
||||
for node in node_list:
|
||||
memalloc = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node)))
|
||||
memused = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node)))
|
||||
@ -190,11 +210,11 @@ def findTargetHypervisorMem(zk_conn, dom_uuid):
|
||||
return target_node
|
||||
|
||||
# via load average
|
||||
def findTargetHypervisorLoad(zk_conn, dom_uuid):
|
||||
def findTargetHypervisorLoad(zk_conn, node_limit, dom_uuid):
|
||||
least_load = 9999
|
||||
target_node = None
|
||||
|
||||
node_list = getHypervisors(zk_conn, dom_uuid)
|
||||
node_list = getHypervisors(zk_conn, node_limit, dom_uuid)
|
||||
for node in node_list:
|
||||
load = int(zkhandler.readdata(zk_conn, '/nodes/{}/load'.format(node)))
|
||||
|
||||
@ -205,11 +225,11 @@ def findTargetHypervisorLoad(zk_conn, dom_uuid):
|
||||
return target_node
|
||||
|
||||
# via total vCPUs
|
||||
def findTargetHypervisorVCPUs(zk_conn, dom_uuid):
|
||||
def findTargetHypervisorVCPUs(zk_conn, node_limit, dom_uuid):
|
||||
least_vcpus = 9999
|
||||
target_node = None
|
||||
|
||||
node_list = getHypervisors(zk_conn, dom_uuid)
|
||||
node_list = getHypervisors(zk_conn, node_limit, dom_uuid)
|
||||
for node in node_list:
|
||||
vcpus = int(zkhandler.readdata(zk_conn, '/nodes/{}/vcpualloc'.format(node)))
|
||||
|
||||
@ -220,11 +240,11 @@ def findTargetHypervisorVCPUs(zk_conn, dom_uuid):
|
||||
return target_node
|
||||
|
||||
# via total VMs
|
||||
def findTargetHypervisorVMs(zk_conn, dom_uuid):
|
||||
def findTargetHypervisorVMs(zk_conn, node_limit, dom_uuid):
|
||||
least_vms = 9999
|
||||
target_node = None
|
||||
|
||||
node_list = getHypervisors(zk_conn, dom_uuid)
|
||||
node_list = getHypervisors(zk_conn, node_limit, dom_uuid)
|
||||
for node in node_list:
|
||||
vms = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node)))
|
||||
|
||||
|
@ -67,27 +67,34 @@ def fenceNode(node_name, zk_conn, config, logger):
|
||||
|
||||
# If the fence succeeded and successful_fence is migrate
|
||||
if fence_status == True and config['successful_fence'] == 'migrate':
|
||||
migrateFromFencedNode(zk_conn, node_name, logger)
|
||||
migrateFromFencedNode(zk_conn, node_name, config, logger)
|
||||
|
||||
# If the fence failed and failed_fence is migrate
|
||||
if fence_status == False and config['failed_fence'] == 'migrate' and config['suicide_intervals'] != '0':
|
||||
migrateFromFencedNode(zk_conn, node_name, logger)
|
||||
migrateFromFencedNode(zk_conn, node_name, config, logger)
|
||||
|
||||
# Migrate hosts away from a fenced node
|
||||
def migrateFromFencedNode(zk_conn, node_name, logger):
|
||||
def migrateFromFencedNode(zk_conn, node_name, config, logger):
|
||||
logger.out('Migrating VMs from dead node "{}" to new hosts'.format(node_name), state='i')
|
||||
dead_node_running_domains = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split()
|
||||
for dom_uuid in dead_node_running_domains:
|
||||
VMInstance.flush_locks(zk_conn, logger, dom_uuid)
|
||||
|
||||
target_node = common.findTargetHypervisor(zk_conn, 'mem', dom_uuid)
|
||||
target_node = common.findTargetHypervisor(zk_conn, config, dom_uuid)
|
||||
|
||||
logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')
|
||||
zkhandler.writedata(zk_conn, {
|
||||
'/domains/{}/state'.format(dom_uuid): 'start',
|
||||
'/domains/{}/node'.format(dom_uuid): target_node,
|
||||
'/domains/{}/lastnode'.format(dom_uuid): node_name
|
||||
})
|
||||
if target_node is not None:
|
||||
logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')
|
||||
zkhandler.writedata(zk_conn, {
|
||||
'/domains/{}/state'.format(dom_uuid): 'start',
|
||||
'/domains/{}/node'.format(dom_uuid): target_node,
|
||||
'/domains/{}/lastnode'.format(dom_uuid): node_name
|
||||
})
|
||||
else:
|
||||
logger.out('No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(dom_uuid), state='i')
|
||||
zkhandler.writedata(zk_conn, {
|
||||
'/domains/{}/state'.format(dom_uuid): 'stopped',
|
||||
'/domains/{}/node_autostart'.format(dom_uuid): 'True'
|
||||
}
|
||||
|
||||
# Set node in flushed state for easy remigrating when it comes back
|
||||
zkhandler.writedata(zk_conn, { '/nodes/{}/domainstate'.format(node_name): 'flushed' })
|
||||
|
Reference in New Issue
Block a user