Compare commits

...

76 Commits

Author SHA1 Message Date
cf96bb009f Bump version to 0.9.19 2021-06-06 01:47:41 -04:00
719954b70b Fix missing list comma 2021-06-06 01:39:43 -04:00
f0dc0fc782 Avoid duplicating maintenance state change
This makes no functional difference, but is technically more correct.
2021-06-05 01:36:40 -04:00
5d88e92acc Avoid returning errors with duplicate router mode
Like the previous (new) flush change, these shouldn't be errors, but
simply information "what you want is already done" messages.
2021-06-05 01:14:31 -04:00
505c109875 Avoid re-flush or re-ready nodes if unnecessary 2021-06-05 01:08:32 -04:00
3eedfaa7d5 Collect database model error 2021-06-03 00:22:48 -04:00
7de7e1bc71 Properly handle cluster networks in provisioner 2021-06-02 15:57:46 -04:00
34ef055954 Adjust VNI column for provisioner to text
Allows the storing of the textual cluster labels (e.g. 'upstream') as
valid VNI values in the template.
2021-06-02 15:45:22 -04:00
7dea5d2fac Move logger to common, fix buffering 2021-06-01 18:50:26 -04:00
3a5226b893 Add missing flushed output 2021-06-01 18:30:18 -04:00
de2ff2e01b Fix removed function args 2021-06-01 17:02:36 -04:00
cd75413667 Increase initial lock timer
With the new library the reader seems to be a little too quick, so hold
the write lock for 1 second instead of 1/2 second to ensure it is
caught.
2021-06-01 17:00:11 -04:00
9764090d6d Merge node common with daemon common 2021-06-01 12:22:11 -04:00
f73c433fc7 Remove useless try and import 2021-06-01 12:05:17 -04:00
12ac3686de Convert missed elements to new zkhandler 2021-06-01 11:57:21 -04:00
5740d0f2d5 Remove obsolete zkhandler.py 2021-06-01 11:55:44 -04:00
889f4cdf47 Convert common to new zkhandler 2021-06-01 11:55:32 -04:00
8f66a8d00e Fix missed zkhandler conversion 2021-06-01 11:53:33 -04:00
6beea0693c Convert fencing to new zkhandler 2021-06-01 11:53:21 -04:00
1c9a7a6479 Convert VXNetworkInstance to new zkhandler 2021-06-01 11:49:39 -04:00
790098f181 Convert VMInstance to new zkhandler 2021-06-01 11:46:27 -04:00
8a4a41e092 Convert NodeInstance to new zkhandler 2021-06-01 11:27:35 -04:00
a48bf2d71e More gracefully handle none selectors
Allow selection of "none" as the node selector, and handle this by
always using the cluster default instead of writing it in.
2021-06-01 11:13:13 -04:00
a0b9087167 Set Daemon migration selector in zookeeper 2021-06-01 10:52:41 -04:00
33a54cf7f2 Move configuration keys to /config tree 2021-06-01 10:48:55 -04:00
d6a8cf9780 Convert MetadataAPIInstance to new zkhandler 2021-05-31 19:55:09 -04:00
abd619a3c1 Convert DNSAggregatorInstance to new zkhandler 2021-05-31 19:55:01 -04:00
ef5fe78125 Convert CepnInstance to new zkhandler 2021-05-31 19:51:27 -04:00
f6d0e89568 Properly add absent node type 2021-05-31 19:26:27 -04:00
d3b5b5236a Remove transactional delete
This just doesn't work due to the darn limit on recursive deletes in
transactions.
2021-05-31 19:22:01 -04:00
8625e9bd3e Update Delete to recursive method 2021-05-31 03:14:09 -04:00
ede3e88cd7 Modify node daemon root to use updated zkhandler 2021-05-31 03:14:09 -04:00
ed4f84a3ec Add log handling and persistent listener 2021-05-31 03:14:09 -04:00
a1969eb981 Allow overwrite during init command 2021-05-31 00:12:28 -04:00
c7992000eb Explicitly output JSON cluster data 2021-05-30 23:50:42 -04:00
a1e8cc5867 Skip patroni tree during backups 2021-05-30 23:39:37 -04:00
ac0c3b0ec9 Ensure temp_dir exists before starting
Otherwise some failures throw the wrong error.
2021-05-30 16:04:38 -04:00
60db800d9c Use full ZKHandler in provisioner
Required due to references to self from Celery that are replaced by the
ZKConnection self instance.
2021-05-30 15:59:37 -04:00
9be426507a Fix erroneous lock calls 2021-05-30 15:31:17 -04:00
58a5b00aa1 Remove extraneous zkhandler reference 2021-05-30 01:01:40 -04:00
73407e245f Move startup code to an entrypoint function
Prevents further issues with startup.
2021-05-30 00:18:04 -04:00
25f80a4478 Move API version string location to Daemon
Prevents a startup bug with pvcapid-manage.py.
2021-05-30 00:11:24 -04:00
c23a53d082 Add daemon_lib symlink to pvcnoded 2021-05-30 00:00:07 -04:00
b4f2cf879e Rework vm library for new zkhandler 2021-05-29 21:17:19 -04:00
3603b782c0 Rework node library for new zkhandler 2021-05-29 20:56:21 -04:00
62cb72b62f Rework network library for new zkhandler 2021-05-29 20:53:42 -04:00
b186a75b4e Rework common library for new zkhandler 2021-05-29 20:35:28 -04:00
6205dba451 Rework cluster library for new zkhandler 2021-05-29 20:32:20 -04:00
688d1a6ae2 Rework ceph library for new zkhandler 2021-05-29 20:29:51 -04:00
163015bd4a Port remaining helper functions to ZKConnection 2021-05-29 00:30:42 -04:00
49bbad8021 Port provisioner to ZKConnection 2021-05-29 00:26:15 -04:00
2c0bafc313 Port benchmark to ZKConnection 2021-05-29 00:24:53 -04:00
1963f2c336 Convert OVA helper to ZKConnection 2021-05-29 00:22:06 -04:00
9cd121ef9f Convert remaining VM functions 2021-05-29 00:16:26 -04:00
ea63a58b21 Port two more functions to new decorator 2021-05-28 23:38:53 -04:00
0eceec0341 Disable SQLAlchemy modifcation tracking 2021-05-28 23:36:36 -04:00
c6bececb55 Revamp config parsing and imports
Brings sanity to the passing of the config variable around the various
submodules for use in the ZKConnection decorator.
2021-05-28 23:33:36 -04:00
4554a0d6af Add line break to lint output 2021-05-28 00:20:03 -04:00
f82da03a62 Add first wrappers and exceptions 2021-05-28 00:19:39 -04:00
fef230ad98 Implement class-based version of zkhander 2021-05-27 22:50:00 -04:00
3128c8fa70 Correct flawed conditional in some commands 2021-05-25 09:59:20 -04:00
0c75a127b2 Bump version to 0.9.18 2021-05-23 17:23:10 -04:00
f46c2e7f6a Implement VM rename functionality
Closes #125
2021-05-23 17:21:19 -04:00
9de14c46fb Bump version to 0.9.17 2021-05-19 17:06:29 -04:00
1b8b101b64 Fix bugs in log follow command 2021-05-19 16:22:48 -04:00
fe15bdb854 Bump version to 0.9.16 2021-05-10 01:13:21 -04:00
b851a6209c Catch all other exceptions in subprocess run
Found a rare glitch where the subprocess pipes would not engage, causing
a daemon crash. Catch these exceptions with a retcode of 255 instead of
bailing out.

Closes #124
2021-05-10 01:07:25 -04:00
5ceb57e540 Handle emptying corrupted console log files
Libvirt will someones write junk out to console log files, which breaks
the log parser deque with a UnicodeDecodeError.

If this happens, clear the log and re-open the deque again for newer
updates.

Closes #123
2021-05-10 01:03:04 -04:00
62c84664fc Handle restart confirmation for VMs more nicely
For the "vm modify", revamp the way confirmations are presented. Do the
edits/load, show changes, verify XML, then prompt to write and the
restart. The previous order didn't make much sense.

For any of these `--restart` triggered VM modifications, also alter how
the confirmation works. If the user declines the restart, do not abort;
instead, just set restart=False and continue with the modification.
2021-04-13 10:35:26 -04:00
66f1ac35ab Skip an empty local cluster in cluster list 2021-04-13 10:01:49 -04:00
529f99841d Improve formatting of some help messages 2021-04-12 15:55:59 -04:00
6246b8dfb3 Fix help message output on root command 2021-04-08 14:27:55 -04:00
669338c22b Bump version to 0.9.15 2021-04-08 13:37:47 -04:00
629cf62385 Add confirmation flag to disruptive VM operations
Also add some additional output when --restart is not selected.

Closes #118
2021-04-08 13:33:10 -04:00
dfa3432601 Add unsafe envvar/flag option
Allows another way (beyond --yes) to avoid confirming "unsafe"
operations. While there is probably nearly zero usecase for this (at
least to any sane admin), it is provided to allow maximum flexibility.
2021-04-08 12:48:38 -04:00
62213fab99 Add description field to CLI clusters
Allow specifying a textual description of the cluster in the client for
ease of management.
2021-04-08 12:28:23 -04:00
37 changed files with 2747 additions and 2459 deletions

View File

@ -5,7 +5,7 @@ pushd $( git rev-parse --show-toplevel ) &>/dev/null
ex=0
# Linting
echo -n "Linting... "
echo "Linting..."
./lint
if [[ $? -ne 0 ]]; then
echo "Aborting commit due to linting errors."

View File

@ -20,6 +20,40 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
## Changelog
#### v0.9.19
* [CLI] Corrects some flawed conditionals
* [API] Disables SQLAlchemy modification tracking functionality (not used by us)
* [Daemons] Implements new zkhandler module for improved reliability and reusability
* [Daemons] Refactors some code to use new zkhandler module
* [API, CLI] Adds support for "none" migration selector (uses cluster default instead)
* [Daemons] Moves some configuration keys to new /config tree
* [Node Daemon] Increases initial lock timeout for VM migrations to avoid out-of-sync potential
* [Provisioner] Support storing and using textual cluster network labels ("upstream", "storage", "cluster") in templates
* [API] Avoid duplicating existing node states
#### v0.9.18
* Adds VM rename functionality to API and CLI client
#### v0.9.17
* [CLI] Fixes bugs in log follow output
#### v0.9.16
* Improves some CLI help messages
* Skips empty local cluster in CLI
* Adjusts how confirmations happen during VM modify restarts
* Fixes bug around corrupted VM log files
* Fixes bug around subprocess pipe exceptions
#### v0.9.15
* [CLI] Adds additional verification (--yes) to several VM management commands
* [CLI] Adds a method to override --yes/confirmation requirements via envvar (PVC_UNSAFE)
* [CLI] Adds description fields to PVC clusters in CLI
#### v0.9.14
* Fixes bugs around cloned volume provisioning

View File

@ -0,0 +1,28 @@
"""PVC version 0.9.18
Revision ID: bae4d5a77c74
Revises: 3efe890e1d87
Create Date: 2021-06-02 15:41:40.061806
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'bae4d5a77c74'
down_revision = '3efe890e1d87'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.execute('ALTER TABLE network ALTER COLUMN vni TYPE TEXT')
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.execute('ALTER TABLE network ALTER COLUMN vni TYPE INTEGER USING vni::integer')
# ### end Alembic commands ###

View File

@ -20,3 +20,5 @@
###############################################################################
import pvcapid.Daemon # noqa: F401
pvcapid.Daemon.entrypoint()

View File

@ -19,37 +19,119 @@
#
###############################################################################
import pvcapid.flaskapi as pvc_api
import os
import yaml
from distutils.util import strtobool as dustrtobool
# Daemon version
version = '0.9.19'
# API version
API_VERSION = 1.0
##########################################################
# Helper Functions
##########################################################
def strtobool(stringv):
if stringv is None:
return False
if isinstance(stringv, bool):
return bool(stringv)
try:
return bool(dustrtobool(stringv))
except Exception:
return False
##########################################################
# Configuration Parsing
##########################################################
# Parse the configuration file
try:
pvcapid_config_file = os.environ['PVC_CONFIG_FILE']
except Exception:
print('Error: The "PVC_CONFIG_FILE" environment variable must be set before starting pvcapid.')
exit(1)
print('Loading configuration from file "{}"'.format(pvcapid_config_file))
# Read in the config
try:
with open(pvcapid_config_file, 'r') as cfgfile:
o_config = yaml.load(cfgfile, Loader=yaml.BaseLoader)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e))
exit(1)
try:
# Create the config object
config = {
'debug': strtobool(o_config['pvc']['debug']),
'coordinators': o_config['pvc']['coordinators'],
'listen_address': o_config['pvc']['api']['listen_address'],
'listen_port': int(o_config['pvc']['api']['listen_port']),
'auth_enabled': strtobool(o_config['pvc']['api']['authentication']['enabled']),
'auth_secret_key': o_config['pvc']['api']['authentication']['secret_key'],
'auth_tokens': o_config['pvc']['api']['authentication']['tokens'],
'ssl_enabled': strtobool(o_config['pvc']['api']['ssl']['enabled']),
'ssl_key_file': o_config['pvc']['api']['ssl']['key_file'],
'ssl_cert_file': o_config['pvc']['api']['ssl']['cert_file'],
'database_host': o_config['pvc']['provisioner']['database']['host'],
'database_port': int(o_config['pvc']['provisioner']['database']['port']),
'database_name': o_config['pvc']['provisioner']['database']['name'],
'database_user': o_config['pvc']['provisioner']['database']['user'],
'database_password': o_config['pvc']['provisioner']['database']['pass'],
'queue_host': o_config['pvc']['provisioner']['queue']['host'],
'queue_port': o_config['pvc']['provisioner']['queue']['port'],
'queue_path': o_config['pvc']['provisioner']['queue']['path'],
'storage_hosts': o_config['pvc']['provisioner']['ceph_cluster']['storage_hosts'],
'storage_domain': o_config['pvc']['provisioner']['ceph_cluster']['storage_domain'],
'ceph_monitor_port': o_config['pvc']['provisioner']['ceph_cluster']['ceph_monitor_port'],
'ceph_storage_secret_uuid': o_config['pvc']['provisioner']['ceph_cluster']['ceph_storage_secret_uuid']
}
# Use coordinators as storage hosts if not explicitly specified
if not config['storage_hosts']:
config['storage_hosts'] = config['coordinators']
except Exception as e:
print('ERROR: Failed to load configuration: {}'.format(e))
exit(1)
##########################################################
# Entrypoint
##########################################################
# Version string for startup output
version = '0.9.14'
def entrypoint():
import pvcapid.flaskapi as pvc_api # noqa: E402
if pvc_api.config['ssl_enabled']:
context = (pvc_api.config['ssl_cert_file'], pvc_api.config['ssl_key_file'])
else:
context = None
if config['ssl_enabled']:
context = (config['ssl_cert_file'], config['ssl_key_file'])
else:
context = None
# Print our startup messages
print('')
print('|--------------------------------------------------|')
print('| ######## ## ## ###### |')
print('| ## ## ## ## ## ## |')
print('| ## ## ## ## ## |')
print('| ######## ## ## ## |')
print('| ## ## ## ## |')
print('| ## ## ## ## ## |')
print('| ## ### ###### |')
print('|--------------------------------------------------|')
print('| Parallel Virtual Cluster API daemon v{0: <11} |'.format(version))
print('| API version: v{0: <34} |'.format(pvc_api.API_VERSION))
print('| Listen: {0: <40} |'.format('{}:{}'.format(pvc_api.config['listen_address'], pvc_api.config['listen_port'])))
print('| SSL: {0: <43} |'.format(str(pvc_api.config['ssl_enabled'])))
print('| Authentication: {0: <32} |'.format(str(pvc_api.config['auth_enabled'])))
print('|--------------------------------------------------|')
print('')
# Print our startup messages
print('')
print('|--------------------------------------------------|')
print('| ######## ## ## ###### |')
print('| ## ## ## ## ## ## |')
print('| ## ## ## ## ## |')
print('| ######## ## ## ## |')
print('| ## ## ## ## |')
print('| ## ## ## ## ## |')
print('| ## ### ###### |')
print('|--------------------------------------------------|')
print('| Parallel Virtual Cluster API daemon v{0: <11} |'.format(version))
print('| API version: v{0: <34} |'.format(API_VERSION))
print('| Listen: {0: <40} |'.format('{}:{}'.format(config['listen_address'], config['listen_port'])))
print('| SSL: {0: <43} |'.format(str(config['ssl_enabled'])))
print('| Authentication: {0: <32} |'.format(str(config['auth_enabled'])))
print('|--------------------------------------------------|')
print('')
pvc_api.app.run(pvc_api.config['listen_address'], pvc_api.config['listen_port'], threaded=True, ssl_context=context)
pvc_api.app.run(config['listen_address'], config['listen_port'], threaded=True, ssl_context=context)

View File

@ -22,24 +22,13 @@
import psycopg2
import psycopg2.extras
from distutils.util import strtobool as dustrtobool
from pvcapid.Daemon import config
from daemon_lib.zkhandler import ZKConnection
import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph
config = None # Set in this namespace by flaskapi
def strtobool(stringv):
if stringv is None:
return False
if isinstance(stringv, bool):
return bool(stringv)
try:
return bool(dustrtobool(stringv))
except Exception:
return False
#
# Exceptions (used by Celery tasks)
@ -48,7 +37,7 @@ class BenchmarkError(Exception):
"""
An exception that results from the Benchmark job.
"""
def __init__(self, message, cur_time=None, db_conn=None, db_cur=None, zk_conn=None):
def __init__(self, message, cur_time=None, db_conn=None, db_cur=None, zkhandler=None):
self.message = message
if cur_time is not None:
# Clean up our dangling result
@ -58,7 +47,7 @@ class BenchmarkError(Exception):
db_conn.commit()
# Close the database connections cleanly
close_database(db_conn, db_cur)
pvc_common.stopZKConnection(zk_conn)
zkhandler.disconnect()
def __str__(self):
return str(self.message)
@ -114,7 +103,8 @@ def list_benchmarks(job=None):
return {'message': 'No benchmark found.'}, 404
def run_benchmark(self, pool):
@ZKConnection(config)
def run_benchmark(self, zkhandler, pool):
# Runtime imports
import time
import json
@ -133,12 +123,6 @@ def run_benchmark(self, pool):
print('FATAL - failed to connect to Postgres')
raise Exception
try:
zk_conn = pvc_common.startZKConnection(config['coordinators'])
except Exception:
print('FATAL - failed to connect to Zookeeper')
raise Exception
print("Storing running status for job '{}' in database".format(cur_time))
try:
query = "INSERT INTO storage_benchmarks (job, result) VALUES (%s, %s);"
@ -146,7 +130,7 @@ def run_benchmark(self, pool):
db_cur.execute(query, args)
db_conn.commit()
except Exception as e:
raise BenchmarkError("Failed to store running status: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
raise BenchmarkError("Failed to store running status: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
# Phase 1 - volume preparation
self.update_state(state='RUNNING', meta={'current': 1, 'total': 3, 'status': 'Creating benchmark volume'})
@ -155,9 +139,9 @@ def run_benchmark(self, pool):
volume = 'pvcbenchmark'
# Create the RBD volume
retcode, retmsg = pvc_ceph.add_volume(zk_conn, pool, volume, "8G")
retcode, retmsg = pvc_ceph.add_volume(zkhandler, pool, volume, "8G")
if not retcode:
raise BenchmarkError('Failed to create volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
raise BenchmarkError('Failed to create volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
else:
print(retmsg)
@ -244,7 +228,7 @@ def run_benchmark(self, pool):
retcode, stdout, stderr = pvc_common.run_os_command(fio_cmd)
if retcode:
raise BenchmarkError("Failed to run fio test: {}".format(stderr), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
raise BenchmarkError("Failed to run fio test: {}".format(stderr), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
# Parse the terse results to avoid storing tons of junk
# Reference: https://fio.readthedocs.io/en/latest/fio_doc.html#terse-output
@ -445,9 +429,9 @@ def run_benchmark(self, pool):
time.sleep(1)
# Remove the RBD volume
retcode, retmsg = pvc_ceph.remove_volume(zk_conn, pool, volume)
retcode, retmsg = pvc_ceph.remove_volume(zkhandler, pool, volume)
if not retcode:
raise BenchmarkError('Failed to remove volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
raise BenchmarkError('Failed to remove volume "{}": {}'.format(volume, retmsg), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
else:
print(retmsg)
@ -458,8 +442,7 @@ def run_benchmark(self, pool):
db_cur.execute(query, args)
db_conn.commit()
except Exception as e:
raise BenchmarkError("Failed to store test results: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zk_conn=zk_conn)
raise BenchmarkError("Failed to store test results: {}".format(e), cur_time=cur_time, db_conn=db_conn, db_cur=db_cur, zkhandler=zkhandler)
close_database(db_conn, db_cur)
pvc_common.stopZKConnection(zk_conn)
return {'status': "Storage benchmark '{}' completed successfully.", 'current': 3, 'total': 3}

View File

@ -19,15 +19,14 @@
#
###############################################################################
import yaml
import os
import flask
from distutils.util import strtobool as dustrtobool
from functools import wraps
from flask_restful import Resource, Api, reqparse, abort
from celery import Celery
from pvcapid.Daemon import config, strtobool, API_VERSION
import pvcapid.helper as api_helper
import pvcapid.provisioner as api_provisioner
import pvcapid.benchmark as api_benchmark
@ -35,84 +34,12 @@ import pvcapid.ova as api_ova
from flask_sqlalchemy import SQLAlchemy
API_VERSION = 1.0
def strtobool(stringv):
if stringv is None:
return False
if isinstance(stringv, bool):
return bool(stringv)
try:
return bool(dustrtobool(stringv))
except Exception:
return False
# Parse the configuration file
try:
pvcapid_config_file = os.environ['PVC_CONFIG_FILE']
except Exception:
print('Error: The "PVC_CONFIG_FILE" environment variable must be set before starting pvcapid.')
exit(1)
print('Loading configuration from file "{}"'.format(pvcapid_config_file))
# Read in the config
try:
with open(pvcapid_config_file, 'r') as cfgfile:
o_config = yaml.load(cfgfile, Loader=yaml.BaseLoader)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e))
exit(1)
try:
# Create the config object
config = {
'debug': strtobool(o_config['pvc']['debug']),
'coordinators': o_config['pvc']['coordinators'],
'listen_address': o_config['pvc']['api']['listen_address'],
'listen_port': int(o_config['pvc']['api']['listen_port']),
'auth_enabled': strtobool(o_config['pvc']['api']['authentication']['enabled']),
'auth_secret_key': o_config['pvc']['api']['authentication']['secret_key'],
'auth_tokens': o_config['pvc']['api']['authentication']['tokens'],
'ssl_enabled': strtobool(o_config['pvc']['api']['ssl']['enabled']),
'ssl_key_file': o_config['pvc']['api']['ssl']['key_file'],
'ssl_cert_file': o_config['pvc']['api']['ssl']['cert_file'],
'database_host': o_config['pvc']['provisioner']['database']['host'],
'database_port': int(o_config['pvc']['provisioner']['database']['port']),
'database_name': o_config['pvc']['provisioner']['database']['name'],
'database_user': o_config['pvc']['provisioner']['database']['user'],
'database_password': o_config['pvc']['provisioner']['database']['pass'],
'queue_host': o_config['pvc']['provisioner']['queue']['host'],
'queue_port': o_config['pvc']['provisioner']['queue']['port'],
'queue_path': o_config['pvc']['provisioner']['queue']['path'],
'storage_hosts': o_config['pvc']['provisioner']['ceph_cluster']['storage_hosts'],
'storage_domain': o_config['pvc']['provisioner']['ceph_cluster']['storage_domain'],
'ceph_monitor_port': o_config['pvc']['provisioner']['ceph_cluster']['ceph_monitor_port'],
'ceph_storage_secret_uuid': o_config['pvc']['provisioner']['ceph_cluster']['ceph_storage_secret_uuid']
}
# Use coordinators as storage hosts if not explicitly specified
if not config['storage_hosts']:
config['storage_hosts'] = config['coordinators']
# Set the config object in the api_helper namespace
api_helper.config = config
# Set the config object in the api_provisioner namespace
api_provisioner.config = config
# Set the config object in the api_benchmark namespace
api_benchmark.config = config
# Set the config object in the api_ova namespace
api_ova.config = config
except Exception as e:
print('ERROR: Failed to load configuration: {}'.format(e))
exit(1)
# Create Flask app and set config values
app = flask.Flask(__name__)
app.config['CELERY_BROKER_URL'] = 'redis://{}:{}{}'.format(config['queue_host'], config['queue_port'], config['queue_path'])
app.config['CELERY_RESULT_BACKEND'] = 'redis://{}:{}{}'.format(config['queue_host'], config['queue_port'], config['queue_path'])
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql://{}:{}@{}:{}/{}'.format(config['database_user'], config['database_password'], config['database_host'], config['database_port'], config['database_name'])
if config['debug']:
@ -333,17 +260,27 @@ api.add_resource(API_Logout, '/logout')
# /initialize
class API_Initialize(Resource):
@RequestParser([
{'name': 'yes-i-really-mean-it', 'required': True, 'helptext': "Initialization is destructive; please confirm with the argument 'yes-i-really-mean-it'."}
{'name': 'overwrite', 'required': False},
{'name': 'yes-i-really-mean-it', 'required': True, 'helptext': "Initialization is destructive; please confirm with the argument 'yes-i-really-mean-it'."},
])
@Authenticator
def post(self, reqargs):
"""
Initialize a new PVC cluster
Note: Normally used only once during cluster bootstrap; checks for the existence of the "/primary_node" key before proceeding and returns 400 if found
If the 'overwrite' option is not True, the cluster will return 400 if the `/config/primary_node` key is found. If 'overwrite' is True, the existing cluster
data will be erased and new, empty data written in its place.
All node daemons should be stopped before running this command, and the API daemon started manually to avoid undefined behavior.
---
tags:
- root
parameters:
- in: query
name: overwrite
type: bool
required: false
description: A flag to enable or disable (default) overwriting existing data
- in: query
name: yes-i-really-mean-it
type: string
@ -362,7 +299,10 @@ class API_Initialize(Resource):
400:
description: Bad request
"""
if api_helper.initialize_cluster():
if reqargs.get('overwrite', False):
overwrite_flag = True
if api_helper.initialize_cluster(overwrite=overwrite_flag):
return {"message": "Successfully initialized a new PVC cluster"}, 200
else:
return {"message": "PVC cluster already initialized"}, 400
@ -1162,7 +1102,7 @@ class API_VM_Root(Resource):
@RequestParser([
{'name': 'limit'},
{'name': 'node'},
{'name': 'selector', 'choices': ('mem', 'vcpus', 'load', 'vms'), 'helptext': "A valid selector must be specified"},
{'name': 'selector', 'choices': ('mem', 'vcpus', 'load', 'vms', 'none'), 'helptext': "A valid selector must be specified"},
{'name': 'autostart'},
{'name': 'migration_method', 'choices': ('live', 'shutdown', 'none'), 'helptext': "A valid migration_method must be specified"},
{'name': 'xml', 'required': True, 'helptext': "A Libvirt XML document must be specified"},
@ -1232,7 +1172,7 @@ class API_VM_Root(Resource):
reqargs.get('xml'),
reqargs.get('node', None),
reqargs.get('limit', None),
reqargs.get('selector', 'mem'),
reqargs.get('selector', 'none'),
bool(strtobool(reqargs.get('autostart', 'false'))),
reqargs.get('migration_method', 'none')
)
@ -1266,7 +1206,7 @@ class API_VM_Element(Resource):
@RequestParser([
{'name': 'limit'},
{'name': 'node'},
{'name': 'selector', 'choices': ('mem', 'vcpus', 'load', 'vms'), 'helptext': "A valid selector must be specified"},
{'name': 'selector', 'choices': ('mem', 'vcpus', 'load', 'vms', 'none'), 'helptext': "A valid selector must be specified"},
{'name': 'autostart'},
{'name': 'migration_method', 'choices': ('live', 'shutdown', 'none'), 'helptext': "A valid migration_method must be specified"},
{'name': 'xml', 'required': True, 'helptext': "A Libvirt XML document must be specified"},
@ -1307,6 +1247,7 @@ class API_VM_Element(Resource):
- vcpus
- load
- vms
- none (cluster default)
- in: query
name: autostart
type: boolean
@ -1338,7 +1279,7 @@ class API_VM_Element(Resource):
reqargs.get('xml'),
reqargs.get('node', None),
reqargs.get('limit', None),
reqargs.get('selector', 'mem'),
reqargs.get('selector', 'none'),
bool(strtobool(reqargs.get('autostart', 'false'))),
reqargs.get('migration_method', 'none')
)
@ -1467,7 +1408,7 @@ class API_VM_Metadata(Resource):
@RequestParser([
{'name': 'limit'},
{'name': 'selector', 'choices': ('mem', 'vcpus', 'load', 'vms'), 'helptext': "A valid selector must be specified"},
{'name': 'selector', 'choices': ('mem', 'vcpus', 'load', 'vms', 'none'), 'helptext': "A valid selector must be specified"},
{'name': 'autostart'},
{'name': 'profile'},
{'name': 'migration_method', 'choices': ('live', 'shutdown', 'none'), 'helptext': "A valid migration_method must be specified"},
@ -1804,6 +1745,45 @@ class API_VM_Console(Resource):
api.add_resource(API_VM_Console, '/vm/<vm>/console')
# /vm/<vm>/rename
class API_VM_Rename(Resource):
@RequestParser([
{'name': 'new_name'}
])
@Authenticator
def post(self, vm, reqargs):
"""
Rename VM {vm}, and all connected disk volumes which include this name, to {new_name}
---
tags:
- vm
parameters:
- in: query
name: new_name
type: string
required: true
description: The new name of the VM
responses:
200:
description: OK
schema:
type: object
id: Message
400:
description: Bad request
schema:
type: object
id: Message
"""
return api_helper.vm_rename(
vm,
reqargs.get('new_name', None)
)
api.add_resource(API_VM_Rename, '/vm/<vm>/rename')
##########################################################
# Client API - Network
##########################################################

File diff suppressed because it is too large Load Diff

View File

@ -77,7 +77,7 @@ class DBNetworkElement(db.Model):
id = db.Column(db.Integer, primary_key=True)
network_template = db.Column(db.Integer, db.ForeignKey("network_template.id"), nullable=False)
vni = db.Column(db.Integer, nullable=False)
vni = db.Column(db.Text, nullable=False)
def __init__(self, network_template, vni):
self.network_template = network_template

View File

@ -30,13 +30,15 @@ import lxml.etree
from werkzeug.formparser import parse_form_data
from pvcapid.Daemon import config
from daemon_lib.zkhandler import ZKConnection
import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph
import pvcapid.provisioner as provisioner
config = None # Set in this namespace by flaskapi
#
# Common functions
@ -110,7 +112,8 @@ def list_ova(limit, is_fuzzy=True):
return {'message': 'No OVAs found.'}, 404
def delete_ova(name):
@ZKConnection(config)
def delete_ova(zkhandler, name):
ova_data, retcode = list_ova(name, is_fuzzy=False)
if retcode != 200:
retmsg = {'message': 'The OVA "{}" does not exist.'.format(name)}
@ -127,9 +130,8 @@ def delete_ova(name):
volumes = cur.fetchall()
# Remove each volume for this OVA
zk_conn = pvc_common.startZKConnection(config['coordinators'])
for volume in volumes:
pvc_ceph.remove_volume(zk_conn, volume.get('pool'), volume.get('volume_name'))
pvc_ceph.remove_volume(zkhandler, volume.get('pool'), volume.get('volume_name'))
# Delete the volume entries from the database
query = "DELETE FROM ova_volume WHERE ova = %s;"
@ -160,7 +162,8 @@ def delete_ova(name):
return retmsg, retcode
def upload_ova(pool, name, ova_size):
@ZKConnection(config)
def upload_ova(zkhandler, pool, name, ova_size):
ova_archive = None
# Cleanup function
@ -168,21 +171,17 @@ def upload_ova(pool, name, ova_size):
# Close the OVA archive
if ova_archive:
ova_archive.close()
zk_conn = pvc_common.startZKConnection(config['coordinators'])
# Unmap the OVA temporary blockdev
retflag, retdata = pvc_ceph.unmap_volume(zk_conn, pool, "ova_{}".format(name))
retflag, retdata = pvc_ceph.unmap_volume(zkhandler, pool, "ova_{}".format(name))
# Remove the OVA temporary blockdev
retflag, retdata = pvc_ceph.remove_volume(zk_conn, pool, "ova_{}".format(name))
pvc_common.stopZKConnection(zk_conn)
retflag, retdata = pvc_ceph.remove_volume(zkhandler, pool, "ova_{}".format(name))
# Normalize the OVA size to bytes
ova_size_bytes = pvc_ceph.format_bytes_fromhuman(ova_size)
ova_size = '{}B'.format(ova_size_bytes)
# Verify that the cluster has enough space to store the OVA volumes (2x OVA size, temporarily, 1x permanently)
zk_conn = pvc_common.startZKConnection(config['coordinators'])
pool_information = pvc_ceph.getPoolInformation(zk_conn, pool)
pvc_common.stopZKConnection(zk_conn)
pool_information = pvc_ceph.getPoolInformation(zkhandler, pool)
pool_free_space_bytes = int(pool_information['stats']['free_bytes'])
if ova_size_bytes * 2 >= pool_free_space_bytes:
output = {
@ -196,9 +195,7 @@ def upload_ova(pool, name, ova_size):
return output, retcode
# Create a temporary OVA blockdev
zk_conn = pvc_common.startZKConnection(config['coordinators'])
retflag, retdata = pvc_ceph.add_volume(zk_conn, pool, "ova_{}".format(name), ova_size)
pvc_common.stopZKConnection(zk_conn)
retflag, retdata = pvc_ceph.add_volume(zkhandler, pool, "ova_{}".format(name), ova_size)
if not retflag:
output = {
'message': retdata.replace('\"', '\'')
@ -208,9 +205,7 @@ def upload_ova(pool, name, ova_size):
return output, retcode
# Map the temporary OVA blockdev
zk_conn = pvc_common.startZKConnection(config['coordinators'])
retflag, retdata = pvc_ceph.map_volume(zk_conn, pool, "ova_{}".format(name))
pvc_common.stopZKConnection(zk_conn)
retflag, retdata = pvc_ceph.map_volume(zkhandler, pool, "ova_{}".format(name))
if not retflag:
output = {
'message': retdata.replace('\"', '\'')
@ -276,15 +271,11 @@ def upload_ova(pool, name, ova_size):
dev_size = '{}B'.format(pvc_ceph.format_bytes_fromhuman(dev_size_raw))
def cleanup_img_maps():
zk_conn = pvc_common.startZKConnection(config['coordinators'])
# Unmap the temporary blockdev
retflag, retdata = pvc_ceph.unmap_volume(zk_conn, pool, volume)
pvc_common.stopZKConnection(zk_conn)
retflag, retdata = pvc_ceph.unmap_volume(zkhandler, pool, volume)
# Create the blockdev
zk_conn = pvc_common.startZKConnection(config['coordinators'])
retflag, retdata = pvc_ceph.add_volume(zk_conn, pool, volume, dev_size)
pvc_common.stopZKConnection(zk_conn)
retflag, retdata = pvc_ceph.add_volume(zkhandler, pool, volume, dev_size)
if not retflag:
output = {
'message': retdata.replace('\"', '\'')
@ -295,9 +286,7 @@ def upload_ova(pool, name, ova_size):
return output, retcode
# Map the blockdev
zk_conn = pvc_common.startZKConnection(config['coordinators'])
retflag, retdata = pvc_ceph.map_volume(zk_conn, pool, volume)
pvc_common.stopZKConnection(zk_conn)
retflag, retdata = pvc_ceph.map_volume(zkhandler, pool, volume)
if not retflag:
output = {
'message': retdata.replace('\"', '\'')

View File

@ -24,7 +24,9 @@ import psycopg2
import psycopg2.extras
import re
from distutils.util import strtobool as dustrtobool
from pvcapid.Daemon import config, strtobool
from daemon_lib.zkhandler import ZKHandler
import daemon_lib.common as pvc_common
import daemon_lib.node as pvc_node
@ -36,19 +38,6 @@ import pvcapid.libvirt_schema as libvirt_schema
from pvcapid.ova import list_ova
config = None # Set in this namespace by flaskapi
def strtobool(stringv):
if stringv is None:
return False
if isinstance(stringv, bool):
return bool(stringv)
try:
return bool(dustrtobool(stringv))
except Exception:
return False
#
# Exceptions (used by Celery tasks)
@ -230,6 +219,9 @@ def create_template_system(name, vcpu_count, vram_mb, serial=False, vnc=False, v
retcode = 400
return retmsg, retcode
if node_selector == 'none':
node_selector = None
query = "INSERT INTO system_template (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, ova) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
args = (name, vcpu_count, vram_mb, serial, vnc, vnc_bind, node_limit, node_selector, node_autostart, migration_method, ova)
@ -276,7 +268,7 @@ def create_template_network_element(name, vni):
networks = []
found_vni = False
for network in networks:
if int(network['vni']) == int(vni):
if network['vni'] == vni:
found_vni = True
if found_vni:
retmsg = {'message': 'The VNI "{}" in network template "{}" already exists.'.format(vni, name)}
@ -425,6 +417,9 @@ def modify_template_system(name, vcpu_count=None, vram_mb=None, serial=None, vnc
fields.append({'field': 'node_limit', 'data': node_limit})
if node_selector is not None:
if node_selector == 'none':
node_selector = 'None'
fields.append({'field': 'node_selector', 'data': node_selector})
if node_autostart is not None:
@ -1070,6 +1065,8 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
import datetime
import random
temp_dir = None
time.sleep(2)
print("Starting provisioning of VM '{}' with profile '{}'".format(vm_name, vm_profile))
@ -1078,14 +1075,13 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
try:
db_conn, db_cur = open_database(config)
except Exception:
print('FATAL - failed to connect to Postgres')
raise Exception
raise ClusterError('Failed to connect to Postgres')
try:
zk_conn = pvc_common.startZKConnection(config['coordinators'])
zkhandler = ZKHandler(config)
zkhandler.connect()
except Exception:
print('FATAL - failed to connect to Zookeeper')
raise Exception
raise ClusterError('Failed to connect to Zookeeper')
# Phase 1 - setup
# * Get the profile elements
@ -1187,11 +1183,11 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
time.sleep(1)
# Verify that a VM with this name does not already exist
if pvc_vm.searchClusterByName(zk_conn, vm_name):
if pvc_vm.searchClusterByName(zkhandler, vm_name):
raise ClusterError("A VM with the name '{}' already exists in the cluster.".format(vm_name))
# Verify that at least one host has enough free RAM to run the VM
_discard, nodes = pvc_node.get_list(zk_conn, None)
_discard, nodes = pvc_node.get_list(zkhandler, None)
target_node = None
last_free = 0
for node in nodes:
@ -1212,10 +1208,10 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
print('Selecting target node "{}" with "{}" MB free RAM'.format(target_node, last_free))
# Verify that all configured networks are present on the cluster
cluster_networks, _discard = pvc_network.getClusterNetworkList(zk_conn)
cluster_networks, _discard = pvc_network.getClusterNetworkList(zkhandler)
for network in vm_data['networks']:
vni = str(network['vni'])
if vni not in cluster_networks:
if vni not in cluster_networks and vni not in ['upstream', 'cluster', 'storage']:
raise ClusterError('The network VNI "{}" is not present on the cluster.'.format(vni))
print("All configured networks for VM are valid")
@ -1224,7 +1220,7 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
pools = dict()
for volume in vm_data['volumes']:
if volume.get('source_volume') is not None:
volume_data = pvc_ceph.getVolumeInformation(zk_conn, volume['pool'], volume['source_volume'])
volume_data = pvc_ceph.getVolumeInformation(zkhandler, volume['pool'], volume['source_volume'])
if not volume_data:
raise ClusterError('The source volume {}/{} could not be found.'.format(volume['pool'], volume['source_volume']))
if not volume['pool'] in pools:
@ -1239,7 +1235,7 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
for pool in pools:
try:
pool_information = pvc_ceph.getPoolInformation(zk_conn, pool)
pool_information = pvc_ceph.getPoolInformation(zkhandler, pool)
if not pool_information:
raise
except Exception:
@ -1331,7 +1327,10 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
network_id = 0
for network in vm_data['networks']:
vni = network['vni']
eth_bridge = "vmbr{}".format(vni)
if vni in ['upstream', 'cluster', 'storage']:
eth_bridge = "br{}".format(vni)
else:
eth_bridge = "vmbr{}".format(vni)
vm_id_hex = '{:x}'.format(int(vm_id % 16))
net_id_hex = '{:x}'.format(int(network_id % 16))
@ -1437,7 +1436,7 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
node_selector = vm_data['system_details']['node_selector']
node_autostart = vm_data['system_details']['node_autostart']
migration_method = vm_data['system_details']['migration_method']
retcode, retmsg = pvc_vm.define_vm(zk_conn, vm_schema.strip(), target_node, node_limit, node_selector, node_autostart, migration_method, vm_profile, initial_state='provision')
retcode, retmsg = pvc_vm.define_vm(zkhandler, vm_schema.strip(), target_node, node_limit, node_selector, node_autostart, migration_method, vm_profile, initial_state='provision')
print(retmsg)
else:
print("Skipping VM definition")
@ -1449,12 +1448,12 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
for volume in vm_data['volumes']:
if volume.get('source_volume') is not None:
success, message = pvc_ceph.clone_volume(zk_conn, volume['pool'], volume['source_volume'], "{}_{}".format(vm_name, volume['disk_id']))
success, message = pvc_ceph.clone_volume(zkhandler, volume['pool'], volume['source_volume'], "{}_{}".format(vm_name, volume['disk_id']))
print(message)
if not success:
raise ProvisioningError('Failed to clone volume "{}" to "{}".'.format(volume['source_volume'], volume['disk_id']))
else:
success, message = pvc_ceph.add_volume(zk_conn, volume['pool'], "{}_{}".format(vm_name, volume['disk_id']), "{}G".format(volume['disk_size_gb']))
success, message = pvc_ceph.add_volume(zkhandler, volume['pool'], "{}_{}".format(vm_name, volume['disk_id']), "{}G".format(volume['disk_size_gb']))
print(message)
if not success:
raise ProvisioningError('Failed to create volume "{}".'.format(volume['disk_id']))
@ -1478,11 +1477,11 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
print('Converting {} source volume {} to raw format on {}'.format(volume['volume_format'], src_volume, dst_volume))
# Map the target RBD device
retcode, retmsg = pvc_ceph.map_volume(zk_conn, volume['pool'], dst_volume_name)
retcode, retmsg = pvc_ceph.map_volume(zkhandler, volume['pool'], dst_volume_name)
if not retcode:
raise ProvisioningError('Failed to map destination volume "{}": {}'.format(dst_volume_name, retmsg))
# Map the source RBD device
retcode, retmsg = pvc_ceph.map_volume(zk_conn, volume['pool'], src_volume_name)
retcode, retmsg = pvc_ceph.map_volume(zkhandler, volume['pool'], src_volume_name)
if not retcode:
raise ProvisioningError('Failed to map source volume "{}": {}'.format(src_volume_name, retmsg))
# Convert from source to target
@ -1497,11 +1496,11 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
raise ProvisioningError('Failed to convert {} volume "{}" to raw volume "{}": {}'.format(volume['volume_format'], src_volume, dst_volume, stderr))
# Unmap the source RBD device (don't bother later)
retcode, retmsg = pvc_ceph.unmap_volume(zk_conn, volume['pool'], src_volume_name)
retcode, retmsg = pvc_ceph.unmap_volume(zkhandler, volume['pool'], src_volume_name)
if not retcode:
raise ProvisioningError('Failed to unmap source volume "{}": {}'.format(src_volume_name, retmsg))
# Unmap the target RBD device (don't bother later)
retcode, retmsg = pvc_ceph.unmap_volume(zk_conn, volume['pool'], dst_volume_name)
retcode, retmsg = pvc_ceph.unmap_volume(zkhandler, volume['pool'], dst_volume_name)
if not retcode:
raise ProvisioningError('Failed to unmap destination volume "{}": {}'.format(dst_volume_name, retmsg))
else:
@ -1521,7 +1520,7 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
filesystem_args = ' '.join(filesystem_args_list)
# Map the RBD device
retcode, retmsg = pvc_ceph.map_volume(zk_conn, volume['pool'], dst_volume_name)
retcode, retmsg = pvc_ceph.map_volume(zkhandler, volume['pool'], dst_volume_name)
if not retcode:
raise ProvisioningError('Failed to map volume "{}": {}'.format(dst_volume, retmsg))
@ -1659,8 +1658,10 @@ def create_vm(self, vm_name, vm_profile, define_vm=True, start_vm=True, script_r
if start_vm:
self.update_state(state='RUNNING', meta={'current': 10, 'total': 10, 'status': 'Starting VM'})
time.sleep(1)
retcode, retmsg = pvc_vm.start_vm(zk_conn, vm_name)
retcode, retmsg = pvc_vm.start_vm(zkhandler, vm_name)
print(retmsg)
pvc_common.stopZKConnection(zk_conn)
zkhandler.disconnect()
del zkhandler
return {'status': 'VM "{}" with profile "{}" has been provisioned and started successfully'.format(vm_name, vm_profile), 'current': 10, 'total': 10}

View File

@ -25,16 +25,17 @@ import cli_lib.ansiprint as ansiprint
from cli_lib.common import call_api
def initialize(config):
def initialize(config, overwrite=False):
"""
Initialize the PVC cluster
API endpoint: GET /api/v1/initialize
API arguments: yes-i-really-mean-it
API arguments: overwrite, yes-i-really-mean-it
API schema: {json_data_object}
"""
params = {
'yes-i-really-mean-it': 'yes'
'yes-i-really-mean-it': 'yes',
'overwrite': overwrite
}
response = call_api(config, 'post', '/initialize', params=params)

View File

@ -130,6 +130,27 @@ def vm_modify(config, vm, xml, restart):
return retstatus, response.json().get('message', '')
def vm_rename(config, vm, new_name):
"""
Rename VM to new name
API endpoint: POST /vm/{vm}/rename
API arguments: new_name={new_name}
API schema: {"message":"{data}"}
"""
params = {
'new_name': new_name
}
response = call_api(config, 'post', '/vm/{vm}/rename'.format(vm=vm), params=params)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get('message', '')
def vm_metadata(config, vm, node_limit, node_selector, node_autostart, migration_method, provisioner_profile):
"""
Modify PVC metadata of a VM
@ -1002,8 +1023,9 @@ def follow_console_log(config, vm, lines=10):
API arguments: lines={lines}
API schema: {"name":"{vmname}","data":"{console_log}"}
"""
# We always grab 500 to match the follow call, but only _show_ `lines` number
params = {
'lines': lines
'lines': 500
}
response = call_api(config, 'get', '/vm/{vm}/console'.format(vm=vm), params=params)
@ -1012,7 +1034,7 @@ def follow_console_log(config, vm, lines=10):
# Shrink the log buffer to length lines
console_log = response.json()['data']
shrunk_log = console_log.split('\n')[-lines:]
shrunk_log = console_log.split('\n')[-int(lines):]
loglines = '\n'.join(shrunk_log)
# Print the initial data and begin following

View File

@ -46,7 +46,7 @@ myhostname = socket.gethostname().split('.')[0]
zk_host = ''
default_store_data = {
'cfgfile': '/etc/pvc/pvcapid.yaml' # pvc/api/listen_address, pvc/api/listen_port
'cfgfile': '/etc/pvc/pvcapid.yaml'
}
@ -67,7 +67,7 @@ def read_from_yaml(cfgfile):
api_key = api_config['pvc']['api']['authentication']['tokens'][0]['token']
else:
api_key = 'N/A'
return host, port, scheme, api_key
return cfgfile, host, port, scheme, api_key
def get_config(store_data, cluster=None):
@ -84,7 +84,7 @@ def get_config(store_data, cluster=None):
# This is a reference to an API configuration; grab the details from its listen address
cfgfile = cluster_details.get('cfgfile')
if os.path.isfile(cfgfile):
host, port, scheme, api_key = read_from_yaml(cfgfile)
description, host, port, scheme, api_key = read_from_yaml(cfgfile)
else:
return {'badcfg': True}
# Handle an all-wildcard address
@ -92,6 +92,7 @@ def get_config(store_data, cluster=None):
host = '127.0.0.1'
else:
# This is a static configuration, get the raw details
description = cluster_details['description']
host = cluster_details['host']
port = cluster_details['port']
scheme = cluster_details['scheme']
@ -100,6 +101,7 @@ def get_config(store_data, cluster=None):
config = dict()
config['debug'] = False
config['cluster'] = cluster
config['desctription'] = description
config['api_host'] = '{}:{}'.format(host, port)
config['api_scheme'] = scheme
config['api_key'] = api_key
@ -175,6 +177,10 @@ def cli_cluster():
# pvc cluster add
###############################################################################
@click.command(name='add', short_help='Add a new cluster to the client.')
@click.option(
'-d', '--description', 'description', required=False, default="N/A",
help='A text description of the cluster.'
)
@click.option(
'-a', '--address', 'address', required=True,
help='The IP address or hostname of the cluster API client.'
@ -194,7 +200,7 @@ def cli_cluster():
@click.argument(
'name'
)
def cluster_add(address, port, ssl, name, api_key):
def cluster_add(description, address, port, ssl, name, api_key):
"""
Add a new PVC cluster NAME, via its API connection details, to the configuration of the local CLI client. Replaces any existing cluster with this name.
"""
@ -207,6 +213,7 @@ def cluster_add(address, port, ssl, name, api_key):
existing_config = get_store(store_path)
# Append our new entry to the end
existing_config[name] = {
'description': description,
'host': address,
'port': port,
'scheme': scheme,
@ -252,10 +259,11 @@ def cluster_list():
clusters = get_store(store_path)
# Find the lengths of each column
name_length = 5
description_length = 12
address_length = 10
port_length = 5
scheme_length = 7
api_key_length = 8
api_key_length = 32
for cluster in clusters:
cluster_details = clusters[cluster]
@ -263,10 +271,11 @@ def cluster_list():
# This is a reference to an API configuration; grab the details from its listen address
cfgfile = cluster_details.get('cfgfile')
if os.path.isfile(cfgfile):
address, port, scheme, api_key = read_from_yaml(cfgfile)
description, address, port, scheme, api_key = read_from_yaml(cfgfile)
else:
address, port, scheme, api_key = 'N/A', 'N/A', 'N/A', 'N/A'
description, address, port, scheme, api_key = 'N/A', 'N/A', 'N/A', 'N/A', 'N/A'
else:
description = cluster_details.get('description', '')
address = cluster_details.get('host', 'N/A')
port = cluster_details.get('port', 'N/A')
scheme = cluster_details.get('scheme', 'N/A')
@ -278,6 +287,9 @@ def cluster_list():
if _name_length > name_length:
name_length = _name_length
_address_length = len(address) + 1
_description_length = len(description) + 1
if _description_length > description_length:
description_length = _description_length
if _address_length > address_length:
address_length = _address_length
_port_length = len(str(port)) + 1
@ -294,11 +306,13 @@ def cluster_list():
click.echo("Available clusters:")
click.echo()
click.echo(
'{bold}{name: <{name_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
'{bold}{name: <{name_length}} {description: <{description_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
name="Name",
name_length=name_length,
description="Description",
description_length=description_length,
address="Address",
address_length=address_length,
port="Port",
@ -315,14 +329,12 @@ def cluster_list():
if cluster_details.get('cfgfile', None):
# This is a reference to an API configuration; grab the details from its listen address
if os.path.isfile(cfgfile):
address, port, scheme, api_key = read_from_yaml(cfgfile)
description, address, port, scheme, api_key = read_from_yaml(cfgfile)
else:
address = 'N/A'
port = 'N/A'
scheme = 'N/A'
api_key = 'N/A'
continue
else:
address = cluster_details.get('host', 'N/A')
description = cluster_details.get('description', 'N/A')
port = cluster_details.get('port', 'N/A')
scheme = cluster_details.get('scheme', 'N/A')
api_key = cluster_details.get('api_key', 'N/A')
@ -330,11 +342,13 @@ def cluster_list():
api_key = 'N/A'
click.echo(
'{bold}{name: <{name_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
'{bold}{name: <{name_length}} {description: <{description_length}} {address: <{address_length}} {port: <{port_length}} {scheme: <{scheme_length}} {api_key: <{api_key_length}}{end_bold}'.format(
bold='',
end_bold='',
name=cluster,
name_length=name_length,
description=description,
description_length=description_length,
address=address,
address_length=address_length,
port=port,
@ -604,8 +618,8 @@ def cli_vm():
)
@click.option(
'-s', '--selector', 'node_selector', default='mem', show_default=True,
type=click.Choice(['mem', 'load', 'vcpus', 'vms']),
help='Method to determine optimal target node during autoselect; saved with VM.'
type=click.Choice(['mem', 'load', 'vcpus', 'vms', 'none']),
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.'
)
@click.option(
'-a/-A', '--autostart/--no-autostart', 'node_autostart', is_flag=True, default=False,
@ -650,8 +664,8 @@ def vm_define(vmconfig, target_node, node_limit, node_selector, node_autostart,
)
@click.option(
'-s', '--selector', 'node_selector', default=None, show_default=False,
type=click.Choice(['mem', 'load', 'vcpus', 'vms']),
help='Method to determine optimal target node during autoselect.'
type=click.Choice(['mem', 'load', 'vcpus', 'vms', 'none']),
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.'
)
@click.option(
'-a/-A', '--autostart/--no-autostart', 'node_autostart', is_flag=True, default=None,
@ -694,13 +708,18 @@ def vm_meta(domain, node_limit, node_selector, node_autostart, migration_method,
'-r', '--restart', 'restart', is_flag=True,
help='Immediately restart VM to apply new config.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@click.argument(
'domain'
)
@click.argument(
'cfgfile', type=click.File(), default=None, required=False
)
def vm_modify(domain, cfgfile, editor, restart):
def vm_modify(domain, cfgfile, editor, restart, confirm_flag):
"""
Modify existing virtual machine DOMAIN, either in-editor or with replacement CONFIG. DOMAIN may be a UUID or name.
"""
@ -709,7 +728,7 @@ def vm_modify(domain, cfgfile, editor, restart):
cleanup(False, 'Either an XML config file or the "--editor" option must be specified.')
retcode, vm_information = pvc_vm.vm_info(config, domain)
if not retcode and not vm_information.get('name', None):
if not retcode or not vm_information.get('name', None):
cleanup(False, 'ERROR: Could not find VM "{}"!'.format(domain))
dom_name = vm_information.get('name')
@ -727,38 +746,28 @@ def vm_modify(domain, cfgfile, editor, restart):
else:
new_vm_cfgfile = new_vm_cfgfile.strip()
# Show a diff and confirm
click.echo('Pending modifications:')
click.echo('')
diff = list(difflib.unified_diff(current_vm_cfgfile.split('\n'), new_vm_cfgfile.split('\n'), fromfile='current', tofile='modified', fromfiledate='', tofiledate='', n=3, lineterm=''))
for line in diff:
if re.match(r'^\+', line) is not None:
click.echo(colorama.Fore.GREEN + line + colorama.Fore.RESET)
elif re.match(r'^\-', line) is not None:
click.echo(colorama.Fore.RED + line + colorama.Fore.RESET)
elif re.match(r'^\^', line) is not None:
click.echo(colorama.Fore.BLUE + line + colorama.Fore.RESET)
else:
click.echo(line)
click.echo('')
click.confirm('Write modifications to cluster?', abort=True)
if restart:
click.echo('Writing modified configuration of VM "{}" and restarting.'.format(dom_name))
else:
click.echo('Writing modified configuration of VM "{}".'.format(dom_name))
# We're operating in replace mode
else:
# Open the XML file
new_vm_cfgfile = cfgfile.read()
cfgfile.close()
if restart:
click.echo('Replacing configuration of VM "{}" with file "{}" and restarting.'.format(dom_name, cfgfile.name))
click.echo('Replacing configuration of VM "{}" with file "{}".'.format(dom_name, cfgfile.name))
# Show a diff and confirm
click.echo('Pending modifications:')
click.echo('')
diff = list(difflib.unified_diff(current_vm_cfgfile.split('\n'), new_vm_cfgfile.split('\n'), fromfile='current', tofile='modified', fromfiledate='', tofiledate='', n=3, lineterm=''))
for line in diff:
if re.match(r'^\+', line) is not None:
click.echo(colorama.Fore.GREEN + line + colorama.Fore.RESET)
elif re.match(r'^\-', line) is not None:
click.echo(colorama.Fore.RED + line + colorama.Fore.RESET)
elif re.match(r'^\^', line) is not None:
click.echo(colorama.Fore.BLUE + line + colorama.Fore.RESET)
else:
click.echo('Replacing configuration of VM "{}" with file "{}".'.format(dom_name, cfgfile.name))
click.echo(line)
click.echo('')
# Verify our XML is sensible
try:
@ -767,7 +776,47 @@ def vm_modify(domain, cfgfile, editor, restart):
except Exception as e:
cleanup(False, 'Error: XML is malformed or invalid: {}'.format(e))
click.confirm('Write modifications to cluster?', abort=True)
if restart and not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
restart = False
retcode, retmsg = pvc_vm.vm_modify(config, domain, new_cfg, restart)
if retcode and not restart:
retmsg = retmsg + " Changes will be applied on next VM start/restart."
cleanup(retcode, retmsg)
###############################################################################
# pvc vm rename
###############################################################################
@click.command(name='rename', short_help='Rename a virtual machine.')
@click.argument(
'domain'
)
@click.argument(
'new_name'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the rename'
)
@cluster_req
def vm_rename(domain, new_name, confirm_flag):
"""
Rename virtual machine DOMAIN, and all its connected disk volumes, to NEW_NAME. DOMAIN may be a UUID or name.
"""
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Rename VM {} to {}'.format(domain, new_name), prompt_suffix='? ', abort=True)
except Exception:
exit(0)
retcode, retmsg = pvc_vm.vm_rename(config, domain, new_name)
cleanup(retcode, retmsg)
@ -788,7 +837,7 @@ def vm_undefine(domain, confirm_flag):
"""
Stop virtual machine DOMAIN and remove it database, preserving disks. DOMAIN may be a UUID or name.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Undefine VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
@ -815,7 +864,7 @@ def vm_remove(domain, confirm_flag):
"""
Stop virtual machine DOMAIN and remove it, along with all disks,. DOMAIN may be a UUID or name.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Undefine VM {} and remove all disks'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
@ -853,11 +902,21 @@ def vm_start(domain):
'-w', '--wait', 'wait', is_flag=True, default=False,
help='Wait for restart to complete before returning.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@cluster_req
def vm_restart(domain, wait):
def vm_restart(domain, wait, confirm_flag):
"""
Restart running virtual machine DOMAIN. DOMAIN may be a UUID or name.
"""
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
exit(0)
retcode, retmsg = pvc_vm.vm_state(config, domain, 'restart', wait=wait)
cleanup(retcode, retmsg)
@ -874,11 +933,21 @@ def vm_restart(domain, wait):
'-w', '--wait', 'wait', is_flag=True, default=False,
help='Wait for shutdown to complete before returning.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the shutdown'
)
@cluster_req
def vm_shutdown(domain, wait):
def vm_shutdown(domain, wait, confirm_flag):
"""
Gracefully shut down virtual machine DOMAIN. DOMAIN may be a UUID or name.
"""
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Shut down VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
exit(0)
retcode, retmsg = pvc_vm.vm_state(config, domain, 'shutdown', wait=wait)
cleanup(retcode, retmsg)
@ -891,11 +960,21 @@ def vm_shutdown(domain, wait):
@click.argument(
'domain'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the stop'
)
@cluster_req
def vm_stop(domain):
def vm_stop(domain, confirm_flag):
"""
Forcibly halt (destroy) running virtual machine DOMAIN. DOMAIN may be a UUID or name.
"""
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Forcibly stop VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
exit(0)
retcode, retmsg = pvc_vm.vm_state(config, domain, 'stop')
cleanup(retcode, retmsg)
@ -1078,26 +1157,38 @@ def vm_vcpu_get(domain, raw):
'-r', '--restart', 'restart', is_flag=True, default=False,
help='Immediately restart VM to apply new config.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@cluster_req
def vm_vcpu_set(domain, vcpus, topology, restart):
def vm_vcpu_set(domain, vcpus, topology, restart, confirm_flag):
"""
Set the vCPU count of the virtual machine DOMAIN to VCPUS.
By default, the topology of the vCPus is 1 socket, VCPUS cores per socket, 1 thread per core.
"""
if topology is not None:
try:
sockets, cores, threads = topology.split(',')
if sockets * cores * threads != vcpus:
raise
except Exception:
cleanup(False, "The topology specified is not valid.")
cleanup(False, "The specified topology is not valid.")
topology = (sockets, cores, threads)
else:
topology = (1, vcpus, 1)
if restart and not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
restart = False
retcode, retmsg = pvc_vm.vm_vcpus_set(config, domain, vcpus, topology, restart)
if retcode and not restart:
retmsg = retmsg + " Changes will be applied on next VM start/restart."
cleanup(retcode, retmsg)
@ -1149,13 +1240,25 @@ def vm_memory_get(domain, raw):
'-r', '--restart', 'restart', is_flag=True, default=False,
help='Immediately restart VM to apply new config.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@cluster_req
def vm_memory_set(domain, memory, restart):
def vm_memory_set(domain, memory, restart, confirm_flag):
"""
Set the provisioned memory of the virtual machine DOMAIN to MEMORY; MEMORY must be an integer in MB.
"""
if restart and not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
restart = False
retcode, retmsg = pvc_vm.vm_memory_set(config, domain, memory, restart)
if retcode and not restart:
retmsg = retmsg + " Changes will be applied on next VM start/restart."
cleanup(retcode, retmsg)
@ -1222,13 +1325,25 @@ def vm_network_get(domain, raw):
'-r', '--restart', 'restart', is_flag=True, default=False,
help='Immediately restart VM to apply new config.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@cluster_req
def vm_network_add(domain, vni, macaddr, model, restart):
def vm_network_add(domain, vni, macaddr, model, restart, confirm_flag):
"""
Add the network VNI to the virtual machine DOMAIN. Networks are always addded to the end of the current list of networks in the virtual machine.
"""
if restart and not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
restart = False
retcode, retmsg = pvc_vm.vm_networks_add(config, domain, vni, macaddr, model, restart)
if retcode and not restart:
retmsg = retmsg + " Changes will be applied on next VM start/restart."
cleanup(retcode, retmsg)
@ -1246,13 +1361,25 @@ def vm_network_add(domain, vni, macaddr, model, restart):
'-r', '--restart', 'restart', is_flag=True, default=False,
help='Immediately restart VM to apply new config.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@cluster_req
def vm_network_remove(domain, vni, restart):
def vm_network_remove(domain, vni, restart, confirm_flag):
"""
Remove the network VNI to the virtual machine DOMAIN.
"""
if restart and not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
restart = False
retcode, retmsg = pvc_vm.vm_networks_remove(config, domain, vni, restart)
if retcode and not restart:
retmsg = retmsg + " Changes will be applied on next VM start/restart."
cleanup(retcode, retmsg)
@ -1325,15 +1452,27 @@ def vm_volume_get(domain, raw):
'-r', '--restart', 'restart', is_flag=True, default=False,
help='Immediately restart VM to apply new config.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@cluster_req
def vm_volume_add(domain, volume, disk_id, bus, disk_type, restart):
def vm_volume_add(domain, volume, disk_id, bus, disk_type, restart, confirm_flag):
"""
Add the volume VOLUME to the virtual machine DOMAIN.
VOLUME may be either an absolute file path (for type 'file') or an RBD volume in the form "pool/volume" (for type 'rbd'). RBD volumes are verified against the cluster before adding and must exist.
"""
if restart and not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
restart = False
retcode, retmsg = pvc_vm.vm_volumes_add(config, domain, volume, disk_id, bus, disk_type, restart)
if retcode and not restart:
retmsg = retmsg + " Changes will be applied on next VM start/restart."
cleanup(retcode, retmsg)
@ -1351,13 +1490,25 @@ def vm_volume_add(domain, volume, disk_id, bus, disk_type, restart):
'-r', '--restart', 'restart', is_flag=True, default=False,
help='Immediately restart VM to apply new config.'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the restart'
)
@cluster_req
def vm_volume_remove(domain, vni, restart):
def vm_volume_remove(domain, vni, restart, confirm_flag):
"""
Remove the volume VNI to the virtual machine DOMAIN.
"""
if restart and not confirm_flag and not config['unsafe']:
try:
click.confirm('Restart VM {}'.format(domain), prompt_suffix='? ', abort=True)
except Exception:
restart = False
retcode, retmsg = pvc_vm.vm_volumes_remove(config, domain, vni, restart)
if retcode and not restart:
retmsg = retmsg + " Changes will be applied on next VM start/restart."
cleanup(retcode, retmsg)
@ -1435,7 +1586,7 @@ def vm_dump(domain):
"""
retcode, vm_information = pvc_vm.vm_info(config, domain)
if not retcode and not vm_information.get('name', None):
if not retcode or not vm_information.get('name', None):
cleanup(False, 'ERROR: Could not find VM "{}"!'.format(domain))
# Grab the current config
@ -1642,6 +1793,7 @@ def net_modify(vni, description, domain, name_servers, ip6_network, ip6_gateway,
Modify details of virtual network VNI. All fields optional; only specified fields will be updated.
Example:
pvc network modify 1001 --gateway 10.1.1.1 --dhcp
"""
@ -1669,7 +1821,7 @@ def net_remove(net, confirm_flag):
WARNING: PVC does not verify whether clients are still present in this network. Before removing, ensure
that all client VMs have been removed from the network or undefined behaviour may occur.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove network {}'.format(net), prompt_suffix='? ', abort=True)
except Exception:
@ -1778,7 +1930,7 @@ def net_dhcp_remove(net, macaddr, confirm_flag):
"""
Remove a DHCP lease for MACADDR from virtual network NET; NET must be a VNI.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove DHCP lease for {} in network {}'.format(macaddr, net), prompt_suffix='? ', abort=True)
except Exception:
@ -1897,7 +2049,7 @@ def net_acl_remove(net, rule, confirm_flag):
"""
Remove an NFT firewall rule RULE from network NET; RULE must be a description; NET must be a VNI.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove ACL {} in network {}'.format(rule, net), prompt_suffix='? ', abort=True)
except Exception:
@ -2096,7 +2248,7 @@ def ceph_osd_add(node, device, weight, confirm_flag):
"""
Add a new Ceph OSD on node NODE with block device DEVICE.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Destroy all data and create a new OSD on {}:{}'.format(node, device), prompt_suffix='? ', abort=True)
except Exception:
@ -2125,7 +2277,7 @@ def ceph_osd_remove(osdid, confirm_flag):
DANGER: This will completely remove the OSD from the cluster. OSDs will rebalance which may negatively affect performance or available space.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove OSD {}'.format(osdid), prompt_suffix='? ', abort=True)
except Exception:
@ -2256,14 +2408,16 @@ def ceph_pool():
default='copies=3,mincopies=2', show_default=True, required=False,
help="""
The replication configuration, specifying both a "copies" and "mincopies" value, separated by a
comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas and should not exceed the total number of nodes; the "mincopies" value specifies the minimum number of available copies to allow writes. For additional details please see the Cluster Architecture documentation.
comma, e.g. "copies=3,mincopies=2". The "copies" value specifies the total number of replicas
and should not exceed the total number of nodes; the "mincopies" value specifies the minimum
number of available copies to allow writes. For additional details please see the Cluster
Architecture documentation.
"""
)
@cluster_req
def ceph_pool_add(name, pgs, replcfg):
"""
Add a new Ceph RBD pool with name NAME and PGS placement groups.
"""
retcode, retmsg = pvc_ceph.ceph_pool_add(config, name, pgs, replcfg)
@ -2289,7 +2443,7 @@ def ceph_pool_remove(name, confirm_flag):
DANGER: This will completely remove the pool and all volumes contained in it from the cluster.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove RBD pool {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -2410,7 +2564,7 @@ def ceph_volume_remove(pool, name, confirm_flag):
DANGER: This will completely remove the volume and all data contained in it.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove volume {}/{}'.format(pool, name), prompt_suffix='? ', abort=True)
except Exception:
@ -2594,7 +2748,7 @@ def ceph_volume_snapshot_remove(pool, volume, name, confirm_flag):
DANGER: This will completely remove the snapshot.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove snapshot {} for volume {}/{}'.format(name, pool, volume), prompt_suffix='? ', abort=True)
except Exception:
@ -2741,9 +2895,9 @@ def provisioner_template_system_list(limit):
)
@click.option(
'--node-selector', 'node_selector',
type=click.Choice(['mem', 'vcpus', 'vms', 'load'], case_sensitive=False),
default=None, # Use cluster default
help='Use this selector to determine the optimal node during migrations.'
type=click.Choice(['mem', 'vcpus', 'vms', 'load', 'none'], case_sensitive=False),
default='none',
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.'
)
@click.option(
'--node-autostart', 'node_autostart',
@ -2819,8 +2973,8 @@ def provisioner_template_system_add(name, vcpus, vram, serial, vnc, vnc_bind, no
)
@click.option(
'--node-selector', 'node_selector',
type=click.Choice(['mem', 'vcpus', 'vms', 'load'], case_sensitive=False),
help='Use this selector to determine the optimal node during migrations.'
type=click.Choice(['mem', 'vcpus', 'vms', 'load', 'none'], case_sensitive=False),
help='Method to determine optimal target node during autoselect; "none" will use the default for the cluster.'
)
@click.option(
'--node-autostart', 'node_autostart',
@ -2870,7 +3024,7 @@ def provisioner_template_system_remove(name, confirm_flag):
"""
Remove system template NAME from the PVC cluster provisioner.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove system template {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -2977,7 +3131,7 @@ def provisioner_template_network_remove(name, confirm_flag):
"""
Remove network template MAME from the PVC cluster provisioner.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove network template {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -3041,7 +3195,7 @@ def provisioner_template_network_vni_remove(name, vni, confirm_flag):
"""
Remove network VNI from network template NAME.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove VNI {} from network template {}'.format(vni, name), prompt_suffix='? ', abort=True)
except Exception:
@ -3116,7 +3270,7 @@ def provisioner_template_storage_remove(name, confirm_flag):
"""
Remove storage template NAME from the PVC cluster provisioner.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove storage template {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -3235,7 +3389,7 @@ def provisioner_template_storage_disk_remove(name, disk, confirm_flag):
DISK must be a Linux-style disk identifier such as "sda" or "vdb".
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove disk {} from storage template {}'.format(disk, name), prompt_suffix='? ', abort=True)
except Exception:
@ -3425,7 +3579,7 @@ def provisioner_userdata_remove(name, confirm_flag):
"""
Remove userdata document NAME from the PVC cluster provisioner.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove userdata document {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -3604,7 +3758,7 @@ def provisioner_script_remove(name, confirm_flag):
"""
Remove script NAME from the PVC cluster provisioner.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove provisioning script {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -3700,7 +3854,7 @@ def provisioner_ova_remove(name, confirm_flag):
"""
Remove OVA image NAME from the PVC cluster provisioner.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove OVA image {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -3885,7 +4039,7 @@ def provisioner_profile_remove(name, confirm_flag):
"""
Remove profile NAME from the PVC cluster provisioner.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove profile {}'.format(name), prompt_suffix='? ', abort=True)
except Exception:
@ -4065,9 +4219,13 @@ def status_cluster(oformat):
Show basic information and health for the active PVC cluster.
Output formats:
plain: Full text, full colour output for human-readability.
short: Health-only, full colour output for human-readability.
json: Compact JSON representation for machine parsing.
json-pretty: Pretty-printed JSON representation for machine parsing or human-readability.
"""
@ -4131,7 +4289,7 @@ def task_restore(filename, confirm_flag):
Restore the JSON backup data from a file to the cluster.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Replace all existing cluster data from coordinators with backup file "{}"'.format(filename.name), prompt_suffix='? ', abort=True)
except Exception:
@ -4146,18 +4304,29 @@ def task_restore(filename, confirm_flag):
# pvc task init
###############################################################################
@click.command(name='init', short_help='Initialize a new cluster.')
@click.option(
'-o', '--overwite', 'overwrite_flag',
is_flag=True, default=False,
help='Remove and overwrite any existing data'
)
@click.option(
'-y', '--yes', 'confirm_flag',
is_flag=True, default=False,
help='Confirm the initialization'
)
@cluster_req
def task_init(confirm_flag):
def task_init(confirm_flag, overwrite_flag):
"""
Perform initialization of a new PVC cluster.
If the '-o'/'--overwrite' option is specified, all existing data in the cluster will be deleted
before new, empty data is written.
It is not advisable to do this against a running cluster - all node daemons should be stopped
first and the API daemon started manually before running this command.
"""
if not confirm_flag:
if not confirm_flag and not config['unsafe']:
try:
click.confirm('Remove all existing cluster data from coordinators and initialize a new cluster', prompt_suffix='? ', abort=True)
except Exception:
@ -4166,7 +4335,7 @@ def task_init(confirm_flag):
# Easter-egg
click.echo("Some music while we're Layin' Pipe? https://youtu.be/sw8S_Kv89IU")
retcode, retmsg = pvc_cluster.initialize(config)
retcode, retmsg = pvc_cluster.initialize(config, overwrite_flag)
cleanup(retcode, retmsg)
@ -4186,7 +4355,11 @@ def task_init(confirm_flag):
'-q', '--quiet', '_quiet', envvar='PVC_QUIET', is_flag=True, default=False,
help='Suppress cluster connection information.'
)
def cli(_cluster, _debug, _quiet):
@click.option(
'-u', '--unsafe', '_unsafe', envvar='PVC_UNSAFE', is_flag=True, default=False,
help='Allow unsafe operations without confirmation/"--yes" argument.'
)
def cli(_cluster, _debug, _quiet, _unsafe):
"""
Parallel Virtual Cluster CLI management tool
@ -4194,6 +4367,12 @@ def cli(_cluster, _debug, _quiet):
"PVC_CLUSTER": Set the cluster to access instead of using --cluster/-c
"PVC_DEBUG": Enable additional debugging details instead of using --debug/-v
"PVC_QUIET": Suppress stderr connection output from client instead of using --quiet/-q
"PVC_UNSAFE": Suppress confirmation requirements instead of using --unsafe/-u or --yes/-y; USE WITH EXTREME CARE
If no PVC_CLUSTER/--cluster is specified, attempts first to load the "local" cluster, checking
for an API configuration in "/etc/pvc/pvcapid.yaml". If this is also not found, abort.
"""
@ -4203,6 +4382,7 @@ def cli(_cluster, _debug, _quiet):
config = get_config(store_data, _cluster)
if not config.get('badcfg', None):
config['debug'] = _debug
config['unsafe'] = _unsafe
if not _quiet:
if config['api_scheme'] == 'https' and not config['verify_ssl']:
@ -4256,6 +4436,7 @@ vm_volume.add_command(vm_volume_remove)
cli_vm.add_command(vm_define)
cli_vm.add_command(vm_meta)
cli_vm.add_command(vm_modify)
cli_vm.add_command(vm_rename)
cli_vm.add_command(vm_undefine)
cli_vm.add_command(vm_remove)
cli_vm.add_command(vm_dump)

View File

@ -26,7 +26,6 @@ import time
import math
import daemon_lib.vm as vm
import daemon_lib.zkhandler as zkhandler
import daemon_lib.common as common
@ -35,42 +34,30 @@ import daemon_lib.common as common
#
# Verify OSD is valid in cluster
def verifyOSD(zk_conn, osd_id):
if zkhandler.exists(zk_conn, '/ceph/osds/{}'.format(osd_id)):
return True
else:
return False
def verifyOSD(zkhandler, osd_id):
return zkhandler.exists('/ceph/osds/{}'.format(osd_id))
# Verify Pool is valid in cluster
def verifyPool(zk_conn, name):
if zkhandler.exists(zk_conn, '/ceph/pools/{}'.format(name)):
return True
else:
return False
def verifyPool(zkhandler, name):
return zkhandler.exists('/ceph/pools/{}'.format(name))
# Verify Volume is valid in cluster
def verifyVolume(zk_conn, pool, name):
if zkhandler.exists(zk_conn, '/ceph/volumes/{}/{}'.format(pool, name)):
return True
else:
return False
def verifyVolume(zkhandler, pool, name):
return zkhandler.exists('/ceph/volumes/{}/{}'.format(pool, name))
# Verify Snapshot is valid in cluster
def verifySnapshot(zk_conn, pool, volume, name):
if zkhandler.exists(zk_conn, '/ceph/snapshots/{}/{}/{}'.format(pool, volume, name)):
return True
else:
return False
def verifySnapshot(zkhandler, pool, volume, name):
return zkhandler.exists('/ceph/snapshots/{}/{}/{}'.format(pool, volume, name))
# Verify OSD path is valid in cluster
def verifyOSDBlock(zk_conn, node, device):
for osd in zkhandler.listchildren(zk_conn, '/ceph/osds'):
osd_node = zkhandler.readdata(zk_conn, '/ceph/osds/{}/node'.format(osd))
osd_device = zkhandler.readdata(zk_conn, '/ceph/osds/{}/device'.format(osd))
def verifyOSDBlock(zkhandler, node, device):
for osd in zkhandler.children('/ceph/osds'):
osd_node = zkhandler.read('/ceph/osds/{}/node'.format(osd))
osd_device = zkhandler.read('/ceph/osds/{}/device'.format(osd))
if node == osd_node and device == osd_device:
return osd
return None
@ -156,9 +143,9 @@ def format_pct_tohuman(datapct):
#
# Status functions
#
def get_status(zk_conn):
primary_node = zkhandler.readdata(zk_conn, '/primary_node')
ceph_status = zkhandler.readdata(zk_conn, '/ceph').rstrip()
def get_status(zkhandler):
primary_node = zkhandler.read('/config/primary_node')
ceph_status = zkhandler.read('/ceph').rstrip()
# Create a data structure for the information
status_data = {
@ -169,9 +156,9 @@ def get_status(zk_conn):
return True, status_data
def get_util(zk_conn):
primary_node = zkhandler.readdata(zk_conn, '/primary_node')
ceph_df = zkhandler.readdata(zk_conn, '/ceph/util').rstrip()
def get_util(zkhandler):
primary_node = zkhandler.read('/config/primary_node')
ceph_df = zkhandler.read('/ceph/util').rstrip()
# Create a data structure for the information
status_data = {
@ -185,15 +172,14 @@ def get_util(zk_conn):
#
# OSD functions
#
def getClusterOSDList(zk_conn):
def getClusterOSDList(zkhandler):
# Get a list of VNIs by listing the children of /networks
osd_list = zkhandler.listchildren(zk_conn, '/ceph/osds')
return osd_list
return zkhandler.children('/ceph/osds')
def getOSDInformation(zk_conn, osd_id):
def getOSDInformation(zkhandler, osd_id):
# Parse the stats data
osd_stats_raw = zkhandler.readdata(zk_conn, '/ceph/osds/{}/stats'.format(osd_id))
osd_stats_raw = zkhandler.read('/ceph/osds/{}/stats'.format(osd_id))
osd_stats = dict(json.loads(osd_stats_raw))
osd_information = {
@ -205,26 +191,26 @@ def getOSDInformation(zk_conn, osd_id):
# OSD addition and removal uses the /cmd/ceph pipe
# These actions must occur on the specific node they reference
def add_osd(zk_conn, node, device, weight):
def add_osd(zkhandler, node, device, weight):
# Verify the target node exists
if not common.verifyNode(zk_conn, node):
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Verify target block device isn't in use
block_osd = verifyOSDBlock(zk_conn, node, device)
block_osd = verifyOSDBlock(zkhandler, node, device)
if block_osd:
return False, 'ERROR: Block device "{}" on node "{}" is used by OSD "{}"'.format(device, node, block_osd)
# Tell the cluster to create a new OSD for the host
add_osd_string = 'osd_add {},{},{}'.format(node, device, weight)
zkhandler.writedata(zk_conn, {'/cmd/ceph': add_osd_string})
zkhandler.write([('/cmd/ceph', add_osd_string)])
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/cmd/ceph')
lock = zkhandler.readlock('/cmd/ceph')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/cmd/ceph').split()[0]
result = zkhandler.read('/cmd/ceph').split()[0]
if result == 'success-osd_add':
message = 'Created new OSD with block device "{}" on node "{}".'.format(device, node)
success = True
@ -236,28 +222,28 @@ def add_osd(zk_conn, node, device, weight):
success = False
# Acquire a write lock to ensure things go smoothly
lock = zkhandler.writelock(zk_conn, '/cmd/ceph')
lock = zkhandler.writelock('/cmd/ceph')
with lock:
time.sleep(0.5)
zkhandler.writedata(zk_conn, {'/cmd/ceph': ''})
zkhandler.write([('/cmd/ceph', '')])
return success, message
def remove_osd(zk_conn, osd_id):
if not verifyOSD(zk_conn, osd_id):
def remove_osd(zkhandler, osd_id):
if not verifyOSD(zkhandler, osd_id):
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id)
# Tell the cluster to remove an OSD
remove_osd_string = 'osd_remove {}'.format(osd_id)
zkhandler.writedata(zk_conn, {'/cmd/ceph': remove_osd_string})
zkhandler.write([('/cmd/ceph', remove_osd_string)])
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/cmd/ceph')
lock = zkhandler.readlock('/cmd/ceph')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/cmd/ceph').split()[0]
result = zkhandler.read('/cmd/ceph').split()[0]
if result == 'success-osd_remove':
message = 'Removed OSD "{}" from the cluster.'.format(osd_id)
success = True
@ -269,16 +255,16 @@ def remove_osd(zk_conn, osd_id):
message = 'ERROR Command ignored by node.'
# Acquire a write lock to ensure things go smoothly
lock = zkhandler.writelock(zk_conn, '/cmd/ceph')
lock = zkhandler.writelock('/cmd/ceph')
with lock:
time.sleep(0.5)
zkhandler.writedata(zk_conn, {'/cmd/ceph': ''})
zkhandler.write([('/cmd/ceph', '')])
return success, message
def in_osd(zk_conn, osd_id):
if not verifyOSD(zk_conn, osd_id):
def in_osd(zkhandler, osd_id):
if not verifyOSD(zkhandler, osd_id):
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id)
retcode, stdout, stderr = common.run_os_command('ceph osd in {}'.format(osd_id))
@ -288,8 +274,8 @@ def in_osd(zk_conn, osd_id):
return True, 'Set OSD {} online.'.format(osd_id)
def out_osd(zk_conn, osd_id):
if not verifyOSD(zk_conn, osd_id):
def out_osd(zkhandler, osd_id):
if not verifyOSD(zkhandler, osd_id):
return False, 'ERROR: No OSD with ID "{}" is present in the cluster.'.format(osd_id)
retcode, stdout, stderr = common.run_os_command('ceph osd out {}'.format(osd_id))
@ -299,7 +285,7 @@ def out_osd(zk_conn, osd_id):
return True, 'Set OSD {} offline.'.format(osd_id)
def set_osd(zk_conn, option):
def set_osd(zkhandler, option):
retcode, stdout, stderr = common.run_os_command('ceph osd set {}'.format(option))
if retcode:
return False, 'ERROR: Failed to set property "{}": {}'.format(option, stderr)
@ -307,7 +293,7 @@ def set_osd(zk_conn, option):
return True, 'Set OSD property "{}".'.format(option)
def unset_osd(zk_conn, option):
def unset_osd(zkhandler, option):
retcode, stdout, stderr = common.run_os_command('ceph osd unset {}'.format(option))
if retcode:
return False, 'ERROR: Failed to unset property "{}": {}'.format(option, stderr)
@ -315,9 +301,9 @@ def unset_osd(zk_conn, option):
return True, 'Unset OSD property "{}".'.format(option)
def get_list_osd(zk_conn, limit, is_fuzzy=True):
def get_list_osd(zkhandler, limit, is_fuzzy=True):
osd_list = []
full_osd_list = zkhandler.listchildren(zk_conn, '/ceph/osds')
full_osd_list = zkhandler.children('/ceph/osds')
if is_fuzzy and limit:
# Implicitly assume fuzzy limits
@ -330,11 +316,11 @@ def get_list_osd(zk_conn, limit, is_fuzzy=True):
if limit:
try:
if re.match(limit, osd):
osd_list.append(getOSDInformation(zk_conn, osd))
osd_list.append(getOSDInformation(zkhandler, osd))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
osd_list.append(getOSDInformation(zk_conn, osd))
osd_list.append(getOSDInformation(zkhandler, osd))
return True, sorted(osd_list, key=lambda x: int(x['id']))
@ -342,11 +328,11 @@ def get_list_osd(zk_conn, limit, is_fuzzy=True):
#
# Pool functions
#
def getPoolInformation(zk_conn, pool):
def getPoolInformation(zkhandler, pool):
# Parse the stats data
pool_stats_raw = zkhandler.readdata(zk_conn, '/ceph/pools/{}/stats'.format(pool))
pool_stats_raw = zkhandler.read('/ceph/pools/{}/stats'.format(pool))
pool_stats = dict(json.loads(pool_stats_raw))
volume_count = len(getCephVolumes(zk_conn, pool))
volume_count = len(getCephVolumes(zkhandler, pool))
pool_information = {
'name': pool,
@ -356,7 +342,7 @@ def getPoolInformation(zk_conn, pool):
return pool_information
def add_pool(zk_conn, name, pgs, replcfg):
def add_pool(zkhandler, name, pgs, replcfg):
# Prepare the copies/mincopies variables
try:
copies, mincopies = replcfg.split(',')
@ -388,24 +374,24 @@ def add_pool(zk_conn, name, pgs, replcfg):
return False, 'ERROR: Failed to enable RBD application on pool "{}" : {}'.format(name, stderr)
# 4. Add the new pool to Zookeeper
zkhandler.writedata(zk_conn, {
'/ceph/pools/{}'.format(name): '',
'/ceph/pools/{}/pgs'.format(name): pgs,
'/ceph/pools/{}/stats'.format(name): '{}',
'/ceph/volumes/{}'.format(name): '',
'/ceph/snapshots/{}'.format(name): '',
})
zkhandler.write([
('/ceph/pools/{}'.format(name), ''),
('/ceph/pools/{}/pgs'.format(name), pgs),
('/ceph/pools/{}/stats'.format(name), '{}'),
('/ceph/volumes/{}'.format(name), ''),
('/ceph/snapshots/{}'.format(name), ''),
])
return True, 'Created RBD pool "{}" with {} PGs'.format(name, pgs)
def remove_pool(zk_conn, name):
if not verifyPool(zk_conn, name):
def remove_pool(zkhandler, name):
if not verifyPool(zkhandler, name):
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(name)
# 1. Remove pool volumes
for volume in zkhandler.listchildren(zk_conn, '/ceph/volumes/{}'.format(name)):
remove_volume(zk_conn, name, volume)
for volume in zkhandler.children('/ceph/volumes/{}'.format(name)):
remove_volume(zkhandler, name, volume)
# 2. Remove the pool
retcode, stdout, stderr = common.run_os_command('ceph osd pool rm {pool} {pool} --yes-i-really-really-mean-it'.format(pool=name))
@ -413,16 +399,16 @@ def remove_pool(zk_conn, name):
return False, 'ERROR: Failed to remove pool "{}": {}'.format(name, stderr)
# 3. Delete pool from Zookeeper
zkhandler.deletekey(zk_conn, '/ceph/pools/{}'.format(name))
zkhandler.deletekey(zk_conn, '/ceph/volumes/{}'.format(name))
zkhandler.deletekey(zk_conn, '/ceph/snapshots/{}'.format(name))
zkhandler.delete('/ceph/pools/{}'.format(name))
zkhandler.delete('/ceph/volumes/{}'.format(name))
zkhandler.delete('/ceph/snapshots/{}'.format(name))
return True, 'Removed RBD pool "{}" and all volumes.'.format(name)
def get_list_pool(zk_conn, limit, is_fuzzy=True):
def get_list_pool(zkhandler, limit, is_fuzzy=True):
pool_list = []
full_pool_list = zkhandler.listchildren(zk_conn, '/ceph/pools')
full_pool_list = zkhandler.children('/ceph/pools')
if limit:
if not is_fuzzy:
@ -432,11 +418,11 @@ def get_list_pool(zk_conn, limit, is_fuzzy=True):
if limit:
try:
if re.match(limit, pool):
pool_list.append(getPoolInformation(zk_conn, pool))
pool_list.append(getPoolInformation(zkhandler, pool))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
pool_list.append(getPoolInformation(zk_conn, pool))
pool_list.append(getPoolInformation(zkhandler, pool))
return True, sorted(pool_list, key=lambda x: int(x['stats']['id']))
@ -444,23 +430,23 @@ def get_list_pool(zk_conn, limit, is_fuzzy=True):
#
# Volume functions
#
def getCephVolumes(zk_conn, pool):
def getCephVolumes(zkhandler, pool):
volume_list = list()
if not pool:
pool_list = zkhandler.listchildren(zk_conn, '/ceph/pools')
pool_list = zkhandler.children('/ceph/pools')
else:
pool_list = [pool]
for pool_name in pool_list:
for volume_name in zkhandler.listchildren(zk_conn, '/ceph/volumes/{}'.format(pool_name)):
for volume_name in zkhandler.children('/ceph/volumes/{}'.format(pool_name)):
volume_list.append('{}/{}'.format(pool_name, volume_name))
return volume_list
def getVolumeInformation(zk_conn, pool, volume):
def getVolumeInformation(zkhandler, pool, volume):
# Parse the stats data
volume_stats_raw = zkhandler.readdata(zk_conn, '/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats_raw = zkhandler.read('/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats = dict(json.loads(volume_stats_raw))
# Format the size to something nicer
volume_stats['size'] = format_bytes_tohuman(volume_stats['size'])
@ -473,9 +459,9 @@ def getVolumeInformation(zk_conn, pool, volume):
return volume_information
def add_volume(zk_conn, pool, name, size):
def add_volume(zkhandler, pool, name, size):
# 1. Verify the size of the volume
pool_information = getPoolInformation(zk_conn, pool)
pool_information = getPoolInformation(zkhandler, pool)
size_bytes = format_bytes_fromhuman(size)
if size_bytes >= int(pool_information['stats']['free_bytes']):
return False, 'ERROR: Requested volume size is greater than the available free space in the pool'
@ -494,17 +480,17 @@ def add_volume(zk_conn, pool, name, size):
volstats = stdout
# 3. Add the new volume to Zookeeper
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}'.format(pool, name): '',
'/ceph/volumes/{}/{}/stats'.format(pool, name): volstats,
'/ceph/snapshots/{}/{}'.format(pool, name): '',
})
zkhandler.write([
('/ceph/volumes/{}/{}'.format(pool, name), ''),
('/ceph/volumes/{}/{}/stats'.format(pool, name), volstats),
('/ceph/snapshots/{}/{}'.format(pool, name), ''),
])
return True, 'Created RBD volume "{}/{}" ({}).'.format(pool, name, size)
def clone_volume(zk_conn, pool, name_src, name_new):
if not verifyVolume(zk_conn, pool, name_src):
def clone_volume(zkhandler, pool, name_src, name_new):
if not verifyVolume(zkhandler, pool, name_src):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(name_src, pool)
# 1. Clone the volume
@ -517,17 +503,17 @@ def clone_volume(zk_conn, pool, name_src, name_new):
volstats = stdout
# 3. Add the new volume to Zookeeper
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}'.format(pool, name_new): '',
'/ceph/volumes/{}/{}/stats'.format(pool, name_new): volstats,
'/ceph/snapshots/{}/{}'.format(pool, name_new): '',
})
zkhandler.write([
('/ceph/volumes/{}/{}'.format(pool, name_new), ''),
('/ceph/volumes/{}/{}/stats'.format(pool, name_new), volstats),
('/ceph/snapshots/{}/{}'.format(pool, name_new), ''),
])
return True, 'Cloned RBD volume "{}" to "{}" in pool "{}"'.format(name_src, name_new, pool)
def resize_volume(zk_conn, pool, name, size):
if not verifyVolume(zk_conn, pool, name):
def resize_volume(zkhandler, pool, name, size):
if not verifyVolume(zkhandler, pool, name):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(name, pool)
# 1. Resize the volume
@ -538,7 +524,7 @@ def resize_volume(zk_conn, pool, name, size):
# 2a. Determine the node running this VM if applicable
active_node = None
volume_vm_name = name.split('_')[0]
retcode, vm_info = vm.get_info(zk_conn, volume_vm_name)
retcode, vm_info = vm.get_info(zkhandler, volume_vm_name)
if retcode:
for disk in vm_info['disks']:
# This block device is present in this VM so we can continue
@ -564,17 +550,17 @@ def resize_volume(zk_conn, pool, name, size):
volstats = stdout
# 3. Add the new volume to Zookeeper
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}'.format(pool, name): '',
'/ceph/volumes/{}/{}/stats'.format(pool, name): volstats,
'/ceph/snapshots/{}/{}'.format(pool, name): '',
})
zkhandler.write([
('/ceph/volumes/{}/{}'.format(pool, name), ''),
('/ceph/volumes/{}/{}/stats'.format(pool, name), volstats),
('/ceph/snapshots/{}/{}'.format(pool, name), ''),
])
return True, 'Resized RBD volume "{}" to size "{}" in pool "{}".'.format(name, size, pool)
def rename_volume(zk_conn, pool, name, new_name):
if not verifyVolume(zk_conn, pool, name):
def rename_volume(zkhandler, pool, name, new_name):
if not verifyVolume(zkhandler, pool, name):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(name, pool)
# 1. Rename the volume
@ -583,30 +569,30 @@ def rename_volume(zk_conn, pool, name, new_name):
return False, 'ERROR: Failed to rename volume "{}" to "{}" in pool "{}": {}'.format(name, new_name, pool, stderr)
# 2. Rename the volume in Zookeeper
zkhandler.renamekey(zk_conn, {
'/ceph/volumes/{}/{}'.format(pool, name): '/ceph/volumes/{}/{}'.format(pool, new_name),
'/ceph/snapshots/{}/{}'.format(pool, name): '/ceph/snapshots/{}/{}'.format(pool, new_name)
})
zkhandler.rename([
('/ceph/volumes/{}/{}'.format(pool, name), '/ceph/volumes/{}/{}'.format(pool, new_name)),
('/ceph/snapshots/{}/{}'.format(pool, name), '/ceph/snapshots/{}/{}'.format(pool, new_name))
])
# 3. Get volume stats
retcode, stdout, stderr = common.run_os_command('rbd info --format json {}/{}'.format(pool, new_name))
volstats = stdout
# 4. Update the volume stats in Zookeeper
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}/stats'.format(pool, new_name): volstats,
})
zkhandler.write([
('/ceph/volumes/{}/{}/stats'.format(pool, new_name), volstats)
])
return True, 'Renamed RBD volume "{}" to "{}" in pool "{}".'.format(name, new_name, pool)
def remove_volume(zk_conn, pool, name):
if not verifyVolume(zk_conn, pool, name):
def remove_volume(zkhandler, pool, name):
if not verifyVolume(zkhandler, pool, name):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(name, pool)
# 1. Remove volume snapshots
for snapshot in zkhandler.listchildren(zk_conn, '/ceph/snapshots/{}/{}'.format(pool, name)):
remove_snapshot(zk_conn, pool, name, snapshot)
for snapshot in zkhandler.children('/ceph/snapshots/{}/{}'.format(pool, name)):
remove_snapshot(zkhandler, pool, name, snapshot)
# 2. Remove the volume
retcode, stdout, stderr = common.run_os_command('rbd rm {}/{}'.format(pool, name))
@ -614,14 +600,14 @@ def remove_volume(zk_conn, pool, name):
return False, 'ERROR: Failed to remove RBD volume "{}" in pool "{}": {}'.format(name, pool, stderr)
# 3. Delete volume from Zookeeper
zkhandler.deletekey(zk_conn, '/ceph/volumes/{}/{}'.format(pool, name))
zkhandler.deletekey(zk_conn, '/ceph/snapshots/{}/{}'.format(pool, name))
zkhandler.delete('/ceph/volumes/{}/{}'.format(pool, name))
zkhandler.delete('/ceph/snapshots/{}/{}'.format(pool, name))
return True, 'Removed RBD volume "{}" in pool "{}".'.format(name, pool)
def map_volume(zk_conn, pool, name):
if not verifyVolume(zk_conn, pool, name):
def map_volume(zkhandler, pool, name):
if not verifyVolume(zkhandler, pool, name):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(name, pool)
# 1. Map the volume onto the local system
@ -639,8 +625,8 @@ def map_volume(zk_conn, pool, name):
return True, mapped_volume
def unmap_volume(zk_conn, pool, name):
if not verifyVolume(zk_conn, pool, name):
def unmap_volume(zkhandler, pool, name):
if not verifyVolume(zkhandler, pool, name):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(name, pool)
mapped_volume = '/dev/rbd/{}/{}'.format(pool, name)
@ -657,12 +643,12 @@ def unmap_volume(zk_conn, pool, name):
return True, 'Unmapped RBD volume at "{}".'.format(mapped_volume)
def get_list_volume(zk_conn, pool, limit, is_fuzzy=True):
def get_list_volume(zkhandler, pool, limit, is_fuzzy=True):
volume_list = []
if pool and not verifyPool(zk_conn, pool):
if pool and not verifyPool(zkhandler, pool):
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(pool)
full_volume_list = getCephVolumes(zk_conn, pool)
full_volume_list = getCephVolumes(zkhandler, pool)
if limit:
if not is_fuzzy:
@ -679,11 +665,11 @@ def get_list_volume(zk_conn, pool, limit, is_fuzzy=True):
if limit:
try:
if re.match(limit, volume_name):
volume_list.append(getVolumeInformation(zk_conn, pool_name, volume_name))
volume_list.append(getVolumeInformation(zkhandler, pool_name, volume_name))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
volume_list.append(getVolumeInformation(zk_conn, pool_name, volume_name))
volume_list.append(getVolumeInformation(zkhandler, pool_name, volume_name))
return True, sorted(volume_list, key=lambda x: str(x['name']))
@ -691,11 +677,11 @@ def get_list_volume(zk_conn, pool, limit, is_fuzzy=True):
#
# Snapshot functions
#
def getCephSnapshots(zk_conn, pool, volume):
def getCephSnapshots(zkhandler, pool, volume):
snapshot_list = list()
volume_list = list()
volume_list = getCephVolumes(zk_conn, pool)
volume_list = getCephVolumes(zkhandler, pool)
if volume:
for volume_entry in volume_list:
volume_pool, volume_name = volume_entry.split('/')
@ -703,14 +689,14 @@ def getCephSnapshots(zk_conn, pool, volume):
volume_list = ['{}/{}'.format(volume_pool, volume_name)]
for volume_entry in volume_list:
for snapshot_name in zkhandler.listchildren(zk_conn, '/ceph/snapshots/{}'.format(volume_entry)):
for snapshot_name in zkhandler.children('/ceph/snapshots/{}'.format(volume_entry)):
snapshot_list.append('{}@{}'.format(volume_entry, snapshot_name))
return snapshot_list
def add_snapshot(zk_conn, pool, volume, name):
if not verifyVolume(zk_conn, pool, volume):
def add_snapshot(zkhandler, pool, volume, name):
if not verifyVolume(zkhandler, pool, volume):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(volume, pool)
# 1. Create the snapshot
@ -719,28 +705,28 @@ def add_snapshot(zk_conn, pool, volume, name):
return False, 'ERROR: Failed to create RBD snapshot "{}" of volume "{}" in pool "{}": {}'.format(name, volume, pool, stderr)
# 2. Add the snapshot to Zookeeper
zkhandler.writedata(zk_conn, {
'/ceph/snapshots/{}/{}/{}'.format(pool, volume, name): '',
'/ceph/snapshots/{}/{}/{}/stats'.format(pool, volume, name): '{}'
})
zkhandler.write([
('/ceph/snapshots/{}/{}/{}'.format(pool, volume, name), ''),
('/ceph/snapshots/{}/{}/{}/stats'.format(pool, volume, name), '{}')
])
# 3. Update the count of snapshots on this volume
volume_stats_raw = zkhandler.readdata(zk_conn, '/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats_raw = zkhandler.read('/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats = dict(json.loads(volume_stats_raw))
# Format the size to something nicer
volume_stats['snapshot_count'] = volume_stats['snapshot_count'] + 1
volume_stats_raw = json.dumps(volume_stats)
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}/stats'.format(pool, volume): volume_stats_raw
})
zkhandler.write([
('/ceph/volumes/{}/{}/stats'.format(pool, volume), volume_stats_raw)
])
return True, 'Created RBD snapshot "{}" of volume "{}" in pool "{}".'.format(name, volume, pool)
def rename_snapshot(zk_conn, pool, volume, name, new_name):
if not verifyVolume(zk_conn, pool, volume):
def rename_snapshot(zkhandler, pool, volume, name, new_name):
if not verifyVolume(zkhandler, pool, volume):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(volume, pool)
if not verifySnapshot(zk_conn, pool, volume, name):
if not verifySnapshot(zkhandler, pool, volume, name):
return False, 'ERROR: No snapshot with name "{}" is present for volume "{}" in pool "{}".'.format(name, volume, pool)
# 1. Rename the snapshot
@ -749,17 +735,17 @@ def rename_snapshot(zk_conn, pool, volume, name, new_name):
return False, 'ERROR: Failed to rename RBD snapshot "{}" to "{}" for volume "{}" in pool "{}": {}'.format(name, new_name, volume, pool, stderr)
# 2. Rename the snapshot in ZK
zkhandler.renamekey(zk_conn, {
'/ceph/snapshots/{}/{}/{}'.format(pool, volume, name): '/ceph/snapshots/{}/{}/{}'.format(pool, volume, new_name)
})
zkhandler.rename([
('/ceph/snapshots/{}/{}/{}'.format(pool, volume, name), '/ceph/snapshots/{}/{}/{}'.format(pool, volume, new_name))
])
return True, 'Renamed RBD snapshot "{}" to "{}" for volume "{}" in pool "{}".'.format(name, new_name, volume, pool)
def remove_snapshot(zk_conn, pool, volume, name):
if not verifyVolume(zk_conn, pool, volume):
def remove_snapshot(zkhandler, pool, volume, name):
if not verifyVolume(zkhandler, pool, volume):
return False, 'ERROR: No volume with name "{}" is present in pool "{}".'.format(volume, pool)
if not verifySnapshot(zk_conn, pool, volume, name):
if not verifySnapshot(zkhandler, pool, volume, name):
return False, 'ERROR: No snapshot with name "{}" is present of volume {} in pool {}.'.format(name, volume, pool)
# 1. Remove the snapshot
@ -768,30 +754,30 @@ def remove_snapshot(zk_conn, pool, volume, name):
return False, 'Failed to remove RBD snapshot "{}" of volume "{}" in pool "{}": {}'.format(name, volume, pool, stderr)
# 2. Delete snapshot from Zookeeper
zkhandler.deletekey(zk_conn, '/ceph/snapshots/{}/{}/{}'.format(pool, volume, name))
zkhandler.delete('/ceph/snapshots/{}/{}/{}'.format(pool, volume, name))
# 3. Update the count of snapshots on this volume
volume_stats_raw = zkhandler.readdata(zk_conn, '/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats_raw = zkhandler.read('/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats = dict(json.loads(volume_stats_raw))
# Format the size to something nicer
volume_stats['snapshot_count'] = volume_stats['snapshot_count'] - 1
volume_stats_raw = json.dumps(volume_stats)
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}/stats'.format(pool, volume): volume_stats_raw
})
zkhandler.write([
('/ceph/volumes/{}/{}/stats'.format(pool, volume), volume_stats_raw)
])
return True, 'Removed RBD snapshot "{}" of volume "{}" in pool "{}".'.format(name, volume, pool)
def get_list_snapshot(zk_conn, pool, volume, limit, is_fuzzy=True):
def get_list_snapshot(zkhandler, pool, volume, limit, is_fuzzy=True):
snapshot_list = []
if pool and not verifyPool(zk_conn, pool):
if pool and not verifyPool(zkhandler, pool):
return False, 'ERROR: No pool with name "{}" is present in the cluster.'.format(pool)
if volume and not verifyPool(zk_conn, volume):
if volume and not verifyPool(zkhandler, volume):
return False, 'ERROR: No volume with name "{}" is present in the cluster.'.format(volume)
full_snapshot_list = getCephSnapshots(zk_conn, pool, volume)
full_snapshot_list = getCephSnapshots(zkhandler, pool, volume)
if is_fuzzy and limit:
# Implicitly assume fuzzy limits

View File

@ -21,7 +21,6 @@
import re
import daemon_lib.zkhandler as zkhandler
import daemon_lib.common as common
import daemon_lib.vm as pvc_vm
import daemon_lib.node as pvc_node
@ -29,43 +28,48 @@ import daemon_lib.network as pvc_network
import daemon_lib.ceph as pvc_ceph
def set_maintenance(zk_conn, maint_state):
try:
def set_maintenance(zkhandler, maint_state):
current_maint_state = zkhandler.read('/config/maintenance')
if maint_state == current_maint_state:
if maint_state == 'true':
zkhandler.writedata(zk_conn, {'/maintenance': 'true'})
return True, 'Successfully set cluster in maintenance mode'
return True, 'Cluster is already in maintenance mode'
else:
zkhandler.writedata(zk_conn, {'/maintenance': 'false'})
return True, 'Successfully set cluster in normal mode'
except Exception:
return False, 'Failed to set cluster maintenance state'
return True, 'Cluster is already in normal mode'
if maint_state == 'true':
zkhandler.write([
('/config/maintenance', 'true')
])
return True, 'Successfully set cluster in maintenance mode'
else:
zkhandler.write([
('/config/maintenance', 'false')
])
return True, 'Successfully set cluster in normal mode'
def getClusterInformation(zk_conn):
def getClusterInformation(zkhandler):
# Get cluster maintenance state
try:
maint_state = zkhandler.readdata(zk_conn, '/maintenance')
except Exception:
maint_state = 'false'
maint_state = zkhandler.read('/config/maintenance')
# List of messages to display to the clients
cluster_health_msg = []
storage_health_msg = []
# Get node information object list
retcode, node_list = pvc_node.get_list(zk_conn, None)
retcode, node_list = pvc_node.get_list(zkhandler, None)
# Get vm information object list
retcode, vm_list = pvc_vm.get_list(zk_conn, None, None, None)
retcode, vm_list = pvc_vm.get_list(zkhandler, None, None, None)
# Get network information object list
retcode, network_list = pvc_network.get_list(zk_conn, None, None)
retcode, network_list = pvc_network.get_list(zkhandler, None, None)
# Get storage information object list
retcode, ceph_osd_list = pvc_ceph.get_list_osd(zk_conn, None)
retcode, ceph_pool_list = pvc_ceph.get_list_pool(zk_conn, None)
retcode, ceph_volume_list = pvc_ceph.get_list_volume(zk_conn, None, None)
retcode, ceph_snapshot_list = pvc_ceph.get_list_snapshot(zk_conn, None, None, None)
retcode, ceph_osd_list = pvc_ceph.get_list_osd(zkhandler, None)
retcode, ceph_pool_list = pvc_ceph.get_list_pool(zkhandler, None)
retcode, ceph_volume_list = pvc_ceph.get_list_volume(zkhandler, None, None)
retcode, ceph_snapshot_list = pvc_ceph.get_list_snapshot(zkhandler, None, None, None)
# Determine, for each subsection, the total count
node_count = len(node_list)
@ -164,7 +168,7 @@ def getClusterInformation(zk_conn):
cluster_health = 'Optimal'
# Find out our storage health from Ceph
ceph_status = zkhandler.readdata(zk_conn, '/ceph').split('\n')
ceph_status = zkhandler.read('/ceph').split('\n')
ceph_health = ceph_status[2].split()[-1]
# Parse the status output to get the health indicators
@ -234,8 +238,8 @@ def getClusterInformation(zk_conn):
'health_msg': cluster_health_msg,
'storage_health': storage_health,
'storage_health_msg': storage_health_msg,
'primary_node': common.getPrimaryNode(zk_conn),
'upstream_ip': zkhandler.readdata(zk_conn, '/upstream_ip'),
'primary_node': common.getPrimaryNode(zkhandler),
'upstream_ip': zkhandler.read('/config/upstream_ip'),
'nodes': formatted_node_states,
'vms': formatted_vm_states,
'networks': network_count,
@ -248,9 +252,9 @@ def getClusterInformation(zk_conn):
return cluster_information
def get_info(zk_conn):
def get_info(zkhandler):
# This is a thin wrapper function for naming purposes
cluster_information = getClusterInformation(zk_conn)
cluster_information = getClusterInformation(zkhandler)
if cluster_information:
return True, cluster_information
else:

View File

@ -22,48 +22,99 @@
import time
import uuid
import lxml
import shlex
import subprocess
import kazoo.client
import signal
from json import loads
from re import match as re_match
from distutils.util import strtobool
from threading import Thread
from shlex import split as shlex_split
import daemon_lib.zkhandler as zkhandler
###############################################################################
# Supplemental functions
###############################################################################
#
# Run a local OS daemon in the background
#
class OSDaemon(object):
def __init__(self, command_string, environment, logfile):
command = shlex_split(command_string)
# Set stdout to be a logfile if set
if logfile:
stdout = open(logfile, 'a')
else:
stdout = subprocess.PIPE
# Invoke the process
self.proc = subprocess.Popen(
command,
env=environment,
stdout=stdout,
stderr=stdout,
)
# Signal the process
def signal(self, sent_signal):
signal_map = {
'hup': signal.SIGHUP,
'int': signal.SIGINT,
'term': signal.SIGTERM,
'kill': signal.SIGKILL
}
self.proc.send_signal(signal_map[sent_signal])
def run_os_daemon(command_string, environment=None, logfile=None):
daemon = OSDaemon(command_string, environment, logfile)
return daemon
#
# Run a local OS command via shell
#
def run_os_command(command_string, background=False, environment=None, timeout=None, shell=False):
command = shlex.split(command_string)
try:
command_output = subprocess.run(
command,
shell=shell,
env=environment,
timeout=timeout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
retcode = command_output.returncode
except subprocess.TimeoutExpired:
retcode = 128
def run_os_command(command_string, background=False, environment=None, timeout=None):
command = shlex_split(command_string)
if background:
def runcmd():
try:
subprocess.run(
command,
env=environment,
timeout=timeout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
except subprocess.TimeoutExpired:
pass
thread = Thread(target=runcmd, args=())
thread.start()
return 0, None, None
else:
try:
command_output = subprocess.run(
command,
env=environment,
timeout=timeout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
retcode = command_output.returncode
except subprocess.TimeoutExpired:
retcode = 128
except Exception:
retcode = 255
try:
stdout = command_output.stdout.decode('ascii')
except Exception:
stdout = ''
try:
stderr = command_output.stderr.decode('ascii')
except Exception:
stderr = ''
return retcode, stdout, stderr
try:
stdout = command_output.stdout.decode('ascii')
except Exception:
stdout = ''
try:
stderr = command_output.stderr.decode('ascii')
except Exception:
stderr = ''
return retcode, stdout, stderr
#
@ -77,34 +128,12 @@ def validateUUID(dom_uuid):
return False
#
# Connect and disconnect from Zookeeper
#
def startZKConnection(zk_host):
zk_conn = kazoo.client.KazooClient(hosts=zk_host)
try:
zk_conn.start()
except kazoo.handlers.threading.KazooTimeoutError:
print('Timed out connecting to Zookeeper at "{}".'.format(zk_host))
exit(1)
except Exception as e:
print('Failed to connect to Zookeeper at "{}": {}'.format(zk_host, e))
exit(1)
return zk_conn
def stopZKConnection(zk_conn):
zk_conn.stop()
zk_conn.close()
return 0
#
# Parse a Domain XML object
#
def getDomainXML(zk_conn, dom_uuid):
def getDomainXML(zkhandler, dom_uuid):
try:
xml = zkhandler.readdata(zk_conn, '/domains/{}/xml'.format(dom_uuid))
xml = zkhandler.read('/domains/{}/xml'.format(dom_uuid))
except Exception:
return None
@ -214,8 +243,8 @@ def getDomainDisks(parsed_xml, stats_data):
#
# Get a list of disk devices
#
def getDomainDiskList(zk_conn, dom_uuid):
domain_information = getInformationFromXML(zk_conn, dom_uuid)
def getDomainDiskList(zkhandler, dom_uuid):
domain_information = getInformationFromXML(zkhandler, dom_uuid)
disk_list = []
for disk in domain_information['disks']:
disk_list.append(disk['name'])
@ -226,30 +255,30 @@ def getDomainDiskList(zk_conn, dom_uuid):
#
# Get domain information from XML
#
def getInformationFromXML(zk_conn, uuid):
def getInformationFromXML(zkhandler, uuid):
"""
Gather information about a VM from the Libvirt XML configuration in the Zookeper database
and return a dict() containing it.
"""
domain_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(uuid))
domain_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(uuid))
domain_lastnode = zkhandler.readdata(zk_conn, '/domains/{}/lastnode'.format(uuid))
domain_failedreason = zkhandler.readdata(zk_conn, '/domains/{}/failedreason'.format(uuid))
domain_state = zkhandler.read('/domains/{}/state'.format(uuid))
domain_node = zkhandler.read('/domains/{}/node'.format(uuid))
domain_lastnode = zkhandler.read('/domains/{}/lastnode'.format(uuid))
domain_failedreason = zkhandler.read('/domains/{}/failedreason'.format(uuid))
try:
domain_node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(uuid))
domain_node_limit = zkhandler.read('/domains/{}/node_limit'.format(uuid))
except Exception:
domain_node_limit = None
try:
domain_node_selector = zkhandler.readdata(zk_conn, '/domains/{}/node_selector'.format(uuid))
domain_node_selector = zkhandler.read('/domains/{}/node_selector'.format(uuid))
except Exception:
domain_node_selector = None
try:
domain_node_autostart = zkhandler.readdata(zk_conn, '/domains/{}/node_autostart'.format(uuid))
domain_node_autostart = zkhandler.read('/domains/{}/node_autostart'.format(uuid))
except Exception:
domain_node_autostart = None
try:
domain_migration_method = zkhandler.readdata(zk_conn, '/domains/{}/migration_method'.format(uuid))
domain_migration_method = zkhandler.read('/domains/{}/migration_method'.format(uuid))
except Exception:
domain_migration_method = None
@ -262,21 +291,21 @@ def getInformationFromXML(zk_conn, uuid):
domain_node_autostart = None
try:
domain_profile = zkhandler.readdata(zk_conn, '/domains/{}/profile'.format(uuid))
domain_profile = zkhandler.read('/domains/{}/profile'.format(uuid))
except Exception:
domain_profile = None
try:
domain_vnc = zkhandler.readdata(zk_conn, '/domains/{}/vnc'.format(uuid))
domain_vnc = zkhandler.read('/domains/{}/vnc'.format(uuid))
domain_vnc_listen, domain_vnc_port = domain_vnc.split(':')
except Exception:
domain_vnc_listen = 'None'
domain_vnc_port = 'None'
parsed_xml = getDomainXML(zk_conn, uuid)
parsed_xml = getDomainXML(zkhandler, uuid)
try:
stats_data = loads(zkhandler.readdata(zk_conn, '/domains/{}/stats'.format(uuid)))
stats_data = loads(zkhandler.read('/domains/{}/stats'.format(uuid)))
except Exception:
stats_data = {}
@ -409,21 +438,18 @@ def getDomainControllers(parsed_xml):
#
# Verify node is valid in cluster
#
def verifyNode(zk_conn, node):
if zkhandler.exists(zk_conn, '/nodes/{}'.format(node)):
return True
else:
return False
def verifyNode(zkhandler, node):
return zkhandler.exists('/nodes/{}'.format(node))
#
# Get the primary coordinator node
#
def getPrimaryNode(zk_conn):
def getPrimaryNode(zkhandler):
failcount = 0
while True:
try:
primary_node = zkhandler.readdata(zk_conn, '/primary_node')
primary_node = zkhandler.read('/config/primary_node')
except Exception:
primary_node == 'none'
@ -444,10 +470,10 @@ def getPrimaryNode(zk_conn):
#
# Find a migration target
#
def findTargetNode(zk_conn, dom_uuid):
def findTargetNode(zkhandler, dom_uuid):
# Determine VM node limits; set config value if read fails
try:
node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(dom_uuid)).split(',')
node_limit = zkhandler.read('/domains/{}/node_limit'.format(dom_uuid)).split(',')
if not any(node_limit):
node_limit = None
except Exception:
@ -455,39 +481,42 @@ def findTargetNode(zk_conn, dom_uuid):
# Determine VM search field or use default; set config value if read fails
try:
search_field = zkhandler.readdata(zk_conn, '/domains/{}/node_selector'.format(dom_uuid))
search_field = zkhandler.read('/domains/{}/node_selector'.format(dom_uuid))
except Exception:
search_field = 'mem'
search_field = None
# If our search field is invalid, use the default
if search_field is None or search_field == 'None':
search_field = zkhandler.read('/config/migration_target_selector')
# Execute the search
if search_field == 'mem':
return findTargetNodeMem(zk_conn, node_limit, dom_uuid)
return findTargetNodeMem(zkhandler, node_limit, dom_uuid)
if search_field == 'load':
return findTargetNodeLoad(zk_conn, node_limit, dom_uuid)
return findTargetNodeLoad(zkhandler, node_limit, dom_uuid)
if search_field == 'vcpus':
return findTargetNodeVCPUs(zk_conn, node_limit, dom_uuid)
return findTargetNodeVCPUs(zkhandler, node_limit, dom_uuid)
if search_field == 'vms':
return findTargetNodeVMs(zk_conn, node_limit, dom_uuid)
return findTargetNodeVMs(zkhandler, node_limit, dom_uuid)
# Nothing was found
return None
#
# Get the list of valid target nodes
def getNodes(zk_conn, node_limit, dom_uuid):
#
def getNodes(zkhandler, node_limit, dom_uuid):
valid_node_list = []
full_node_list = zkhandler.listchildren(zk_conn, '/nodes')
try:
current_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(dom_uuid))
except kazoo.exceptions.NoNodeError:
current_node = None
full_node_list = zkhandler.children('/nodes')
current_node = zkhandler.read('/domains/{}/node'.format(dom_uuid))
for node in full_node_list:
if node_limit and node not in node_limit:
continue
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node))
daemon_state = zkhandler.read('/nodes/{}/daemonstate'.format(node))
domain_state = zkhandler.read('/nodes/{}/domainstate'.format(node))
if node == current_node:
continue
@ -500,16 +529,18 @@ def getNodes(zk_conn, node_limit, dom_uuid):
return valid_node_list
#
# via free memory (relative to allocated memory)
def findTargetNodeMem(zk_conn, node_limit, dom_uuid):
#
def findTargetNodeMem(zkhandler, node_limit, dom_uuid):
most_provfree = 0
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
memprov = int(zkhandler.readdata(zk_conn, '/nodes/{}/memprov'.format(node)))
memused = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node)))
memfree = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node)))
memprov = int(zkhandler.read('/nodes/{}/memprov'.format(node)))
memused = int(zkhandler.read('/nodes/{}/memused'.format(node)))
memfree = int(zkhandler.read('/nodes/{}/memfree'.format(node)))
memtotal = memused + memfree
provfree = memtotal - memprov
@ -520,14 +551,16 @@ def findTargetNodeMem(zk_conn, node_limit, dom_uuid):
return target_node
#
# via load average
def findTargetNodeLoad(zk_conn, node_limit, dom_uuid):
#
def findTargetNodeLoad(zkhandler, node_limit, dom_uuid):
least_load = 9999.0
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
load = float(zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node)))
load = float(zkhandler.read('/nodes/{}/cpuload'.format(node)))
if load < least_load:
least_load = load
@ -536,14 +569,16 @@ def findTargetNodeLoad(zk_conn, node_limit, dom_uuid):
return target_node
#
# via total vCPUs
def findTargetNodeVCPUs(zk_conn, node_limit, dom_uuid):
#
def findTargetNodeVCPUs(zkhandler, node_limit, dom_uuid):
least_vcpus = 9999
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
vcpus = int(zkhandler.readdata(zk_conn, '/nodes/{}/vcpualloc'.format(node)))
vcpus = int(zkhandler.read('/nodes/{}/vcpualloc'.format(node)))
if vcpus < least_vcpus:
least_vcpus = vcpus
@ -552,14 +587,16 @@ def findTargetNodeVCPUs(zk_conn, node_limit, dom_uuid):
return target_node
#
# via total VMs
def findTargetNodeVMs(zk_conn, node_limit, dom_uuid):
#
def findTargetNodeVMs(zkhandler, node_limit, dom_uuid):
least_vms = 9999
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
node_list = getNodes(zkhandler, node_limit, dom_uuid)
for node in node_list:
vms = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node)))
vms = int(zkhandler.read('/nodes/{}/domainscount'.format(node)))
if vms < least_vms:
least_vms = vms
@ -568,7 +605,9 @@ def findTargetNodeVMs(zk_conn, node_limit, dom_uuid):
return target_node
# Connect to the primary host and run a command
#
# Connect to the primary node and run a command
#
def runRemoteCommand(node, command, become=False):
import paramiko
import hashlib
@ -598,3 +637,47 @@ def runRemoteCommand(node, command, become=False):
ssh_client.connect(node)
stdin, stdout, stderr = ssh_client.exec_command(command)
return stdout.read().decode('ascii').rstrip(), stderr.read().decode('ascii').rstrip()
#
# Reload the firewall rules of the system
#
def reload_firewall_rules(rules_file, logger=None):
if logger is not None:
logger.out('Reloading firewall configuration', state='o')
retcode, stdout, stderr = run_os_command('/usr/sbin/nft -f {}'.format(rules_file))
if retcode != 0 and logger is not None:
logger.out('Failed to reload configuration: {}'.format(stderr), state='e')
#
# Create an IP address
#
def createIPAddress(ipaddr, cidrnetmask, dev):
run_os_command(
'ip address add {}/{} dev {}'.format(
ipaddr,
cidrnetmask,
dev
)
)
run_os_command(
'arping -P -U -W 0.02 -c 2 -i {dev} -S {ip} {ip}'.format(
dev=dev,
ip=ipaddr
)
)
#
# Remove an IP address
#
def removeIPAddress(ipaddr, cidrnetmask, dev):
run_os_command(
'ip address delete {}/{} dev {}'.format(
ipaddr,
cidrnetmask,
dev
)
)

View File

@ -72,7 +72,7 @@ class Logger(object):
if self.config['file_logging']:
self.logfile = self.config['log_directory'] + '/pvc.log'
# We open the logfile for the duration of our session, but have a hup function
self.writer = open(self.logfile, 'a', buffering=1)
self.writer = open(self.logfile, 'a', buffering=0)
self.last_colour = ''
self.last_prompt = ''

View File

@ -21,28 +21,24 @@
import re
from kazoo.exceptions import NoNodeError
import daemon_lib.zkhandler as zkhandler
#
# Cluster search functions
#
def getClusterNetworkList(zk_conn):
def getClusterNetworkList(zkhandler):
# Get a list of VNIs by listing the children of /networks
vni_list = zkhandler.listchildren(zk_conn, '/networks')
vni_list = zkhandler.children('/networks')
description_list = []
# For each VNI, get the corresponding description from the data
for vni in vni_list:
description_list.append(zkhandler.readdata(zk_conn, '/networks/{}'.format(vni)))
description_list.append(zkhandler.read('/networks/{}'.format(vni)))
return vni_list, description_list
def searchClusterByVNI(zk_conn, vni):
def searchClusterByVNI(zkhandler, vni):
try:
# Get the lists
vni_list, description_list = getClusterNetworkList(zk_conn)
vni_list, description_list = getClusterNetworkList(zkhandler)
# We're looking for UUID, so find that element ID
index = vni_list.index(vni)
# Get the name_list element at that index
@ -54,10 +50,10 @@ def searchClusterByVNI(zk_conn, vni):
return description
def searchClusterByDescription(zk_conn, description):
def searchClusterByDescription(zkhandler, description):
try:
# Get the lists
vni_list, description_list = getClusterNetworkList(zk_conn)
vni_list, description_list = getClusterNetworkList(zkhandler)
# We're looking for name, so find that element ID
index = description_list.index(description)
# Get the uuid_list element at that index
@ -69,43 +65,43 @@ def searchClusterByDescription(zk_conn, description):
return vni
def getNetworkVNI(zk_conn, network):
def getNetworkVNI(zkhandler, network):
# Validate and obtain alternate passed value
if network.isdigit():
net_description = searchClusterByVNI(zk_conn, network)
net_vni = searchClusterByDescription(zk_conn, net_description)
net_description = searchClusterByVNI(zkhandler, network)
net_vni = searchClusterByDescription(zkhandler, net_description)
else:
net_vni = searchClusterByDescription(zk_conn, network)
net_description = searchClusterByVNI(zk_conn, net_vni)
net_vni = searchClusterByDescription(zkhandler, network)
net_description = searchClusterByVNI(zkhandler, net_vni)
return net_vni
def getNetworkDescription(zk_conn, network):
def getNetworkDescription(zkhandler, network):
# Validate and obtain alternate passed value
if network.isdigit():
net_description = searchClusterByVNI(zk_conn, network)
net_vni = searchClusterByDescription(zk_conn, net_description)
net_description = searchClusterByVNI(zkhandler, network)
net_vni = searchClusterByDescription(zkhandler, net_description)
else:
net_vni = searchClusterByDescription(zk_conn, network)
net_description = searchClusterByVNI(zk_conn, net_vni)
net_vni = searchClusterByDescription(zkhandler, network)
net_description = searchClusterByVNI(zkhandler, net_vni)
return net_description
def getNetworkDHCPLeases(zk_conn, vni):
def getNetworkDHCPLeases(zkhandler, vni):
# Get a list of DHCP leases by listing the children of /networks/<vni>/dhcp4_leases
dhcp4_leases = zkhandler.listchildren(zk_conn, '/networks/{}/dhcp4_leases'.format(vni))
dhcp4_leases = zkhandler.children('/networks/{}/dhcp4_leases'.format(vni))
return sorted(dhcp4_leases)
def getNetworkDHCPReservations(zk_conn, vni):
def getNetworkDHCPReservations(zkhandler, vni):
# Get a list of DHCP reservations by listing the children of /networks/<vni>/dhcp4_reservations
dhcp4_reservations = zkhandler.listchildren(zk_conn, '/networks/{}/dhcp4_reservations'.format(vni))
dhcp4_reservations = zkhandler.children('/networks/{}/dhcp4_reservations'.format(vni))
return sorted(dhcp4_reservations)
def getNetworkACLs(zk_conn, vni, _direction):
def getNetworkACLs(zkhandler, vni, _direction):
# Get the (sorted) list of active ACLs
if _direction == 'both':
directions = ['in', 'out']
@ -114,32 +110,32 @@ def getNetworkACLs(zk_conn, vni, _direction):
full_acl_list = []
for direction in directions:
unordered_acl_list = zkhandler.listchildren(zk_conn, '/networks/{}/firewall_rules/{}'.format(vni, direction))
unordered_acl_list = zkhandler.children('/networks/{}/firewall_rules/{}'.format(vni, direction))
ordered_acls = dict()
for acl in unordered_acl_list:
order = zkhandler.readdata(zk_conn, '/networks/{}/firewall_rules/{}/{}/order'.format(vni, direction, acl))
order = zkhandler.read('/networks/{}/firewall_rules/{}/{}/order'.format(vni, direction, acl))
ordered_acls[order] = acl
for order in sorted(ordered_acls.keys()):
rule = zkhandler.readdata(zk_conn, '/networks/{}/firewall_rules/{}/{}/rule'.format(vni, direction, acl))
rule = zkhandler.read('/networks/{}/firewall_rules/{}/{}/rule'.format(vni, direction, acl))
full_acl_list.append({'direction': direction, 'order': int(order), 'description': ordered_acls[order], 'rule': rule})
return full_acl_list
def getNetworkInformation(zk_conn, vni):
description = zkhandler.readdata(zk_conn, '/networks/{}'.format(vni))
nettype = zkhandler.readdata(zk_conn, '/networks/{}/nettype'.format(vni))
domain = zkhandler.readdata(zk_conn, '/networks/{}/domain'.format(vni))
name_servers = zkhandler.readdata(zk_conn, '/networks/{}/name_servers'.format(vni))
ip6_network = zkhandler.readdata(zk_conn, '/networks/{}/ip6_network'.format(vni))
ip6_gateway = zkhandler.readdata(zk_conn, '/networks/{}/ip6_gateway'.format(vni))
dhcp6_flag = zkhandler.readdata(zk_conn, '/networks/{}/dhcp6_flag'.format(vni))
ip4_network = zkhandler.readdata(zk_conn, '/networks/{}/ip4_network'.format(vni))
ip4_gateway = zkhandler.readdata(zk_conn, '/networks/{}/ip4_gateway'.format(vni))
dhcp4_flag = zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_flag'.format(vni))
dhcp4_start = zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_start'.format(vni))
dhcp4_end = zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_end'.format(vni))
def getNetworkInformation(zkhandler, vni):
description = zkhandler.read('/networks/{}'.format(vni))
nettype = zkhandler.read('/networks/{}/nettype'.format(vni))
domain = zkhandler.read('/networks/{}/domain'.format(vni))
name_servers = zkhandler.read('/networks/{}/name_servers'.format(vni))
ip6_network = zkhandler.read('/networks/{}/ip6_network'.format(vni))
ip6_gateway = zkhandler.read('/networks/{}/ip6_gateway'.format(vni))
dhcp6_flag = zkhandler.read('/networks/{}/dhcp6_flag'.format(vni))
ip4_network = zkhandler.read('/networks/{}/ip4_network'.format(vni))
ip4_gateway = zkhandler.read('/networks/{}/ip4_gateway'.format(vni))
dhcp4_flag = zkhandler.read('/networks/{}/dhcp4_flag'.format(vni))
dhcp4_start = zkhandler.read('/networks/{}/dhcp4_start'.format(vni))
dhcp4_end = zkhandler.read('/networks/{}/dhcp4_end'.format(vni))
# Construct a data structure to represent the data
network_information = {
@ -164,19 +160,19 @@ def getNetworkInformation(zk_conn, vni):
return network_information
def getDHCPLeaseInformation(zk_conn, vni, mac_address):
def getDHCPLeaseInformation(zkhandler, vni, mac_address):
# Check whether this is a dynamic or static lease
try:
zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_leases/{}'.format(vni, mac_address))
if zkhandler.exists('/networks/{}/dhcp4_leases/{}'.format(vni, mac_address)):
type_key = 'dhcp4_leases'
except NoNodeError:
zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_reservations/{}'.format(vni, mac_address))
elif zkhandler.exists('/networks/{}/dhcp4_reservations/{}'.format(vni, mac_address)):
type_key = 'dhcp4_reservations'
else:
return {}
hostname = zkhandler.readdata(zk_conn, '/networks/{}/{}/{}/hostname'.format(vni, type_key, mac_address))
ip4_address = zkhandler.readdata(zk_conn, '/networks/{}/{}/{}/ipaddr'.format(vni, type_key, mac_address))
hostname = zkhandler.read('/networks/{}/{}/{}/hostname'.format(vni, type_key, mac_address))
ip4_address = zkhandler.read('/networks/{}/{}/{}/ipaddr'.format(vni, type_key, mac_address))
if type_key == 'dhcp4_leases':
timestamp = zkhandler.readdata(zk_conn, '/networks/{}/{}/{}/expiry'.format(vni, type_key, mac_address))
timestamp = zkhandler.read('/networks/{}/{}/{}/expiry'.format(vni, type_key, mac_address))
else:
timestamp = 'static'
@ -190,9 +186,9 @@ def getDHCPLeaseInformation(zk_conn, vni, mac_address):
return lease_information
def getACLInformation(zk_conn, vni, direction, description):
order = zkhandler.readdata(zk_conn, '/networks/{}/firewall_rules/{}/{}/order'.format(vni, direction, description))
rule = zkhandler.readdata(zk_conn, '/networks/{}/firewall_rules/{}/{}/rule'.format(vni, direction, description))
def getACLInformation(zkhandler, vni, direction, description):
order = zkhandler.read('/networks/{}/firewall_rules/{}/{}/order'.format(vni, direction, description))
rule = zkhandler.read('/networks/{}/firewall_rules/{}/{}/rule'.format(vni, direction, description))
# Construct a data structure to represent the data
acl_information = {
@ -235,7 +231,7 @@ def isValidIP(ipaddr):
#
# Direct functions
#
def add_network(zk_conn, vni, description, nettype,
def add_network(zkhandler, vni, description, nettype,
domain, name_servers, ip4_network, ip4_gateway, ip6_network, ip6_gateway,
dhcp4_flag, dhcp4_start, dhcp4_end):
# Ensure start and end DHCP ranges are set if the flag is set
@ -243,10 +239,10 @@ def add_network(zk_conn, vni, description, nettype,
return False, 'ERROR: DHCPv4 start and end addresses are required for a DHCPv4-enabled network.'
# Check if a network with this VNI or description already exists
if zkhandler.exists(zk_conn, '/networks/{}'.format(vni)):
if zkhandler.exists('/networks/{}'.format(vni)):
return False, 'ERROR: A network with VNI "{}" already exists!'.format(vni)
for network in zkhandler.listchildren(zk_conn, '/networks'):
network_description = zkhandler.readdata(zk_conn, '/networks/{}'.format(network))
for network in zkhandler.children('/networks'):
network_description = zkhandler.read('/networks/{}'.format(network))
if network_description == description:
return False, 'ERROR: A network with description "{}" already exists!'.format(description)
@ -263,87 +259,87 @@ def add_network(zk_conn, vni, description, nettype,
domain = '{}.local'.format(description)
# Add the new network to Zookeeper
zkhandler.writedata(zk_conn, {
'/networks/{}'.format(vni): description,
'/networks/{}/nettype'.format(vni): nettype,
'/networks/{}/domain'.format(vni): domain,
'/networks/{}/name_servers'.format(vni): name_servers,
'/networks/{}/ip6_network'.format(vni): ip6_network,
'/networks/{}/ip6_gateway'.format(vni): ip6_gateway,
'/networks/{}/dhcp6_flag'.format(vni): dhcp6_flag,
'/networks/{}/ip4_network'.format(vni): ip4_network,
'/networks/{}/ip4_gateway'.format(vni): ip4_gateway,
'/networks/{}/dhcp4_flag'.format(vni): dhcp4_flag,
'/networks/{}/dhcp4_start'.format(vni): dhcp4_start,
'/networks/{}/dhcp4_end'.format(vni): dhcp4_end,
'/networks/{}/dhcp4_leases'.format(vni): '',
'/networks/{}/dhcp4_reservations'.format(vni): '',
'/networks/{}/firewall_rules'.format(vni): '',
'/networks/{}/firewall_rules/in'.format(vni): '',
'/networks/{}/firewall_rules/out'.format(vni): ''
})
zkhandler.write([
('/networks/{}'.format(vni), description),
('/networks/{}/nettype'.format(vni), nettype),
('/networks/{}/domain'.format(vni), domain),
('/networks/{}/name_servers'.format(vni), name_servers),
('/networks/{}/ip6_network'.format(vni), ip6_network),
('/networks/{}/ip6_gateway'.format(vni), ip6_gateway),
('/networks/{}/dhcp6_flag'.format(vni), dhcp6_flag),
('/networks/{}/ip4_network'.format(vni), ip4_network),
('/networks/{}/ip4_gateway'.format(vni), ip4_gateway),
('/networks/{}/dhcp4_flag'.format(vni), dhcp4_flag),
('/networks/{}/dhcp4_start'.format(vni), dhcp4_start),
('/networks/{}/dhcp4_end'.format(vni), dhcp4_end),
('/networks/{}/dhcp4_leases'.format(vni), ''),
('/networks/{}/dhcp4_reservations'.format(vni), ''),
('/networks/{}/firewall_rules'.format(vni), ''),
('/networks/{}/firewall_rules/in'.format(vni), ''),
('/networks/{}/firewall_rules/out'.format(vni), '')
])
return True, 'Network "{}" added successfully!'.format(description)
def modify_network(zk_conn, vni, description=None, domain=None, name_servers=None,
def modify_network(zkhandler, vni, description=None, domain=None, name_servers=None,
ip4_network=None, ip4_gateway=None, ip6_network=None, ip6_gateway=None,
dhcp4_flag=None, dhcp4_start=None, dhcp4_end=None):
# Add the modified parameters to Zookeeper
zk_data = dict()
update_data = list()
if description is not None:
zk_data.update({'/networks/{}'.format(vni): description})
update_data.append(('/networks/{}'.format(vni), description))
if domain is not None:
zk_data.update({'/networks/{}/domain'.format(vni): domain})
update_data.append(('/networks/{}/domain'.format(vni), domain))
if name_servers is not None:
zk_data.update({'/networks/{}/name_servers'.format(vni): name_servers})
update_data.append(('/networks/{}/name_servers'.format(vni), name_servers))
if ip4_network is not None:
zk_data.update({'/networks/{}/ip4_network'.format(vni): ip4_network})
update_data.append(('/networks/{}/ip4_network'.format(vni), ip4_network))
if ip4_gateway is not None:
zk_data.update({'/networks/{}/ip4_gateway'.format(vni): ip4_gateway})
update_data.append(('/networks/{}/ip4_gateway'.format(vni), ip4_gateway))
if ip6_network is not None:
zk_data.update({'/networks/{}/ip6_network'.format(vni): ip6_network})
update_data.append(('/networks/{}/ip6_network'.format(vni), ip6_network))
if ip6_network:
zk_data.update({'/networks/{}/dhcp6_flag'.format(vni): 'True'})
update_data.append(('/networks/{}/dhcp6_flag'.format(vni), 'True'))
else:
zk_data.update({'/networks/{}/dhcp6_flag'.format(vni): 'False'})
update_data.append(('/networks/{}/dhcp6_flag'.format(vni), 'False'))
if ip6_gateway is not None:
zk_data.update({'/networks/{}/ip6_gateway'.format(vni): ip6_gateway})
update_data.append(('/networks/{}/ip6_gateway'.format(vni), ip6_gateway))
else:
# If we're changing the network, but don't also specify the gateway,
# generate a new one automatically
if ip6_network:
ip6_netpart, ip6_maskpart = ip6_network.split('/')
ip6_gateway = '{}1'.format(ip6_netpart)
zk_data.update({'/networks/{}/ip6_gateway'.format(vni): ip6_gateway})
update_data.append(('/networks/{}/ip6_gateway'.format(vni), ip6_gateway))
if dhcp4_flag is not None:
zk_data.update({'/networks/{}/dhcp4_flag'.format(vni): dhcp4_flag})
update_data.append(('/networks/{}/dhcp4_flag'.format(vni), dhcp4_flag))
if dhcp4_start is not None:
zk_data.update({'/networks/{}/dhcp4_start'.format(vni): dhcp4_start})
update_data.append(('/networks/{}/dhcp4_start'.format(vni), dhcp4_start))
if dhcp4_end is not None:
zk_data.update({'/networks/{}/dhcp4_end'.format(vni): dhcp4_end})
update_data.append(('/networks/{}/dhcp4_end'.format(vni), dhcp4_end))
zkhandler.writedata(zk_conn, zk_data)
zkhandler.write(update_data)
return True, 'Network "{}" modified successfully!'.format(vni)
def remove_network(zk_conn, network):
def remove_network(zkhandler, network):
# Validate and obtain alternate passed value
vni = getNetworkVNI(zk_conn, network)
description = getNetworkDescription(zk_conn, network)
vni = getNetworkVNI(zkhandler, network)
description = getNetworkDescription(zkhandler, network)
if not vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
# Delete the configuration
zkhandler.deletekey(zk_conn, '/networks/{}'.format(vni))
zkhandler.delete('/networks/{}'.format(vni))
return True, 'Network "{}" removed successfully!'.format(description)
def add_dhcp_reservation(zk_conn, network, ipaddress, macaddress, hostname):
def add_dhcp_reservation(zkhandler, network, ipaddress, macaddress, hostname):
# Validate and obtain standard passed value
net_vni = getNetworkVNI(zk_conn, network)
net_vni = getNetworkVNI(zkhandler, network)
if not net_vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
@ -356,45 +352,45 @@ def add_dhcp_reservation(zk_conn, network, ipaddress, macaddress, hostname):
if not isValidIP(ipaddress):
return False, 'ERROR: IP address "{}" is not valid!'.format(macaddress)
if zkhandler.exists(zk_conn, '/networks/{}/dhcp4_reservations/{}'.format(net_vni, macaddress)):
if zkhandler.exists('/networks/{}/dhcp4_reservations/{}'.format(net_vni, macaddress)):
return False, 'ERROR: A reservation with MAC "{}" already exists!'.format(macaddress)
# Add the new static lease to ZK
try:
zkhandler.writedata(zk_conn, {
'/networks/{}/dhcp4_reservations/{}'.format(net_vni, macaddress): 'static',
'/networks/{}/dhcp4_reservations/{}/hostname'.format(net_vni, macaddress): hostname,
'/networks/{}/dhcp4_reservations/{}/ipaddr'.format(net_vni, macaddress): ipaddress
})
zkhandler.write([
('/networks/{}/dhcp4_reservations/{}'.format(net_vni, macaddress), 'static'),
('/networks/{}/dhcp4_reservations/{}/hostname'.format(net_vni, macaddress), hostname),
('/networks/{}/dhcp4_reservations/{}/ipaddr'.format(net_vni, macaddress), ipaddress)
])
except Exception as e:
return False, 'ERROR: Failed to write to Zookeeper! Exception: "{}".'.format(e)
return True, 'DHCP reservation "{}" added successfully!'.format(macaddress)
def remove_dhcp_reservation(zk_conn, network, reservation):
def remove_dhcp_reservation(zkhandler, network, reservation):
# Validate and obtain standard passed value
net_vni = getNetworkVNI(zk_conn, network)
net_vni = getNetworkVNI(zkhandler, network)
if not net_vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
match_description = ''
# Check if the reservation matches a static reservation description, a mac, or an IP address currently in the database
dhcp4_reservations_list = getNetworkDHCPReservations(zk_conn, net_vni)
dhcp4_reservations_list = getNetworkDHCPReservations(zkhandler, net_vni)
for macaddr in dhcp4_reservations_list:
hostname = zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_reservations/{}/hostname'.format(net_vni, macaddr))
ipaddress = zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_reservations/{}/ipaddr'.format(net_vni, macaddr))
hostname = zkhandler.read('/networks/{}/dhcp4_reservations/{}/hostname'.format(net_vni, macaddr))
ipaddress = zkhandler.read('/networks/{}/dhcp4_reservations/{}/ipaddr'.format(net_vni, macaddr))
if reservation == macaddr or reservation == hostname or reservation == ipaddress:
match_description = macaddr
lease_type_zk = 'reservations'
lease_type_human = 'static reservation'
# Check if the reservation matches a dynamic reservation description, a mac, or an IP address currently in the database
dhcp4_leases_list = getNetworkDHCPLeases(zk_conn, net_vni)
dhcp4_leases_list = getNetworkDHCPLeases(zkhandler, net_vni)
for macaddr in dhcp4_leases_list:
hostname = zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_leases/{}/hostname'.format(net_vni, macaddr))
ipaddress = zkhandler.readdata(zk_conn, '/networks/{}/dhcp4_leases/{}/ipaddr'.format(net_vni, macaddr))
hostname = zkhandler.read('/networks/{}/dhcp4_leases/{}/hostname'.format(net_vni, macaddr))
ipaddress = zkhandler.read('/networks/{}/dhcp4_leases/{}/ipaddr'.format(net_vni, macaddr))
if reservation == macaddr or reservation == hostname or reservation == ipaddress:
match_description = macaddr
lease_type_zk = 'leases'
@ -404,23 +400,20 @@ def remove_dhcp_reservation(zk_conn, network, reservation):
return False, 'ERROR: No DHCP reservation or lease exists matching "{}"!'.format(reservation)
# Remove the entry from zookeeper
try:
zkhandler.deletekey(zk_conn, '/networks/{}/dhcp4_{}/{}'.format(net_vni, lease_type_zk, match_description))
except Exception:
return False, 'ERROR: Failed to write to Zookeeper!'
zkhandler.delete('/networks/{}/dhcp4_{}/{}'.format(net_vni, lease_type_zk, match_description))
return True, 'DHCP {} "{}" removed successfully!'.format(lease_type_human, match_description)
def add_acl(zk_conn, network, direction, description, rule, order):
def add_acl(zkhandler, network, direction, description, rule, order):
# Validate and obtain standard passed value
net_vni = getNetworkVNI(zk_conn, network)
net_vni = getNetworkVNI(zkhandler, network)
if not net_vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
# Check if the ACL matches a description currently in the database
match_description = ''
full_acl_list = getNetworkACLs(zk_conn, net_vni, 'both')
full_acl_list = getNetworkACLs(zkhandler, net_vni, 'both')
for acl in full_acl_list:
if acl['description'] == description:
match_description = acl['description']
@ -435,7 +428,7 @@ def add_acl(zk_conn, network, direction, description, rule, order):
direction = "out"
# Handle reordering
full_acl_list = getNetworkACLs(zk_conn, net_vni, direction)
full_acl_list = getNetworkACLs(zkhandler, net_vni, direction)
acl_list_length = len(full_acl_list)
# Set order to len
if not order or int(order) > acl_list_length:
@ -448,44 +441,37 @@ def add_acl(zk_conn, network, direction, description, rule, order):
full_acl_list.insert(order, {'direction': direction, 'description': description, 'rule': rule})
# Update the existing ordering
updated_orders = dict()
for idx, acl in enumerate(full_acl_list):
if acl['description'] == description:
continue
updated_orders[
'/networks/{}/firewall_rules/{}/{}/order'.format(net_vni, direction, acl['description'])
] = idx
if updated_orders:
try:
zkhandler.writedata(zk_conn, updated_orders)
except Exception as e:
return False, 'ERROR: Failed to write to Zookeeper! Exception: "{}".'.format(e)
if idx == acl['order']:
continue
else:
zkhandler.write([
('/networks/{}/firewall_rules/{}/{}/order'.format(net_vni, direction, acl['description']), idx)
])
# Add the new rule
try:
zkhandler.writedata(zk_conn, {
'/networks/{}/firewall_rules/{}/{}'.format(net_vni, direction, description): '',
'/networks/{}/firewall_rules/{}/{}/order'.format(net_vni, direction, description): order,
'/networks/{}/firewall_rules/{}/{}/rule'.format(net_vni, direction, description): rule
})
except Exception as e:
return False, 'ERROR: Failed to write to Zookeeper! Exception: "{}".'.format(e)
zkhandler.write([
('/networks/{}/firewall_rules/{}/{}'.format(net_vni, direction, description), ''),
('/networks/{}/firewall_rules/{}/{}/order'.format(net_vni, direction, description), order),
('/networks/{}/firewall_rules/{}/{}/rule'.format(net_vni, direction, description), rule)
])
return True, 'Firewall rule "{}" added successfully!'.format(description)
def remove_acl(zk_conn, network, description):
def remove_acl(zkhandler, network, description):
# Validate and obtain standard passed value
net_vni = getNetworkVNI(zk_conn, network)
net_vni = getNetworkVNI(zkhandler, network)
if not net_vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
match_description = ''
# Check if the ACL matches a description currently in the database
acl_list = getNetworkACLs(zk_conn, net_vni, 'both')
acl_list = getNetworkACLs(zkhandler, net_vni, 'both')
for acl in acl_list:
if acl['description'] == description:
match_description = acl['description']
@ -496,76 +482,75 @@ def remove_acl(zk_conn, network, description):
# Remove the entry from zookeeper
try:
zkhandler.deletekey(zk_conn, '/networks/{}/firewall_rules/{}/{}'.format(net_vni, match_direction, match_description))
zkhandler.delete('/networks/{}/firewall_rules/{}/{}'.format(net_vni, match_direction, match_description))
except Exception as e:
return False, 'ERROR: Failed to write to Zookeeper! Exception: "{}".'.format(e)
# Update the existing ordering
updated_acl_list = getNetworkACLs(zk_conn, net_vni, match_direction)
updated_orders = dict()
updated_acl_list = getNetworkACLs(zkhandler, net_vni, match_direction)
for idx, acl in enumerate(updated_acl_list):
updated_orders[
'/networks/{}/firewall_rules/{}/{}/order'.format(net_vni, match_direction, acl['description'])
] = idx
if acl['description'] == description:
continue
if updated_orders:
try:
zkhandler.writedata(zk_conn, updated_orders)
except Exception as e:
return False, 'ERROR: Failed to write to Zookeeper! Exception: "{}".'.format(e)
if idx == acl['order']:
continue
else:
zkhandler.write([
('/networks/{}/firewall_rules/{}/{}/order'.format(net_vni, match_direction, acl['description']), idx)
])
return True, 'Firewall rule "{}" removed successfully!'.format(match_description)
def get_info(zk_conn, network):
def get_info(zkhandler, network):
# Validate and obtain alternate passed value
net_vni = getNetworkVNI(zk_conn, network)
net_vni = getNetworkVNI(zkhandler, network)
if not net_vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
network_information = getNetworkInformation(zk_conn, network)
network_information = getNetworkInformation(zkhandler, network)
if not network_information:
return False, 'ERROR: Could not get information about network "{}"'.format(network)
return True, network_information
def get_list(zk_conn, limit, is_fuzzy=True):
def get_list(zkhandler, limit, is_fuzzy=True):
net_list = []
full_net_list = zkhandler.listchildren(zk_conn, '/networks')
full_net_list = zkhandler.children('/networks')
for net in full_net_list:
description = zkhandler.readdata(zk_conn, '/networks/{}'.format(net))
description = zkhandler.read('/networks/{}'.format(net))
if limit:
try:
if not is_fuzzy:
limit = '^' + limit + '$'
if re.match(limit, net):
net_list.append(getNetworkInformation(zk_conn, net))
net_list.append(getNetworkInformation(zkhandler, net))
if re.match(limit, description):
net_list.append(getNetworkInformation(zk_conn, net))
net_list.append(getNetworkInformation(zkhandler, net))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
net_list.append(getNetworkInformation(zk_conn, net))
net_list.append(getNetworkInformation(zkhandler, net))
return True, net_list
def get_list_dhcp(zk_conn, network, limit, only_static=False, is_fuzzy=True):
def get_list_dhcp(zkhandler, network, limit, only_static=False, is_fuzzy=True):
# Validate and obtain alternate passed value
net_vni = getNetworkVNI(zk_conn, network)
net_vni = getNetworkVNI(zkhandler, network)
if not net_vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
dhcp_list = []
if only_static:
full_dhcp_list = getNetworkDHCPReservations(zk_conn, net_vni)
full_dhcp_list = getNetworkDHCPReservations(zkhandler, net_vni)
else:
full_dhcp_list = getNetworkDHCPReservations(zk_conn, net_vni)
full_dhcp_list += getNetworkDHCPLeases(zk_conn, net_vni)
full_dhcp_list = getNetworkDHCPReservations(zkhandler, net_vni)
full_dhcp_list += getNetworkDHCPLeases(zkhandler, net_vni)
if limit:
try:
@ -591,14 +576,14 @@ def get_list_dhcp(zk_conn, network, limit, only_static=False, is_fuzzy=True):
valid_lease = True
if valid_lease:
dhcp_list.append(getDHCPLeaseInformation(zk_conn, net_vni, lease))
dhcp_list.append(getDHCPLeaseInformation(zkhandler, net_vni, lease))
return True, dhcp_list
def get_list_acl(zk_conn, network, limit, direction, is_fuzzy=True):
def get_list_acl(zkhandler, network, limit, direction, is_fuzzy=True):
# Validate and obtain alternate passed value
net_vni = getNetworkVNI(zk_conn, network)
net_vni = getNetworkVNI(zkhandler, network)
if not net_vni:
return False, 'ERROR: Could not find network "{}" in the cluster!'.format(network)
@ -611,7 +596,7 @@ def get_list_acl(zk_conn, network, limit, direction, is_fuzzy=True):
direction = "out"
acl_list = []
full_acl_list = getNetworkACLs(zk_conn, net_vni, direction)
full_acl_list = getNetworkACLs(zkhandler, net_vni, direction)
if limit:
try:

View File

@ -22,31 +22,30 @@
import time
import re
import daemon_lib.zkhandler as zkhandler
import daemon_lib.common as common
def getNodeInformation(zk_conn, node_name):
def getNodeInformation(zkhandler, node_name):
"""
Gather information about a node from the Zookeeper database and return a dict() containing it.
"""
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
node_coordinator_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node_name))
node_domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node_name))
node_static_data = zkhandler.readdata(zk_conn, '/nodes/{}/staticdata'.format(node_name)).split()
node_daemon_state = zkhandler.read('/nodes/{}/daemonstate'.format(node_name))
node_coordinator_state = zkhandler.read('/nodes/{}/routerstate'.format(node_name))
node_domain_state = zkhandler.read('/nodes/{}/domainstate'.format(node_name))
node_static_data = zkhandler.read('/nodes/{}/staticdata'.format(node_name)).split()
node_cpu_count = int(node_static_data[0])
node_kernel = node_static_data[1]
node_os = node_static_data[2]
node_arch = node_static_data[3]
node_vcpu_allocated = int(zkhandler.readdata(zk_conn, 'nodes/{}/vcpualloc'.format(node_name)))
node_mem_total = int(zkhandler.readdata(zk_conn, '/nodes/{}/memtotal'.format(node_name)))
node_mem_allocated = int(zkhandler.readdata(zk_conn, '/nodes/{}/memalloc'.format(node_name)))
node_mem_provisioned = int(zkhandler.readdata(zk_conn, '/nodes/{}/memprov'.format(node_name)))
node_mem_used = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node_name)))
node_mem_free = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node_name)))
node_load = float(zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node_name)))
node_domains_count = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node_name)))
node_running_domains = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split()
node_vcpu_allocated = int(zkhandler.read('nodes/{}/vcpualloc'.format(node_name)))
node_mem_total = int(zkhandler.read('/nodes/{}/memtotal'.format(node_name)))
node_mem_allocated = int(zkhandler.read('/nodes/{}/memalloc'.format(node_name)))
node_mem_provisioned = int(zkhandler.read('/nodes/{}/memprov'.format(node_name)))
node_mem_used = int(zkhandler.read('/nodes/{}/memused'.format(node_name)))
node_mem_free = int(zkhandler.read('/nodes/{}/memfree'.format(node_name)))
node_load = float(zkhandler.read('/nodes/{}/cpuload'.format(node_name)))
node_domains_count = int(zkhandler.read('/nodes/{}/domainscount'.format(node_name)))
node_running_domains = zkhandler.read('/nodes/{}/runningdomains'.format(node_name)).split()
# Construct a data structure to represent the data
node_information = {
@ -79,118 +78,124 @@ def getNodeInformation(zk_conn, node_name):
#
# Direct Functions
#
def secondary_node(zk_conn, node):
def secondary_node(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.readdata(zk_conn, '/nodes/{}/daemonmode'.format(node))
daemon_mode = zkhandler.read('/nodes/{}/daemonmode'.format(node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Ensure node is in run daemonstate
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
daemon_state = zkhandler.read('/nodes/{}/daemonstate'.format(node))
if daemon_state != 'run':
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node))
if current_state == 'primary':
retmsg = 'Setting node {} in secondary router mode.'.format(node)
zkhandler.writedata(zk_conn, {
'/primary_node': 'none'
})
else:
return False, 'Node "{}" is already in secondary router mode.'.format(node)
return True, retmsg
def primary_node(zk_conn, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.readdata(zk_conn, '/nodes/{}/daemonmode'.format(node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Ensure node is in run daemonstate
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
if daemon_state != 'run':
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.readdata(zk_conn, '/nodes/{}/routerstate'.format(node))
current_state = zkhandler.read('/nodes/{}/routerstate'.format(node))
if current_state == 'secondary':
retmsg = 'Setting node {} in primary router mode.'.format(node)
zkhandler.writedata(zk_conn, {
'/primary_node': node
})
else:
return False, 'Node "{}" is already in primary router mode.'.format(node)
return True, 'Node "{}" is already in secondary router mode.'.format(node)
retmsg = 'Setting node {} in secondary router mode.'.format(node)
zkhandler.write([
('/config/primary_node', 'none')
])
return True, retmsg
def flush_node(zk_conn, node, wait=False):
def primary_node(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Ensure node is a coordinator
daemon_mode = zkhandler.read('/nodes/{}/daemonmode'.format(node))
if daemon_mode == 'hypervisor':
return False, 'ERROR: Cannot change router mode on non-coordinator node "{}"'.format(node)
# Ensure node is in run daemonstate
daemon_state = zkhandler.read('/nodes/{}/daemonstate'.format(node))
if daemon_state != 'run':
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.read('/nodes/{}/routerstate'.format(node))
if current_state == 'primary':
return True, 'Node "{}" is already in primary router mode.'.format(node)
retmsg = 'Setting node {} in primary router mode.'.format(node)
zkhandler.write([
('/config/primary_node', node)
])
return True, retmsg
def flush_node(zkhandler, node, wait=False):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
if zkhandler.read('/nodes/{}/domainstate'.format(node)) == 'flushed':
return True, 'Hypervisor {} is already flushed.'.format(node)
retmsg = 'Flushing hypervisor {} of running VMs.'.format(node)
# Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, {
'/nodes/{}/domainstate'.format(node): 'flush'
})
zkhandler.write([
('/nodes/{}/domainstate'.format(node), 'flush')
])
if wait:
while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node)) == 'flush':
while zkhandler.read('/nodes/{}/domainstate'.format(node)) == 'flush':
time.sleep(1)
retmsg = 'Flushed hypervisor {} of running VMs.'.format(node)
return True, retmsg
def ready_node(zk_conn, node, wait=False):
def ready_node(zkhandler, node, wait=False):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
if zkhandler.read('/nodes/{}/domainstate'.format(node)) == 'ready':
return True, 'Hypervisor {} is already ready.'.format(node)
retmsg = 'Restoring hypervisor {} to active service.'.format(node)
# Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, {
'/nodes/{}/domainstate'.format(node): 'unflush'
})
zkhandler.write([
('/nodes/{}/domainstate'.format(node), 'unflush')
])
if wait:
while zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node)) == 'unflush':
while zkhandler.read('/nodes/{}/domainstate'.format(node)) == 'unflush':
time.sleep(1)
retmsg = 'Restored hypervisor {} to active service.'.format(node)
return True, retmsg
def get_info(zk_conn, node):
def get_info(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zk_conn, node):
if not common.verifyNode(zkhandler, node):
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(node)
# Get information about node in a pretty format
node_information = getNodeInformation(zk_conn, node)
node_information = getNodeInformation(zkhandler, node)
if not node_information:
return False, 'ERROR: Could not get information about node "{}".'.format(node)
return True, node_information
def get_list(zk_conn, limit, daemon_state=None, coordinator_state=None, domain_state=None, is_fuzzy=True):
def get_list(zkhandler, limit, daemon_state=None, coordinator_state=None, domain_state=None, is_fuzzy=True):
node_list = []
full_node_list = zkhandler.listchildren(zk_conn, '/nodes')
full_node_list = zkhandler.children('/nodes')
for node in full_node_list:
if limit:
@ -199,11 +204,11 @@ def get_list(zk_conn, limit, daemon_state=None, coordinator_state=None, domain_s
limit = '^' + limit + '$'
if re.match(limit, node):
node_list.append(getNodeInformation(zk_conn, node))
node_list.append(getNodeInformation(zkhandler, node))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
node_list.append(getNodeInformation(zk_conn, node))
node_list.append(getNodeInformation(zkhandler, node))
if daemon_state or coordinator_state or domain_state:
limited_node_list = []

View File

@ -22,8 +22,8 @@
import time
import re
import lxml.objectify
import lxml.etree
import daemon_lib.zkhandler as zkhandler
import daemon_lib.common as common
import daemon_lib.ceph as ceph
@ -32,20 +32,20 @@ import daemon_lib.ceph as ceph
#
# Cluster search functions
#
def getClusterDomainList(zk_conn):
def getClusterDomainList(zkhandler):
# Get a list of UUIDs by listing the children of /domains
uuid_list = zkhandler.listchildren(zk_conn, '/domains')
uuid_list = zkhandler.children('/domains')
name_list = []
# For each UUID, get the corresponding name from the data
for uuid in uuid_list:
name_list.append(zkhandler.readdata(zk_conn, '/domains/%s' % uuid))
name_list.append(zkhandler.read('/domains/{}'.format(uuid)))
return uuid_list, name_list
def searchClusterByUUID(zk_conn, uuid):
def searchClusterByUUID(zkhandler, uuid):
try:
# Get the lists
uuid_list, name_list = getClusterDomainList(zk_conn)
uuid_list, name_list = getClusterDomainList(zkhandler)
# We're looking for UUID, so find that element ID
index = uuid_list.index(uuid)
# Get the name_list element at that index
@ -57,10 +57,10 @@ def searchClusterByUUID(zk_conn, uuid):
return name
def searchClusterByName(zk_conn, name):
def searchClusterByName(zkhandler, name):
try:
# Get the lists
uuid_list, name_list = getClusterDomainList(zk_conn)
uuid_list, name_list = getClusterDomainList(zkhandler)
# We're looking for name, so find that element ID
index = name_list.index(name)
# Get the uuid_list element at that index
@ -72,67 +72,83 @@ def searchClusterByName(zk_conn, name):
return uuid
def getDomainUUID(zk_conn, domain):
def getDomainUUID(zkhandler, domain):
# Validate that VM exists in cluster
if common.validateUUID(domain):
dom_name = searchClusterByUUID(zk_conn, domain)
dom_uuid = searchClusterByName(zk_conn, dom_name)
dom_name = searchClusterByUUID(zkhandler, domain)
dom_uuid = searchClusterByName(zkhandler, dom_name)
else:
dom_uuid = searchClusterByName(zk_conn, domain)
dom_name = searchClusterByUUID(zk_conn, dom_uuid)
dom_uuid = searchClusterByName(zkhandler, domain)
dom_name = searchClusterByUUID(zkhandler, dom_uuid)
return dom_uuid
def getDomainName(zk_conn, domain):
def getDomainName(zkhandler, domain):
# Validate that VM exists in cluster
if common.validateUUID(domain):
dom_name = searchClusterByUUID(zk_conn, domain)
dom_uuid = searchClusterByName(zk_conn, dom_name)
dom_name = searchClusterByUUID(zkhandler, domain)
dom_uuid = searchClusterByName(zkhandler, dom_name)
else:
dom_uuid = searchClusterByName(zk_conn, domain)
dom_name = searchClusterByUUID(zk_conn, dom_uuid)
dom_uuid = searchClusterByName(zkhandler, domain)
dom_name = searchClusterByUUID(zkhandler, dom_uuid)
return dom_name
#
# Helper functions
#
def change_state(zkhandler, dom_uuid, new_state):
lock = zkhandler.exclusivelock('/domains/{}/state'.format(dom_uuid))
with lock:
zkhandler.write([
('/domains/{}/state'.format(dom_uuid), new_state)
])
# Wait for 1/2 second to allow state to flow to all nodes
time.sleep(0.5)
#
# Direct functions
#
def is_migrated(zk_conn, domain):
def is_migrated(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
last_node = zkhandler.readdata(zk_conn, '/domains/{}/lastnode'.format(dom_uuid))
last_node = zkhandler.read('/domains/{}/lastnode'.format(dom_uuid))
if last_node:
return True
else:
return False
def flush_locks(zk_conn, domain):
def flush_locks(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Verify that the VM is in a stopped state; freeing locks is not safe otherwise
state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if state != 'stop':
return False, 'ERROR: VM "{}" is not in stopped state; flushing RBD locks on a running VM is dangerous.'.format(domain)
# Tell the cluster to create a new OSD for the host
flush_locks_string = 'flush_locks {}'.format(dom_uuid)
zkhandler.writedata(zk_conn, {'/cmd/domains': flush_locks_string})
zkhandler.write([
('/cmd/domains', flush_locks_string)
])
# Wait 1/2 second for the cluster to get the message and start working
time.sleep(0.5)
# Acquire a read lock, so we get the return exclusively
lock = zkhandler.readlock(zk_conn, '/cmd/domains')
lock = zkhandler.readlock('/cmd/domains')
with lock:
try:
result = zkhandler.readdata(zk_conn, '/cmd/domains').split()[0]
result = zkhandler.read('/cmd/domains').split()[0]
if result == 'success-flush_locks':
message = 'Flushed locks on VM "{}"'.format(domain)
success = True
@ -144,15 +160,17 @@ def flush_locks(zk_conn, domain):
success = False
# Acquire a write lock to ensure things go smoothly
lock = zkhandler.writelock(zk_conn, '/cmd/domains')
lock = zkhandler.writelock('/cmd/domains')
with lock:
time.sleep(0.5)
zkhandler.writedata(zk_conn, {'/cmd/domains': ''})
zkhandler.write([
('/cmd/domains', '')
])
return success, message
def define_vm(zk_conn, config_data, target_node, node_limit, node_selector, node_autostart, migration_method=None, profile=None, initial_state='stop'):
def define_vm(zkhandler, config_data, target_node, node_limit, node_selector, node_autostart, migration_method=None, profile=None, initial_state='stop'):
# Parse the XML data
try:
parsed_xml = lxml.objectify.fromstring(config_data)
@ -162,14 +180,14 @@ def define_vm(zk_conn, config_data, target_node, node_limit, node_selector, node
dom_name = parsed_xml.name.text
# Ensure that the UUID and name are unique
if searchClusterByUUID(zk_conn, dom_uuid) or searchClusterByName(zk_conn, dom_name):
if searchClusterByUUID(zkhandler, dom_uuid) or searchClusterByName(zkhandler, dom_name):
return False, 'ERROR: Specified VM "{}" or UUID "{}" matches an existing VM on the cluster'.format(dom_name, dom_uuid)
if not target_node:
target_node = common.findTargetNode(zk_conn, dom_uuid)
target_node = common.findTargetNode(zkhandler, dom_uuid)
else:
# Verify node is valid
valid_node = common.verifyNode(zk_conn, target_node)
valid_node = common.verifyNode(zkhandler, target_node)
if not valid_node:
return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node)
@ -193,64 +211,64 @@ def define_vm(zk_conn, config_data, target_node, node_limit, node_selector, node
formatted_rbd_list = ''
# Add the new domain to Zookeeper
zkhandler.writedata(zk_conn, {
'/domains/{}'.format(dom_uuid): dom_name,
'/domains/{}/state'.format(dom_uuid): initial_state,
'/domains/{}/node'.format(dom_uuid): target_node,
'/domains/{}/lastnode'.format(dom_uuid): '',
'/domains/{}/node_limit'.format(dom_uuid): formatted_node_limit,
'/domains/{}/node_selector'.format(dom_uuid): node_selector,
'/domains/{}/node_autostart'.format(dom_uuid): node_autostart,
'/domains/{}/migration_method'.format(dom_uuid): migration_method,
'/domains/{}/failedreason'.format(dom_uuid): '',
'/domains/{}/consolelog'.format(dom_uuid): '',
'/domains/{}/rbdlist'.format(dom_uuid): formatted_rbd_list,
'/domains/{}/profile'.format(dom_uuid): profile,
'/domains/{}/vnc'.format(dom_uuid): '',
'/domains/{}/xml'.format(dom_uuid): config_data
})
zkhandler.write([
('/domains/{}'.format(dom_uuid), dom_name),
('/domains/{}/state'.format(dom_uuid), initial_state),
('/domains/{}/node'.format(dom_uuid), target_node),
('/domains/{}/lastnode'.format(dom_uuid), ''),
('/domains/{}/node_limit'.format(dom_uuid), formatted_node_limit),
('/domains/{}/node_selector'.format(dom_uuid), node_selector),
('/domains/{}/node_autostart'.format(dom_uuid), node_autostart),
('/domains/{}/migration_method'.format(dom_uuid), migration_method),
('/domains/{}/failedreason'.format(dom_uuid), ''),
('/domains/{}/consolelog'.format(dom_uuid), ''),
('/domains/{}/rbdlist'.format(dom_uuid), formatted_rbd_list),
('/domains/{}/profile'.format(dom_uuid), profile),
('/domains/{}/vnc'.format(dom_uuid), ''),
('/domains/{}/xml'.format(dom_uuid), config_data)
])
return True, 'Added new VM with Name "{}" and UUID "{}" to database.'.format(dom_name, dom_uuid)
def modify_vm_metadata(zk_conn, domain, node_limit, node_selector, node_autostart, provisioner_profile, migration_method):
dom_uuid = getDomainUUID(zk_conn, domain)
def modify_vm_metadata(zkhandler, domain, node_limit, node_selector, node_autostart, provisioner_profile, migration_method):
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
if node_limit is not None:
zkhandler.writedata(zk_conn, {
'/domains/{}/node_limit'.format(dom_uuid): node_limit
})
zkhandler.write([
('/domains/{}/node_limit'.format(dom_uuid), node_limit)
])
if node_selector is not None:
zkhandler.writedata(zk_conn, {
'/domains/{}/node_selector'.format(dom_uuid): node_selector
})
zkhandler.write([
('/domains/{}/node_selector'.format(dom_uuid), node_selector)
])
if node_autostart is not None:
zkhandler.writedata(zk_conn, {
'/domains/{}/node_autostart'.format(dom_uuid): node_autostart
})
zkhandler.write([
('/domains/{}/node_autostart'.format(dom_uuid), node_autostart)
])
if provisioner_profile is not None:
zkhandler.writedata(zk_conn, {
'/domains/{}/profile'.format(dom_uuid): provisioner_profile
})
zkhandler.write([
('/domains/{}/profile'.format(dom_uuid), provisioner_profile)
])
if migration_method is not None:
zkhandler.writedata(zk_conn, {
'/domains/{}/migration_method'.format(dom_uuid): migration_method
})
zkhandler.write([
('/domains/{}/migration_method'.format(dom_uuid), migration_method)
])
return True, 'Successfully modified PVC metadata of VM "{}".'.format(domain)
def modify_vm(zk_conn, domain, restart, new_vm_config):
dom_uuid = getDomainUUID(zk_conn, domain)
def modify_vm(zkhandler, domain, restart, new_vm_config):
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
dom_name = getDomainName(zk_conn, domain)
dom_name = getDomainName(zkhandler, domain)
# Parse and valiate the XML
try:
@ -272,213 +290,231 @@ def modify_vm(zk_conn, domain, restart, new_vm_config):
formatted_rbd_list = ''
# Add the modified config to Zookeeper
zk_data = {
'/domains/{}'.format(dom_uuid): dom_name,
'/domains/{}/rbdlist'.format(dom_uuid): formatted_rbd_list,
'/domains/{}/xml'.format(dom_uuid): new_vm_config
}
zkhandler.writedata(zk_conn, zk_data)
zkhandler.write([
('/domains/{}'.format(dom_uuid), dom_name),
('/domains/{}/rbdlist'.format(dom_uuid), formatted_rbd_list),
('/domains/{}/xml'.format(dom_uuid), new_vm_config)
])
if restart:
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'restart'})
lock.release()
change_state(zkhandler, dom_uuid, 'restart')
return True, 'Successfully modified configuration of VM "{}".'.format(domain)
def dump_vm(zk_conn, domain):
dom_uuid = getDomainUUID(zk_conn, domain)
def dump_vm(zkhandler, domain):
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Gram the domain XML and dump it to stdout
vm_xml = zkhandler.readdata(zk_conn, '/domains/{}/xml'.format(dom_uuid))
vm_xml = zkhandler.read('/domains/{}/xml'.format(dom_uuid))
return True, vm_xml
def undefine_vm(zk_conn, domain):
def rename_vm(zkhandler, domain, new_domain):
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Verify that the VM is in a stopped state; renaming is not supported otherwise
state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if state != 'stop':
return False, 'ERROR: VM "{}" is not in stopped state; VMs cannot be renamed while running.'.format(domain)
# Parse and valiate the XML
vm_config = common.getDomainXML(zkhandler, dom_uuid)
# Obtain the RBD disk list using the common functions
ddisks = common.getDomainDisks(vm_config, {})
pool_list = []
rbd_list = []
for disk in ddisks:
if disk['type'] == 'rbd':
pool_list.append(disk['name'].split('/')[0])
rbd_list.append(disk['name'].split('/')[1])
# Rename each volume in turn
for idx, rbd in enumerate(rbd_list):
rbd_new = re.sub(r"{}".format(domain), new_domain, rbd)
# Skip renaming if nothing changed
if rbd_new == rbd:
continue
ceph.rename_volume(zkhandler, pool_list[idx], rbd, rbd_new)
# Replace the name in the config
vm_config_new = lxml.etree.tostring(vm_config, encoding='ascii', method='xml').decode().replace(domain, new_domain)
# Get VM information
_b, dom_info = get_info(zkhandler, dom_uuid)
# Undefine the old VM
undefine_vm(zkhandler, dom_uuid)
# Define the new VM
define_vm(zkhandler, vm_config_new, dom_info['node'], dom_info['node_limit'], dom_info['node_selector'], dom_info['node_autostart'], migration_method=dom_info['migration_method'], profile=dom_info['profile'], initial_state='stop')
# If the VM is migrated, store that
if dom_info['migrated'] != 'no':
zkhandler.write([
('/domains/{}/lastnode'.format(dom_uuid), dom_info['last_node'])
])
return True, 'Successfully renamed VM "{}" to "{}".'.format(domain, new_domain)
def undefine_vm(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Shut down the VM
current_vm_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_vm_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_vm_state != 'stop':
# Set the domain into stop mode
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'stop'})
lock.release()
# Wait for 2 seconds to allow state to flow to all nodes
time.sleep(2)
change_state(zkhandler, dom_uuid, 'stop')
# Gracefully terminate the class instances
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'delete'})
time.sleep(2)
change_state(zkhandler, dom_uuid, 'delete')
# Delete the configurations
zkhandler.deletekey(zk_conn, '/domains/{}'.format(dom_uuid))
zkhandler.delete('/domains/{}'.format(dom_uuid))
return True, 'Undefined VM "{}" from the cluster.'.format(domain)
def remove_vm(zk_conn, domain):
def remove_vm(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
disk_list = common.getDomainDiskList(zk_conn, dom_uuid)
disk_list = common.getDomainDiskList(zkhandler, dom_uuid)
# Shut down the VM
current_vm_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_vm_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_vm_state != 'stop':
# Set the domain into stop mode
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'stop'})
lock.release()
# Wait for 2 seconds to allow state to flow to all nodes
time.sleep(2)
change_state(zkhandler, dom_uuid, 'stop')
# Gracefully terminate the class instances
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'delete'})
time.sleep(2)
change_state(zkhandler, dom_uuid, 'delete')
# Delete the configurations
zkhandler.deletekey(zk_conn, '/domains/{}'.format(dom_uuid))
time.sleep(2)
zkhandler.delete('/domains/{}'.format(dom_uuid))
# Wait for 1 second to allow state to flow to all nodes
time.sleep(1)
# Remove disks
for disk in disk_list:
# vmpool/vmname_volume
try:
disk_pool, disk_name = disk.split('/')
retcode, message = ceph.remove_volume(zk_conn, disk_pool, disk_name)
retcode, message = ceph.remove_volume(zkhandler, disk_pool, disk_name)
except ValueError:
continue
return True, 'Removed VM "{}" and disks from the cluster.'.format(domain)
def start_vm(zk_conn, domain):
def start_vm(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Set the VM to start
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'start'})
lock.release()
change_state(zkhandler, dom_uuid, 'start')
return True, 'Starting VM "{}".'.format(domain)
def restart_vm(zk_conn, domain, wait=False):
def restart_vm(zkhandler, domain, wait=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Get state and verify we're OK to proceed
current_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_state != 'start':
return False, 'ERROR: VM "{}" is not in "start" state!'.format(domain)
retmsg = 'Restarting VM "{}".'.format(domain)
# Set the VM to restart
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'restart'})
lock.release()
change_state(zkhandler, dom_uuid, 'restart')
if wait:
while zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) == 'restart':
while zkhandler.read('/domains/{}/state'.format(dom_uuid)) == 'restart':
time.sleep(1)
retmsg = 'Restarted VM "{}"'.format(domain)
return True, retmsg
def shutdown_vm(zk_conn, domain, wait=False):
def shutdown_vm(zkhandler, domain, wait=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Get state and verify we're OK to proceed
current_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_state != 'start':
return False, 'ERROR: VM "{}" is not in "start" state!'.format(domain)
retmsg = 'Shutting down VM "{}"'.format(domain)
# Set the VM to shutdown
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'shutdown'})
lock.release()
change_state(zkhandler, dom_uuid, 'shutdown')
if wait:
while zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) == 'shutdown':
while zkhandler.read('/domains/{}/state'.format(dom_uuid)) == 'shutdown':
time.sleep(1)
retmsg = 'Shut down VM "{}"'.format(domain)
return True, retmsg
def stop_vm(zk_conn, domain):
def stop_vm(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Set the VM to start
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'stop'})
lock.release()
# Set the VM to stop
change_state(zkhandler, dom_uuid, 'stop')
return True, 'Forcibly stopping VM "{}".'.format(domain)
def disable_vm(zk_conn, domain):
def disable_vm(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Get state and verify we're OK to proceed
current_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_state != 'stop':
return False, 'ERROR: VM "{}" must be stopped before disabling!'.format(domain)
# Set the VM to start
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'disable'})
lock.release()
# Set the VM to disable
change_state(zkhandler, dom_uuid, 'disable')
return True, 'Marked VM "{}" as disable.'.format(domain)
def move_vm(zk_conn, domain, target_node, wait=False, force_live=False):
def move_vm(zkhandler, domain, target_node, wait=False, force_live=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Get state and verify we're OK to proceed
current_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_state != 'start':
# If the current state isn't start, preserve it; we're not doing live migration
target_state = current_state
@ -488,26 +524,28 @@ def move_vm(zk_conn, domain, target_node, wait=False, force_live=False):
else:
target_state = 'migrate'
current_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(dom_uuid))
current_node = zkhandler.read('/domains/{}/node'.format(dom_uuid))
if not target_node:
target_node = common.findTargetNode(zk_conn, dom_uuid)
target_node = common.findTargetNode(zkhandler, dom_uuid)
else:
# Verify node is valid
valid_node = common.verifyNode(zk_conn, target_node)
valid_node = common.verifyNode(zkhandler, target_node)
if not valid_node:
return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node)
# Check if node is within the limit
node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(dom_uuid))
node_limit = zkhandler.read('/domains/{}/node_limit'.format(dom_uuid))
if node_limit and target_node not in node_limit.split(','):
return False, 'ERROR: Specified node "{}" is not in the allowed list of nodes for VM "{}".'.format(target_node, domain)
# Verify if node is current node
if target_node == current_node:
last_node = zkhandler.readdata(zk_conn, '/domains/{}/lastnode'.format(dom_uuid))
last_node = zkhandler.read('/domains/{}/lastnode'.format(dom_uuid))
if last_node:
zkhandler.writedata(zk_conn, {'/domains/{}/lastnode'.format(dom_uuid): ''})
zkhandler.write([
('/domains/{}/lastnode'.format(dom_uuid), '')
])
return True, 'Making temporary migration permanent for VM "{}".'.format(domain)
return False, 'ERROR: VM "{}" is already running on node "{}".'.format(domain, current_node)
@ -517,31 +555,33 @@ def move_vm(zk_conn, domain, target_node, wait=False, force_live=False):
retmsg = 'Permanently migrating VM "{}" to node "{}".'.format(domain, target_node)
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {
'/domains/{}/state'.format(dom_uuid): target_state,
'/domains/{}/node'.format(dom_uuid): target_node,
'/domains/{}/lastnode'.format(dom_uuid): ''
})
lock.release()
lock = zkhandler.exclusivelock('/domains/{}/state'.format(dom_uuid))
with lock:
zkhandler.write([
('/domains/{}/state'.format(dom_uuid), target_state),
('/domains/{}/node'.format(dom_uuid), target_node),
('/domains/{}/lastnode'.format(dom_uuid), '')
])
# Wait for 1/2 second for migration to start
time.sleep(0.5)
if wait:
while zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) == target_state:
while zkhandler.read('/domains/{}/state'.format(dom_uuid)) == target_state:
time.sleep(1)
retmsg = 'Permanently migrated VM "{}" to node "{}"'.format(domain, target_node)
return True, retmsg
def migrate_vm(zk_conn, domain, target_node, force_migrate, wait=False, force_live=False):
def migrate_vm(zkhandler, domain, target_node, force_migrate, wait=False, force_live=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Get state and verify we're OK to proceed
current_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_state != 'start':
# If the current state isn't start, preserve it; we're not doing live migration
target_state = current_state
@ -551,22 +591,22 @@ def migrate_vm(zk_conn, domain, target_node, force_migrate, wait=False, force_li
else:
target_state = 'migrate'
current_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(dom_uuid))
last_node = zkhandler.readdata(zk_conn, '/domains/{}/lastnode'.format(dom_uuid))
current_node = zkhandler.read('/domains/{}/node'.format(dom_uuid))
last_node = zkhandler.read('/domains/{}/lastnode'.format(dom_uuid))
if last_node and not force_migrate:
return False, 'ERROR: VM "{}" has been previously migrated.'.format(domain)
if not target_node:
target_node = common.findTargetNode(zk_conn, dom_uuid)
target_node = common.findTargetNode(zkhandler, dom_uuid)
else:
# Verify node is valid
valid_node = common.verifyNode(zk_conn, target_node)
valid_node = common.verifyNode(zkhandler, target_node)
if not valid_node:
return False, 'ERROR: Specified node "{}" is invalid.'.format(target_node)
# Check if node is within the limit
node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(dom_uuid))
node_limit = zkhandler.read('/domains/{}/node_limit'.format(dom_uuid))
if node_limit and target_node not in node_limit.split(','):
return False, 'ERROR: Specified node "{}" is not in the allowed list of nodes for VM "{}".'.format(target_node, domain)
@ -583,31 +623,33 @@ def migrate_vm(zk_conn, domain, target_node, force_migrate, wait=False, force_li
retmsg = 'Migrating VM "{}" to node "{}".'.format(domain, target_node)
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {
'/domains/{}/state'.format(dom_uuid): target_state,
'/domains/{}/node'.format(dom_uuid): target_node,
'/domains/{}/lastnode'.format(dom_uuid): current_node
})
lock.release()
lock = zkhandler.exclusivelock('/domains/{}/state'.format(dom_uuid))
with lock:
zkhandler.write([
('/domains/{}/state'.format(dom_uuid), target_state),
('/domains/{}/node'.format(dom_uuid), target_node),
('/domains/{}/lastnode'.format(dom_uuid), current_node)
])
# Wait for 1/2 second for migration to start
time.sleep(0.5)
if wait:
while zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) == target_state:
while zkhandler.read('/domains/{}/state'.format(dom_uuid)) == target_state:
time.sleep(1)
retmsg = 'Migrated VM "{}" to node "{}"'.format(domain, target_node)
return True, retmsg
def unmigrate_vm(zk_conn, domain, wait=False, force_live=False):
def unmigrate_vm(zkhandler, domain, wait=False, force_live=False):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Get state and verify we're OK to proceed
current_state = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid))
current_state = zkhandler.read('/domains/{}/state'.format(dom_uuid))
if current_state != 'start':
# If the current state isn't start, preserve it; we're not doing live migration
target_state = current_state
@ -617,38 +659,40 @@ def unmigrate_vm(zk_conn, domain, wait=False, force_live=False):
else:
target_state = 'migrate'
target_node = zkhandler.readdata(zk_conn, '/domains/{}/lastnode'.format(dom_uuid))
target_node = zkhandler.read('/domains/{}/lastnode'.format(dom_uuid))
if target_node == '':
return False, 'ERROR: VM "{}" has not been previously migrated.'.format(domain)
retmsg = 'Unmigrating VM "{}" back to node "{}".'.format(domain, target_node)
lock = zkhandler.exclusivelock(zk_conn, '/domains/{}/state'.format(dom_uuid))
lock.acquire()
zkhandler.writedata(zk_conn, {
'/domains/{}/state'.format(dom_uuid): target_state,
'/domains/{}/node'.format(dom_uuid): target_node,
'/domains/{}/lastnode'.format(dom_uuid): ''
})
lock.release()
lock = zkhandler.exclusivelock('/domains/{}/state'.format(dom_uuid))
with lock:
zkhandler.write([
('/domains/{}/state'.format(dom_uuid), target_state),
('/domains/{}/node'.format(dom_uuid), target_node),
('/domains/{}/lastnode'.format(dom_uuid), '')
])
# Wait for 1/2 second for migration to start
time.sleep(0.5)
if wait:
while zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) == target_state:
while zkhandler.read('/domains/{}/state'.format(dom_uuid)) == target_state:
time.sleep(1)
retmsg = 'Unmigrated VM "{}" back to node "{}"'.format(domain, target_node)
return True, retmsg
def get_console_log(zk_conn, domain, lines=1000):
def get_console_log(zkhandler, domain, lines=1000):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: Could not find VM "{}" in the cluster!'.format(domain)
# Get the data from ZK
console_log = zkhandler.readdata(zk_conn, '/domains/{}/consolelog'.format(dom_uuid))
console_log = zkhandler.read('/domains/{}/consolelog'.format(dom_uuid))
# Shrink the log buffer to length lines
shrunk_log = console_log.split('\n')[-lines:]
@ -657,24 +701,24 @@ def get_console_log(zk_conn, domain, lines=1000):
return True, loglines
def get_info(zk_conn, domain):
def get_info(zkhandler, domain):
# Validate that VM exists in cluster
dom_uuid = getDomainUUID(zk_conn, domain)
dom_uuid = getDomainUUID(zkhandler, domain)
if not dom_uuid:
return False, 'ERROR: No VM named "{}" is present in the cluster.'.format(domain)
# Gather information from XML config and print it
domain_information = common.getInformationFromXML(zk_conn, dom_uuid)
domain_information = common.getInformationFromXML(zkhandler, dom_uuid)
if not domain_information:
return False, 'ERROR: Could not get information about VM "{}".'.format(domain)
return True, domain_information
def get_list(zk_conn, node, state, limit, is_fuzzy=True):
def get_list(zkhandler, node, state, limit, is_fuzzy=True):
if node:
# Verify node is valid
if not common.verifyNode(zk_conn, node):
if not common.verifyNode(zkhandler, node):
return False, 'Specified node "{}" is invalid.'.format(node)
if state:
@ -682,7 +726,7 @@ def get_list(zk_conn, node, state, limit, is_fuzzy=True):
if state not in valid_states:
return False, 'VM state "{}" is not valid.'.format(state)
full_vm_list = zkhandler.listchildren(zk_conn, '/domains')
full_vm_list = zkhandler.children('/domains')
vm_list = []
# Set our limit to a sensible regex
@ -701,33 +745,33 @@ def get_list(zk_conn, node, state, limit, is_fuzzy=True):
vm_state = {}
for vm in full_vm_list:
# Check we don't match the limit
name = zkhandler.readdata(zk_conn, '/domains/{}'.format(vm))
vm_node[vm] = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(vm))
vm_state[vm] = zkhandler.readdata(zk_conn, '/domains/{}/state'.format(vm))
name = zkhandler.read('/domains/{}'.format(vm))
vm_node[vm] = zkhandler.read('/domains/{}/node'.format(vm))
vm_state[vm] = zkhandler.read('/domains/{}/state'.format(vm))
# Handle limiting
if limit:
try:
if re.match(limit, vm):
if not node and not state:
vm_list.append(common.getInformationFromXML(zk_conn, vm))
vm_list.append(common.getInformationFromXML(zkhandler, vm))
else:
if vm_node[vm] == node or vm_state[vm] == state:
vm_list.append(common.getInformationFromXML(zk_conn, vm))
vm_list.append(common.getInformationFromXML(zkhandler, vm))
if re.match(limit, name):
if not node and not state:
vm_list.append(common.getInformationFromXML(zk_conn, vm))
vm_list.append(common.getInformationFromXML(zkhandler, vm))
else:
if vm_node[vm] == node or vm_state[vm] == state:
vm_list.append(common.getInformationFromXML(zk_conn, vm))
vm_list.append(common.getInformationFromXML(zkhandler, vm))
except Exception as e:
return False, 'Regex Error: {}'.format(e)
else:
# Check node to avoid unneeded ZK calls
if not node and not state:
vm_list.append(common.getInformationFromXML(zk_conn, vm))
vm_list.append(common.getInformationFromXML(zkhandler, vm))
else:
if vm_node[vm] == node or vm_state[vm] == state:
vm_list.append(common.getInformationFromXML(zk_conn, vm))
vm_list.append(common.getInformationFromXML(zkhandler, vm))
return True, vm_list

View File

@ -21,164 +21,333 @@
import time
import uuid
from functools import wraps
from kazoo.client import KazooClient, KazooState
# Exists function
def exists(zk_conn, key):
stat = zk_conn.exists(key)
if stat:
return True
else:
return False
#
# Function decorators
#
class ZKConnection(object):
"""
Decorates a function with a Zookeeper connection before and after the main call.
The decorated function must accept the `zkhandler` argument as its first argument, and
then use this to access the connection.
"""
def __init__(self, config):
self.config = config
def __call__(self, function):
if not callable(function):
return
@wraps(function)
def connection(*args, **kwargs):
zkhandler = ZKHandler(self.config)
zkhandler.connect()
ret = function(zkhandler, *args, **kwargs)
zkhandler.disconnect()
del zkhandler
return ret
return connection
# Child list function
def listchildren(zk_conn, key):
children = zk_conn.get_children(key)
return children
# Delete key function
def deletekey(zk_conn, key, recursive=True):
zk_conn.delete(key, recursive=recursive)
# Rename key recursive function
def rename_key_element(zk_conn, zk_transaction, source_key, destination_key):
data_raw = zk_conn.get(source_key)
data = data_raw[0]
zk_transaction.create(destination_key, data)
if zk_conn.get_children(source_key):
for child_key in zk_conn.get_children(source_key):
child_source_key = "{}/{}".format(source_key, child_key)
child_destination_key = "{}/{}".format(destination_key, child_key)
rename_key_element(zk_conn, zk_transaction, child_source_key, child_destination_key)
zk_transaction.delete(source_key)
# Rename key function
def renamekey(zk_conn, kv):
# Start up a transaction
zk_transaction = zk_conn.transaction()
# Proceed one KV pair at a time
for source_key in sorted(kv):
destination_key = kv[source_key]
# Check if the source key exists or fail out
if not zk_conn.exists(source_key):
raise
# Check if the destination key exists and fail out
if zk_conn.exists(destination_key):
raise
rename_key_element(zk_conn, zk_transaction, source_key, destination_key)
# Commit the transaction
try:
zk_transaction.commit()
return True
except Exception:
return False
# Data read function
def readdata(zk_conn, key):
data_raw = zk_conn.get(key)
data = data_raw[0].decode('utf8')
return data
# Data write function
def writedata(zk_conn, kv):
# Start up a transaction
zk_transaction = zk_conn.transaction()
# Proceed one KV pair at a time
for key in sorted(kv):
data = kv[key]
# Check if this key already exists or not
if not zk_conn.exists(key):
# We're creating a new key
zk_transaction.create(key, str(data).encode('utf8'))
#
# Exceptions
#
class ZKConnectionException(Exception):
"""
A exception when connecting to the cluster
"""
def __init__(self, zkhandler, error=None):
if error is not None:
self.message = "Failed to connect to Zookeeper at {}: {}".format(zkhandler.coordinators(), error)
else:
# We're updating a key with version validation
orig_data = zk_conn.get(key)
version = orig_data[1].version
self.message = "Failed to connect to Zookeeper at {}".format(zkhandler.coordinators())
zkhandler.disconnect()
# Set what we expect the new version to be
new_version = version + 1
def __str__(self):
return str(self.message)
# Update the data
zk_transaction.set_data(key, str(data).encode('utf8'))
# Set up the check
try:
zk_transaction.check(key, new_version)
except TypeError:
print('Zookeeper key "{}" does not match expected version'.format(key))
#
# Handler class
#
class ZKHandler(object):
def __init__(self, config, logger=None):
"""
Initialize an instance of the ZKHandler class with config
A zk_conn object will be created but not started
"""
self.encoding = 'utf8'
self.coordinators = config['coordinators']
self.logger = logger
self.zk_conn = KazooClient(hosts=self.coordinators)
#
# Class meta-functions
#
def coordinators(self):
return str(self.coordinators)
def log(self, message, state=''):
if self.logger is not None:
self.logger.out(message, state)
else:
print(message)
#
# State/connection management
#
def listener(self, state):
if state == KazooState.CONNECTED:
self.log('Connection to Zookeeper started', state='o')
else:
self.log('Connection to Zookeeper lost', state='w')
while True:
time.sleep(0.5)
_zk_conn = KazooClient(hosts=self.coordinators)
try:
_zk_conn.start()
except Exception:
del _zk_conn
continue
self.zk_conn = _zk_conn
self.zk_conn.add_listener(self.listener)
break
def connect(self, persistent=False):
"""
Start the zk_conn object and connect to the cluster
"""
try:
self.zk_conn.start()
if persistent:
self.zk_conn.add_listener(self.listener)
except Exception as e:
raise ZKConnectionException(self, e)
def disconnect(self):
"""
Stop and close the zk_conn object and disconnect from the cluster
The class instance may be reused later (avoids persistent connections)
"""
self.zk_conn.stop()
self.zk_conn.close()
#
# Key Actions
#
def exists(self, key):
"""
Check if a key exists
"""
stat = self.zk_conn.exists(key)
if stat:
return True
else:
return False
def read(self, key):
"""
Read data from a key
"""
return self.zk_conn.get(key)[0].decode(self.encoding)
def write(self, kvpairs):
"""
Create or update one or more keys' data
"""
if type(kvpairs) is not list:
self.log("ZKHandler error: Key-value sequence is not a list", state='e')
return False
transaction = self.zk_conn.transaction()
for kvpair in (kvpairs):
if type(kvpair) is not tuple:
self.log("ZKHandler error: Key-value pair '{}' is not a tuple".format(kvpair), state='e')
return False
# Commit the transaction
try:
zk_transaction.commit()
key = kvpair[0]
value = kvpair[1]
if not self.exists(key):
# Creating a new key
transaction.create(key, str(value).encode(self.encoding))
else:
# Updating an existing key
data = self.zk_conn.get(key)
version = data[1].version
# Validate the expected version after the execution
new_version = version + 1
# Update the data
transaction.set_data(key, str(value).encode(self.encoding))
# Check the data
try:
transaction.check(key, new_version)
except TypeError:
self.log("ZKHandler error: Key '{}' does not match expected version".format(key), state='e')
return False
try:
transaction.commit()
return True
except Exception as e:
self.log("ZKHandler error: Failed to commit transaction: {}".format(e), state='e')
return False
def delete(self, keys, recursive=True):
"""
Delete a key or list of keys (defaults to recursive)
"""
if type(keys) is not list:
keys = [keys]
for key in keys:
if self.exists(key):
try:
self.zk_conn.delete(key, recursive=recursive)
except Exception as e:
self.log("ZKHandler error: Failed to delete key {}: {}".format(key, e), state='e')
return False
return True
except Exception:
return False
def children(self, key):
"""
Lists all children of a key
"""
return self.zk_conn.get_children(key)
def rename(self, kkpairs):
"""
Rename one or more keys to a new value
"""
if type(kkpairs) is not list:
self.log("ZKHandler error: Key-key sequence is not a list", state='e')
return False
transaction = self.zk_conn.transaction()
def rename_element(transaction, source_key, destnation_key):
data = self.zk_conn.get(source_key)[0]
transaction.create(destination_key, data)
if self.children(source_key):
for child_key in self.children(source_key):
child_source_key = "{}/{}".format(source_key, child_key)
child_destination_key = "{}/{}".format(destination_key, child_key)
rename_element(transaction, child_source_key, child_destination_key)
transaction.delete(source_key, recursive=True)
for kkpair in (kkpairs):
if type(kkpair) is not tuple:
self.log("ZKHandler error: Key-key pair '{}' is not a tuple".format(kkpair), state='e')
return False
source_key = kkpair[0]
destination_key = kkpair[1]
if not self.exists(source_key):
self.log("ZKHander error: Source key '{}' does not exist".format(source_key), state='e')
return False
if self.exists(destination_key):
self.log("ZKHander error: Destination key '{}' already exists".format(destination_key), state='e')
return False
rename_element(transaction, source_key, destination_key)
# Write lock function
def writelock(zk_conn, key):
count = 1
while True:
try:
lock_id = str(uuid.uuid1())
lock = zk_conn.WriteLock('{}'.format(key), lock_id)
break
except Exception:
count += 1
if count > 5:
transaction.commit()
return True
except Exception as e:
self.log("ZKHandler error: Failed to commit transaction: {}".format(e), state='e')
return False
#
# Lock actions
#
def readlock(self, key):
"""
Acquires a read lock on a key
"""
count = 1
lock = None
while True:
try:
lock_id = str(uuid.uuid1())
lock = self.zk_conn.ReadLock(key, lock_id)
break
else:
time.sleep(0.5)
continue
return lock
except Exception as e:
if count > 5:
self.log("ZKHandler warning: Failed to acquire read lock after 5 tries: {}".format(e), state='e')
break
else:
time.sleep(0.5)
count += 1
continue
return lock
# Read lock function
def readlock(zk_conn, key):
count = 1
while True:
try:
lock_id = str(uuid.uuid1())
lock = zk_conn.ReadLock('{}'.format(key), lock_id)
break
except Exception:
count += 1
if count > 5:
def writelock(self, key):
"""
Acquires a write lock on a key
"""
count = 1
lock = None
while True:
try:
lock_id = str(uuid.uuid1())
lock = self.zk_conn.WriteLock(key, lock_id)
break
else:
time.sleep(0.5)
continue
return lock
except Exception as e:
if count > 5:
self.log("ZKHandler warning: Failed to acquire write lock after 5 tries: {}".format(e), state='e')
break
else:
time.sleep(0.5)
count += 1
continue
return lock
# Exclusive lock function
def exclusivelock(zk_conn, key):
count = 1
while True:
try:
lock_id = str(uuid.uuid1())
lock = zk_conn.Lock('{}'.format(key), lock_id)
break
except Exception:
count += 1
if count > 5:
def exclusivelock(self, key):
"""
Acquires an exclusive lock on a key
"""
count = 1
lock = None
while True:
try:
lock_id = str(uuid.uuid1())
lock = self.zk_conn.Lock(key, lock_id)
break
else:
time.sleep(0.5)
continue
return lock
except Exception as e:
if count > 5:
self.log("ZKHandler warning: Failed to acquire exclusive lock after 5 tries: {}".format(e), state='e')
break
else:
time.sleep(0.5)
count += 1
continue
return lock

44
debian/changelog vendored
View File

@ -1,3 +1,47 @@
pvc (0.9.19-0) unstable; urgency=high
* [CLI] Corrects some flawed conditionals
* [API] Disables SQLAlchemy modification tracking functionality (not used by us)
* [Daemons] Implements new zkhandler module for improved reliability and reusability
* [Daemons] Refactors some code to use new zkhandler module
* [API, CLI] Adds support for "none" migration selector (uses cluster default instead)
* [Daemons] Moves some configuration keys to new /config tree
* [Node Daemon] Increases initial lock timeout for VM migrations to avoid out-of-sync potential
* [Provisioner] Support storing and using textual cluster network labels ("upstream", "storage", "cluster") in templates
* [API] Avoid duplicating existing node states
-- Joshua M. Boniface <joshua@boniface.me> Sun, 06 Jun 2021 01:47:41 -0400
pvc (0.9.18-0) unstable; urgency=high
* Adds VM rename functionality to API and CLI client
-- Joshua M. Boniface <joshua@boniface.me> Sun, 23 May 2021 17:23:10 -0400
pvc (0.9.17-0) unstable; urgency=high
* [CLI] Fixes bugs in log follow output
-- Joshua M. Boniface <joshua@boniface.me> Wed, 19 May 2021 17:06:29 -0400
pvc (0.9.16-0) unstable; urgency=high
* Improves some CLI help messages
* Skips empty local cluster in CLI
* Adjusts how confirmations happen during VM modify restarts
* Fixes bug around corrupted VM log files
* Fixes bug around subprocess pipe exceptions
-- Joshua M. Boniface <joshua@boniface.me> Mon, 10 May 2021 01:13:21 -0400
pvc (0.9.15-0) unstable; urgency=high
* [CLI] Adds additional verification (--yes) to several VM management commands
* [CLI] Adds a method to override --yes/confirmation requirements via envvar (PVC_UNSAFE)
* [CLI] Adds description fields to PVC clusters in CLI
-- Joshua M. Boniface <joshua@boniface.me> Thu, 08 Apr 2021 13:37:47 -0400
pvc (0.9.14-0) unstable; urgency=high
* Fixes bugs around cloned volume provisioning

View File

@ -18,6 +18,40 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
## Changelog
#### v0.9.19
* [CLI] Corrects some flawed conditionals
* [API] Disables SQLAlchemy modification tracking functionality (not used by us)
* [Daemons] Implements new zkhandler module for improved reliability and reusability
* [Daemons] Refactors some code to use new zkhandler module
* [API, CLI] Adds support for "none" migration selector (uses cluster default instead)
* [Daemons] Moves some configuration keys to new /config tree
* [Node Daemon] Increases initial lock timeout for VM migrations to avoid out-of-sync potential
* [Provisioner] Support storing and using textual cluster network labels ("upstream", "storage", "cluster") in templates
* [API] Avoid duplicating existing node states
#### v0.9.18
* Adds VM rename functionality to API and CLI client
#### v0.9.17
* [CLI] Fixes bugs in log follow output
#### v0.9.16
* Improves some CLI help messages
* Skips empty local cluster in CLI
* Adjusts how confirmations happen during VM modify restarts
* Fixes bug around corrupted VM log files
* Fixes bug around subprocess pipe exceptions
#### v0.9.15
* [CLI] Adds additional verification (--yes) to several VM management commands
* [CLI] Adds a method to override --yes/confirmation requirements via envvar (PVC_UNSAFE)
* [CLI] Adds description fields to PVC clusters in CLI
#### v0.9.14
* Fixes bugs around cloned volume provisioning

View File

@ -6035,6 +6035,38 @@
]
}
},
"/api/v1/vm/{vm}/rename": {
"post": {
"description": "",
"parameters": [
{
"description": "The new name of the VM",
"in": "query",
"name": "new_name",
"required": true,
"type": "string"
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"$ref": "#/definitions/Message"
}
},
"400": {
"description": "Bad request",
"schema": {
"$ref": "#/definitions/Message"
}
}
},
"summary": "Rename VM {vm}, and all connected disk volumes which include this name, to {new_name}",
"tags": [
"vm"
]
}
},
"/api/v1/vm/{vm}/state": {
"get": {
"description": "",

1
node-daemon/daemon_lib Symbolic link
View File

@ -0,0 +1 @@
../daemon-common

View File

@ -23,20 +23,19 @@ import time
import json
import psutil
import pvcnoded.zkhandler as zkhandler
import pvcnoded.common as common
import daemon_lib.common as common
class CephOSDInstance(object):
def __init__(self, zk_conn, this_node, osd_id):
self.zk_conn = zk_conn
def __init__(self, zkhandler, this_node, osd_id):
self.zkhandler = zkhandler
self.this_node = this_node
self.osd_id = osd_id
self.node = None
self.size = None
self.stats = dict()
@self.zk_conn.DataWatch('/ceph/osds/{}/node'.format(self.osd_id))
@self.zkhandler.zk_conn.DataWatch('/ceph/osds/{}/node'.format(self.osd_id))
def watch_osd_node(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -51,7 +50,7 @@ class CephOSDInstance(object):
if data and data != self.node:
self.node = data
@self.zk_conn.DataWatch('/ceph/osds/{}/stats'.format(self.osd_id))
@self.zkhandler.zk_conn.DataWatch('/ceph/osds/{}/stats'.format(self.osd_id))
def watch_osd_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -67,7 +66,7 @@ class CephOSDInstance(object):
self.stats = json.loads(data)
def add_osd(zk_conn, logger, node, device, weight):
def add_osd(zkhandler, logger, node, device, weight):
# We are ready to create a new OSD on this node
logger.out('Creating new OSD disk on block device {}'.format(device), state='i')
try:
@ -174,12 +173,12 @@ def add_osd(zk_conn, logger, node, device, weight):
# 7. Add the new OSD to the list
logger.out('Adding new OSD disk with ID {} to Zookeeper'.format(osd_id), state='i')
zkhandler.writedata(zk_conn, {
'/ceph/osds/{}'.format(osd_id): '',
'/ceph/osds/{}/node'.format(osd_id): node,
'/ceph/osds/{}/device'.format(osd_id): device,
'/ceph/osds/{}/stats'.format(osd_id): '{}'
})
zkhandler.write([
('/ceph/osds/{}'.format(osd_id), ''),
('/ceph/osds/{}/node'.format(osd_id), node),
('/ceph/osds/{}/device'.format(osd_id), device),
('/ceph/osds/{}/stats'.format(osd_id), '{}')
])
# Log it
logger.out('Created new OSD disk with ID {}'.format(osd_id), state='o')
@ -190,7 +189,7 @@ def add_osd(zk_conn, logger, node, device, weight):
return False
def remove_osd(zk_conn, logger, osd_id, osd_obj):
def remove_osd(zkhandler, logger, osd_id, osd_obj):
logger.out('Removing OSD disk {}'.format(osd_id), state='i')
try:
# 1. Verify the OSD is present
@ -273,7 +272,7 @@ def remove_osd(zk_conn, logger, osd_id, osd_obj):
# 7. Delete OSD from ZK
logger.out('Deleting OSD disk with ID {} from Zookeeper'.format(osd_id), state='i')
zkhandler.deletekey(zk_conn, '/ceph/osds/{}'.format(osd_id))
zkhandler.delete('/ceph/osds/{}'.format(osd_id), recursive=True)
# Log it
logger.out('Removed OSD disk with ID {}'.format(osd_id), state='o')
@ -285,14 +284,14 @@ def remove_osd(zk_conn, logger, osd_id, osd_obj):
class CephPoolInstance(object):
def __init__(self, zk_conn, this_node, name):
self.zk_conn = zk_conn
def __init__(self, zkhandler, this_node, name):
self.zkhandler = zkhandler
self.this_node = this_node
self.name = name
self.pgs = ''
self.stats = dict()
@self.zk_conn.DataWatch('/ceph/pools/{}/pgs'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/ceph/pools/{}/pgs'.format(self.name))
def watch_pool_node(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -307,7 +306,7 @@ class CephPoolInstance(object):
if data and data != self.pgs:
self.pgs = data
@self.zk_conn.DataWatch('/ceph/pools/{}/stats'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/ceph/pools/{}/stats'.format(self.name))
def watch_pool_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -324,14 +323,14 @@ class CephPoolInstance(object):
class CephVolumeInstance(object):
def __init__(self, zk_conn, this_node, pool, name):
self.zk_conn = zk_conn
def __init__(self, zkhandler, this_node, pool, name):
self.zkhandler = zkhandler
self.this_node = this_node
self.pool = pool
self.name = name
self.stats = dict()
@self.zk_conn.DataWatch('/ceph/volumes/{}/{}/stats'.format(self.pool, self.name))
@self.zkhandler.zk_conn.DataWatch('/ceph/volumes/{}/{}/stats'.format(self.pool, self.name))
def watch_volume_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -348,15 +347,15 @@ class CephVolumeInstance(object):
class CephSnapshotInstance(object):
def __init__(self, zk_conn, this_node, pool, volume, name):
self.zk_conn = zk_conn
def __init__(self, zkhandler, this_node, pool, volume, name):
self.zkhandler = zkhandler
self.this_node = this_node
self.pool = pool
self.volume = volume
self.name = name
self.stats = dict()
@self.zk_conn.DataWatch('/ceph/snapshots/{}/{}/{}/stats'.format(self.pool, self.volume, self.name))
@self.zkhandler.zk_conn.DataWatch('/ceph/snapshots/{}/{}/{}/stats'.format(self.pool, self.volume, self.name))
def watch_snapshot_stats(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -374,7 +373,7 @@ class CephSnapshotInstance(object):
# Primary command function
# This command pipe is only used for OSD adds and removes
def run_command(zk_conn, logger, this_node, data, d_osd):
def run_command(zkhandler, logger, this_node, data, d_osd):
# Get the command and args
command, args = data.split()
@ -383,18 +382,22 @@ def run_command(zk_conn, logger, this_node, data, d_osd):
node, device, weight = args.split(',')
if node == this_node.name:
# Lock the command queue
zk_lock = zkhandler.writelock(zk_conn, '/cmd/ceph')
zk_lock = zkhandler.writelock('/cmd/ceph')
with zk_lock:
# Add the OSD
result = add_osd(zk_conn, logger, node, device, weight)
result = add_osd(zkhandler, logger, node, device, weight)
# Command succeeded
if result:
# Update the command queue
zkhandler.writedata(zk_conn, {'/cmd/ceph': 'success-{}'.format(data)})
zkhandler.write([
('/cmd/ceph', 'success-{}'.format(data))
])
# Command failed
else:
# Update the command queue
zkhandler.writedata(zk_conn, {'/cmd/ceph': 'failure-{}'.format(data)})
zkhandler.write([
('/cmd/ceph', 'failure-{}'.format(data))
])
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)
@ -405,17 +408,21 @@ def run_command(zk_conn, logger, this_node, data, d_osd):
# Verify osd_id is in the list
if d_osd[osd_id] and d_osd[osd_id].node == this_node.name:
# Lock the command queue
zk_lock = zkhandler.writelock(zk_conn, '/cmd/ceph')
zk_lock = zkhandler.writelock('/cmd/ceph')
with zk_lock:
# Remove the OSD
result = remove_osd(zk_conn, logger, osd_id, d_osd[osd_id])
result = remove_osd(zkhandler, logger, osd_id, d_osd[osd_id])
# Command succeeded
if result:
# Update the command queue
zkhandler.writedata(zk_conn, {'/cmd/ceph': 'success-{}'.format(data)})
zkhandler.write([
('/cmd/ceph', 'success-{}'.format(data))
])
# Command failed
else:
# Update the command queue
zkhandler.writedata(zk_conn, {'/cmd/ceph': 'failure-{}'.format(data)})
zkhandler.write([
('/cmd/ceph', 'failure-{}'.format(data))
])
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)

View File

@ -26,13 +26,12 @@ import psycopg2
from threading import Thread, Event
import pvcnoded.common as common
import daemon_lib.common as common
class DNSAggregatorInstance(object):
# Initialization function
def __init__(self, zk_conn, config, logger):
self.zk_conn = zk_conn
def __init__(self, config, logger):
self.config = config
self.logger = logger
self.dns_networks = dict()

View File

@ -40,10 +40,11 @@ from queue import Queue
from xml.etree import ElementTree
from rados import Rados
import pvcnoded.log as log
import pvcnoded.zkhandler as zkhandler
from daemon_lib.zkhandler import ZKHandler
import pvcnoded.fencing as fencing
import pvcnoded.common as common
import daemon_lib.log as log
import daemon_lib.common as common
import pvcnoded.VMInstance as VMInstance
import pvcnoded.NodeInstance as NodeInstance
@ -53,7 +54,7 @@ import pvcnoded.CephInstance as CephInstance
import pvcnoded.MetadataAPIInstance as MetadataAPIInstance
# Version string for startup output
version = '0.9.14'
version = '0.9.19'
###############################################################################
# PVCD - node daemon startup program
@ -517,51 +518,50 @@ time.sleep(5)
# PHASE 4 - Attempt to connect to the coordinators and start zookeeper client
###############################################################################
# Start the connection to the coordinators
zk_conn = kazoo.client.KazooClient(hosts=config['coordinators'])
# Create an instance of the handler
zkhandler = ZKHandler(config, logger=logger)
try:
logger.out('Connecting to Zookeeper cluster nodes {}'.format(config['coordinators']), state='i')
# Start connection
zk_conn.start()
zkhandler.connect(persistent=True)
except Exception as e:
logger.out('ERROR: Failed to connect to Zookeeper cluster: {}'.format(e), state='e')
exit(1)
# Create the /config key if it does not exist
try:
zkhandler.read('/config')
except Exception:
zkhandler.write([
('/config', ''),
('/config/primary_node', 'none'),
('/config/upstream_ip', 'none'),
('/config/maintenance', 'False'),
])
# Handle zookeeper failures
def zk_listener(state):
global zk_conn, update_timer
if state == kazoo.client.KazooState.CONNECTED:
logger.out('Connection to Zookeeper restarted', state='o')
# Start keepalive thread
if update_timer:
update_timer = startKeepaliveTimer()
else:
# Stop keepalive thread
if update_timer:
stopKeepaliveTimer()
logger.out('Connection to Zookeeper lost; retrying', state='w')
while True:
time.sleep(1)
_zk_conn = kazoo.client.KazooClient(hosts=config['coordinators'])
try:
_zk_conn.start()
except Exception:
del _zk_conn
continue
# Overwrite global zk_conn with new connection
zk_conn = _zk_conn
# Readd the listener
zk_conn.add_listener(zk_listener)
break
zk_conn.add_listener(zk_listener)
# MIGRATION - populate the keys from their old values
try:
primary_node = zkhandler.read('/primary_node')
zkhandler.write([
('/config/primary_node', primary_node)
])
except Exception:
pass
try:
upstream_ip = zkhandler.read('/upstream_ip')
zkhandler.write([
('/config/upstream_ip', upstream_ip)
])
except Exception:
pass
try:
maintenance = zkhandler.read('/maintenance')
zkhandler.write([
('/config/maintenance', maintenance)
])
except Exception:
pass
###############################################################################
# PHASE 5 - Gracefully handle termination
@ -570,12 +570,14 @@ zk_conn.add_listener(zk_listener)
# Cleanup function
def cleanup():
global zk_conn, update_timer, d_domain
global zkhandler, update_timer, d_domain
logger.out('Terminating pvcnoded and cleaning up', state='s')
# Set shutdown state in Zookeeper
zkhandler.writedata(zk_conn, {'/nodes/{}/daemonstate'.format(myhostname): 'shutdown'})
zkhandler.write([
('/nodes/{}/daemonstate'.format(myhostname), 'shutdown')
])
# Waiting for any flushes to complete
logger.out('Waiting for any active flushes', state='s')
@ -596,9 +598,9 @@ def cleanup():
# Force into secondary coordinator state if needed
try:
if this_node.router_state == 'primary':
zkhandler.writedata(zk_conn, {
'/primary_node': 'none'
})
zkhandler.write([
('/config/primary_node', 'none')
])
logger.out('Waiting for primary migration', state='s')
while this_node.router_state != 'secondary':
time.sleep(0.5)
@ -617,15 +619,17 @@ def cleanup():
node_keepalive()
# Set stop state in Zookeeper
zkhandler.writedata(zk_conn, {'/nodes/{}/daemonstate'.format(myhostname): 'stop'})
zkhandler.write([
('/nodes/{}/daemonstate'.format(myhostname), 'stop')
])
# Forcibly terminate dnsmasq because it gets stuck sometimes
common.run_os_command('killall dnsmasq')
# Close the Zookeeper connection
try:
zk_conn.stop()
zk_conn.close()
zkhandler.disconnect()
del zkhandler
except Exception:
pass
@ -655,54 +659,54 @@ signal.signal(signal.SIGHUP, hup)
###############################################################################
# Check if our node exists in Zookeeper, and create it if not
if zk_conn.exists('/nodes/{}'.format(myhostname)):
if config['daemon_mode'] == 'coordinator':
init_routerstate = 'secondary'
else:
init_routerstate = 'client'
if zkhandler.exists('/nodes/{}'.format(myhostname)):
logger.out("Node is " + fmt_green + "present" + fmt_end + " in Zookeeper", state='i')
if config['daemon_mode'] == 'coordinator':
init_routerstate = 'secondary'
else:
init_routerstate = 'client'
# Update static data just in case it's changed
zkhandler.writedata(zk_conn, {
'/nodes/{}/daemonmode'.format(myhostname): config['daemon_mode'],
'/nodes/{}/daemonstate'.format(myhostname): 'init',
'/nodes/{}/routerstate'.format(myhostname): init_routerstate,
'/nodes/{}/staticdata'.format(myhostname): ' '.join(staticdata),
zkhandler.write([
('/nodes/{}/daemonmode'.format(myhostname), config['daemon_mode']),
('/nodes/{}/daemonstate'.format(myhostname), 'init'),
('/nodes/{}/routerstate'.format(myhostname), init_routerstate),
('/nodes/{}/staticdata'.format(myhostname), ' '.join(staticdata)),
# Keepalives and fencing information (always load and set from config on boot)
'/nodes/{}/ipmihostname'.format(myhostname): config['ipmi_hostname'],
'/nodes/{}/ipmiusername'.format(myhostname): config['ipmi_username'],
'/nodes/{}/ipmipassword'.format(myhostname): config['ipmi_password']
})
('/nodes/{}/ipmihostname'.format(myhostname), config['ipmi_hostname']),
('/nodes/{}/ipmiusername'.format(myhostname), config['ipmi_username']),
('/nodes/{}/ipmipassword'.format(myhostname), config['ipmi_password'])
])
else:
logger.out("Node is " + fmt_red + "absent" + fmt_end + " in Zookeeper; adding new node", state='i')
keepalive_time = int(time.time())
zkhandler.writedata(zk_conn, {
'/nodes/{}'.format(myhostname): config['daemon_mode'],
zkhandler.write([
('/nodes/{}'.format(myhostname), config['daemon_mode']),
# Basic state information
'/nodes/{}/daemonmode'.format(myhostname): config['daemon_mode'],
'/nodes/{}/daemonstate'.format(myhostname): 'init',
'/nodes/{}/routerstate'.format(myhostname): 'client',
'/nodes/{}/domainstate'.format(myhostname): 'flushed',
'/nodes/{}/staticdata'.format(myhostname): ' '.join(staticdata),
'/nodes/{}/memtotal'.format(myhostname): '0',
'/nodes/{}/memfree'.format(myhostname): '0',
'/nodes/{}/memused'.format(myhostname): '0',
'/nodes/{}/memalloc'.format(myhostname): '0',
'/nodes/{}/memprov'.format(myhostname): '0',
'/nodes/{}/vcpualloc'.format(myhostname): '0',
'/nodes/{}/cpuload'.format(myhostname): '0.0',
'/nodes/{}/networkscount'.format(myhostname): '0',
'/nodes/{}/domainscount'.format(myhostname): '0',
'/nodes/{}/runningdomains'.format(myhostname): '',
('/nodes/{}/daemonmode'.format(myhostname), config['daemon_mode']),
('/nodes/{}/daemonstate'.format(myhostname), 'init'),
('/nodes/{}/routerstate'.format(myhostname), init_routerstate),
('/nodes/{}/domainstate'.format(myhostname), 'flushed'),
('/nodes/{}/staticdata'.format(myhostname), ' '.join(staticdata)),
('/nodes/{}/memtotal'.format(myhostname), '0'),
('/nodes/{}/memfree'.format(myhostname), '0'),
('/nodes/{}/memused'.format(myhostname), '0'),
('/nodes/{}/memalloc'.format(myhostname), '0'),
('/nodes/{}/memprov'.format(myhostname), '0'),
('/nodes/{}/vcpualloc'.format(myhostname), '0'),
('/nodes/{}/cpuload'.format(myhostname), '0.0'),
('/nodes/{}/networkscount'.format(myhostname), '0'),
('/nodes/{}/domainscount'.format(myhostname), '0'),
('/nodes/{}/runningdomains'.format(myhostname), ''),
# Keepalives and fencing information
'/nodes/{}/keepalive'.format(myhostname): str(keepalive_time),
'/nodes/{}/ipmihostname'.format(myhostname): config['ipmi_hostname'],
'/nodes/{}/ipmiusername'.format(myhostname): config['ipmi_username'],
'/nodes/{}/ipmipassword'.format(myhostname): config['ipmi_password']
})
('/nodes/{}/keepalive'.format(myhostname), str(keepalive_time)),
('/nodes/{}/ipmihostname'.format(myhostname), config['ipmi_hostname']),
('/nodes/{}/ipmiusername'.format(myhostname), config['ipmi_username']),
('/nodes/{}/ipmipassword'.format(myhostname), config['ipmi_password'])
])
# Check that the primary key exists, and create it with us as master if not
try:
current_primary = zkhandler.readdata(zk_conn, '/primary_node')
current_primary = zkhandler.read('/config/primary_node')
except kazoo.exceptions.NoNodeError:
current_primary = 'none'
@ -711,7 +715,9 @@ if current_primary and current_primary != 'none':
else:
if config['daemon_mode'] == 'coordinator':
logger.out('No primary node found; creating with us as primary.', state='i')
zkhandler.writedata(zk_conn, {'/primary_node': myhostname})
zkhandler.write([
('/config/primary_node', myhostname)
])
###############################################################################
# PHASE 7a - Ensure IPMI is reachable and working
@ -776,7 +782,7 @@ if enable_networking:
nftables_base_filename = '{}/base.nft'.format(config['nft_dynamic_directory'])
with open(nftables_base_filename, 'w') as nfbasefile:
nfbasefile.write(nftables_base_rules)
common.reload_firewall_rules(logger, nftables_base_filename)
common.reload_firewall_rules(nftables_base_filename, logger=logger)
###############################################################################
# PHASE 7d - Ensure DNSMASQ is not running
@ -806,8 +812,8 @@ volume_list = dict() # Dict of Lists
if enable_networking:
# Create an instance of the DNS Aggregator and Metadata API if we're a coordinator
if config['daemon_mode'] == 'coordinator':
dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(zk_conn, config, logger)
metadata_api = MetadataAPIInstance.MetadataAPIInstance(zk_conn, config, logger)
dns_aggregator = DNSAggregatorInstance.DNSAggregatorInstance(config, logger)
metadata_api = MetadataAPIInstance.MetadataAPIInstance(zkhandler, config, logger)
else:
dns_aggregator = None
metadata_api = None
@ -817,14 +823,14 @@ else:
# Node objects
@zk_conn.ChildrenWatch('/nodes')
@zkhandler.zk_conn.ChildrenWatch('/nodes')
def update_nodes(new_node_list):
global node_list, d_node
# Add any missing nodes to the list
for node in new_node_list:
if node not in node_list:
d_node[node] = NodeInstance.NodeInstance(node, myhostname, zk_conn, config, logger, d_node, d_network, d_domain, dns_aggregator, metadata_api)
d_node[node] = NodeInstance.NodeInstance(node, myhostname, zkhandler, config, logger, d_node, d_network, d_domain, dns_aggregator, metadata_api)
# Remove any deleted nodes from the list
for node in node_list:
@ -846,7 +852,7 @@ this_node = d_node[myhostname]
# Maintenance mode
@zk_conn.DataWatch('/maintenance')
@zkhandler.zk_conn.DataWatch('/config/maintenance')
def set_maintenance(_maintenance, stat, event=''):
global maintenance
try:
@ -856,7 +862,7 @@ def set_maintenance(_maintenance, stat, event=''):
# Primary node
@zk_conn.DataWatch('/primary_node')
@zkhandler.zk_conn.DataWatch('/config/primary_node')
def update_primary(new_primary, stat, event=''):
try:
new_primary = new_primary.decode('ascii')
@ -871,7 +877,7 @@ def update_primary(new_primary, stat, event=''):
if this_node.daemon_state == 'run' and this_node.router_state not in ['primary', 'takeover', 'relinquish']:
logger.out('Contending for primary coordinator state', state='i')
# Acquire an exclusive lock on the primary_node key
primary_lock = zkhandler.exclusivelock(zk_conn, '/primary_node')
primary_lock = zkhandler.exclusivelock('/config/primary_node')
try:
# This lock times out after 0.4s, which is 0.1s less than the pre-takeover
# timeout below, thus ensuring that a primary takeover will not deadlock
@ -879,23 +885,31 @@ def update_primary(new_primary, stat, event=''):
primary_lock.acquire(timeout=0.4)
# Ensure when we get the lock that the versions are still consistent and that
# another node hasn't already acquired primary state
if key_version == zk_conn.get('/primary_node')[1].version:
zkhandler.writedata(zk_conn, {'/primary_node': myhostname})
if key_version == zkhandler.zk_conn.get('/config/primary_node')[1].version:
zkhandler.write([
('/config/primary_node', myhostname)
])
# Cleanly release the lock
primary_lock.release()
# We timed out acquiring a lock, which means we failed contention, so just pass
except kazoo.exceptions.LockTimeout:
except Exception:
pass
elif new_primary == myhostname:
if this_node.router_state == 'secondary':
time.sleep(0.5)
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'takeover'})
zkhandler.write([
('/nodes/{}/routerstate'.format(myhostname), 'takeover')
])
else:
if this_node.router_state == 'primary':
time.sleep(0.5)
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'relinquish'})
zkhandler.write([
('/nodes/{}/routerstate'.format(myhostname), 'relinquish')
])
else:
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(myhostname): 'client'})
zkhandler.write([
('/nodes/{}/routerstate'.format(myhostname), 'client')
])
for node in d_node:
d_node[node].primary_node = new_primary
@ -903,14 +917,14 @@ def update_primary(new_primary, stat, event=''):
if enable_networking:
# Network objects
@zk_conn.ChildrenWatch('/networks')
@zkhandler.zk_conn.ChildrenWatch('/networks')
def update_networks(new_network_list):
global network_list, d_network
# Add any missing networks to the list
for network in new_network_list:
if network not in network_list:
d_network[network] = VXNetworkInstance.VXNetworkInstance(network, zk_conn, config, logger, this_node, dns_aggregator)
d_network[network] = VXNetworkInstance.VXNetworkInstance(network, zkhandler, config, logger, this_node, dns_aggregator)
if config['daemon_mode'] == 'coordinator' and d_network[network].nettype == 'managed':
try:
dns_aggregator.add_network(d_network[network])
@ -946,20 +960,20 @@ if enable_networking:
if enable_hypervisor:
# VM command pipeline key
@zk_conn.DataWatch('/cmd/domains')
@zkhandler.zk_conn.DataWatch('/cmd/domains')
def cmd_domains(data, stat, event=''):
if data:
VMInstance.run_command(zk_conn, logger, this_node, data.decode('ascii'))
VMInstance.run_command(zkhandler, logger, this_node, data.decode('ascii'))
# VM domain objects
@zk_conn.ChildrenWatch('/domains')
@zkhandler.zk_conn.ChildrenWatch('/domains')
def update_domains(new_domain_list):
global domain_list, d_domain
# Add any missing domains to the list
for domain in new_domain_list:
if domain not in domain_list:
d_domain[domain] = VMInstance.VMInstance(domain, zk_conn, config, logger, this_node)
d_domain[domain] = VMInstance.VMInstance(domain, zkhandler, config, logger, this_node)
# Remove any deleted domains from the list
for domain in domain_list:
@ -977,20 +991,20 @@ if enable_hypervisor:
if enable_storage:
# Ceph command pipeline key
@zk_conn.DataWatch('/cmd/ceph')
@zkhandler.zk_conn.DataWatch('/cmd/ceph')
def cmd_ceph(data, stat, event=''):
if data:
CephInstance.run_command(zk_conn, logger, this_node, data.decode('ascii'), d_osd)
CephInstance.run_command(zkhandler, logger, this_node, data.decode('ascii'), d_osd)
# OSD objects
@zk_conn.ChildrenWatch('/ceph/osds')
@zkhandler.zk_conn.ChildrenWatch('/ceph/osds')
def update_osds(new_osd_list):
global osd_list, d_osd
# Add any missing OSDs to the list
for osd in new_osd_list:
if osd not in osd_list:
d_osd[osd] = CephInstance.CephOSDInstance(zk_conn, this_node, osd)
d_osd[osd] = CephInstance.CephOSDInstance(zkhandler, this_node, osd)
# Remove any deleted OSDs from the list
for osd in osd_list:
@ -1003,14 +1017,14 @@ if enable_storage:
logger.out('{}OSD list:{} {}'.format(fmt_blue, fmt_end, ' '.join(osd_list)), state='i')
# Pool objects
@zk_conn.ChildrenWatch('/ceph/pools')
@zkhandler.zk_conn.ChildrenWatch('/ceph/pools')
def update_pools(new_pool_list):
global pool_list, d_pool
# Add any missing Pools to the list
for pool in new_pool_list:
if pool not in pool_list:
d_pool[pool] = CephInstance.CephPoolInstance(zk_conn, this_node, pool)
d_pool[pool] = CephInstance.CephPoolInstance(zkhandler, this_node, pool)
d_volume[pool] = dict()
volume_list[pool] = []
@ -1026,14 +1040,14 @@ if enable_storage:
# Volume objects in each pool
for pool in pool_list:
@zk_conn.ChildrenWatch('/ceph/volumes/{}'.format(pool))
@zkhandler.zk_conn.ChildrenWatch('/ceph/volumes/{}'.format(pool))
def update_volumes(new_volume_list):
global volume_list, d_volume
# Add any missing Volumes to the list
for volume in new_volume_list:
if volume not in volume_list[pool]:
d_volume[pool][volume] = CephInstance.CephVolumeInstance(zk_conn, this_node, pool, volume)
d_volume[pool][volume] = CephInstance.CephVolumeInstance(zkhandler, this_node, pool, volume)
# Remove any deleted Volumes from the list
for volume in volume_list[pool]:
@ -1092,9 +1106,9 @@ def collect_ceph_stats(queue):
command = {"prefix": "status", "format": "pretty"}
ceph_status = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
try:
zkhandler.writedata(zk_conn, {
'/ceph': str(ceph_status)
})
zkhandler.write([
('/ceph', str(ceph_status))
])
except Exception as e:
logger.out('Failed to set Ceph status data: {}'.format(e), state='e')
return
@ -1106,9 +1120,9 @@ def collect_ceph_stats(queue):
command = {"prefix": "df", "format": "pretty"}
ceph_df = ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1].decode('ascii')
try:
zkhandler.writedata(zk_conn, {
'/ceph/util': str(ceph_df)
})
zkhandler.write([
('/ceph/util', str(ceph_df))
])
except Exception as e:
logger.out('Failed to set Ceph utilization data: {}'.format(e), state='e')
return
@ -1171,9 +1185,9 @@ def collect_ceph_stats(queue):
}
# Write the pool data to Zookeeper
zkhandler.writedata(zk_conn, {
'/ceph/pools/{}/stats'.format(pool['name']): str(json.dumps(pool_df))
})
zkhandler.write([
('/ceph/pools/{}/stats'.format(pool['name']), str(json.dumps(pool_df)))
])
except Exception as e:
# One or more of the status commands timed out, just continue
logger.out('Failed to format and send pool data: {}'.format(e), state='w')
@ -1307,9 +1321,9 @@ def collect_ceph_stats(queue):
for osd in osd_list:
try:
stats = json.dumps(osd_stats[osd])
zkhandler.writedata(zk_conn, {
'/ceph/osds/{}/stats'.format(osd): str(stats)
})
zkhandler.write([
('/ceph/osds/{}/stats'.format(osd), str(stats))
])
except KeyError as e:
# One or more of the status commands timed out, just continue
logger.out('Failed to upload OSD stats from dictionary: {}'.format(e), state='w')
@ -1374,7 +1388,9 @@ def collect_vm_stats(queue):
except Exception:
# Toggle a state "change"
logger.out("Resetting state to {} for VM {}".format(instance.getstate(), instance.domname), state='i', prefix='vm-thread')
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(domain): instance.getstate()})
zkhandler.write([
('/domains/{}/state'.format(domain), instance.getstate())
])
elif instance.getnode() == this_node.name:
memprov += instance.getmemory()
@ -1464,9 +1480,9 @@ def collect_vm_stats(queue):
logger.out("Writing statistics for VM {} to Zookeeper".format(domain_name), state='d', prefix='vm-thread')
try:
zkhandler.writedata(zk_conn, {
"/domains/{}/stats".format(domain_uuid): str(json.dumps(domain_stats))
})
zkhandler.write([
("/domains/{}/stats".format(domain_uuid), str(json.dumps(domain_stats)))
])
except Exception as e:
if debug:
logger.out("{}".format(e), state='d', prefix='vm-thread')
@ -1488,22 +1504,37 @@ def node_keepalive():
if debug:
logger.out("Keepalive starting", state='d', prefix='main-thread')
# Set the migration selector in Zookeeper for clients to read
if config['enable_hypervisor']:
if this_node.router_state == 'primary':
try:
if zkhandler.read('/config/migration_target_selector') != config['migration_target_selector']:
raise
except Exception:
zkhandler.write([
('/config/migration_target_selector', config['migration_target_selector'])
])
# Set the upstream IP in Zookeeper for clients to read
if config['enable_networking']:
if this_node.router_state == 'primary':
try:
if zkhandler.readdata(zk_conn, '/upstream_ip') != config['upstream_floating_ip']:
if zkhandler.read('/config/upstream_ip') != config['upstream_floating_ip']:
raise
except Exception:
zkhandler.writedata(zk_conn, {'/upstream_ip': config['upstream_floating_ip']})
zkhandler.write([
('/config/upstream_ip', config['upstream_floating_ip'])
])
# Get past state and update if needed
if debug:
logger.out("Get past state and update if needed", state='d', prefix='main-thread')
past_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(this_node.name))
past_state = zkhandler.read('/nodes/{}/daemonstate'.format(this_node.name))
if past_state != 'run':
this_node.daemon_state = 'run'
zkhandler.writedata(zk_conn, {'/nodes/{}/daemonstate'.format(this_node.name): 'run'})
zkhandler.write([
('/nodes/{}/daemonstate'.format(this_node.name), 'run')
])
else:
this_node.daemon_state = 'run'
@ -1511,8 +1542,10 @@ def node_keepalive():
if debug:
logger.out("Ensure the primary key is properly set", state='d', prefix='main-thread')
if this_node.router_state == 'primary':
if zkhandler.readdata(zk_conn, '/primary_node') != this_node.name:
zkhandler.writedata(zk_conn, {'/primary_node': this_node.name})
if zkhandler.read('/config/primary_node') != this_node.name:
zkhandler.write([
('/config/primary_node', this_node.name)
])
# Run VM statistics collection in separate thread for parallelization
if enable_hypervisor:
@ -1572,18 +1605,18 @@ def node_keepalive():
if debug:
logger.out("Set our information in zookeeper", state='d', prefix='main-thread')
try:
zkhandler.writedata(zk_conn, {
'/nodes/{}/memtotal'.format(this_node.name): str(this_node.memtotal),
'/nodes/{}/memused'.format(this_node.name): str(this_node.memused),
'/nodes/{}/memfree'.format(this_node.name): str(this_node.memfree),
'/nodes/{}/memalloc'.format(this_node.name): str(this_node.memalloc),
'/nodes/{}/memprov'.format(this_node.name): str(this_node.memprov),
'/nodes/{}/vcpualloc'.format(this_node.name): str(this_node.vcpualloc),
'/nodes/{}/cpuload'.format(this_node.name): str(this_node.cpuload),
'/nodes/{}/domainscount'.format(this_node.name): str(this_node.domains_count),
'/nodes/{}/runningdomains'.format(this_node.name): ' '.join(this_node.domain_list),
'/nodes/{}/keepalive'.format(this_node.name): str(keepalive_time)
})
zkhandler.write([
('/nodes/{}/memtotal'.format(this_node.name), str(this_node.memtotal)),
('/nodes/{}/memused'.format(this_node.name), str(this_node.memused)),
('/nodes/{}/memfree'.format(this_node.name), str(this_node.memfree)),
('/nodes/{}/memalloc'.format(this_node.name), str(this_node.memalloc)),
('/nodes/{}/memprov'.format(this_node.name), str(this_node.memprov)),
('/nodes/{}/vcpualloc'.format(this_node.name), str(this_node.vcpualloc)),
('/nodes/{}/cpuload'.format(this_node.name), str(this_node.cpuload)),
('/nodes/{}/domainscount'.format(this_node.name), str(this_node.domains_count)),
('/nodes/{}/runningdomains'.format(this_node.name), ' '.join(this_node.domain_list)),
('/nodes/{}/keepalive'.format(this_node.name), str(keepalive_time))
])
except Exception:
logger.out('Failed to set keepalive data', state='e')
return
@ -1652,8 +1685,8 @@ def node_keepalive():
if config['daemon_mode'] == 'coordinator':
for node_name in d_node:
try:
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
node_keepalive = int(zkhandler.readdata(zk_conn, '/nodes/{}/keepalive'.format(node_name)))
node_daemon_state = zkhandler.read('/nodes/{}/daemonstate'.format(node_name))
node_keepalive = int(zkhandler.read('/nodes/{}/keepalive'.format(node_name)))
except Exception:
node_daemon_state = 'unknown'
node_keepalive = 0
@ -1664,15 +1697,17 @@ def node_keepalive():
node_deadtime = int(time.time()) - (int(config['keepalive_interval']) * int(config['fence_intervals']))
if node_keepalive < node_deadtime and node_daemon_state == 'run':
logger.out('Node {} seems dead - starting monitor for fencing'.format(node_name), state='w')
zk_lock = zkhandler.writelock(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
zk_lock = zkhandler.writelock('/nodes/{}/daemonstate'.format(node_name))
with zk_lock:
# Ensures that, if we lost the lock race and come out of waiting,
# we won't try to trigger our own fence thread.
if zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name)) != 'dead':
fence_thread = Thread(target=fencing.fenceNode, args=(node_name, zk_conn, config, logger), kwargs={})
if zkhandler.read('/nodes/{}/daemonstate'.format(node_name)) != 'dead':
fence_thread = Thread(target=fencing.fenceNode, args=(node_name, zkhandler, config, logger), kwargs={})
fence_thread.start()
# Write the updated data after we start the fence thread
zkhandler.writedata(zk_conn, {'/nodes/{}/daemonstate'.format(node_name): 'dead'})
zkhandler.write([
('/nodes/{}/daemonstate'.format(node_name), 'dead')
])
if debug:
logger.out("Keepalive finished", state='d', prefix='main-thread')

View File

@ -36,8 +36,8 @@ class MetadataAPIInstance(object):
mdapi = flask.Flask(__name__)
# Initialization function
def __init__(self, zk_conn, config, logger):
self.zk_conn = zk_conn
def __init__(self, zkhandler, config, logger):
self.zkhandler = zkhandler
self.config = config
self.logger = logger
self.thread = None
@ -158,15 +158,15 @@ class MetadataAPIInstance(object):
# VM details function
def get_vm_details(self, source_address):
# Start connection to Zookeeper
_discard, networks = pvc_network.get_list(self.zk_conn, None)
_discard, networks = pvc_network.get_list(self.zkhandler, None)
# Figure out which server this is via the DHCP address
host_information = dict()
networks_managed = (x for x in networks if x.get('type') == 'managed')
for network in networks_managed:
network_leases = pvc_network.getNetworkDHCPLeases(self.zk_conn, network.get('vni'))
network_leases = pvc_network.getNetworkDHCPLeases(self.zkhandler, network.get('vni'))
for network_lease in network_leases:
information = pvc_network.getDHCPLeaseInformation(self.zk_conn, network.get('vni'), network_lease)
information = pvc_network.getDHCPLeaseInformation(self.zkhandler, network.get('vni'), network_lease)
try:
if information.get('ip4_address', None) == source_address:
host_information = information
@ -177,7 +177,7 @@ class MetadataAPIInstance(object):
client_macaddr = host_information.get('mac_address', None)
# Find the VM with that MAC address - we can't assume that the hostname is actually right
_discard, vm_list = pvc_vm.get_list(self.zk_conn, None, None, None)
_discard, vm_list = pvc_vm.get_list(self.zkhandler, None, None, None)
vm_details = dict()
for vm in vm_list:
try:

View File

@ -23,23 +23,22 @@ import time
from threading import Thread
import pvcnoded.zkhandler as zkhandler
import pvcnoded.common as common
import daemon_lib.common as common
class NodeInstance(object):
# Initialization function
def __init__(self, name, this_node, zk_conn, config, logger, d_node, d_network, d_domain, dns_aggregator, metadata_api):
def __init__(self, name, this_node, zkhandler, config, logger, d_node, d_network, d_domain, dns_aggregator, metadata_api):
# Passed-in variables on creation
self.name = name
self.this_node = this_node
self.zk_conn = zk_conn
self.zkhandler = zkhandler
self.config = config
self.logger = logger
# Which node is primary
self.primary_node = None
# States
self.daemon_mode = zkhandler.readdata(self.zk_conn, '/nodes/{}/daemonmode'.format(self.name))
self.daemon_mode = self.zkhandler.read('/nodes/{}/daemonmode'.format(self.name))
self.daemon_state = 'stop'
self.router_state = 'client'
self.domain_state = 'ready'
@ -91,7 +90,7 @@ class NodeInstance(object):
self.flush_stopper = False
# Zookeeper handlers for changed states
@self.zk_conn.DataWatch('/nodes/{}/daemonstate'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/daemonstate'.format(self.name))
def watch_node_daemonstate(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -106,7 +105,7 @@ class NodeInstance(object):
if data != self.daemon_state:
self.daemon_state = data
@self.zk_conn.DataWatch('/nodes/{}/routerstate'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/routerstate'.format(self.name))
def watch_node_routerstate(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -135,9 +134,11 @@ class NodeInstance(object):
transition_thread.start()
else:
# We did nothing, so just become secondary state
zkhandler.writedata(self.zk_conn, {'/nodes/{}/routerstate'.format(self.name): 'secondary'})
self.zkhandler.write([
('/nodes/{}/routerstate'.format(self.name), 'secondary')
])
@self.zk_conn.DataWatch('/nodes/{}/domainstate'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/domainstate'.format(self.name))
def watch_node_domainstate(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -170,7 +171,7 @@ class NodeInstance(object):
self.flush_thread = Thread(target=self.unflush, args=(), kwargs={})
self.flush_thread.start()
@self.zk_conn.DataWatch('/nodes/{}/memfree'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/memfree'.format(self.name))
def watch_node_memfree(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -185,7 +186,7 @@ class NodeInstance(object):
if data != self.memfree:
self.memfree = data
@self.zk_conn.DataWatch('/nodes/{}/memused'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/memused'.format(self.name))
def watch_node_memused(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -200,7 +201,7 @@ class NodeInstance(object):
if data != self.memused:
self.memused = data
@self.zk_conn.DataWatch('/nodes/{}/memalloc'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/memalloc'.format(self.name))
def watch_node_memalloc(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -215,7 +216,7 @@ class NodeInstance(object):
if data != self.memalloc:
self.memalloc = data
@self.zk_conn.DataWatch('/nodes/{}/vcpualloc'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/vcpualloc'.format(self.name))
def watch_node_vcpualloc(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -230,7 +231,7 @@ class NodeInstance(object):
if data != self.vcpualloc:
self.vcpualloc = data
@self.zk_conn.DataWatch('/nodes/{}/runningdomains'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/runningdomains'.format(self.name))
def watch_node_runningdomains(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -245,7 +246,7 @@ class NodeInstance(object):
if data != self.domain_list:
self.domain_list = data
@self.zk_conn.DataWatch('/nodes/{}/domainscount'.format(self.name))
@self.zkhandler.zk_conn.DataWatch('/nodes/{}/domainscount'.format(self.name))
def watch_node_domainscount(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -323,26 +324,30 @@ class NodeInstance(object):
Acquire primary coordinator status from a peer node
"""
# Lock the primary node until transition is complete
primary_lock = zkhandler.exclusivelock(self.zk_conn, '/primary_node')
primary_lock = self.zkhandler.exclusivelock('/config/primary_node')
primary_lock.acquire()
# Ensure our lock key is populated
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
# Synchronize nodes A (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.writelock('/locks/primary_node')
self.logger.out('Acquiring write lock for synchronization phase A', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase A', state='o')
time.sleep(1) # Time fir reader to acquire the lock
self.logger.out('Releasing write lock for synchronization phase A', state='i')
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase A', state='o')
time.sleep(0.1) # Time fir new writer to acquire the lock
# Synchronize nodes B (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.readlock('/locks/primary_node')
self.logger.out('Acquiring read lock for synchronization phase B', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase B', state='o')
@ -351,7 +356,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase B', state='o')
# Synchronize nodes C (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.writelock('/locks/primary_node')
self.logger.out('Acquiring write lock for synchronization phase C', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase C', state='o')
@ -367,12 +372,14 @@ class NodeInstance(object):
)
common.createIPAddress(self.upstream_floatingipaddr, self.upstream_cidrnetmask, 'brupstream')
self.logger.out('Releasing write lock for synchronization phase C', state='i')
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase C', state='o')
# Synchronize nodes D (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.writelock('/locks/primary_node')
self.logger.out('Acquiring write lock for synchronization phase D', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase D', state='o')
@ -397,12 +404,14 @@ class NodeInstance(object):
)
common.createIPAddress(self.storage_floatingipaddr, self.storage_cidrnetmask, 'brstorage')
self.logger.out('Releasing write lock for synchronization phase D', state='i')
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase D', state='o')
# Synchronize nodes E (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.writelock('/locks/primary_node')
self.logger.out('Acquiring write lock for synchronization phase E', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase E', state='o')
@ -418,12 +427,14 @@ class NodeInstance(object):
)
common.createIPAddress('169.254.169.254', '32', 'lo')
self.logger.out('Releasing write lock for synchronization phase E', state='i')
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase E', state='o')
# Synchronize nodes F (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.writelock('/locks/primary_node')
self.logger.out('Acquiring write lock for synchronization phase F', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase F', state='o')
@ -432,12 +443,14 @@ class NodeInstance(object):
for network in self.d_network:
self.d_network[network].createGateways()
self.logger.out('Releasing write lock for synchronization phase F', state='i')
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase F', state='o')
# Synchronize nodes G (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.writelock('/locks/primary_node')
self.logger.out('Acquiring write lock for synchronization phase G', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase G', state='o')
@ -504,14 +517,18 @@ class NodeInstance(object):
else:
self.logger.out('Not starting DNS aggregator due to Patroni failures', state='e')
self.logger.out('Releasing write lock for synchronization phase G', state='i')
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase G', state='o')
# Wait 2 seconds for everything to stabilize before we declare all-done
time.sleep(2)
primary_lock.release()
zkhandler.writedata(self.zk_conn, {'/nodes/{}/routerstate'.format(self.name): 'primary'})
self.zkhandler.write([
('/nodes/{}/routerstate'.format(self.name), 'primary')
])
self.logger.out('Node {} transitioned to primary state'.format(self.name), state='o')
def become_secondary(self):
@ -521,7 +538,7 @@ class NodeInstance(object):
time.sleep(0.2) # Initial delay for the first writer to grab the lock
# Synchronize nodes A (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.readlock('/locks/primary_node')
self.logger.out('Acquiring read lock for synchronization phase A', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase A', state='o')
@ -530,7 +547,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase A', state='o')
# Synchronize nodes B (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.writelock('/locks/primary_node')
self.logger.out('Acquiring write lock for synchronization phase B', state='i')
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase B', state='o')
@ -541,7 +558,9 @@ class NodeInstance(object):
for network in self.d_network:
self.d_network[network].stopDHCPServer()
self.logger.out('Releasing write lock for synchronization phase B', state='i')
zkhandler.writedata(self.zk_conn, {'/locks/primary_node': ''})
self.zkhandler.write([
('/locks/primary_node', '')
])
lock.release()
self.logger.out('Released write lock for synchronization phase B', state='o')
# 3. Stop client API
@ -553,7 +572,7 @@ class NodeInstance(object):
time.sleep(0.1) # Time fir new writer to acquire the lock
# Synchronize nodes C (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.readlock('/locks/primary_node')
self.logger.out('Acquiring read lock for synchronization phase C', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase C', state='o')
@ -572,7 +591,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase C', state='o')
# Synchronize nodes D (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.readlock('/locks/primary_node')
self.logger.out('Acquiring read lock for synchronization phase D', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase D', state='o')
@ -600,7 +619,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase D', state='o')
# Synchronize nodes E (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.readlock('/locks/primary_node')
self.logger.out('Acquiring read lock for synchronization phase E', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase E', state='o')
@ -619,7 +638,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase E', state='o')
# Synchronize nodes F (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.readlock('/locks/primary_node')
self.logger.out('Acquiring read lock for synchronization phase F', state='i')
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase F', state='o')
@ -631,7 +650,7 @@ class NodeInstance(object):
self.logger.out('Released read lock for synchronization phase F', state='o')
# Synchronize nodes G (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/primary_node')
lock = self.zkhandler.readlock('/locks/primary_node')
self.logger.out('Acquiring read lock for synchronization phase G', state='i')
try:
lock.acquire(timeout=60) # Don't wait forever and completely block us
@ -644,7 +663,9 @@ class NodeInstance(object):
# Wait 2 seconds for everything to stabilize before we declare all-done
time.sleep(2)
zkhandler.writedata(self.zk_conn, {'/nodes/{}/routerstate'.format(self.name): 'secondary'})
self.zkhandler.write([
('/nodes/{}/routerstate'.format(self.name), 'secondary')
])
self.logger.out('Node {} transitioned to secondary state'.format(self.name), state='o')
# Flush all VMs on the host
@ -664,38 +685,42 @@ class NodeInstance(object):
self.logger.out('Selecting target to migrate VM "{}"'.format(dom_uuid), state='i')
# Don't replace the previous node if the VM is already migrated
if zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(dom_uuid)):
current_node = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(dom_uuid))
if self.zkhandler.read('/domains/{}/lastnode'.format(dom_uuid)):
current_node = self.zkhandler.read('/domains/{}/lastnode'.format(dom_uuid))
else:
current_node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid))
current_node = self.zkhandler.read('/domains/{}/node'.format(dom_uuid))
target_node = common.findTargetNode(self.zk_conn, self.config, self.logger, dom_uuid)
target_node = common.findTargetNode(self.zkhandler, dom_uuid)
if target_node == current_node:
target_node = None
if target_node is None:
self.logger.out('Failed to find migration target for VM "{}"; shutting down and setting autostart flag'.format(dom_uuid), state='e')
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(dom_uuid): 'shutdown'})
zkhandler.writedata(self.zk_conn, {'/domains/{}/node_autostart'.format(dom_uuid): 'True'})
self.zkhandler.write([
('/domains/{}/state'.format(dom_uuid), 'shutdown'),
('/domains/{}/node_autostart'.format(dom_uuid), 'True')
])
else:
self.logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')
zkhandler.writedata(self.zk_conn, {
'/domains/{}/state'.format(dom_uuid): 'migrate',
'/domains/{}/node'.format(dom_uuid): target_node,
'/domains/{}/lastnode'.format(dom_uuid): current_node
})
self.zkhandler.write([
('/domains/{}/state'.format(dom_uuid), 'migrate'),
('/domains/{}/node'.format(dom_uuid), target_node),
('/domains/{}/lastnode'.format(dom_uuid), current_node)
])
# Wait for the VM to migrate so the next VM's free RAM count is accurate (they migrate in serial anyways)
ticks = 0
while zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(dom_uuid)) in ['migrate', 'unmigrate', 'shutdown']:
while self.zkhandler.read('/domains/{}/state'.format(dom_uuid)) in ['migrate', 'unmigrate', 'shutdown']:
ticks += 1
if ticks > 600:
# Abort if we've waited for 120 seconds, the VM is messed and just continue
break
time.sleep(0.2)
zkhandler.writedata(self.zk_conn, {'/nodes/{}/runningdomains'.format(self.name): ''})
zkhandler.writedata(self.zk_conn, {'/nodes/{}/domainstate'.format(self.name): 'flushed'})
self.zkhandler.write([
('/nodes/{}/runningdomains'.format(self.name), ''),
('/nodes/{}/domainstate'.format(self.name), 'flushed')
])
self.flush_thread = None
self.flush_stopper = False
return
@ -712,20 +737,20 @@ class NodeInstance(object):
return
# Handle autostarts
autostart = zkhandler.readdata(self.zk_conn, '/domains/{}/node_autostart'.format(dom_uuid))
node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(dom_uuid))
autostart = self.zkhandler.read('/domains/{}/node_autostart'.format(dom_uuid))
node = self.zkhandler.read('/domains/{}/node'.format(dom_uuid))
if autostart == 'True' and node == self.name:
self.logger.out('Starting autostart VM "{}"'.format(dom_uuid), state='i')
zkhandler.writedata(self.zk_conn, {
'/domains/{}/state'.format(dom_uuid): 'start',
'/domains/{}/node'.format(dom_uuid): self.name,
'/domains/{}/lastnode'.format(dom_uuid): '',
'/domains/{}/node_autostart'.format(dom_uuid): 'False'
})
self.zkhandler.write([
('/domains/{}/state'.format(dom_uuid), 'start'),
('/domains/{}/node'.format(dom_uuid), self.name),
('/domains/{}/lastnode'.format(dom_uuid), ''),
('/domains/{}/node_autostart'.format(dom_uuid), 'False')
])
continue
try:
last_node = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(dom_uuid))
last_node = self.zkhandler.read('/domains/{}/lastnode'.format(dom_uuid))
except Exception:
continue
@ -733,17 +758,19 @@ class NodeInstance(object):
continue
self.logger.out('Setting unmigration for VM "{}"'.format(dom_uuid), state='i')
zkhandler.writedata(self.zk_conn, {
'/domains/{}/state'.format(dom_uuid): 'migrate',
'/domains/{}/node'.format(dom_uuid): self.name,
'/domains/{}/lastnode'.format(dom_uuid): ''
})
self.zkhandler.write([
('/domains/{}/state'.format(dom_uuid), 'migrate'),
('/domains/{}/node'.format(dom_uuid), self.name),
('/domains/{}/lastnode'.format(dom_uuid), '')
])
# Wait for the VM to migrate back
while zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(dom_uuid)) in ['migrate', 'unmigrate', 'shutdown']:
while self.zkhandler.read('/domains/{}/state'.format(dom_uuid)) in ['migrate', 'unmigrate', 'shutdown']:
time.sleep(0.1)
zkhandler.writedata(self.zk_conn, {'/nodes/{}/domainstate'.format(self.name): 'ready'})
self.zkhandler.write([
('/nodes/{}/domainstate'.format(self.name), 'ready')
])
self.flush_thread = None
self.flush_stopper = False
return

View File

@ -25,15 +25,13 @@ import time
from threading import Thread, Event
from collections import deque
import pvcnoded.zkhandler as zkhandler
class VMConsoleWatcherInstance(object):
# Initialization function
def __init__(self, domuuid, domname, zk_conn, config, logger, this_node):
def __init__(self, domuuid, domname, zkhandler, config, logger, this_node):
self.domuuid = domuuid
self.domname = domname
self.zk_conn = zk_conn
self.zkhandler = zkhandler
self.config = config
self.logfile = '{}/{}.log'.format(config['console_log_directory'], self.domname)
self.console_log_lines = config['console_log_lines']
@ -44,7 +42,14 @@ class VMConsoleWatcherInstance(object):
open(self.logfile, 'a').close()
os.chmod(self.logfile, 0o600)
self.logdeque = deque(open(self.logfile), self.console_log_lines)
try:
self.logdeque = deque(open(self.logfile), self.console_log_lines)
except UnicodeDecodeError:
# There is corruption in the log file; overwrite it
self.logger.out('Failed to decode console log file; clearing existing file', state='w', prefix='Domain {}'.format(self.domuuid))
with open(self.logfile, 'w') as lfh:
lfh.write('\n')
self.logdeque = deque(open(self.logfile), self.console_log_lines)
self.stamp = None
self.cached_stamp = None
@ -86,7 +91,9 @@ class VMConsoleWatcherInstance(object):
self.fetch_lines()
# Update Zookeeper with the new loglines if they changed
if self.loglines != self.last_loglines:
zkhandler.writedata(self.zk_conn, {'/domains/{}/consolelog'.format(self.domuuid): self.loglines})
self.zkhandler.write([
('/domains/{}/consolelog'.format(self.domuuid), self.loglines)
])
self.last_loglines = self.loglines
def fetch_lines(self):

View File

@ -28,18 +28,17 @@ from threading import Thread
from xml.etree import ElementTree
import pvcnoded.zkhandler as zkhandler
import pvcnoded.common as common
import daemon_lib.common as common
import pvcnoded.VMConsoleWatcherInstance as VMConsoleWatcherInstance
import daemon_lib.common as daemon_common
def flush_locks(zk_conn, logger, dom_uuid, this_node=None):
def flush_locks(zkhandler, logger, dom_uuid, this_node=None):
logger.out('Flushing RBD locks for VM "{}"'.format(dom_uuid), state='i')
# Get the list of RBD images
rbd_list = zkhandler.readdata(zk_conn, '/domains/{}/rbdlist'.format(dom_uuid)).split(',')
rbd_list = zkhandler.read('/domains/{}/rbdlist'.format(dom_uuid)).split(',')
for rbd in rbd_list:
# Check if a lock exists
lock_list_retcode, lock_list_stdout, lock_list_stderr = common.run_os_command('rbd lock list --format json {}'.format(rbd))
@ -57,17 +56,21 @@ def flush_locks(zk_conn, logger, dom_uuid, this_node=None):
if lock_list:
# Loop through the locks
for lock in lock_list:
if this_node is not None and zkhandler.readdata(zk_conn, '/domains/{}/state'.format(dom_uuid)) != 'stop' and lock['address'].split(':')[0] != this_node.storage_ipaddr:
if this_node is not None and zkhandler.read('/domains/{}/state'.format(dom_uuid)) != 'stop' and lock['address'].split(':')[0] != this_node.storage_ipaddr:
logger.out('RBD lock does not belong to this host (lock owner: {}): freeing this lock would be unsafe, aborting'.format(lock['address'].split(':')[0], state='e'))
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'fail'})
zkhandler.writedata(zk_conn, {'/domains/{}/failedreason'.format(dom_uuid): 'Could not safely free RBD lock {} ({}) on volume {}; stop VM and flush locks manually'.format(lock['id'], lock['address'], rbd)})
zkhandler.write([
('/domains/{}/state'.format(dom_uuid), 'fail'),
('/domains/{}/failedreason'.format(dom_uuid), 'Could not safely free RBD lock {} ({}) on volume {}; stop VM and flush locks manually'.format(lock['id'], lock['address'], rbd))
])
break
# Free the lock
lock_remove_retcode, lock_remove_stdout, lock_remove_stderr = common.run_os_command('rbd lock remove {} "{}" "{}"'.format(rbd, lock['id'], lock['locker']))
if lock_remove_retcode != 0:
logger.out('Failed to free RBD lock "{}" on volume "{}": {}'.format(lock['id'], rbd, lock_remove_stderr), state='e')
zkhandler.writedata(zk_conn, {'/domains/{}/state'.format(dom_uuid): 'fail'})
zkhandler.writedata(zk_conn, {'/domains/{}/failedreason'.format(dom_uuid): 'Could not free RBD lock {} ({}) on volume {}: {}'.format(lock['id'], lock['address'], rbd, lock_remove_stderr)})
zkhandler.write([
('/domains/{}/state'.format(dom_uuid), 'fail'),
('/domains/{}/failedreason'.format(dom_uuid), 'Could not free RBD lock {} ({}) on volume {}: {}'.format(lock['id'], lock['address'], rbd, lock_remove_stderr))
])
break
logger.out('Freed RBD lock "{}" on volume "{}"'.format(lock['id'], rbd), state='o')
@ -75,7 +78,7 @@ def flush_locks(zk_conn, logger, dom_uuid, this_node=None):
# Primary command function
def run_command(zk_conn, logger, this_node, data):
def run_command(zkhandler, logger, this_node, data):
# Get the command and args
command, args = data.split()
@ -86,45 +89,49 @@ def run_command(zk_conn, logger, this_node, data):
# Verify that the VM is set to run on this node
if this_node.d_domain[dom_uuid].getnode() == this_node.name:
# Lock the command queue
zk_lock = zkhandler.writelock(zk_conn, '/cmd/domains')
zk_lock = zkhandler.writelock('/cmd/domains')
with zk_lock:
# Flush the lock
result = flush_locks(zk_conn, logger, dom_uuid, this_node)
result = flush_locks(zkhandler, logger, dom_uuid, this_node)
# Command succeeded
if result:
# Update the command queue
zkhandler.writedata(zk_conn, {'/cmd/domains': 'success-{}'.format(data)})
zkhandler.write([
('/cmd/domains', 'success-{}'.format(data))
])
# Command failed
else:
# Update the command queue
zkhandler.writedata(zk_conn, {'/cmd/domains': 'failure-{}'.format(data)})
zkhandler.write([
('/cmd/domains', 'failure-{}'.format(data))
])
# Wait 1 seconds before we free the lock, to ensure the client hits the lock
time.sleep(1)
class VMInstance(object):
# Initialization function
def __init__(self, domuuid, zk_conn, config, logger, this_node):
def __init__(self, domuuid, zkhandler, config, logger, this_node):
# Passed-in variables on creation
self.domuuid = domuuid
self.zk_conn = zk_conn
self.zkhandler = zkhandler
self.config = config
self.logger = logger
self.this_node = this_node
# Get data from zookeeper
self.domname = zkhandler.readdata(zk_conn, '/domains/{}'.format(domuuid))
self.state = zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid))
self.node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
self.lastnode = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(self.domuuid))
self.last_currentnode = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
self.last_lastnode = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(self.domuuid))
self.domname = self.zkhandler.read('/domains/{}'.format(domuuid))
self.state = self.zkhandler.read('/domains/{}/state'.format(self.domuuid))
self.node = self.zkhandler.read('/domains/{}/node'.format(self.domuuid))
self.lastnode = self.zkhandler.read('/domains/{}/lastnode'.format(self.domuuid))
self.last_currentnode = self.zkhandler.read('/domains/{}/node'.format(self.domuuid))
self.last_lastnode = self.zkhandler.read('/domains/{}/lastnode'.format(self.domuuid))
try:
self.pinpolicy = zkhandler.readdata(self.zk_conn, '/domains/{}/pinpolicy'.format(self.domuuid))
self.pinpolicy = self.zkhandler.read('/domains/{}/pinpolicy'.format(self.domuuid))
except Exception:
self.pinpolicy = "none"
try:
self.migration_method = zkhandler.readdata(self.zk_conn, '/domains/{}/migration_method'.format(self.domuuid))
self.migration_method = self.zkhandler.read('/domains/{}/migration_method'.format(self.domuuid))
except Exception:
self.migration_method = 'none'
@ -140,10 +147,10 @@ class VMInstance(object):
self.dom = self.lookupByUUID(self.domuuid)
# Log watcher instance
self.console_log_instance = VMConsoleWatcherInstance.VMConsoleWatcherInstance(self.domuuid, self.domname, self.zk_conn, self.config, self.logger, self.this_node)
self.console_log_instance = VMConsoleWatcherInstance.VMConsoleWatcherInstance(self.domuuid, self.domname, self.zkhandler, self.config, self.logger, self.this_node)
# Watch for changes to the state field in Zookeeper
@self.zk_conn.DataWatch('/domains/{}/state'.format(self.domuuid))
@self.zkhandler.zk_conn.DataWatch('/domains/{}/state'.format(self.domuuid))
def watch_state(data, stat, event=""):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -173,7 +180,7 @@ class VMInstance(object):
if self.dom is not None:
memory = int(self.dom.info()[2] / 1024)
else:
domain_information = daemon_common.getInformationFromXML(self.zk_conn, self.domuuid)
domain_information = daemon_common.getInformationFromXML(self.zkhandler, self.domuuid)
memory = int(domain_information['memory'])
except Exception:
memory = 0
@ -195,7 +202,9 @@ class VMInstance(object):
# Add the domain to the domain_list array
self.this_node.domain_list.append(self.domuuid)
# Push the change up to Zookeeper
zkhandler.writedata(self.zk_conn, {'/nodes/{}/runningdomains'.format(self.this_node.name): ' '.join(self.this_node.domain_list)})
self.zkhandler.write([
('/nodes/{}/runningdomains'.format(self.this_node.name), ' '.join(self.this_node.domain_list))
])
except Exception as e:
self.logger.out('Error adding domain to list: {}'.format(e), state='e')
@ -205,7 +214,9 @@ class VMInstance(object):
# Remove the domain from the domain_list array
self.this_node.domain_list.remove(self.domuuid)
# Push the change up to Zookeeper
zkhandler.writedata(self.zk_conn, {'/nodes/{}/runningdomains'.format(self.this_node.name): ' '.join(self.this_node.domain_list)})
self.zkhandler.write([
('/nodes/{}/runningdomains'.format(self.this_node.name), ' '.join(self.this_node.domain_list))
])
except Exception as e:
self.logger.out('Error removing domain from list: {}'.format(e), state='e')
@ -218,11 +229,17 @@ class VMInstance(object):
self.logger.out('Updating VNC data', state='i', prefix='Domain {}'.format(self.domuuid))
port = graphics.get('port', '')
listen = graphics.get('listen', '')
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): '{}:{}'.format(listen, port)})
self.zkhandler.write([
('/domains/{}/vnc'.format(self.domuuid), '{}:{}'.format(listen, port))
])
else:
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): ''})
self.zkhandler.write([
('/domains/{}/vnc'.format(self.domuuid), '')
])
else:
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): ''})
self.zkhandler.write([
('/domains/{}/vnc'.format(self.domuuid), '')
])
# Start up the VM
def start_vm(self):
@ -251,8 +268,8 @@ class VMInstance(object):
if self.getdom() is None or self.getdom().state()[0] != libvirt.VIR_DOMAIN_RUNNING:
# Flush locks
self.logger.out('Flushing RBD locks', state='i', prefix='Domain {}'.format(self.domuuid))
flush_locks(self.zk_conn, self.logger, self.domuuid, self.this_node)
if zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid)) == 'fail':
flush_locks(self.zkhandler, self.logger, self.domuuid, self.this_node)
if self.zkhandler.read('/domains/{}/state'.format(self.domuuid)) == 'fail':
lv_conn.close()
self.dom = None
self.instart = False
@ -261,21 +278,27 @@ class VMInstance(object):
if curstate == libvirt.VIR_DOMAIN_RUNNING:
# If it is running just update the model
self.addDomainToList()
zkhandler.writedata(self.zk_conn, {'/domains/{}/failedreason'.format(self.domuuid): ''})
self.zkhandler.write([
('/domains/{}/failedreason'.format(self.domuuid), '')
])
else:
# Or try to create it
try:
# Grab the domain information from Zookeeper
xmlconfig = zkhandler.readdata(self.zk_conn, '/domains/{}/xml'.format(self.domuuid))
xmlconfig = self.zkhandler.read('/domains/{}/xml'.format(self.domuuid))
dom = lv_conn.createXML(xmlconfig, 0)
self.addDomainToList()
self.logger.out('Successfully started VM', state='o', prefix='Domain {}'.format(self.domuuid))
self.dom = dom
zkhandler.writedata(self.zk_conn, {'/domains/{}/failedreason'.format(self.domuuid): ''})
self.zkhandler.write([
('/domains/{}/failedreason'.format(self.domuuid), '')
])
except libvirt.libvirtError as e:
self.logger.out('Failed to create VM', state='e', prefix='Domain {}'.format(self.domuuid))
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'fail'})
zkhandler.writedata(self.zk_conn, {'/domains/{}/failedreason'.format(self.domuuid): str(e)})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'fail'),
('/domains/{}/failedreason'.format(self.domuuid), str(e))
])
lv_conn.close()
self.dom = None
self.instart = False
@ -303,7 +326,9 @@ class VMInstance(object):
self.start_vm()
self.addDomainToList()
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'start'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'start')
])
lv_conn.close()
self.inrestart = False
@ -334,7 +359,9 @@ class VMInstance(object):
self.removeDomainFromList()
if self.inrestart is False:
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'stop'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'stop')
])
self.logger.out('Successfully stopped VM', state='o', prefix='Domain {}'.format(self.domuuid))
self.dom = None
@ -355,7 +382,7 @@ class VMInstance(object):
time.sleep(1)
# Abort shutdown if the state changes to start
current_state = zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid))
current_state = self.zkhandler.read('/domains/{}/state'.format(self.domuuid))
if current_state not in ['shutdown', 'restart']:
self.logger.out('Aborting VM shutdown due to state change', state='i', prefix='Domain {}'.format(self.domuuid))
is_aborted = True
@ -368,7 +395,9 @@ class VMInstance(object):
if lvdomstate != libvirt.VIR_DOMAIN_RUNNING:
self.removeDomainFromList()
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'stop'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'stop')
])
self.logger.out('Successfully shutdown VM', state='o', prefix='Domain {}'.format(self.domuuid))
self.dom = None
# Stop the log watcher
@ -377,7 +406,9 @@ class VMInstance(object):
if tick >= self.config['vm_shutdown_timeout']:
self.logger.out('Shutdown timeout ({}s) expired, forcing off'.format(self.config['vm_shutdown_timeout']), state='e', prefix='Domain {}'.format(self.domuuid))
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'stop'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'stop')
])
break
self.inshutdown = False
@ -388,7 +419,9 @@ class VMInstance(object):
if self.inrestart:
# Wait to prevent race conditions
time.sleep(1)
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'start'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'start')
])
# Migrate the VM to a target host
def migrate_vm(self, force_live=False, force_shutdown=False):
@ -405,24 +438,24 @@ class VMInstance(object):
self.logger.out('Migrating VM to node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid))
# Used for sanity checking later
target_node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
target_node = self.zkhandler.read('/domains/{}/node'.format(self.domuuid))
aborted = False
def abort_migrate(reason):
zkhandler.writedata(self.zk_conn, {
'/domains/{}/state'.format(self.domuuid): 'start',
'/domains/{}/node'.format(self.domuuid): self.this_node.name,
'/domains/{}/lastnode'.format(self.domuuid): self.last_lastnode
})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'start'),
('/domains/{}/node'.format(self.domuuid), self.this_node.name),
('/domains/{}/lastnode'.format(self.domuuid), self.last_lastnode)
])
migrate_lock_node.release()
migrate_lock_state.release()
self.inmigrate = False
self.logger.out('Aborted migration: {}'.format(reason), state='i', prefix='Domain {}'.format(self.domuuid))
# Acquire exclusive lock on the domain node key
migrate_lock_node = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
migrate_lock_state = zkhandler.exclusivelock(self.zk_conn, '/domains/{}/state'.format(self.domuuid))
migrate_lock_node = self.zkhandler.exclusivelock('/domains/{}/node'.format(self.domuuid))
migrate_lock_state = self.zkhandler.exclusivelock('/domains/{}/state'.format(self.domuuid))
migrate_lock_node.acquire()
migrate_lock_state.acquire()
@ -434,14 +467,14 @@ class VMInstance(object):
return
# Synchronize nodes A (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.readlock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
if zkhandler.readdata(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid)) == '':
if self.zkhandler.read('/locks/domain_migrate/{}'.format(self.domuuid)) == '':
self.logger.out('Waiting for peer', state='i', prefix='Domain {}'.format(self.domuuid))
ticks = 0
while zkhandler.readdata(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid)) == '':
while self.zkhandler.read('/locks/domain_migrate/{}'.format(self.domuuid)) == '':
time.sleep(0.1)
ticks += 1
if ticks > 300:
@ -457,11 +490,11 @@ class VMInstance(object):
return
# Synchronize nodes B (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.writelock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase B', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.5) # Time fir reader to acquire the lock
time.sleep(0.5) # Time for reader to acquire the lock
def migrate_live():
self.logger.out('Setting up live migration', state='i', prefix='Domain {}'.format(self.domuuid))
@ -498,8 +531,10 @@ class VMInstance(object):
def migrate_shutdown():
self.logger.out('Shutting down VM for offline migration', state='i', prefix='Domain {}'.format(self.domuuid))
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'shutdown'})
while zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid)) != 'stop':
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'shutdown')
])
while self.zkhandler.read('/domains/{}/state'.format(self.domuuid)) != 'stop':
time.sleep(0.5)
return True
@ -545,11 +580,11 @@ class VMInstance(object):
return
# Synchronize nodes C (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.writelock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.5) # Time fir reader to acquire the lock
time.sleep(0.5) # Time for reader to acquire the lock
if do_migrate_shutdown:
migrate_shutdown()
@ -559,20 +594,20 @@ class VMInstance(object):
self.logger.out('Released write lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
# Synchronize nodes D (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.readlock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
self.last_currentnode = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
self.last_lastnode = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(self.domuuid))
self.last_currentnode = self.zkhandler.read('/domains/{}/node'.format(self.domuuid))
self.last_lastnode = self.zkhandler.read('/domains/{}/lastnode'.format(self.domuuid))
self.logger.out('Releasing read lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release()
self.logger.out('Released read lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
# Wait for the receive side to complete before we declare all-done and release locks
while zkhandler.readdata(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid)) != '':
while self.zkhandler.read('/locks/domain_migrate/{}'.format(self.domuuid)) != '':
time.sleep(0.5)
migrate_lock_node.release()
migrate_lock_state.release()
@ -591,21 +626,23 @@ class VMInstance(object):
self.logger.out('Receiving VM migration from node "{}"'.format(self.node), state='i', prefix='Domain {}'.format(self.domuuid))
# Ensure our lock key is populated
zkhandler.writedata(self.zk_conn, {'/locks/domain_migrate/{}'.format(self.domuuid): self.domuuid})
self.zkhandler.write([
('/locks/domain_migrate/{}'.format(self.domuuid), self.domuuid)
])
# Synchronize nodes A (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.writelock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.5) # Time fir reader to acquire the lock
time.sleep(1) # Time for reader to acquire the lock
self.logger.out('Releasing write lock for synchronization phase A', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release()
self.logger.out('Released write lock for synchronization phase A', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.1) # Time fir new writer to acquire the lock
time.sleep(0.1) # Time for new writer to acquire the lock
# Synchronize nodes B (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.readlock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase B', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase B', state='o', prefix='Domain {}'.format(self.domuuid))
@ -614,38 +651,44 @@ class VMInstance(object):
self.logger.out('Released read lock for synchronization phase B', state='o', prefix='Domain {}'.format(self.domuuid))
# Synchronize nodes C (I am reader)
lock = zkhandler.readlock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.readlock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring read lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired read lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
# Set the updated data
self.last_currentnode = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
self.last_lastnode = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(self.domuuid))
self.last_currentnode = self.zkhandler.read('/domains/{}/node'.format(self.domuuid))
self.last_lastnode = self.zkhandler.read('/domains/{}/lastnode'.format(self.domuuid))
self.logger.out('Releasing read lock for synchronization phase C', state='i', prefix='Domain {}'.format(self.domuuid))
lock.release()
self.logger.out('Released read lock for synchronization phase C', state='o', prefix='Domain {}'.format(self.domuuid))
# Synchronize nodes D (I am writer)
lock = zkhandler.writelock(self.zk_conn, '/locks/domain_migrate/{}'.format(self.domuuid))
lock = self.zkhandler.writelock('/locks/domain_migrate/{}'.format(self.domuuid))
self.logger.out('Acquiring write lock for synchronization phase D', state='i', prefix='Domain {}'.format(self.domuuid))
lock.acquire()
self.logger.out('Acquired write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
time.sleep(0.5) # Time fir reader to acquire the lock
time.sleep(0.5) # Time for reader to acquire the lock
self.state = zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid))
self.state = self.zkhandler.read('/domains/{}/state'.format(self.domuuid))
self.dom = self.lookupByUUID(self.domuuid)
if self.dom:
lvdomstate = self.dom.state()[0]
if lvdomstate == libvirt.VIR_DOMAIN_RUNNING:
# VM has been received and started
self.addDomainToList()
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'start'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'start')
])
self.logger.out('Successfully received migrated VM', state='o', prefix='Domain {}'.format(self.domuuid))
else:
# The receive somehow failed
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'fail'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'fail'),
('/domains/{}/failed_reason'.format(self.domuuid), 'Failed to receive migration')
])
self.logger.out('Failed to receive migrated VM', state='e', prefix='Domain {}'.format(self.domuuid))
else:
if self.node == self.this_node.name:
if self.state in ['start']:
@ -653,7 +696,9 @@ class VMInstance(object):
self.logger.out('Receive aborted via state change', state='w', prefix='Domain {}'.format(self.domuuid))
elif self.state in ['stop']:
# The send was shutdown-based
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'start'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'start')
])
else:
# The send failed or was aborted
self.logger.out('Migrate aborted or failed; VM in state {}'.format(self.state), state='w', prefix='Domain {}'.format(self.domuuid))
@ -662,7 +707,9 @@ class VMInstance(object):
lock.release()
self.logger.out('Released write lock for synchronization phase D', state='o', prefix='Domain {}'.format(self.domuuid))
zkhandler.writedata(self.zk_conn, {'/locks/domain_migrate/{}'.format(self.domuuid): ''})
self.zkhandler.write([
('/locks/domain_migrate/{}'.format(self.domuuid), '')
])
self.inreceive = False
return
@ -671,9 +718,9 @@ class VMInstance(object):
#
def manage_vm_state(self):
# Update the current values from zookeeper
self.state = zkhandler.readdata(self.zk_conn, '/domains/{}/state'.format(self.domuuid))
self.node = zkhandler.readdata(self.zk_conn, '/domains/{}/node'.format(self.domuuid))
self.lastnode = zkhandler.readdata(self.zk_conn, '/domains/{}/lastnode'.format(self.domuuid))
self.state = self.zkhandler.read('/domains/{}/state'.format(self.domuuid))
self.node = self.zkhandler.read('/domains/{}/node'.format(self.domuuid))
self.lastnode = self.zkhandler.read('/domains/{}/lastnode'.format(self.domuuid))
# Check the current state of the VM
try:
@ -721,7 +768,9 @@ class VMInstance(object):
elif self.state == "migrate" or self.state == "migrate-live":
# Start the log watcher
self.console_log_instance.start()
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'start'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'start')
])
# Add domain to running list
self.addDomainToList()
# VM should be restarted
@ -744,7 +793,9 @@ class VMInstance(object):
self.receive_migrate()
# VM should be restarted (i.e. started since it isn't running)
if self.state == "restart":
zkhandler.writedata(self.zk_conn, {'/domains/{}/state'.format(self.domuuid): 'start'})
self.zkhandler.write([
('/domains/{}/state'.format(self.domuuid), 'start')
])
# VM should be shut down; ensure it's gone from this node's domain_list
elif self.state == "shutdown":
self.removeDomainFromList()

View File

@ -24,15 +24,14 @@ import time
from textwrap import dedent
import pvcnoded.zkhandler as zkhandler
import pvcnoded.common as common
import daemon_lib.common as common
class VXNetworkInstance(object):
# Initialization function
def __init__(self, vni, zk_conn, config, logger, this_node, dns_aggregator):
def __init__(self, vni, zkhandler, config, logger, this_node, dns_aggregator):
self.vni = vni
self.zk_conn = zk_conn
self.zkhandler = zkhandler
self.config = config
self.logger = logger
self.this_node = this_node
@ -41,7 +40,7 @@ class VXNetworkInstance(object):
self.vni_mtu = config['vni_mtu']
self.bridge_dev = config['bridge_dev']
self.nettype = zkhandler.readdata(self.zk_conn, '/networks/{}/nettype'.format(self.vni))
self.nettype = self.zkhandler.read('/networks/{}/nettype'.format(self.vni))
if self.nettype == 'bridged':
self.logger.out(
'Creating new bridged network',
@ -73,7 +72,7 @@ class VXNetworkInstance(object):
self.bridge_nic = 'vmbr{}'.format(self.vni)
# Zookeper handlers for changed states
@self.zk_conn.DataWatch('/networks/{}'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}'.format(self.vni))
def watch_network_description(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -92,16 +91,16 @@ class VXNetworkInstance(object):
self.description = None
self.domain = None
self.name_servers = None
self.ip6_gateway = zkhandler.readdata(self.zk_conn, '/networks/{}/ip6_gateway'.format(self.vni))
self.ip6_network = zkhandler.readdata(self.zk_conn, '/networks/{}/ip6_network'.format(self.vni))
self.ip6_cidrnetmask = zkhandler.readdata(self.zk_conn, '/networks/{}/ip6_network'.format(self.vni)).split('/')[-1]
self.dhcp6_flag = (zkhandler.readdata(self.zk_conn, '/networks/{}/dhcp6_flag'.format(self.vni)) == 'True')
self.ip4_gateway = zkhandler.readdata(self.zk_conn, '/networks/{}/ip4_gateway'.format(self.vni))
self.ip4_network = zkhandler.readdata(self.zk_conn, '/networks/{}/ip4_network'.format(self.vni))
self.ip4_cidrnetmask = zkhandler.readdata(self.zk_conn, '/networks/{}/ip4_network'.format(self.vni)).split('/')[-1]
self.dhcp4_flag = (zkhandler.readdata(self.zk_conn, '/networks/{}/dhcp4_flag'.format(self.vni)) == 'True')
self.dhcp4_start = (zkhandler.readdata(self.zk_conn, '/networks/{}/dhcp4_start'.format(self.vni)) == 'True')
self.dhcp4_end = (zkhandler.readdata(self.zk_conn, '/networks/{}/dhcp4_end'.format(self.vni)) == 'True')
self.ip6_gateway = self.zkhandler.read('/networks/{}/ip6_gateway'.format(self.vni))
self.ip6_network = self.zkhandler.read('/networks/{}/ip6_network'.format(self.vni))
self.ip6_cidrnetmask = self.zkhandler.read('/networks/{}/ip6_network'.format(self.vni)).split('/')[-1]
self.dhcp6_flag = (self.zkhandler.read('/networks/{}/dhcp6_flag'.format(self.vni)) == 'True')
self.ip4_gateway = self.zkhandler.read('/networks/{}/ip4_gateway'.format(self.vni))
self.ip4_network = self.zkhandler.read('/networks/{}/ip4_network'.format(self.vni))
self.ip4_cidrnetmask = self.zkhandler.read('/networks/{}/ip4_network'.format(self.vni)).split('/')[-1]
self.dhcp4_flag = (self.zkhandler.read('/networks/{}/dhcp4_flag'.format(self.vni)) == 'True')
self.dhcp4_start = (self.zkhandler.read('/networks/{}/dhcp4_start'.format(self.vni)) == 'True')
self.dhcp4_end = (self.zkhandler.read('/networks/{}/dhcp4_end'.format(self.vni)) == 'True')
self.vxlan_nic = 'vxlan{}'.format(self.vni)
self.bridge_nic = 'vmbr{}'.format(self.vni)
@ -158,11 +157,11 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
vxlannic=self.vxlan_nic,
)
self.firewall_rules_in = zkhandler.listchildren(self.zk_conn, '/networks/{}/firewall_rules/in'.format(self.vni))
self.firewall_rules_out = zkhandler.listchildren(self.zk_conn, '/networks/{}/firewall_rules/out'.format(self.vni))
self.firewall_rules_in = self.zkhandler.children('/networks/{}/firewall_rules/in'.format(self.vni))
self.firewall_rules_out = self.zkhandler.children('/networks/{}/firewall_rules/out'.format(self.vni))
# Zookeper handlers for changed states
@self.zk_conn.DataWatch('/networks/{}'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}'.format(self.vni))
def watch_network_description(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -176,7 +175,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/domain'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/domain'.format(self.vni))
def watch_network_domain(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -193,7 +192,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/name_servers'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/name_servers'.format(self.vni))
def watch_network_name_servers(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -210,7 +209,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/ip6_network'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/ip6_network'.format(self.vni))
def watch_network_ip6_network(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -225,7 +224,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/ip6_gateway'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/ip6_gateway'.format(self.vni))
def watch_network_gateway6(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -247,7 +246,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/dhcp6_flag'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/dhcp6_flag'.format(self.vni))
def watch_network_dhcp6_status(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -261,7 +260,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
elif self.dhcp_server_daemon and not self.dhcp4_flag and self.this_node.router_state in ['primary', 'takeover']:
self.stopDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/ip4_network'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/ip4_network'.format(self.vni))
def watch_network_ip4_network(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -276,7 +275,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/ip4_gateway'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/ip4_gateway'.format(self.vni))
def watch_network_gateway4(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -298,7 +297,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/dhcp4_flag'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/dhcp4_flag'.format(self.vni))
def watch_network_dhcp4_status(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -312,7 +311,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
elif self.dhcp_server_daemon and not self.dhcp6_flag and self.this_node.router_state in ['primary', 'takeover']:
self.stopDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/dhcp4_start'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/dhcp4_start'.format(self.vni))
def watch_network_dhcp4_start(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -325,7 +324,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.DataWatch('/networks/{}/dhcp4_end'.format(self.vni))
@self.zkhandler.zk_conn.DataWatch('/networks/{}/dhcp4_end'.format(self.vni))
def watch_network_dhcp4_end(data, stat, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -338,7 +337,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.ChildrenWatch('/networks/{}/dhcp4_reservations'.format(self.vni))
@self.zkhandler.zk_conn.ChildrenWatch('/networks/{}/dhcp4_reservations'.format(self.vni))
def watch_network_dhcp_reservations(new_reservations, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -354,7 +353,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.stopDHCPServer()
self.startDHCPServer()
@self.zk_conn.ChildrenWatch('/networks/{}/firewall_rules/in'.format(self.vni))
@self.zkhandler.zk_conn.ChildrenWatch('/networks/{}/firewall_rules/in'.format(self.vni))
def watch_network_firewall_rules_in(new_rules, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -366,7 +365,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
self.firewall_rules_in = new_rules
self.updateFirewallRules()
@self.zk_conn.ChildrenWatch('/networks/{}/firewall_rules/out'.format(self.vni))
@self.zkhandler.zk_conn.ChildrenWatch('/networks/{}/firewall_rules/out'.format(self.vni))
def watch_network_firewall_rules_out(new_rules, event=''):
if event and event.type == 'DELETED':
# The key has been deleted after existing before; terminate this watcher
@ -389,13 +388,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
if reservation not in old_reservations_list:
# Add new reservation file
filename = '{}/{}'.format(self.dnsmasq_hostsdir, reservation)
ipaddr = zkhandler.readdata(
self.zk_conn,
'/networks/{}/dhcp4_reservations/{}/ipaddr'.format(
self.vni,
reservation
)
)
ipaddr = self.zkhandler.read('/networks/{}/dhcp4_reservations/{}/ipaddr'.format(self.vni, reservation))
entry = '{},{}'.format(reservation, ipaddr)
# Write the entry
with open(filename, 'w') as outfile:
@ -426,10 +419,10 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
full_ordered_rules = []
for acl in self.firewall_rules_in:
order = zkhandler.readdata(self.zk_conn, '/networks/{}/firewall_rules/in/{}/order'.format(self.vni, acl))
order = self.zkhandler.read('/networks/{}/firewall_rules/in/{}/order'.format(self.vni, acl))
ordered_acls_in[order] = acl
for acl in self.firewall_rules_out:
order = zkhandler.readdata(self.zk_conn, '/networks/{}/firewall_rules/out/{}/order'.format(self.vni, acl))
order = self.zkhandler.read('/networks/{}/firewall_rules/out/{}/order'.format(self.vni, acl))
ordered_acls_out[order] = acl
for order in sorted(ordered_acls_in.keys()):
@ -440,7 +433,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
for direction in 'in', 'out':
for acl in sorted_acl_list[direction]:
rule_prefix = "add rule inet filter vxlan{}-{} counter".format(self.vni, direction)
rule_data = zkhandler.readdata(self.zk_conn, '/networks/{}/firewall_rules/{}/{}/rule'.format(self.vni, direction, acl))
rule_data = self.zkhandler.read('/networks/{}/firewall_rules/{}/{}/rule'.format(self.vni, direction, acl))
rule = '{} {}'.format(rule_prefix, rule_data)
full_ordered_rules.append(rule)
@ -459,7 +452,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
# Reload firewall rules
nftables_base_filename = '{}/base.nft'.format(self.config['nft_dynamic_directory'])
common.reload_firewall_rules(self.logger, nftables_base_filename)
common.reload_firewall_rules(nftables_base_filename, logger=self.logger)
# Create bridged network configuration
def createNetworkBridged(self):
@ -805,7 +798,7 @@ add rule inet filter forward ip6 saddr {netaddr6} counter jump {vxlannic}-out
# Reload firewall rules
nftables_base_filename = '{}/base.nft'.format(self.config['nft_dynamic_directory'])
common.reload_firewall_rules(self.logger, nftables_base_filename)
common.reload_firewall_rules(nftables_base_filename, logger=self.logger)
def removeGateways(self):
if self.nettype == 'managed':

View File

@ -1,300 +0,0 @@
#!/usr/bin/env python3
# common.py - PVC daemon function library, common fuctions
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import subprocess
import signal
from threading import Thread
from shlex import split as shlex_split
import pvcnoded.zkhandler as zkhandler
class OSDaemon(object):
def __init__(self, command_string, environment, logfile):
command = shlex_split(command_string)
# Set stdout to be a logfile if set
if logfile:
stdout = open(logfile, 'a')
else:
stdout = subprocess.PIPE
# Invoke the process
self.proc = subprocess.Popen(
command,
env=environment,
stdout=stdout,
stderr=stdout,
)
# Signal the process
def signal(self, sent_signal):
signal_map = {
'hup': signal.SIGHUP,
'int': signal.SIGINT,
'term': signal.SIGTERM,
'kill': signal.SIGKILL
}
self.proc.send_signal(signal_map[sent_signal])
def run_os_daemon(command_string, environment=None, logfile=None):
daemon = OSDaemon(command_string, environment, logfile)
return daemon
# Run a oneshot command, optionally without blocking
def run_os_command(command_string, background=False, environment=None, timeout=None):
command = shlex_split(command_string)
if background:
def runcmd():
try:
subprocess.run(
command,
env=environment,
timeout=timeout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
except subprocess.TimeoutExpired:
pass
thread = Thread(target=runcmd, args=())
thread.start()
return 0, None, None
else:
try:
command_output = subprocess.run(
command,
env=environment,
timeout=timeout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
retcode = command_output.returncode
except subprocess.TimeoutExpired:
retcode = 128
try:
stdout = command_output.stdout.decode('ascii')
except Exception:
stdout = ''
try:
stderr = command_output.stderr.decode('ascii')
except Exception:
stderr = ''
return retcode, stdout, stderr
# Reload the firewall rules of the system
def reload_firewall_rules(logger, rules_file):
logger.out('Reloading firewall configuration', state='o')
retcode, stdout, stderr = run_os_command('/usr/sbin/nft -f {}'.format(rules_file))
if retcode != 0:
logger.out('Failed to reload configuration: {}'.format(stderr), state='e')
# Create IP address
def createIPAddress(ipaddr, cidrnetmask, dev):
run_os_command(
'ip address add {}/{} dev {}'.format(
ipaddr,
cidrnetmask,
dev
)
)
run_os_command(
'arping -P -U -W 0.02 -c 2 -i {dev} -S {ip} {ip}'.format(
dev=dev,
ip=ipaddr
)
)
# Remove IP address
def removeIPAddress(ipaddr, cidrnetmask, dev):
run_os_command(
'ip address delete {}/{} dev {}'.format(
ipaddr,
cidrnetmask,
dev
)
)
#
# Find a migration target
#
def findTargetNode(zk_conn, config, logger, dom_uuid):
# Determine VM node limits; set config value if read fails
try:
node_limit = zkhandler.readdata(zk_conn, '/domains/{}/node_limit'.format(dom_uuid)).split(',')
if not any(node_limit):
node_limit = ''
except Exception:
node_limit = ''
zkhandler.writedata(zk_conn, {'/domains/{}/node_limit'.format(dom_uuid): ''})
# Determine VM search field
try:
search_field = zkhandler.readdata(zk_conn, '/domains/{}/node_selector'.format(dom_uuid))
except Exception:
search_field = None
# If our search field is invalid, use and set the default (for next time)
if search_field is None or search_field == 'None':
search_field = config['migration_target_selector']
zkhandler.writedata(zk_conn, {'/domains/{}/node_selector'.format(dom_uuid): config['migration_target_selector']})
if config['debug']:
logger.out('Migrating VM {} with selector {}'.format(dom_uuid, search_field), state='d', prefix='node-flush')
# Execute the search
if search_field == 'mem':
return findTargetNodeMem(zk_conn, config, logger, node_limit, dom_uuid)
if search_field == 'load':
return findTargetNodeLoad(zk_conn, config, logger, node_limit, dom_uuid)
if search_field == 'vcpus':
return findTargetNodeVCPUs(zk_conn, config, logger, node_limit, dom_uuid)
if search_field == 'vms':
return findTargetNodeVMs(zk_conn, config, logger, node_limit, dom_uuid)
# Nothing was found
return None
# Get the list of valid target nodes
def getNodes(zk_conn, node_limit, dom_uuid):
valid_node_list = []
full_node_list = zkhandler.listchildren(zk_conn, '/nodes')
current_node = zkhandler.readdata(zk_conn, '/domains/{}/node'.format(dom_uuid))
for node in full_node_list:
if node_limit and node not in node_limit:
continue
daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node))
domain_state = zkhandler.readdata(zk_conn, '/nodes/{}/domainstate'.format(node))
if node == current_node:
continue
if daemon_state != 'run' or domain_state != 'ready':
continue
valid_node_list.append(node)
return valid_node_list
# via free memory (relative to allocated memory)
def findTargetNodeMem(zk_conn, config, logger, node_limit, dom_uuid):
most_provfree = 0
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list:
memprov = int(zkhandler.readdata(zk_conn, '/nodes/{}/memprov'.format(node)))
memused = int(zkhandler.readdata(zk_conn, '/nodes/{}/memused'.format(node)))
memfree = int(zkhandler.readdata(zk_conn, '/nodes/{}/memfree'.format(node)))
memtotal = memused + memfree
provfree = memtotal - memprov
if config['debug']:
logger.out('Evaluating node {} with {} provfree'.format(node, provfree), state='d', prefix='node-flush')
if provfree > most_provfree:
most_provfree = provfree
target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node
# via load average
def findTargetNodeLoad(zk_conn, config, logger, node_limit, dom_uuid):
least_load = 9999.0
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list:
load = float(zkhandler.readdata(zk_conn, '/nodes/{}/cpuload'.format(node)))
if config['debug']:
logger.out('Evaluating node {} with load {}'.format(node, load), state='d', prefix='node-flush')
if load < least_load:
least_load = load
target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node
# via total vCPUs
def findTargetNodeVCPUs(zk_conn, config, logger, node_limit, dom_uuid):
least_vcpus = 9999
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list:
vcpus = int(zkhandler.readdata(zk_conn, '/nodes/{}/vcpualloc'.format(node)))
if config['debug']:
logger.out('Evaluating node {} with vcpualloc {}'.format(node, vcpus), state='d', prefix='node-flush')
if vcpus < least_vcpus:
least_vcpus = vcpus
target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node
# via total VMs
def findTargetNodeVMs(zk_conn, config, logger, node_limit, dom_uuid):
least_vms = 9999
target_node = None
node_list = getNodes(zk_conn, node_limit, dom_uuid)
if config['debug']:
logger.out('Found nodes: {}'.format(node_list), state='d', prefix='node-flush')
for node in node_list:
vms = int(zkhandler.readdata(zk_conn, '/nodes/{}/domainscount'.format(node)))
if config['debug']:
logger.out('Evaluating node {} with VM count {}'.format(node, vms), state='d', prefix='node-flush')
if vms < least_vms:
least_vms = vms
target_node = node
if config['debug']:
logger.out('Selected node {}'.format(target_node), state='d', prefix='node-flush')
return target_node

View File

@ -21,15 +21,14 @@
import time
import pvcnoded.zkhandler as zkhandler
import pvcnoded.common as common
import daemon_lib.common as common
import pvcnoded.VMInstance as VMInstance
#
# Fence thread entry function
#
def fenceNode(node_name, zk_conn, config, logger):
def fenceNode(node_name, zkhandler, config, logger):
# We allow exactly 6 saving throws (30 seconds) for the host to come back online or we kill it
failcount_limit = 6
failcount = 0
@ -37,7 +36,7 @@ def fenceNode(node_name, zk_conn, config, logger):
# Wait 5 seconds
time.sleep(config['keepalive_interval'])
# Get the state
node_daemon_state = zkhandler.readdata(zk_conn, '/nodes/{}/daemonstate'.format(node_name))
node_daemon_state = zkhandler.read('/nodes/{}/daemonstate'.format(node_name))
# Is it still 'dead'
if node_daemon_state == 'dead':
failcount += 1
@ -50,9 +49,9 @@ def fenceNode(node_name, zk_conn, config, logger):
logger.out('Fencing node "{}" via IPMI reboot signal'.format(node_name), state='w')
# Get IPMI information
ipmi_hostname = zkhandler.readdata(zk_conn, '/nodes/{}/ipmihostname'.format(node_name))
ipmi_username = zkhandler.readdata(zk_conn, '/nodes/{}/ipmiusername'.format(node_name))
ipmi_password = zkhandler.readdata(zk_conn, '/nodes/{}/ipmipassword'.format(node_name))
ipmi_hostname = zkhandler.read('/nodes/{}/ipmihostname'.format(node_name))
ipmi_username = zkhandler.read('/nodes/{}/ipmiusername'.format(node_name))
ipmi_password = zkhandler.read('/nodes/{}/ipmipassword'.format(node_name))
# Shoot it in the head
fence_status = rebootViaIPMI(ipmi_hostname, ipmi_username, ipmi_password, logger)
@ -62,47 +61,53 @@ def fenceNode(node_name, zk_conn, config, logger):
# Force into secondary network state if needed
if node_name in config['coordinators']:
logger.out('Forcing secondary status for node "{}"'.format(node_name), state='i')
zkhandler.writedata(zk_conn, {'/nodes/{}/routerstate'.format(node_name): 'secondary'})
if zkhandler.readdata(zk_conn, '/primary_node') == node_name:
zkhandler.writedata(zk_conn, {'/primary_node': 'none'})
zkhandler.write([
('/nodes/{}/routerstate'.format(node_name), 'secondary')
])
if zkhandler.read('/config/primary_node') == node_name:
zkhandler.write([
('/config/primary_node', 'none')
])
# If the fence succeeded and successful_fence is migrate
if fence_status and config['successful_fence'] == 'migrate':
migrateFromFencedNode(zk_conn, node_name, config, logger)
migrateFromFencedNode(zkhandler, node_name, config, logger)
# If the fence failed and failed_fence is migrate
if not fence_status and config['failed_fence'] == 'migrate' and config['suicide_intervals'] != '0':
migrateFromFencedNode(zk_conn, node_name, config, logger)
migrateFromFencedNode(zkhandler, node_name, config, logger)
# Migrate hosts away from a fenced node
def migrateFromFencedNode(zk_conn, node_name, config, logger):
def migrateFromFencedNode(zkhandler, node_name, config, logger):
logger.out('Migrating VMs from dead node "{}" to new hosts'.format(node_name), state='i')
# Get the list of VMs
dead_node_running_domains = zkhandler.readdata(zk_conn, '/nodes/{}/runningdomains'.format(node_name)).split()
dead_node_running_domains = zkhandler.read('/nodes/{}/runningdomains'.format(node_name)).split()
# Set the node to a custom domainstate so we know what's happening
zkhandler.writedata(zk_conn, {'/nodes/{}/domainstate'.format(node_name): 'fence-flush'})
zkhandler.write([
('/nodes/{}/domainstate'.format(node_name), 'fence-flush')
])
# Migrate a VM after a flush
def fence_migrate_vm(dom_uuid):
VMInstance.flush_locks(zk_conn, logger, dom_uuid)
VMInstance.flush_locks(zkhandler, logger, dom_uuid)
target_node = common.findTargetNode(zk_conn, config, logger, dom_uuid)
target_node = common.findTargetNode(zkhandler, config, logger, dom_uuid)
if target_node is not None:
logger.out('Migrating VM "{}" to node "{}"'.format(dom_uuid, target_node), state='i')
zkhandler.writedata(zk_conn, {
'/domains/{}/state'.format(dom_uuid): 'start',
'/domains/{}/node'.format(dom_uuid): target_node,
'/domains/{}/lastnode'.format(dom_uuid): node_name
})
zkhandler.write([
('/domains/{}/state'.format(dom_uuid), 'start'),
('/domains/{}/node'.format(dom_uuid), target_node),
('/domains/{}/lastnode'.format(dom_uuid), node_name)
])
else:
logger.out('No target node found for VM "{}"; VM will autostart on next unflush/ready of current node'.format(dom_uuid), state='i')
zkhandler.writedata(zk_conn, {
'/domains/{}/state'.format(dom_uuid): 'stopped',
'/domains/{}/node_autostart'.format(dom_uuid): 'True'
zkhandler.write({
('/domains/{}/state'.format(dom_uuid), 'stopped'),
('/domains/{}/node_autostart'.format(dom_uuid), 'True')
})
# Loop through the VMs
@ -110,7 +115,9 @@ def migrateFromFencedNode(zk_conn, node_name, config, logger):
fence_migrate_vm(dom_uuid)
# Set node in flushed state for easy remigrating when it comes back
zkhandler.writedata(zk_conn, {'/nodes/{}/domainstate'.format(node_name): 'flushed'})
zkhandler.write([
('/nodes/{}/domainstate'.format(node_name), 'flushed')
])
#

View File

@ -1,189 +0,0 @@
#!/usr/bin/env python3
# zkhandler.py - Secure versioned ZooKeeper updates
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import uuid
# Child list function
def listchildren(zk_conn, key):
try:
children = zk_conn.get_children(key)
return children
except Exception:
return None
# Key deletion function
def deletekey(zk_conn, key, recursive=True):
try:
zk_conn.delete(key, recursive=recursive)
return True
except Exception:
return False
# Data read function
def readdata(zk_conn, key):
try:
data_raw = zk_conn.get(key)
data = data_raw[0].decode('utf8')
return data
except Exception:
return None
# Data write function
def writedata(zk_conn, kv):
# Commit the transaction
try:
# Start up a transaction
zk_transaction = zk_conn.transaction()
# Proceed one KV pair at a time
for key in sorted(kv):
data = kv[key]
if not data:
data = ''
# Check if this key already exists or not
if not zk_conn.exists(key):
# We're creating a new key
zk_transaction.create(key, str(data).encode('utf8'))
else:
# We're updating a key with version validation
orig_data = zk_conn.get(key)
version = orig_data[1].version
# Set what we expect the new version to be
new_version = version + 1
# Update the data
zk_transaction.set_data(key, str(data).encode('utf8'))
# Set up the check
try:
zk_transaction.check(key, new_version)
except TypeError:
print('Zookeeper key "{}" does not match expected version'.format(key))
return False
zk_transaction.commit()
return True
except Exception:
return False
# Key rename function
def renamekey(zk_conn, kv):
# This one is not transactional because, inexplicably, transactions don't
# support either the recursive delete or recursive create operations that
# we need. Why? No explanation in the docs that I can find.
try:
# Proceed one KV pair at a time
for key in sorted(kv):
old_name = key
new_name = kv[key]
old_data = zk_conn.get(old_name)[0]
child_keys = list()
# Find the children of old_name recursively
def get_children(key):
children = zk_conn.get_children(key)
if not children:
child_keys.append(key)
else:
for ckey in children:
get_children('{}/{}'.format(key, ckey))
get_children(old_name)
# Get the data out of each of the child keys
child_data = dict()
for ckey in child_keys:
child_data[ckey] = zk_conn.get(ckey)[0]
# Create the new parent key
zk_conn.create(new_name, old_data, makepath=True)
# For each child key, create the key and add the data
for ckey in child_keys:
new_ckey_name = ckey.replace(old_name, new_name)
zk_conn.create(new_ckey_name, child_data[ckey], makepath=True)
# Remove recursively the old key
zk_conn.delete(old_name, recursive=True)
return True
except Exception:
return False
# Write lock function
def writelock(zk_conn, key):
count = 1
while True:
try:
lock_id = str(uuid.uuid1())
lock = zk_conn.WriteLock('{}'.format(key), lock_id)
break
except Exception:
count += 1
if count > 5:
break
else:
continue
return lock
# Read lock function
def readlock(zk_conn, key):
count = 1
while True:
try:
lock_id = str(uuid.uuid1())
lock = zk_conn.ReadLock('{}'.format(key), lock_id)
break
except Exception:
count += 1
if count > 5:
break
else:
continue
return lock
# Exclusive lock function
def exclusivelock(zk_conn, key):
count = 1
while True:
try:
lock_id = str(uuid.uuid1())
lock = zk_conn.Lock('{}'.format(key), lock_id)
break
except Exception:
count += 1
if count > 5:
break
else:
continue
return lock