Compare commits
27 Commits
Author | SHA1 | Date | |
---|---|---|---|
0ccfc41398 | |||
ab05e0f3db | |||
9291ce6ffc | |||
dd87951642 | |||
0e4bece441 | |||
b33c0ab0e2 | |||
094d25dafa | |||
150c61d226 | |||
f1c0c9325d | |||
26b0a8b5c1 | |||
f22f291c8b | |||
9100c63e99 | |||
aba567d6c9 | |||
0db8fd9da6 | |||
a44f134230 | |||
9fbe35fd24 | |||
09fdb5da26 | |||
80a436d7b6 | |||
74a28f2edd | |||
b8aba3a498 | |||
8b584bc545 | |||
a24724d9f0 | |||
d22a5aa7f2 | |||
78c017d51d | |||
1b6613c280 | |||
9aeb86246a | |||
6abb8b2456 |
20
README.md
20
README.md
@ -20,6 +20,26 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
|
||||
|
||||
## Changelog
|
||||
|
||||
#### v0.9.13
|
||||
|
||||
* Adds nicer startup messages for daemons
|
||||
* Adds additional API field for stored_bytes to pool stats
|
||||
* Fixes sorting issues with snapshot lists
|
||||
* Fixes missing increment/decrement of snapshot_count on volumes
|
||||
* Fixes bad calls in pool element API endpoints
|
||||
* Fixes inconsistent bytes_tohuman behaviour in daemons
|
||||
* Adds validation and maximum volume size on creation (must be smaller than the pool free space)
|
||||
|
||||
#### v0.9.12
|
||||
|
||||
* Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition
|
||||
|
||||
#### v0.9.11
|
||||
|
||||
* Documentation updates
|
||||
* Adds VNC information to VM info
|
||||
* Goes back to external Ceph commands for disk usage
|
||||
|
||||
#### v0.9.10
|
||||
|
||||
* Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down
|
||||
|
@ -26,10 +26,31 @@ import pvcapid.flaskapi as pvc_api
|
||||
# Entrypoint
|
||||
##########################################################
|
||||
|
||||
# Version string for startup output
|
||||
version = '0.9.11'
|
||||
|
||||
if pvc_api.config['ssl_enabled']:
|
||||
context = (pvc_api.config['ssl_cert_file'], pvc_api.config['ssl_key_file'])
|
||||
else:
|
||||
context = None
|
||||
|
||||
print('Starting PVC API daemon at {}:{} with SSL={}, Authentication={}'.format(pvc_api.config['listen_address'], pvc_api.config['listen_port'], pvc_api.config['ssl_enabled'], pvc_api.config['auth_enabled']))
|
||||
# Print our startup messages
|
||||
print('')
|
||||
print('|--------------------------------------------------|')
|
||||
print('| ######## ## ## ###### |')
|
||||
print('| ## ## ## ## ## ## |')
|
||||
print('| ## ## ## ## ## |')
|
||||
print('| ######## ## ## ## |')
|
||||
print('| ## ## ## ## |')
|
||||
print('| ## ## ## ## ## |')
|
||||
print('| ## ### ###### |')
|
||||
print('|--------------------------------------------------|')
|
||||
print('| Parallel Virtual Cluster API daemon v{0: <11} |'.format(version))
|
||||
print('| API version: v{0: <34} |'.format(pvc_api.API_VERSION))
|
||||
print('| Listen: {0: <40} |'.format('{}:{}'.format(pvc_api.config['listen_address'], pvc_api.config['listen_port'])))
|
||||
print('| SSL: {0: <43} |'.format(str(pvc_api.config['ssl_enabled'])))
|
||||
print('| Authentication: {0: <32} |'.format(str(pvc_api.config['auth_enabled'])))
|
||||
print('|--------------------------------------------------|')
|
||||
print('')
|
||||
|
||||
pvc_api.app.run(pvc_api.config['listen_address'], pvc_api.config['listen_port'], threaded=True, ssl_context=context)
|
||||
|
@ -52,16 +52,16 @@ def strtobool(stringv):
|
||||
|
||||
# Parse the configuration file
|
||||
try:
|
||||
pvc_config_file = os.environ['PVC_CONFIG_FILE']
|
||||
pvcapid_config_file = os.environ['PVC_CONFIG_FILE']
|
||||
except Exception:
|
||||
print('Error: The "PVC_CONFIG_FILE" environment variable must be set before starting pvcapid.')
|
||||
exit(1)
|
||||
|
||||
print('Starting PVC API daemon')
|
||||
print('Loading configuration from file "{}"'.format(pvcapid_config_file))
|
||||
|
||||
# Read in the config
|
||||
try:
|
||||
with open(pvc_config_file, 'r') as cfgfile:
|
||||
with open(pvcapid_config_file, 'r') as cfgfile:
|
||||
o_config = yaml.load(cfgfile, Loader=yaml.BaseLoader)
|
||||
except Exception as e:
|
||||
print('ERROR: Failed to parse configuration file: {}'.format(e))
|
||||
@ -1023,6 +1023,15 @@ class API_VM_Root(Resource):
|
||||
console:
|
||||
type: string
|
||||
descritpion: The serial console type of the VM
|
||||
vnc:
|
||||
type: object
|
||||
properties:
|
||||
listen:
|
||||
type: string
|
||||
description: The active VNC listen address or 'None'
|
||||
port:
|
||||
type: string
|
||||
description: The active VNC port or 'None'
|
||||
emulator:
|
||||
type: string
|
||||
description: The binary emulator of the VM
|
||||
@ -3328,12 +3337,15 @@ class API_Storage_Ceph_Pool_Root(Resource):
|
||||
id:
|
||||
type: integer
|
||||
description: The Ceph pool ID
|
||||
stored_bytes:
|
||||
type: integer
|
||||
description: The stored data size (in bytes, post-replicas)
|
||||
free_bytes:
|
||||
type: integer
|
||||
description: The total free space (in bytes)
|
||||
description: The total free space (in bytes. post-replicas)
|
||||
used_bytes:
|
||||
type: integer
|
||||
description: The total used space (in bytes)
|
||||
description: The total used space (in bytes, pre-replicas)
|
||||
used_percent:
|
||||
type: number
|
||||
description: The ratio of used space to free space
|
||||
@ -3455,7 +3467,7 @@ class API_Storage_Ceph_Pool_Element(Resource):
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
return api_helper, api_helper.ceph_pool_list(
|
||||
return api_helper.ceph_pool_list(
|
||||
pool,
|
||||
is_fuzzy=False
|
||||
)
|
||||
@ -3499,7 +3511,7 @@ class API_Storage_Ceph_Pool_Element(Resource):
|
||||
type: object
|
||||
id: Message
|
||||
"""
|
||||
api_helper.ceph_pool_add(
|
||||
return api_helper.ceph_pool_add(
|
||||
pool,
|
||||
reqargs.get('pgs', None),
|
||||
reqargs.get('replcfg', None)
|
||||
|
@ -177,8 +177,8 @@ def upload_ova(pool, name, ova_size):
|
||||
pvc_common.stopZKConnection(zk_conn)
|
||||
|
||||
# Normalize the OVA size to bytes
|
||||
ova_size_bytes = int(pvc_ceph.format_bytes_fromhuman(ova_size)[:-1])
|
||||
ova_size = pvc_ceph.format_bytes_fromhuman(ova_size)
|
||||
ova_size_bytes = pvc_ceph.format_bytes_fromhuman(ova_size)
|
||||
ova_size = '{}B'.format(ova_size_bytes)
|
||||
|
||||
# Verify that the cluster has enough space to store the OVA volumes (2x OVA size, temporarily, 1x permanently)
|
||||
zk_conn = pvc_common.startZKConnection(config['coordinators'])
|
||||
@ -274,7 +274,7 @@ def upload_ova(pool, name, ova_size):
|
||||
vm_volume_size = disk.get('capacity')
|
||||
|
||||
# Normalize the dev size to bytes
|
||||
dev_size = pvc_ceph.format_bytes_fromhuman(dev_size_raw)
|
||||
dev_size = '{}B'.format(pvc_ceph.format_bytes_fromhuman(dev_size_raw))
|
||||
|
||||
def cleanup_img_maps():
|
||||
zk_conn = pvc_common.startZKConnection(config['coordinators'])
|
||||
@ -368,7 +368,7 @@ def upload_ova(pool, name, ova_size):
|
||||
vm_volume_size = disk.get('capacity')
|
||||
|
||||
# The function always return XXXXB, so strip off the B and convert to an integer
|
||||
vm_volume_size_bytes = int(pvc_ceph.format_bytes_fromhuman(vm_volume_size)[:-1])
|
||||
vm_volume_size_bytes = pvc_ceph.format_bytes_fromhuman(vm_volume_size)
|
||||
vm_volume_size_gb = math.ceil(vm_volume_size_bytes / 1024 / 1024 / 1024)
|
||||
|
||||
query = "INSERT INTO ova_volume (ova, pool, volume_name, volume_format, disk_id, disk_size_gb) VALUES (%s, %s, %s, %s, %s, %s);"
|
||||
|
@ -17,6 +17,7 @@ $EDITOR ${changelog_file}
|
||||
changelog="$( cat ${changelog_file} | grep -v '^#' | sed 's/^*/ */' )"
|
||||
|
||||
sed -i "s,version = '${current_version}',version = '${new_version}'," node-daemon/pvcnoded/Daemon.py
|
||||
sed -i "s,version = '${current_version}',version = '${new_version}'," api-daemon/pvcapid/Daemon.py
|
||||
|
||||
readme_tmpdir=$( mktemp -d )
|
||||
cp README.md ${readme_tmpdir}/
|
||||
|
@ -1071,6 +1071,11 @@ def format_info(config, domain_information, long_output):
|
||||
ainformation.append('{}vCPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vcpu']))
|
||||
ainformation.append('{}Topology (S/C/T):{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vcpu_topology']))
|
||||
|
||||
if domain_information['vnc'].get('listen', 'None') != 'None' and domain_information['vnc'].get('port', 'None') != 'None':
|
||||
ainformation.append('')
|
||||
ainformation.append('{}VNC listen:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vnc']['listen']))
|
||||
ainformation.append('{}VNC port:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vnc']['port']))
|
||||
|
||||
if long_output is True:
|
||||
# Virtualization information
|
||||
ainformation.append('')
|
||||
|
@ -122,7 +122,7 @@ def format_bytes_fromhuman(datahuman):
|
||||
dataunit = 'B'
|
||||
datasize = int(datahuman)
|
||||
databytes = datasize * byte_unit_matrix[dataunit]
|
||||
return '{}B'.format(databytes)
|
||||
return databytes
|
||||
|
||||
|
||||
# Format ops sizes to/from human-readable units
|
||||
@ -475,7 +475,17 @@ def getVolumeInformation(zk_conn, pool, volume):
|
||||
|
||||
|
||||
def add_volume(zk_conn, pool, name, size):
|
||||
# 1. Create the volume
|
||||
# 1. Verify the size of the volume
|
||||
pool_information = getPoolInformation(zk_conn, pool)
|
||||
size_bytes = format_bytes_fromhuman(size)
|
||||
if size_bytes >= int(pool_information['stats']['free_bytes']):
|
||||
return False, 'ERROR: Requested volume size is greater than the available free space in the pool'
|
||||
|
||||
# Add 'B' if the volume is in bytes
|
||||
if re.match(r'^[0-9]+$', size):
|
||||
size = '{}B'.format(size)
|
||||
|
||||
# 2. Create the volume
|
||||
retcode, stdout, stderr = common.run_os_command('rbd create --size {} --image-feature layering,exclusive-lock {}/{}'.format(size, pool, name))
|
||||
if retcode:
|
||||
return False, 'ERROR: Failed to create RBD volume "{}": {}'.format(name, stderr)
|
||||
@ -545,7 +555,7 @@ def resize_volume(zk_conn, pool, name, size):
|
||||
target_lv_conn = libvirt.open(dest_lv)
|
||||
target_vm_conn = target_lv_conn.lookupByName(vm_info['name'])
|
||||
if target_vm_conn:
|
||||
target_vm_conn.blockResize(volume_id, int(format_bytes_fromhuman(size)[:-1]), libvirt.VIR_DOMAIN_BLOCK_RESIZE_BYTES)
|
||||
target_vm_conn.blockResize(volume_id, format_bytes_fromhuman(size), libvirt.VIR_DOMAIN_BLOCK_RESIZE_BYTES)
|
||||
target_lv_conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
@ -715,6 +725,16 @@ def add_snapshot(zk_conn, pool, volume, name):
|
||||
'/ceph/snapshots/{}/{}/{}/stats'.format(pool, volume, name): '{}'
|
||||
})
|
||||
|
||||
# 3. Update the count of snapshots on this volume
|
||||
volume_stats_raw = zkhandler.readdata(zk_conn, '/ceph/volumes/{}/{}/stats'.format(pool, volume))
|
||||
volume_stats = dict(json.loads(volume_stats_raw))
|
||||
# Format the size to something nicer
|
||||
volume_stats['snapshot_count'] = volume_stats['snapshot_count'] + 1
|
||||
volume_stats_raw = json.dumps(volume_stats)
|
||||
zkhandler.writedata(zk_conn, {
|
||||
'/ceph/volumes/{}/{}/stats'.format(pool, volume): volume_stats_raw
|
||||
})
|
||||
|
||||
return True, 'Created RBD snapshot "{}" of volume "{}" in pool "{}".'.format(name, volume, pool)
|
||||
|
||||
|
||||
@ -751,6 +771,16 @@ def remove_snapshot(zk_conn, pool, volume, name):
|
||||
# 2. Delete snapshot from Zookeeper
|
||||
zkhandler.deletekey(zk_conn, '/ceph/snapshots/{}/{}/{}'.format(pool, volume, name))
|
||||
|
||||
# 3. Update the count of snapshots on this volume
|
||||
volume_stats_raw = zkhandler.readdata(zk_conn, '/ceph/volumes/{}/{}/stats'.format(pool, volume))
|
||||
volume_stats = dict(json.loads(volume_stats_raw))
|
||||
# Format the size to something nicer
|
||||
volume_stats['snapshot_count'] = volume_stats['snapshot_count'] - 1
|
||||
volume_stats_raw = json.dumps(volume_stats)
|
||||
zkhandler.writedata(zk_conn, {
|
||||
'/ceph/volumes/{}/{}/stats'.format(pool, volume): volume_stats_raw
|
||||
})
|
||||
|
||||
return True, 'Removed RBD snapshot "{}" of volume "{}" in pool "{}".'.format(name, volume, pool)
|
||||
|
||||
|
||||
@ -783,4 +813,4 @@ def get_list_snapshot(zk_conn, pool, volume, limit, is_fuzzy=True):
|
||||
else:
|
||||
snapshot_list.append({'pool': pool_name, 'volume': volume_name, 'snapshot': snapshot_name})
|
||||
|
||||
return True, sorted(snapshot_list, key=lambda x: int(x['id']))
|
||||
return True, sorted(snapshot_list, key=lambda x: str(x['snapshot']))
|
||||
|
@ -267,6 +267,13 @@ def getInformationFromXML(zk_conn, uuid):
|
||||
except Exception:
|
||||
domain_profile = None
|
||||
|
||||
try:
|
||||
domain_vnc = zkhandler.readdata(zk_conn, '/domains/{}/vnc'.format(uuid))
|
||||
domain_vnc_listen, domain_vnc_port = domain_vnc.split(':')
|
||||
except Exception:
|
||||
domain_vnc_listen = 'None'
|
||||
domain_vnc_port = 'None'
|
||||
|
||||
parsed_xml = getDomainXML(zk_conn, uuid)
|
||||
|
||||
try:
|
||||
@ -312,6 +319,10 @@ def getInformationFromXML(zk_conn, uuid):
|
||||
'arch': domain_arch,
|
||||
'machine': domain_machine,
|
||||
'console': domain_console,
|
||||
'vnc': {
|
||||
'listen': domain_vnc_listen,
|
||||
'port': domain_vnc_port
|
||||
},
|
||||
'emulator': domain_emulator,
|
||||
'features': domain_features,
|
||||
'disks': domain_disks,
|
||||
|
@ -207,6 +207,7 @@ def define_vm(zk_conn, config_data, target_node, node_limit, node_selector, node
|
||||
'/domains/{}/consolelog'.format(dom_uuid): '',
|
||||
'/domains/{}/rbdlist'.format(dom_uuid): formatted_rbd_list,
|
||||
'/domains/{}/profile'.format(dom_uuid): profile,
|
||||
'/domains/{}/vnc'.format(dom_uuid): '',
|
||||
'/domains/{}/xml'.format(dom_uuid): config_data
|
||||
})
|
||||
|
||||
|
26
debian/changelog
vendored
26
debian/changelog
vendored
@ -1,3 +1,29 @@
|
||||
pvc (0.9.13-0) unstable; urgency=high
|
||||
|
||||
* Adds nicer startup messages for daemons
|
||||
* Adds additional API field for stored_bytes to pool stats
|
||||
* Fixes sorting issues with snapshot lists
|
||||
* Fixes missing increment/decrement of snapshot_count on volumes
|
||||
* Fixes bad calls in pool element API endpoints
|
||||
* Fixes inconsistent bytes_tohuman behaviour in daemons
|
||||
* Adds validation and maximum volume size on creation (must be smaller than the pool free space)
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Wed, 17 Feb 2021 11:33:28 -0500
|
||||
|
||||
pvc (0.9.12-0) unstable; urgency=high
|
||||
|
||||
* Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Thu, 28 Jan 2021 16:29:58 -0500
|
||||
|
||||
pvc (0.9.11-0) unstable; urgency=high
|
||||
|
||||
* Documentation updates
|
||||
* Adds VNC information to VM info
|
||||
* Goes back to external Ceph commands for disk usage
|
||||
|
||||
-- Joshua M. Boniface <joshua@boniface.me> Tue, 05 Jan 2021 15:58:26 -0500
|
||||
|
||||
pvc (0.9.10-0) unstable; urgency=high
|
||||
|
||||
* Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down
|
||||
|
@ -41,31 +41,31 @@ PVC is build from a number of other, open source components. The main system its
|
||||
|
||||
Virtual machines themselves are run with the Linux KVM subsystem via the Libvirt virtual machine management library. This provides the maximum flexibility and compatibility for running various guest operating systems in multiple modes (fully-virtualized, para-virtualized, virtio-enabled, etc.).
|
||||
|
||||
To manage cluster state, PVC uses Zookeeper. This is an Apache project designed to provide a highly-available and always-consistent key-value database. The various daemons all connect to the distributed Zookeeper database to both obtain details about cluster state, and to manage that state. For instance the node daemon watches Zookeeper for information on what VMs to run, networks to create, etc., while the API writes information to Zookeeper in response to requests.
|
||||
To manage cluster state, PVC uses Zookeeper. This is an Apache project designed to provide a highly-available and always-consistent key-value database. The various daemons all connect to the distributed Zookeeper database to both obtain details about cluster state, and to manage that state. For instance the node daemon watches Zookeeper for information on what VMs to run, networks to create, etc., while the API writes to or reads information from Zookeeper in response to requests. The Zookeeper database is the glue which holds the cluster together.
|
||||
|
||||
Additional relational database functionality, specifically for the DNS aggregation subsystem and the VM provisioner, is provided by the PostgreSQL database and the Patroni management tool, which provides automatic clustering and failover for PostgreSQL database instances.
|
||||
Additional relational database functionality, specifically for the managed network DNS aggregation subsystem and the VM provisioner, is provided by the PostgreSQL database system and the Patroni management tool, which provides automatic clustering and failover for PostgreSQL database instances.
|
||||
|
||||
Node network routing for managed networks providing EBGP VXLAN and route-learning is provided by FRRouting, a descendant project of Quaaga and GNU Zebra.
|
||||
Node network routing for managed networks providing EBGP VXLAN and route-learning is provided by FRRouting, a descendant project of Quaaga and GNU Zebra. Upstream routers can use this interface to learn routes to cluster networks as well.
|
||||
|
||||
The storage subsystem is provided by Ceph, a distributed object-based storage subsystem with extensive scalability, self-managing, and self-healing functionality. The Ceph RBD (Rados Block Device) subsystem is used to provide VM block devices similar to traditional LVM or ZFS zvols, but in a distributed, shared-storage manner.
|
||||
The storage subsystem is provided by Ceph, a distributed object-based storage subsystem with extensive scalability, self-managing, and self-healing functionality. The Ceph RBD (RADOS Block Device) subsystem is used to provide VM block devices similar to traditional LVM or ZFS zvols, but in a distributed, shared-storage manner.
|
||||
|
||||
All the components are designed to be run on top of Debian GNU/Linux, specifically Debian 10.X "Buster", with the SystemD system service manager. This OS provides a stable base to run the various other subsystems while remaining truly Free Software, while SystemD provides functionality such as automatic daemon restarting and complex startup/shutdown ordering.
|
||||
|
||||
## Cluster Architecture
|
||||
|
||||
A PVC cluster is based around "nodes", which are physical servers on which the various daemons, storage, networks, and virtual machines run. Each node is self-contained and is able to perform any and all cluster functions if needed; there is no segmentation of function between different types of physical hosts. Ideally, all nodes in a cluster will be identical in specifications, but in some situations mismatched nodes are acceptable, with limitations.
|
||||
A PVC cluster is based around "nodes", which are physical servers on which the various daemons, storage, networks, and virtual machines run. Each node is self-contained and is able to perform any and all cluster functions if needed and configured to do so; there is no strict segmentation of function between different "types" of physical hosts. Ideally, all nodes in a cluster will be identical in specifications, but in some situations mismatched nodes are acceptable, with limitations.
|
||||
|
||||
A subset of the nodes, called "coordinators", are statically configured to provide additional services for the cluster. For instance, all databases, FRRouting instances, and Ceph management daemons run only on the set of cluster coordinators. At cluster bootstrap, 1 (testing-only), 3 (small clusters), or 5 (large clusters) nodes may be chosen as the coordinators. Other nodes can then be added as "hypervisor" nodes, which then provide only block device (storage) and VM (compute) functionality by connecting to the set of coordinators. This limits the scaling problem of the databases while ensuring there is still maximum redundancy and resiliency for the core cluster services.
|
||||
A subset of the nodes, called "coordinators", are statically configured to provide services for the cluster. For instance, all databases, FRRouting instances, and Ceph management daemons run only on the set of cluster coordinators. At cluster bootstrap, 1 (testing-only), 3 (small clusters), or 5 (large clusters) nodes may be chosen as the coordinators. Other nodes can then be added as "hypervisor" nodes, which then provide only block device (storage) and VM (compute) functionality by connecting to the set of coordinators. This limits the scaling problem of the databases while ensuring there is still maximum redundancy and resiliency for the core cluster services.
|
||||
|
||||
Additional nodes can be added to the cluster either as coordinators, or as hypervisors, by adding them to the Ansible configuration and running it against the full set of nodes. Note that the number of coordinators must always be odd, and more than 5 coordinators are normally unnecessary and can cause issues with the database; it is thus normally advisable to add any nodes beyond the initial set as hypervisors instead of coordinators. Nodes can be removed from service, but this is a manual process and should not be attempted unless absolutely required; the Ceph subsystem in particular is sensitive to changes in the coordinator nodes.
|
||||
Additional nodes can be added to the cluster either as coordinators, or as hypervisors, by adding them to the Ansible configuration and running it against the full set of nodes. Note that the number of coordinators must always be odd, and more than 5 coordinators are normally unnecessary and can cause issues with the database; it is thus normally advisable to add any nodes beyond the initial set as hypervisors instead of coordinators. Nodes can be removed from service, but this is a manual process and should not be attempted unless absolutely required; the Ceph subsystem in particular is sensitive to changes in the coordinator nodes. Nodes can also be upgraded or replaced dynamically and without interrupting the cluster, allowing for seamless hardware maintenance, upgrades, and even replacement, as cluster state configuration is held cluster-wide.
|
||||
|
||||
During runtime, one coordinator is elected the "primary" for the cluster. This designation can shift dynamically in response to cluster events, or be manually migrated by an administrator. The coordinator takes on a number of roles for which only one host may be active at once, for instance to provide DHCP services to managed client networks or to interface with the API.
|
||||
|
||||
Nodes are networked together via a set of statically-configured networks. At a minimum, 2 discrete networks are required, with an optional 3rd.
|
||||
Nodes are networked together via a set of statically-configured, simple layer-2 networks. At a minimum, 2 discrete networks are required, with an optional 3rd.
|
||||
|
||||
* The "upstream" network is the primary network for the nodes, and provides functions such as upstream Internet access, routing to and from the cluster nodes, and management via the API; it may be either a firewalled public or NAT'd RFC1918 network, but should never be exposed directly to the Internet.
|
||||
* The "upstream" network is the primary network for the nodes, and provides functions such as upstream Internet access, routing to and from the cluster nodes, and management via the API; it may be either a firewalled public or NAT'd RFC1918 network, but should never be exposed directly to the Internet. It should also contain, or be able to route to, the IPMI BMC management interfaces of the node chassis'.
|
||||
* The "cluster" network is an unrouted RFC1918 network which provides inter-node communication for managed client network traffic (VXLANs), cross-node routing, VM migration and failover, and database replication and access.
|
||||
* The "storage" network is another unrouted RFC1918 network which provides a dedicated logical and/or physical link between the nodes for storage traffic, including VM block device storage traffic, inter-OSD replication traffic, and Ceph heartbeat traffic, thus allowing it to be completely isolated from the other networks for maximum performance. This network can be optionally colocated with the "cluster" network, by specifying the same device for both, and can be further combined by specifying the same IP for both to completely collapse the "cluster" and "storage" networks. This may be ideal to simply management of small clusters.
|
||||
* The "storage" network is another unrouted RFC1918 network which provides a dedicated logical and/or physical link between the nodes for storage traffic, including VM block device storage traffic, inter-OSD replication traffic, and Ceph heartbeat traffic, thus allowing it to be completely isolated from the other networks for maximum performance. This network can be optionally colocated with the "cluster" network, by specifying the same device for both, and can be further combined by specifying the same IP for both to completely collapse the "cluster" and "storage" networks. A collapsed cluster+storage configuration may be ideal to simplify management of small clusters, or a split configuration can be used to provide flexbility for large or demanding high-performance clusters - this choice is left to the administrator based on their needs.
|
||||
|
||||
Within each network is a single "floating" IP address which follows the primary coordinator, providing a single interface to the cluster. Once configured, the cluster is then able to create additional networks of two kinds, "bridged" traditional vLANs and "managed" routed VXLANs, to provide network access to VMs.
|
||||
|
||||
@ -79,15 +79,15 @@ The API client is a Flask-based RESTful API and is the core interface to PVC. By
|
||||
|
||||
The API generally accepts all requests as HTTP form requests following standard RESTful guidelines, supporting arguments in the URI string or, with limited exceptions, in the message body. The API returns JSON response bodies to all requests consisting either of the information requested, or a `{ "message": "text" }` construct to pass informational status messages back to the client.
|
||||
|
||||
The API client manual can be found at the [API manual page](/manuals/api), and the full API documentation can be found at the [API reference page](/manuals/api-reference.html).
|
||||
The API client manual can be found at the [API manual page](/manuals/api), and the full API details can be found in the [API reference specification](/manuals/api-reference.html).
|
||||
|
||||
### Direct Bindings
|
||||
|
||||
The API client uses a dedicated set of Python libraries, packaged as the `pvc-daemon-common` Debian package, to communicate with the cluster. It is thus possible to build custom Python clients that directly interface with the PVC cluster, without having to get "into the weeds" of the Zookeeper or PostgreSQL databases.
|
||||
The API client uses a dedicated set of Python libraries, packaged as the `pvc-daemon-common` Debian package, to communicate with the cluster. One can thus use these libraries to build custom Python clients that directly interface with the PVC cluster, without having to get "into the weeds" of the Zookeeper or PostgreSQL databases.
|
||||
|
||||
### CLI Client
|
||||
|
||||
The CLI client is a Python Click application, which provides a convenient CLI interface to the API client. It supports connecting to multiple clusters from a single instance, with or without authentication and over both HTTP or HTTPS, including a special "local" cluster if the client determines that an API configuration exists on the local host. Information about the configured clusters is stored in a local JSON document, and a default cluster can be set with an environment variable.
|
||||
The CLI client is a Python Click application, which provides a convenient CLI interface to the API client. It supports connecting to multiple clusters from a single instance, with or without authentication and over both HTTP or HTTPS, including a special "local" cluster if the client determines that an API configuration exists on the local host. Information about the configured clusters is stored in a local JSON document, and a default cluster can be set with an environment variable. The CLI client can thus be run either on PVC nodes themselves, or on other, remote systems which can then interface with cluster(s) over the network.
|
||||
|
||||
The CLI client is self-documenting using the `-h`/`--help` arguments throughout, easing the administrator learning curve and providing easy access to command details. A short manual can also be found at the [CLI manual page](/manuals/cli).
|
||||
|
||||
@ -97,6 +97,8 @@ The overall management, deployment, bootstrapping, and configuring of nodes is a
|
||||
|
||||
The Ansible configuration and architecture manual can be found at the [Ansible manual page](/manuals/ansible).
|
||||
|
||||
The [getting started documentation](/getting-started) provides a walkthrough of using these tools to bootstrap a new cluster.
|
||||
|
||||
## Frequently Asked Questions
|
||||
|
||||
### General Questions
|
||||
@ -116,12 +118,13 @@ PVC might be right for you if:
|
||||
1. You need KVM-based VMs.
|
||||
2. You want management of storage and networking (a.k.a. "batteries-included") in the same tool.
|
||||
3. You want hypervisor-level redundancy, able to tolerate hypervisor downtime seamlessly, for all elements of the stack.
|
||||
4. You have a requirement of at least 3 nodes' worth of compute and storage.
|
||||
|
||||
I built PVC for my homelab first, found a perfect use-case with my employer, and think it might be useful to you too.
|
||||
If all you want is a simple home server solution, or you demand scalability beyond a few dozen compute nodes, PVC is likely not what you're looking for. Its sweet spot is specifically in the 3-9 node range, for instance in an advanced homelab, for SMBs or small ISPs with a relatively small server stack, or for MSPs looking to deploy small on-premises clusters at low cost.
|
||||
|
||||
#### Is 3 hypervisors really the minimum?
|
||||
|
||||
For a redundant cluster, yes. PVC requires a majority quorum for proper operation at various levels, and the smallest possible majority quorum is 2-of-3; thus 3 nodes is the safe minimum. That said, you can run PVC on a single node for testing/lab purposes without host-level redundancy, should you wish to do so, and it might also be possible to run 2 "main" systems with a 3rd "quorum observer" hosting only the management tools but no VMs, however this is not officially supported.
|
||||
For a redundant cluster, yes. PVC requires a majority quorum for proper operation at various levels, and the smallest possible majority quorum is 2-of-3; thus 3 nodes is the smallest safe minimum. That said, you can run PVC on a single node for testing/lab purposes without host-level redundancy, should you wish to do so, and it might also be possible to run 2 "main" systems with a 3rd "quorum observer" hosting only the management tools but no VMs; however these options are not officially supported, as PVC is designed primarily for 3+ node operation.
|
||||
|
||||
### Feature Questions
|
||||
|
||||
@ -133,6 +136,10 @@ No, not directly. PVC supports only KVM VMs. To run containers, you would need t
|
||||
|
||||
Not yet. Right now, PVC management is done exclusively with the CLI interface to the API. A WebUI can and likely will be built in the future, but I'm not a frontend developer and I do not consider this a personal priority. As of late 2020 the API is generally stable, so I would welcome 3rd party assistance here.
|
||||
|
||||
#### I want feature X, does it fit with PVC?
|
||||
|
||||
That depends on the specific feature. I will limit features to those that align with the overall goals of PVC, that is to say, to provide an easy-to-use hyperconverged virtualization system focused on redundancy. If a feature suits this goal it is likely to be considered; if it does not, it will not. PVC is rapidly approaching the completion of its 1.0 roadmap, which I consider feature-complete for the primary usecase, and future versions may expand in scope.
|
||||
|
||||
### Storage Questions
|
||||
|
||||
#### Can I use RAID-5/RAID-6 with PVC?
|
||||
|
@ -46,8 +46,6 @@ The following table provides bare-minimum, recommended, and optimal specificatio
|
||||
| Total RAM (n-1) | 32GB | 96GB | 128GB |
|
||||
| Total disk space | 200GB | 400GB | 800GB |
|
||||
|
||||
Of these totals, some amount of CPU and RAM will be used by the storage subsystem and the PVC daemons themselves, meaning that the total available for virtual machines is slightly less. Generally, each OSD data disk will consume 1 vCPU at load and 1-2GB RAM, so nodes should be sized not only according to the VM workload, but the number of storage disks per node. Additionally the coordinator databases will use additional RAM and CPU resources of up to 1-4GB per node, though there is generally little need to spec coordinators any larger than non-coordinator nodes and the VM automatic node selection process will take used RAM into account by default.
|
||||
|
||||
### System Disks
|
||||
|
||||
The system disk(s) chosen are important to consider, especially for coordinators. Ideally, an SSD, or two SSDs in RAID-1/mirroring are recommended for system disks. This helps ensure optimal performance for the system (e.g. swap space) and PVC components such as databases as well as the Ceph caches.
|
||||
@ -62,6 +60,18 @@ The general rule for available resource capacity planning can be though of as "1
|
||||
|
||||
For memory provisioning of VMs, PVC will warn the administrator, via a Degraded cluster state, if the "n-1" RAM quantity is exceeded by the total maximum allocation of all running VMs. This situation can be worked around with sufficient swap space on nodes to ensure there is overflow, however the warning cannot be overridden. If nodes are of mismatched sizes, the "n-1" RAM quantity is calculated by removing (one of) the largest node in the cluster and adding the remaining nodes' RAM counts together.
|
||||
|
||||
### System Memory Utilization
|
||||
|
||||
By default, several components of PVC outside of VMs will have large memory allocations, most notably Ceph OSD processes and Zookeeper database processes. These processes should be considered when selecting the RAM allocation of nodes, and adjusted in the Ansible `group_vars` if lower defaults are required.
|
||||
|
||||
#### Ceph OSD processes
|
||||
|
||||
By default, PVC will allow several GB (up to 4-6GB) of RAM allocation per OSD to maximize the available cache space and hence disk performance. This can be lowered as far as 939MB should the administrator require due to a low RAM configuration, but no further due to Ceph limitations; therefore at least 1GB of memory per storage OSD is required even in the most limited case.
|
||||
|
||||
#### Zookeeper processes
|
||||
|
||||
By default, the Java heap and stack sizes are set to 256MB and 512MB respectively, yieliding a memory usage of 500+MB after serveral days or weeks of uptime. This can be lowered to 32M or less for lightly-used clusters should the administrator require due to a low RAM configuration.
|
||||
|
||||
### Operating System and Architecture
|
||||
|
||||
As an underlying OS, only Debian GNU/Linux 10.x "Buster" is supported by PVC. This is the operating system installed by the PVC [node installer](https://github.com/parallelvirtualcluster/pvc-installer) and expected by the PVC [Ansible configuration system](https://github.com/parallelvirtualcluster/pvc-ansible). Ubuntu or other Debian-derived distributions may work, but are not officially supported. PVC also makes use of a custom repository to provide the PVC software and an updated version of Ceph beyond what is available in the base operating system, and this is only compatible officially with Debian 10 "Buster". PVC will, in the future, upgrade to future versions of Debian based on their release schedule and testing; releases may be skipped for official support if required. As a general rule, using the current versions of the official node installer and Ansible repository is the preferred and only supported method for deploying PVC.
|
||||
|
@ -18,6 +18,26 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
|
||||
|
||||
## Changelog
|
||||
|
||||
#### v0.9.13
|
||||
|
||||
* Adds nicer startup messages for daemons
|
||||
* Adds additional API field for stored_bytes to pool stats
|
||||
* Fixes sorting issues with snapshot lists
|
||||
* Fixes missing increment/decrement of snapshot_count on volumes
|
||||
* Fixes bad calls in pool element API endpoints
|
||||
* Fixes inconsistent bytes_tohuman behaviour in daemons
|
||||
* Adds validation and maximum volume size on creation (must be smaller than the pool free space)
|
||||
|
||||
#### v0.9.12
|
||||
|
||||
* Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition
|
||||
|
||||
#### v0.9.11
|
||||
|
||||
* Documentation updates
|
||||
* Adds VNC information to VM info
|
||||
* Goes back to external Ceph commands for disk usage
|
||||
|
||||
#### v0.9.10
|
||||
|
||||
* Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down
|
||||
|
@ -6,7 +6,7 @@ The PVC Ansible setup and management framework is written in Ansible. It consist
|
||||
|
||||
The Base role configures a node to a specific, standard base Debian system, with a number of PVC-specific tweaks. Some examples include:
|
||||
|
||||
* Installing the custom PVC repository at Boniface Labs.
|
||||
* Installing the custom PVC repository hosted at Boniface Labs.
|
||||
|
||||
* Removing several unnecessary packages and installing numerous additional packages.
|
||||
|
||||
@ -22,6 +22,8 @@ The Base role configures a node to a specific, standard base Debian system, with
|
||||
|
||||
The end result is a standardized "PVC node" system ready to have the daemons installed by the PVC role.
|
||||
|
||||
The Base role is optional: if an administrator so chooses, they can bypass this role and configure things manually. That said, for the proper functioning of the PVC role, the Base role should always be applied first.
|
||||
|
||||
## PVC role
|
||||
|
||||
The PVC role configures all the dependencies of PVC, including storage, networking, and databases, then installs the PVC daemon itself. Specifically, it will, in order:
|
||||
@ -30,21 +32,19 @@ The PVC role configures all the dependencies of PVC, including storage, networki
|
||||
|
||||
* Install, configure, and if `bootstrap=yes` is set, bootstrap a Zookeeper cluster (coordinators only).
|
||||
|
||||
* Install, configure, and if `bootstrap=yes` is set`, bootstrap a Patroni PostgreSQL cluster for the PowerDNS aggregator (coordinators only).
|
||||
* Install, configure, and if `bootstrap=yes` is set, bootstrap a Patroni PostgreSQL cluster for the PowerDNS aggregator (coordinators only).
|
||||
|
||||
* Install and configure Libvirt.
|
||||
|
||||
* Install and configure FRRouting.
|
||||
|
||||
* Install and configure the main PVC daemon and API client, including initializing the PVC cluster (`pvc task init`).
|
||||
* Install and configure the main PVC daemon and API client.
|
||||
|
||||
* If `bootstrap=yes` is set, initialize the PVC cluster (`pvc task init`).
|
||||
|
||||
## Completion
|
||||
|
||||
Once the entire playbook has run for the first time against a given host, the host will be rebooted to apply all the configured services. On startup, the system should immediately launch the PVC daemon, check in to the Zookeeper cluster, and become ready. The node will be in `flushed` state on its first boot; the administrator will need to run `pvc node unflush <node>` to set the node into active state ready to handle virtual machines.
|
||||
|
||||
# PVC Ansible configuration manual
|
||||
|
||||
This manual documents the various `group_vars` configuration options for the `pvc-ansible` framework. We assume that the administrator is generally familiar with Ansible and its operation.
|
||||
Once the entire playbook has run for the first time against a given host, the host will be rebooted to apply all the configured services. On startup, the system should immediately launch the PVC daemon, check in to the Zookeeper cluster, and become ready. The node will be in `flushed` state on its first boot; the administrator will need to run `pvc node unflush <node>` to set the node into active state ready to handle virtual machines. On the first bootstrap run, the administrator will also have to configure storage block devices (OSDs), networks, etc. For full details, see [the main getting started page](/getting-started).
|
||||
|
||||
## General usage
|
||||
|
||||
@ -62,7 +62,7 @@ Create a `group_vars/<cluster>` folder to hold the cluster configuration variabl
|
||||
|
||||
### Bootstrapping a cluster
|
||||
|
||||
Before bootstrapping a cluster, see the section on [PVC Ansible configuration variables](/manuals/ansible#pvc-ansible-configuration-variables) to configure the cluster.
|
||||
Before bootstrapping a cluster, see the section on [PVC Ansible configuration variables](/manuals/ansible/#pvc-ansible-configuration-variables) to configure the cluster.
|
||||
|
||||
Bootstrapping a cluster can be done using the main `pvc.yml` playbook. Generally, a bootstrap run should be limited to the coordinators of the cluster to avoid potential race conditions or strange bootstrap behaviour. The special variable `bootstrap=yes` must be set to indicate that a cluster bootstrap is to be requested.
|
||||
|
||||
@ -74,7 +74,13 @@ Adding new nodes to an existing cluster can be done using the main `pvc.yml` pla
|
||||
|
||||
### Reconfiguration and software updates
|
||||
|
||||
After modifying configuration settings in the `group_vars`, or to update PVC to the latest version on a release, deployment of updated cluster can be done using the main `pvc.yml` playbook. The configuration should be updated if required, then the playbook run against all hosts in the cluster with no special flags or limits.
|
||||
For general, day-to-day software updates such as base system updates or upgrading to newer PVC versions, a special playbook, `oneshot/update-pvc-cluster.yml`, is provided. This playbook will gracefully update and upgrade all PVC nodes in the cluster, flush them, reboot them, and then unflush them. This operation should be completely transparent to VMs on the cluster.
|
||||
|
||||
For more advanced updates, such as changing configurations in the `group_vars`, the main `pvc.yml` playbook can be used to deploy the changes across all hosts. Note that this may cause downtime due to node reboots if certain configurations change, and it is not recommended to use this process frequently.
|
||||
|
||||
# PVC Ansible configuration manual
|
||||
|
||||
This manual documents the various `group_vars` configuration options for the `pvc-ansible` framework. We assume that the administrator is generally familiar with Ansible and its operation.
|
||||
|
||||
## PVC Ansible configuration variables
|
||||
|
||||
@ -96,10 +102,14 @@ Example configuration:
|
||||
|
||||
```
|
||||
---
|
||||
cluster_group: mycluster
|
||||
timezone_location: Canada/Eastern
|
||||
local_domain: upstream.local
|
||||
|
||||
username_ipmi_host: "pvc"
|
||||
passwd_ipmi_host: "MyPassword2019"
|
||||
|
||||
passwd_root: MySuperSecretPassword # Not actually used by the playbook, but good for reference
|
||||
passwdhash_root: "$6$shadowencryptedpassword"
|
||||
|
||||
logrotate_keepcount: 7
|
||||
@ -118,13 +128,19 @@ admin_users:
|
||||
- "ssh-ed25519 MyKey 2019-06"
|
||||
|
||||
networks:
|
||||
"upstream":
|
||||
"bondU":
|
||||
device: "bondU"
|
||||
type: "bond"
|
||||
bond_mode: "802.3ad"
|
||||
bond_devices:
|
||||
- "enp1s0f0"
|
||||
- "enp1s0f1"
|
||||
mtu: 9000
|
||||
|
||||
"upstream":
|
||||
device: "vlan1000"
|
||||
type: "vlan"
|
||||
raw_device: "bondU"
|
||||
mtu: 1500
|
||||
domain: "{{ local_domain }}"
|
||||
subnet: "192.168.100.0/24"
|
||||
@ -144,12 +160,24 @@ networks:
|
||||
device: "vlan1002"
|
||||
type: "vlan"
|
||||
raw_device: "bondU"
|
||||
mtu: 1500
|
||||
mtu: 9000
|
||||
domain: "pvc-storage.local"
|
||||
subnet: "10.0.1.0/24"
|
||||
floating_ip: "10.0.1.254/24"
|
||||
```
|
||||
|
||||
#### `cluster_group`
|
||||
|
||||
* *required*
|
||||
|
||||
The name of the Ansible PVC cluster group in the `hosts` inventory.
|
||||
|
||||
#### `timezone_location`
|
||||
|
||||
* *required*
|
||||
|
||||
The TZ database format name of the local timezone, e.g. `America/Toronto` or `Canada/Eastern`.
|
||||
|
||||
#### `local_domain`
|
||||
|
||||
* *required*
|
||||
@ -172,6 +200,12 @@ The IPMI password, in plain text, used by PVC to communicate with the node manag
|
||||
|
||||
Generate using `pwgen -s 16` and adjusting length as required.
|
||||
|
||||
#### `passwd_root`
|
||||
|
||||
* *ignored*
|
||||
|
||||
Used only for reference, the plain-text root password for `passwdhash_root`.
|
||||
|
||||
#### `passwdhash_root`
|
||||
|
||||
* *required*
|
||||
@ -240,9 +274,13 @@ A list of SSH public key strings, in `authorized_keys` line format, for the user
|
||||
|
||||
* *required*
|
||||
|
||||
A dictionary of networks to configure on the nodes. Three networks are required by all PVC clusters, though additional networks may be configured here as well.
|
||||
A dictionary of networks to configure on the nodes.
|
||||
|
||||
The three required networks are: `upstream`, `cluster`, `storage`.
|
||||
The key will be used to "name" the interface file under `/etc/network/interfaces.d`, but otherwise the `device` is the real name of the device (e.g. `iface [device] inet ...`.
|
||||
|
||||
The three required networks are: `upstream`, `cluster`, `storage`. If `storage` is configured identically to `cluster`, the two networks will be collapsed into one; for details on this, please see the [documentation about the storage network](/cluster-architecture/#storage-connecting-ceph-daemons-with-each-other-and-with-osds).
|
||||
|
||||
Additional networks can also be specified here to automate their configuration. In the above example, a "bondU" interface is configured, which the remaining required networks use as their `raw_device`.
|
||||
|
||||
Within each `network` element, the following options may be specified:
|
||||
|
||||
@ -250,7 +288,7 @@ Within each `network` element, the following options may be specified:
|
||||
|
||||
* *required*
|
||||
|
||||
The network device name.
|
||||
The real network device name.
|
||||
|
||||
##### `type`
|
||||
|
||||
@ -321,18 +359,33 @@ pvc_log_keepalive_cluster_details: True
|
||||
pvc_log_keepalive_storage_details: True
|
||||
pvc_log_console_lines: 1000
|
||||
|
||||
pvc_vm_shutdown_timeout: 180
|
||||
pvc_keepalive_interval: 5
|
||||
pvc_fence_intervals: 6
|
||||
pvc_suicide_intervals: 0
|
||||
pvc_fence_successful_action: migrate
|
||||
pvc_fence_failed_action: None
|
||||
|
||||
pvc_osd_memory_limit: 4294967296
|
||||
pvc_zookeeper_heap_limit: 256M
|
||||
pvc_zookeeper_stack_limit: 512M
|
||||
|
||||
pvc_api_listen_address: "0.0.0.0"
|
||||
pvc_api_listen_port: "7370"
|
||||
pvc_api_enable_authentication: False
|
||||
pvc_api_secret_key: ""
|
||||
|
||||
pvc_api_enable_authentication: False
|
||||
pvc_api_tokens:
|
||||
- description: "myuser"
|
||||
token: ""
|
||||
|
||||
pvc_api_enable_ssl: False
|
||||
pvc_api_ssl_cert_path: /etc/ssl/pvc/cert.pem
|
||||
pvc_api_ssl_cert: >
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIxxx
|
||||
-----END CERTIFICATE-----
|
||||
pvc_api_ssl_key_path: /etc/ssl/pvc/key.pem
|
||||
pvc_api_ssl_key: >
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIxxx
|
||||
@ -343,6 +396,9 @@ pvc_ceph_storage_secret_uuid: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
|
||||
pvc_dns_database_name: "pvcdns"
|
||||
pvc_dns_database_user: "pvcdns"
|
||||
pvc_dns_database_password: "xxxxxxxx"
|
||||
pvc_api_database_name: "pvcapi"
|
||||
pvc_api_database_user: "pcapi"
|
||||
pvc_api_database_password: "xxxxxxxx"
|
||||
pvc_replication_database_user: "replicator"
|
||||
pvc_replication_database_password: "xxxxxxxx"
|
||||
pvc_superuser_database_user: "postgres"
|
||||
@ -393,6 +449,8 @@ pvc_nodes:
|
||||
ipmi_user: "{{ username_ipmi_host }}"
|
||||
ipmi_password: "{{ passwd_ipmi_host }}"
|
||||
|
||||
pvc_bridge_device: bondU
|
||||
|
||||
pvc_upstream_device: "{{ networks['upstream']['device'] }}"
|
||||
pvc_upstream_mtu: "{{ networks['upstream']['mtu'] }}"
|
||||
pvc_upstream_domain: "{{ networks['upstream']['domain'] }}"
|
||||
@ -413,19 +471,23 @@ pvc_storage_floatingip: "{{ networks['storage']['floating_ip'] }}"
|
||||
|
||||
#### `pvc_log_to_file`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
|
||||
Whether to log PVC output to the file `/var/log/pvc/pvc.log`. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
If unset, a default value of "False" is set in the role defaults.
|
||||
|
||||
#### `pvc_log_to_stdout`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
|
||||
Whether to log PVC output to stdout, i.e. `journald`. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
If unset, a default value of "True" is set in the role defaults.
|
||||
|
||||
#### `pvc_log_colours`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
|
||||
Whether to include ANSI coloured prompts (`>>>`) for status in the log output. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
@ -433,39 +495,153 @@ Requires `journalctl -o cat` or file logging in order to be visible and useful.
|
||||
|
||||
If set to False, the prompts will instead be text values.
|
||||
|
||||
If unset, a default value of "True" is set in the role defaults.
|
||||
|
||||
#### `pvc_log_dates`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
|
||||
Whether to include dates in the log output. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
Requires `journalctl -o cat` or file logging in order to be visible and useful (and not clutter the logs with duplicate dates).
|
||||
|
||||
If unset, a default value of "False" is set in the role defaults.
|
||||
|
||||
#### `pvc_log_keepalives`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
|
||||
Whether to log keepalive messages. Must be one of, unquoted: `True`, `False`.
|
||||
Whether to log the regular keepalive messages. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
If unset, a default value of "True" is set in the role defaults.
|
||||
|
||||
#### `pvc_log_keepalive_cluster_details`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
* *ignored* if `pvc_log_keepalives` is `False`
|
||||
|
||||
Whether to log cluster and node details during keepalive messages. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
If unset, a default value of "True" is set in the role defaults.
|
||||
|
||||
#### `pvc_log_keepalive_storage_details`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
* *ignored* if `pvc_log_keepalives` is `False`
|
||||
|
||||
Whether to log storage cluster details during keepalive messages. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
If unset, a default value of "True" is set in the role defaults.
|
||||
|
||||
#### `pvc_log_console_lines`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
|
||||
The number of output console lines to log for each VM.
|
||||
The number of output console lines to log for each VM, to be used by the console log endpoints (`pvc vm log`).
|
||||
|
||||
If unset, a default value of "1000" is set in the role defaults.
|
||||
|
||||
#### `pvc_vm_shutdown_timeout`
|
||||
|
||||
* *optional*
|
||||
|
||||
The number of seconds to wait for a VM to `shutdown` before it is forced off.
|
||||
|
||||
A value of "0" disables this functionality.
|
||||
|
||||
If unset, a default value of "180" is set in the role defaults.
|
||||
|
||||
#### `pvc_keepalive_interval`
|
||||
|
||||
* *optional*
|
||||
|
||||
The number of seconds between node keepalives.
|
||||
|
||||
If unset, a default value of "5" is set in the role defaults.
|
||||
|
||||
**WARNING**: Changing this value is not recommended except in exceptional circumstances.
|
||||
|
||||
#### `pvc_fence_intervals`
|
||||
|
||||
* *optional*
|
||||
|
||||
The number of keepalive intervals to be missed before other nodes consider a node `dead` and trigger the fencing process. The total time elapsed will be `pvc_keepalive_interval * pvc_fence_intervals`.
|
||||
|
||||
If unset, a default value of "6" is set in the role defaults.
|
||||
|
||||
**NOTE**: This is not the total time until a node is fenced. A node has a further 6 (hardcoded) `pvc_keepalive_interval`s ("saving throw" attepmts) to try to send a keepalive before it is actually fenced. Thus, with the default values, this works out to a total of 60 +/- 5 seconds between a node crashing, and it being fenced. An administrator of a very important cluster may want to set this lower, perhaps to 2, or even 1, leaving only the "saving throws", though this is not recommended for most clusters, due to timing overhead from various other subsystems.
|
||||
|
||||
#### `pvc_suicide intervals`
|
||||
|
||||
* *optional*
|
||||
|
||||
The number of keepalive intervals without the ability to send a keepalive before a node considers *itself* to be dead and reboots itself.
|
||||
|
||||
A value of "0" disables this functionality.
|
||||
|
||||
If unset, a default value of "0" is set in the role defaults.
|
||||
|
||||
**WARNING**: This option is provided to allow additional flexibility in fencing behaviour. Normally, it is not safe to set a `pvc_fence_failed_action` of `migrate`, since if the other nodes cannot fence a node its VMs cannot be safely started on other nodes. This would also apply to nodes without IPMI-over-LAN which could not be fenced normally. This option provides an alternative way to guarantee this safety, at least in situations where the node can still reliably shut itself down (i.e. it is not hard-locked). The administrator should however take special care and thoroughly test their system before using these alternative fencing options in production, as the results could be disasterous.
|
||||
|
||||
#### `pvc_fence_successful_action`
|
||||
|
||||
* *optional*
|
||||
|
||||
The action the cluster should take upon a successful node fence with respect to running VMs. Must be one of, unquoted: `migrate`, `None`.
|
||||
|
||||
If unset, a default value of "migrate" is set in the role defaults.
|
||||
|
||||
An administrator can set the value "None" to disable automatic VM recovery migrations after a node fence.
|
||||
|
||||
#### `pvc_fence_failed_action`
|
||||
|
||||
* *optional*
|
||||
|
||||
The action the cluster should take upon a failed node fence with respect to running VMs. Must be one of, unquoted: `migrate`, `None`.
|
||||
|
||||
If unset, a default value of "None" is set in the role defaults.
|
||||
|
||||
**WARNING**: See the warning in the above `pvc_suicide_intervals` section for details on the purpose of this option. Do not set this option to "migrate" unless you have also set `pvc_suicide_intervals` to a non-"0" value and understand the caveats and risks.
|
||||
|
||||
#### `pvc_fence_migrate_target_selector`
|
||||
|
||||
* *optional*
|
||||
|
||||
The migration selector to use when running a `migrate` command after a node fence. Must be one of, unquoted: `mem`, `load`, `vcpu`, `vms`.
|
||||
|
||||
If unset, a default value of "mem" is set in the role defaults.
|
||||
|
||||
**NOTE**: These values map to the standard VM meta `selector` options, and determine how nodes select where to run the migrated VMs.
|
||||
|
||||
#### `pvc_osd_memory_limit`
|
||||
|
||||
* *optional*
|
||||
|
||||
The memory limit, in bytes, to pass to the Ceph OSD processes. Only set once, during cluster bootstrap; subsequent changes to this value must be manually made in the `files/*/ceph.conf` static configuration for the cluster in question.
|
||||
|
||||
If unset, a default value of "4294967296" (i.e. 4GB) is set in the role defaults.
|
||||
|
||||
As per Ceph documentation, the minimum value possible is "939524096" (i.e. ~1GB), and the default matches the Ceph system default. Setting a lower value is only recommended for systems with relatively low memory availability, where the default of 4GB per OSD is too large; it is recommended to increase the total system memory first before tweaking this setting to ensure optimal storage performance across all workloads.
|
||||
|
||||
#### `pvc_zookeeper_heap_limit`
|
||||
|
||||
* *optional*
|
||||
|
||||
The memory limit to pass to the Zookeeper Java process for its heap.
|
||||
|
||||
If unset, a default vlue of "256M" is set in the role defaults.
|
||||
|
||||
The administrator may set this to a lower value on memory-constrained systems or if the memory usage of the Zookeeper process becomes excessive.
|
||||
|
||||
#### `pvc_zookeeper_stack_limit`
|
||||
|
||||
* *optional*
|
||||
|
||||
The memory limit to pass to the Zookeeper Java process for its stack.
|
||||
|
||||
If unset, a defautl value of "512M" is set in the role defaults.
|
||||
|
||||
The administrator may set this to a lower value on memory-constrained systems or if the memory usage of the Zookeeper process becomes excessive.
|
||||
|
||||
#### `pvc_api_listen_address`
|
||||
|
||||
@ -519,17 +695,33 @@ Generate using `uuidgen` or `pwgen -s 32` and adjusting length as required.
|
||||
|
||||
Whether to enable SSL for the PVC API. Must be one of, unquoted: `True`, `False`.
|
||||
|
||||
#### `pvc_api_ssl_cert_path`
|
||||
|
||||
* *optional*
|
||||
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_cert` is not set.
|
||||
|
||||
The path to an (existing) SSL certificate on the node system for the PVC API to use.
|
||||
|
||||
#### `pvc_api_ssl_cert`
|
||||
|
||||
* *required* if `pvc_api_enable_ssl` is `True`
|
||||
* *optional*
|
||||
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_cert_path` is not set.
|
||||
|
||||
The SSL certificate, in text form, for the PVC API to use.
|
||||
The SSL certificate, in text form, for the PVC API to use. Will be installed to `/etc/pvc/api-cert.pem` on the node system.
|
||||
|
||||
#### `pc_api_ssl_key_path`
|
||||
|
||||
* *optional*
|
||||
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_key` is not set.
|
||||
|
||||
The path to an (existing) SSL private key on the node system for the PVC API to use.
|
||||
|
||||
#### `pvc_api_ssl_key`
|
||||
|
||||
* *required* if `pvc_api_enable_ssl` is `True`
|
||||
* *optional*
|
||||
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_key_path` is not set.
|
||||
|
||||
The SSL private key, in text form, for the PVC API to use.
|
||||
The SSL private key, in text form, for the PVC API to use. Will be installed to `/etc/pvc/api-key.pem` on the node system.
|
||||
|
||||
#### `pvc_ceph_storage_secret_uuid`
|
||||
|
||||
@ -559,6 +751,26 @@ The password of the PVC DNS aggregator database user.
|
||||
|
||||
Generate using `pwgen -s 16` and adjusting length as required.
|
||||
|
||||
#### `pvc_api_database_name`
|
||||
|
||||
* *required*
|
||||
|
||||
The name of the PVC API database.
|
||||
|
||||
#### `pvc_api_database_user`
|
||||
|
||||
* *required*
|
||||
|
||||
The username of the PVC API database user.
|
||||
|
||||
#### `pvc_api_database_password`
|
||||
|
||||
* *required*
|
||||
|
||||
The password of the PVC API database user.
|
||||
|
||||
Generate using `pwgen -s 16` and adjusting length as required.
|
||||
|
||||
#### `pvc_replication_database_user`
|
||||
|
||||
* *required*
|
||||
@ -589,10 +801,12 @@ Generate using `pwgen -s 16` and adjusting length as required.
|
||||
|
||||
#### `pvc_asn`
|
||||
|
||||
* *required*
|
||||
* *optional*
|
||||
|
||||
The private autonomous system number used for BGP updates to upstream routers.
|
||||
|
||||
A default value of "65001" is set in the role defaults if left unset.
|
||||
|
||||
#### `pvc_routers`
|
||||
|
||||
A list of upstream routers to communicate BGP routes to.
|
||||
@ -681,6 +895,12 @@ The IPMI username for the node management controller. Unless a per-host override
|
||||
|
||||
The IPMI password for the node management controller. Unless a per-host override is required, should usually use the previously-configured global `passwordname_ipmi_host`. All notes from that entry apply.
|
||||
|
||||
#### `pvc_bridge_device`
|
||||
|
||||
* *required*
|
||||
|
||||
The device name of the underlying network interface to be used for "bridged"-type client networks. For each "bridged"-type network, an IEEE 802.3q vLAN and bridge will be created on top of this device to pass these networks. In most cases, using the reflexive `networks['cluster']['raw_device']` or `networks['upstream']['raw_device']` from the Base role is sufficient.
|
||||
|
||||
#### `pvc_<network>_*`
|
||||
|
||||
The next set of entries is hard-coded to use the values from the global `networks` list. It should not need to be changed under most circumstances. Refer to the previous sections for specific notes about each entry.
|
||||
|
@ -621,7 +621,7 @@
|
||||
"stats": {
|
||||
"properties": {
|
||||
"free_bytes": {
|
||||
"description": "The total free space (in bytes)",
|
||||
"description": "The total free space (in bytes. post-replicas)",
|
||||
"type": "integer"
|
||||
},
|
||||
"id": {
|
||||
@ -660,8 +660,12 @@
|
||||
"description": "The total read operations on the pool (pool-lifetime)",
|
||||
"type": "integer"
|
||||
},
|
||||
"stored_bytes": {
|
||||
"description": "The stored data size (in bytes, post-replicas)",
|
||||
"type": "integer"
|
||||
},
|
||||
"used_bytes": {
|
||||
"description": "The total used space (in bytes)",
|
||||
"description": "The total used space (in bytes, pre-replicas)",
|
||||
"type": "integer"
|
||||
},
|
||||
"used_percent": {
|
||||
@ -1302,6 +1306,19 @@
|
||||
"description": "The topology of the assigned vCPUs in Sockets/Cores/Threads format",
|
||||
"type": "string"
|
||||
},
|
||||
"vnc": {
|
||||
"properties": {
|
||||
"listen": {
|
||||
"description": "The active VNC listen address or 'None'",
|
||||
"type": "string"
|
||||
},
|
||||
"port": {
|
||||
"description": "The active VNC port or 'None'",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"xml": {
|
||||
"description": "The raw Libvirt XML definition of the VM",
|
||||
"type": "string"
|
||||
|
@ -2,8 +2,7 @@
|
||||
|
||||
[Unit]
|
||||
Description = Parallel Virtual Cluster node daemon
|
||||
After = network-online.target zookeeper.service libvirtd.service
|
||||
Wants = zookeeper.service libvirtd.service
|
||||
After = network-online.target
|
||||
PartOf = pvc.target
|
||||
|
||||
[Service]
|
||||
|
@ -54,7 +54,7 @@ import pvcnoded.CephInstance as CephInstance
|
||||
import pvcnoded.MetadataAPIInstance as MetadataAPIInstance
|
||||
|
||||
# Version string for startup output
|
||||
version = '0.9.10'
|
||||
version = '0.9.13'
|
||||
|
||||
###############################################################################
|
||||
# PVCD - node daemon startup program
|
||||
@ -134,7 +134,7 @@ def readConfig(pvcnoded_config_file, myhostname):
|
||||
|
||||
with open(pvcnoded_config_file, 'r') as cfgfile:
|
||||
try:
|
||||
o_config = yaml.load(cfgfile)
|
||||
o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
|
||||
except Exception as e:
|
||||
print('ERROR: Failed to parse configuration file: {}'.format(e))
|
||||
exit(1)
|
||||
@ -331,16 +331,29 @@ if not os.path.exists(config['log_directory']):
|
||||
logger = log.Logger(config)
|
||||
|
||||
# Print our startup messages
|
||||
logger.out('Parallel Virtual Cluster node daemon v{}'.format(version))
|
||||
logger.out('FQDN: {}'.format(myfqdn))
|
||||
logger.out('Host: {}'.format(myhostname))
|
||||
logger.out('ID: {}'.format(mynodeid))
|
||||
logger.out('IPMI hostname: {}'.format(config['ipmi_hostname']))
|
||||
logger.out('Machine details:')
|
||||
logger.out(' CPUs: {}'.format(staticdata[0]))
|
||||
logger.out(' Arch: {}'.format(staticdata[3]))
|
||||
logger.out(' OS: {}'.format(staticdata[2]))
|
||||
logger.out(' Kernel: {}'.format(staticdata[1]))
|
||||
logger.out('')
|
||||
logger.out('|--------------------------------------------------|')
|
||||
logger.out('| ######## ## ## ###### |')
|
||||
logger.out('| ## ## ## ## ## ## |')
|
||||
logger.out('| ## ## ## ## ## |')
|
||||
logger.out('| ######## ## ## ## |')
|
||||
logger.out('| ## ## ## ## |')
|
||||
logger.out('| ## ## ## ## ## |')
|
||||
logger.out('| ## ### ###### |')
|
||||
logger.out('|--------------------------------------------------|')
|
||||
logger.out('| Parallel Virtual Cluster node daemon v{0: <10} |'.format(version))
|
||||
logger.out('| FQDN: {0: <42} |'.format(myfqdn))
|
||||
logger.out('| Host: {0: <42} |'.format(myhostname))
|
||||
logger.out('| ID: {0: <44} |'.format(mynodeid))
|
||||
logger.out('| IPMI hostname: {0: <33} |'.format(config['ipmi_hostname']))
|
||||
logger.out('| Machine details: |')
|
||||
logger.out('| CPUs: {0: <40} |'.format(staticdata[0]))
|
||||
logger.out('| Arch: {0: <40} |'.format(staticdata[3]))
|
||||
logger.out('| OS: {0: <42} |'.format(staticdata[2]))
|
||||
logger.out('| Kernel: {0: <38} |'.format(staticdata[1]))
|
||||
logger.out('|--------------------------------------------------|')
|
||||
logger.out('')
|
||||
|
||||
logger.out('Starting pvcnoded on host {}'.format(myfqdn), state='s')
|
||||
|
||||
# Define some colours for future messages if applicable
|
||||
@ -1105,9 +1118,9 @@ def collect_ceph_stats(queue):
|
||||
logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread')
|
||||
|
||||
# Get pool info
|
||||
command = {"prefix": "df", "format": "json"}
|
||||
retcode, stdout, stderr = common.run_os_command('ceph df --format json', timeout=1)
|
||||
try:
|
||||
ceph_pool_df_raw = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])['pools']
|
||||
ceph_pool_df_raw = json.loads(stdout)['pools']
|
||||
except Exception as e:
|
||||
logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w')
|
||||
ceph_pool_df_raw = []
|
||||
@ -1142,6 +1155,7 @@ def collect_ceph_stats(queue):
|
||||
# Assemble a useful data structure
|
||||
pool_df = {
|
||||
'id': pool['id'],
|
||||
'stored_bytes': pool['stats']['stored'],
|
||||
'free_bytes': pool['stats']['max_avail'],
|
||||
'used_bytes': pool['stats']['bytes_used'],
|
||||
'used_percent': pool['stats']['percent_used'],
|
||||
|
@ -62,13 +62,13 @@ class VMConsoleWatcherInstance(object):
|
||||
def start(self):
|
||||
self.thread_stopper.clear()
|
||||
self.thread = Thread(target=self.run, args=(), kwargs={})
|
||||
self.logger.out('Starting VM log parser', state='i', prefix='Domain {}:'.format(self.domuuid))
|
||||
self.logger.out('Starting VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||
self.thread.start()
|
||||
|
||||
# Stop execution thread
|
||||
def stop(self):
|
||||
if self.thread and self.thread.isAlive():
|
||||
self.logger.out('Stopping VM log parser', state='i', prefix='Domain {}:'.format(self.domuuid))
|
||||
self.logger.out('Stopping VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||
self.thread_stopper.set()
|
||||
# Do one final flush
|
||||
self.update()
|
||||
|
@ -27,6 +27,8 @@ import json
|
||||
|
||||
from threading import Thread
|
||||
|
||||
from xml.etree import ElementTree
|
||||
|
||||
import pvcnoded.zkhandler as zkhandler
|
||||
import pvcnoded.common as common
|
||||
|
||||
@ -208,6 +210,21 @@ class VMInstance(object):
|
||||
except Exception as e:
|
||||
self.logger.out('Error removing domain from list: {}'.format(e), state='e')
|
||||
|
||||
# Update the VNC live data
|
||||
def update_vnc(self):
|
||||
if self.dom is not None:
|
||||
live_xml = ElementTree.fromstring(self.dom.XMLDesc(0))
|
||||
graphics = live_xml.find('./devices/graphics')
|
||||
if graphics is not None:
|
||||
self.logger.out('Updating VNC data', state='i', prefix='Domain {}'.format(self.domuuid))
|
||||
port = graphics.get('port', '')
|
||||
listen = graphics.get('listen', '')
|
||||
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): '{}:{}'.format(listen, port)})
|
||||
else:
|
||||
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): ''})
|
||||
else:
|
||||
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): ''})
|
||||
|
||||
# Start up the VM
|
||||
def start_vm(self):
|
||||
# Start the log watcher
|
||||
@ -739,7 +756,8 @@ class VMInstance(object):
|
||||
self.removeDomainFromList()
|
||||
# Stop the log watcher
|
||||
self.console_log_instance.stop()
|
||||
|
||||
# Update the VNC information
|
||||
self.update_vnc()
|
||||
else:
|
||||
# Conditional pass three - Is this VM currently running on this node
|
||||
if running == libvirt.VIR_DOMAIN_RUNNING:
|
||||
|
Reference in New Issue
Block a user