Compare commits

..

27 Commits

Author SHA1 Message Date
0ccfc41398 Bump version to 0.9.13 2021-02-17 11:37:59 -05:00
ab05e0f3db Add B suffix back to bare bytes values 2021-02-17 11:37:36 -05:00
9291ce6ffc Correct output of fromhuman and add size compare
Ensures that the bytes_tohuman returns an integer to avoid the hacky
workaround of stripping off the B.

Adds a verification on the size of a new volume, that it is not larger
than the free space of the pool to prevent errors/excessively-large
volumes from being created.

Closes #120
2021-02-17 11:27:26 -05:00
dd87951642 Fix bad calls in pool element 2021-02-17 11:14:50 -05:00
0e4bece441 Add missing inc/dec of snapshot_count 2021-02-14 17:02:49 -05:00
b33c0ab0e2 Final final fix to snapshot ordering 2021-02-14 16:48:42 -05:00
094d25dafa Increase indent further 2021-02-14 16:43:51 -05:00
150c61d226 Actually fix sorting issue 2021-02-14 16:41:59 -05:00
f1c0c9325d Fix indentation issue with API daemon 2021-02-14 16:41:52 -05:00
26b0a8b5c1 Fix sorting bug with snapshot list 2021-02-14 16:34:43 -05:00
f22f291c8b Add additional field and info to Swagger 2021-02-09 01:49:15 -05:00
9100c63e99 Add stored_bytes to pool stats information 2021-02-09 01:46:01 -05:00
aba567d6c9 Add nice startup banners to both daemons
Add nicer easy-to-find (yay ASCII art) banners for the startup printouts
of both the node and API daemons. Also adds the safe loader to pvcnoded
to prevent hassle messages and a version string in the API daemon file.
2021-02-08 02:51:43 -05:00
0db8fd9da6 Bump version to 0.9.12 2021-01-28 16:29:58 -05:00
a44f134230 Remove systemd deps on zookeeper and libvirt
This caused a serious race condition, since the IPs managed by PVC had
not yet come up, but Zookeeper was trying to start and bind to them,
which of course failed.

Remove these dependencies entirely - the daemon itself starts these
services during initialization and they do not need to be started by
systemd first.
2021-01-28 16:25:02 -05:00
9fbe35fd24 Bump version to 0.9.11 2021-01-05 15:58:26 -05:00
09fdb5da26 Fix a bad reference 2021-01-03 23:29:43 -05:00
80a436d7b6 Fix bad links 2021-01-03 16:56:13 -05:00
74a28f2edd Improve documentation of networks 2021-01-03 16:50:18 -05:00
b8aba3a498 Use proper type for passwd_root option 2021-01-03 16:41:15 -05:00
8b584bc545 Update Ansible manual with recent changes 2021-01-03 16:38:07 -05:00
a24724d9f0 Use external ceph cmd for ceph df 2020-12-26 14:04:21 -05:00
d22a5aa7f2 Move information about memory utilization 2020-12-21 00:20:01 -05:00
78c017d51d Remove erroneous extra colon in log output 2020-12-20 16:06:35 -05:00
1b6613c280 Add live VNC information to domain output
Sets in the node daemon, returns via the API, and shows in the CLI,
information about the live VNC listen address and port for VNC-enabled
VMs.

Closes #115
2020-12-20 16:00:55 -05:00
9aeb86246a Add even further documentation tweaks 2020-12-19 03:32:25 -05:00
6abb8b2456 Clarify additional information in the cluster docs 2020-12-19 03:20:29 -05:00
19 changed files with 517 additions and 85 deletions

View File

@ -20,6 +20,26 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
## Changelog
#### v0.9.13
* Adds nicer startup messages for daemons
* Adds additional API field for stored_bytes to pool stats
* Fixes sorting issues with snapshot lists
* Fixes missing increment/decrement of snapshot_count on volumes
* Fixes bad calls in pool element API endpoints
* Fixes inconsistent bytes_tohuman behaviour in daemons
* Adds validation and maximum volume size on creation (must be smaller than the pool free space)
#### v0.9.12
* Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition
#### v0.9.11
* Documentation updates
* Adds VNC information to VM info
* Goes back to external Ceph commands for disk usage
#### v0.9.10
* Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down

View File

@ -26,10 +26,31 @@ import pvcapid.flaskapi as pvc_api
# Entrypoint
##########################################################
# Version string for startup output
version = '0.9.11'
if pvc_api.config['ssl_enabled']:
context = (pvc_api.config['ssl_cert_file'], pvc_api.config['ssl_key_file'])
else:
context = None
print('Starting PVC API daemon at {}:{} with SSL={}, Authentication={}'.format(pvc_api.config['listen_address'], pvc_api.config['listen_port'], pvc_api.config['ssl_enabled'], pvc_api.config['auth_enabled']))
# Print our startup messages
print('')
print('|--------------------------------------------------|')
print('| ######## ## ## ###### |')
print('| ## ## ## ## ## ## |')
print('| ## ## ## ## ## |')
print('| ######## ## ## ## |')
print('| ## ## ## ## |')
print('| ## ## ## ## ## |')
print('| ## ### ###### |')
print('|--------------------------------------------------|')
print('| Parallel Virtual Cluster API daemon v{0: <11} |'.format(version))
print('| API version: v{0: <34} |'.format(pvc_api.API_VERSION))
print('| Listen: {0: <40} |'.format('{}:{}'.format(pvc_api.config['listen_address'], pvc_api.config['listen_port'])))
print('| SSL: {0: <43} |'.format(str(pvc_api.config['ssl_enabled'])))
print('| Authentication: {0: <32} |'.format(str(pvc_api.config['auth_enabled'])))
print('|--------------------------------------------------|')
print('')
pvc_api.app.run(pvc_api.config['listen_address'], pvc_api.config['listen_port'], threaded=True, ssl_context=context)

View File

@ -52,16 +52,16 @@ def strtobool(stringv):
# Parse the configuration file
try:
pvc_config_file = os.environ['PVC_CONFIG_FILE']
pvcapid_config_file = os.environ['PVC_CONFIG_FILE']
except Exception:
print('Error: The "PVC_CONFIG_FILE" environment variable must be set before starting pvcapid.')
exit(1)
print('Starting PVC API daemon')
print('Loading configuration from file "{}"'.format(pvcapid_config_file))
# Read in the config
try:
with open(pvc_config_file, 'r') as cfgfile:
with open(pvcapid_config_file, 'r') as cfgfile:
o_config = yaml.load(cfgfile, Loader=yaml.BaseLoader)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e))
@ -1023,6 +1023,15 @@ class API_VM_Root(Resource):
console:
type: string
descritpion: The serial console type of the VM
vnc:
type: object
properties:
listen:
type: string
description: The active VNC listen address or 'None'
port:
type: string
description: The active VNC port or 'None'
emulator:
type: string
description: The binary emulator of the VM
@ -3328,12 +3337,15 @@ class API_Storage_Ceph_Pool_Root(Resource):
id:
type: integer
description: The Ceph pool ID
stored_bytes:
type: integer
description: The stored data size (in bytes, post-replicas)
free_bytes:
type: integer
description: The total free space (in bytes)
description: The total free space (in bytes. post-replicas)
used_bytes:
type: integer
description: The total used space (in bytes)
description: The total used space (in bytes, pre-replicas)
used_percent:
type: number
description: The ratio of used space to free space
@ -3455,7 +3467,7 @@ class API_Storage_Ceph_Pool_Element(Resource):
type: object
id: Message
"""
return api_helper, api_helper.ceph_pool_list(
return api_helper.ceph_pool_list(
pool,
is_fuzzy=False
)
@ -3499,7 +3511,7 @@ class API_Storage_Ceph_Pool_Element(Resource):
type: object
id: Message
"""
api_helper.ceph_pool_add(
return api_helper.ceph_pool_add(
pool,
reqargs.get('pgs', None),
reqargs.get('replcfg', None)

View File

@ -177,8 +177,8 @@ def upload_ova(pool, name, ova_size):
pvc_common.stopZKConnection(zk_conn)
# Normalize the OVA size to bytes
ova_size_bytes = int(pvc_ceph.format_bytes_fromhuman(ova_size)[:-1])
ova_size = pvc_ceph.format_bytes_fromhuman(ova_size)
ova_size_bytes = pvc_ceph.format_bytes_fromhuman(ova_size)
ova_size = '{}B'.format(ova_size_bytes)
# Verify that the cluster has enough space to store the OVA volumes (2x OVA size, temporarily, 1x permanently)
zk_conn = pvc_common.startZKConnection(config['coordinators'])
@ -274,7 +274,7 @@ def upload_ova(pool, name, ova_size):
vm_volume_size = disk.get('capacity')
# Normalize the dev size to bytes
dev_size = pvc_ceph.format_bytes_fromhuman(dev_size_raw)
dev_size = '{}B'.format(pvc_ceph.format_bytes_fromhuman(dev_size_raw))
def cleanup_img_maps():
zk_conn = pvc_common.startZKConnection(config['coordinators'])
@ -368,7 +368,7 @@ def upload_ova(pool, name, ova_size):
vm_volume_size = disk.get('capacity')
# The function always return XXXXB, so strip off the B and convert to an integer
vm_volume_size_bytes = int(pvc_ceph.format_bytes_fromhuman(vm_volume_size)[:-1])
vm_volume_size_bytes = pvc_ceph.format_bytes_fromhuman(vm_volume_size)
vm_volume_size_gb = math.ceil(vm_volume_size_bytes / 1024 / 1024 / 1024)
query = "INSERT INTO ova_volume (ova, pool, volume_name, volume_format, disk_id, disk_size_gb) VALUES (%s, %s, %s, %s, %s, %s);"

View File

@ -17,6 +17,7 @@ $EDITOR ${changelog_file}
changelog="$( cat ${changelog_file} | grep -v '^#' | sed 's/^*/ */' )"
sed -i "s,version = '${current_version}',version = '${new_version}'," node-daemon/pvcnoded/Daemon.py
sed -i "s,version = '${current_version}',version = '${new_version}'," api-daemon/pvcapid/Daemon.py
readme_tmpdir=$( mktemp -d )
cp README.md ${readme_tmpdir}/

View File

@ -1071,6 +1071,11 @@ def format_info(config, domain_information, long_output):
ainformation.append('{}vCPUs:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vcpu']))
ainformation.append('{}Topology (S/C/T):{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vcpu_topology']))
if domain_information['vnc'].get('listen', 'None') != 'None' and domain_information['vnc'].get('port', 'None') != 'None':
ainformation.append('')
ainformation.append('{}VNC listen:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vnc']['listen']))
ainformation.append('{}VNC port:{} {}'.format(ansiprint.purple(), ansiprint.end(), domain_information['vnc']['port']))
if long_output is True:
# Virtualization information
ainformation.append('')

View File

@ -122,7 +122,7 @@ def format_bytes_fromhuman(datahuman):
dataunit = 'B'
datasize = int(datahuman)
databytes = datasize * byte_unit_matrix[dataunit]
return '{}B'.format(databytes)
return databytes
# Format ops sizes to/from human-readable units
@ -475,7 +475,17 @@ def getVolumeInformation(zk_conn, pool, volume):
def add_volume(zk_conn, pool, name, size):
# 1. Create the volume
# 1. Verify the size of the volume
pool_information = getPoolInformation(zk_conn, pool)
size_bytes = format_bytes_fromhuman(size)
if size_bytes >= int(pool_information['stats']['free_bytes']):
return False, 'ERROR: Requested volume size is greater than the available free space in the pool'
# Add 'B' if the volume is in bytes
if re.match(r'^[0-9]+$', size):
size = '{}B'.format(size)
# 2. Create the volume
retcode, stdout, stderr = common.run_os_command('rbd create --size {} --image-feature layering,exclusive-lock {}/{}'.format(size, pool, name))
if retcode:
return False, 'ERROR: Failed to create RBD volume "{}": {}'.format(name, stderr)
@ -545,7 +555,7 @@ def resize_volume(zk_conn, pool, name, size):
target_lv_conn = libvirt.open(dest_lv)
target_vm_conn = target_lv_conn.lookupByName(vm_info['name'])
if target_vm_conn:
target_vm_conn.blockResize(volume_id, int(format_bytes_fromhuman(size)[:-1]), libvirt.VIR_DOMAIN_BLOCK_RESIZE_BYTES)
target_vm_conn.blockResize(volume_id, format_bytes_fromhuman(size), libvirt.VIR_DOMAIN_BLOCK_RESIZE_BYTES)
target_lv_conn.close()
except Exception:
pass
@ -715,6 +725,16 @@ def add_snapshot(zk_conn, pool, volume, name):
'/ceph/snapshots/{}/{}/{}/stats'.format(pool, volume, name): '{}'
})
# 3. Update the count of snapshots on this volume
volume_stats_raw = zkhandler.readdata(zk_conn, '/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats = dict(json.loads(volume_stats_raw))
# Format the size to something nicer
volume_stats['snapshot_count'] = volume_stats['snapshot_count'] + 1
volume_stats_raw = json.dumps(volume_stats)
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}/stats'.format(pool, volume): volume_stats_raw
})
return True, 'Created RBD snapshot "{}" of volume "{}" in pool "{}".'.format(name, volume, pool)
@ -751,6 +771,16 @@ def remove_snapshot(zk_conn, pool, volume, name):
# 2. Delete snapshot from Zookeeper
zkhandler.deletekey(zk_conn, '/ceph/snapshots/{}/{}/{}'.format(pool, volume, name))
# 3. Update the count of snapshots on this volume
volume_stats_raw = zkhandler.readdata(zk_conn, '/ceph/volumes/{}/{}/stats'.format(pool, volume))
volume_stats = dict(json.loads(volume_stats_raw))
# Format the size to something nicer
volume_stats['snapshot_count'] = volume_stats['snapshot_count'] - 1
volume_stats_raw = json.dumps(volume_stats)
zkhandler.writedata(zk_conn, {
'/ceph/volumes/{}/{}/stats'.format(pool, volume): volume_stats_raw
})
return True, 'Removed RBD snapshot "{}" of volume "{}" in pool "{}".'.format(name, volume, pool)
@ -783,4 +813,4 @@ def get_list_snapshot(zk_conn, pool, volume, limit, is_fuzzy=True):
else:
snapshot_list.append({'pool': pool_name, 'volume': volume_name, 'snapshot': snapshot_name})
return True, sorted(snapshot_list, key=lambda x: int(x['id']))
return True, sorted(snapshot_list, key=lambda x: str(x['snapshot']))

View File

@ -267,6 +267,13 @@ def getInformationFromXML(zk_conn, uuid):
except Exception:
domain_profile = None
try:
domain_vnc = zkhandler.readdata(zk_conn, '/domains/{}/vnc'.format(uuid))
domain_vnc_listen, domain_vnc_port = domain_vnc.split(':')
except Exception:
domain_vnc_listen = 'None'
domain_vnc_port = 'None'
parsed_xml = getDomainXML(zk_conn, uuid)
try:
@ -312,6 +319,10 @@ def getInformationFromXML(zk_conn, uuid):
'arch': domain_arch,
'machine': domain_machine,
'console': domain_console,
'vnc': {
'listen': domain_vnc_listen,
'port': domain_vnc_port
},
'emulator': domain_emulator,
'features': domain_features,
'disks': domain_disks,

View File

@ -207,6 +207,7 @@ def define_vm(zk_conn, config_data, target_node, node_limit, node_selector, node
'/domains/{}/consolelog'.format(dom_uuid): '',
'/domains/{}/rbdlist'.format(dom_uuid): formatted_rbd_list,
'/domains/{}/profile'.format(dom_uuid): profile,
'/domains/{}/vnc'.format(dom_uuid): '',
'/domains/{}/xml'.format(dom_uuid): config_data
})

26
debian/changelog vendored
View File

@ -1,3 +1,29 @@
pvc (0.9.13-0) unstable; urgency=high
* Adds nicer startup messages for daemons
* Adds additional API field for stored_bytes to pool stats
* Fixes sorting issues with snapshot lists
* Fixes missing increment/decrement of snapshot_count on volumes
* Fixes bad calls in pool element API endpoints
* Fixes inconsistent bytes_tohuman behaviour in daemons
* Adds validation and maximum volume size on creation (must be smaller than the pool free space)
-- Joshua M. Boniface <joshua@boniface.me> Wed, 17 Feb 2021 11:33:28 -0500
pvc (0.9.12-0) unstable; urgency=high
* Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition
-- Joshua M. Boniface <joshua@boniface.me> Thu, 28 Jan 2021 16:29:58 -0500
pvc (0.9.11-0) unstable; urgency=high
* Documentation updates
* Adds VNC information to VM info
* Goes back to external Ceph commands for disk usage
-- Joshua M. Boniface <joshua@boniface.me> Tue, 05 Jan 2021 15:58:26 -0500
pvc (0.9.10-0) unstable; urgency=high
* Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down

View File

@ -41,31 +41,31 @@ PVC is build from a number of other, open source components. The main system its
Virtual machines themselves are run with the Linux KVM subsystem via the Libvirt virtual machine management library. This provides the maximum flexibility and compatibility for running various guest operating systems in multiple modes (fully-virtualized, para-virtualized, virtio-enabled, etc.).
To manage cluster state, PVC uses Zookeeper. This is an Apache project designed to provide a highly-available and always-consistent key-value database. The various daemons all connect to the distributed Zookeeper database to both obtain details about cluster state, and to manage that state. For instance the node daemon watches Zookeeper for information on what VMs to run, networks to create, etc., while the API writes information to Zookeeper in response to requests.
To manage cluster state, PVC uses Zookeeper. This is an Apache project designed to provide a highly-available and always-consistent key-value database. The various daemons all connect to the distributed Zookeeper database to both obtain details about cluster state, and to manage that state. For instance the node daemon watches Zookeeper for information on what VMs to run, networks to create, etc., while the API writes to or reads information from Zookeeper in response to requests. The Zookeeper database is the glue which holds the cluster together.
Additional relational database functionality, specifically for the DNS aggregation subsystem and the VM provisioner, is provided by the PostgreSQL database and the Patroni management tool, which provides automatic clustering and failover for PostgreSQL database instances.
Additional relational database functionality, specifically for the managed network DNS aggregation subsystem and the VM provisioner, is provided by the PostgreSQL database system and the Patroni management tool, which provides automatic clustering and failover for PostgreSQL database instances.
Node network routing for managed networks providing EBGP VXLAN and route-learning is provided by FRRouting, a descendant project of Quaaga and GNU Zebra.
Node network routing for managed networks providing EBGP VXLAN and route-learning is provided by FRRouting, a descendant project of Quaaga and GNU Zebra. Upstream routers can use this interface to learn routes to cluster networks as well.
The storage subsystem is provided by Ceph, a distributed object-based storage subsystem with extensive scalability, self-managing, and self-healing functionality. The Ceph RBD (Rados Block Device) subsystem is used to provide VM block devices similar to traditional LVM or ZFS zvols, but in a distributed, shared-storage manner.
The storage subsystem is provided by Ceph, a distributed object-based storage subsystem with extensive scalability, self-managing, and self-healing functionality. The Ceph RBD (RADOS Block Device) subsystem is used to provide VM block devices similar to traditional LVM or ZFS zvols, but in a distributed, shared-storage manner.
All the components are designed to be run on top of Debian GNU/Linux, specifically Debian 10.X "Buster", with the SystemD system service manager. This OS provides a stable base to run the various other subsystems while remaining truly Free Software, while SystemD provides functionality such as automatic daemon restarting and complex startup/shutdown ordering.
## Cluster Architecture
A PVC cluster is based around "nodes", which are physical servers on which the various daemons, storage, networks, and virtual machines run. Each node is self-contained and is able to perform any and all cluster functions if needed; there is no segmentation of function between different types of physical hosts. Ideally, all nodes in a cluster will be identical in specifications, but in some situations mismatched nodes are acceptable, with limitations.
A PVC cluster is based around "nodes", which are physical servers on which the various daemons, storage, networks, and virtual machines run. Each node is self-contained and is able to perform any and all cluster functions if needed and configured to do so; there is no strict segmentation of function between different "types" of physical hosts. Ideally, all nodes in a cluster will be identical in specifications, but in some situations mismatched nodes are acceptable, with limitations.
A subset of the nodes, called "coordinators", are statically configured to provide additional services for the cluster. For instance, all databases, FRRouting instances, and Ceph management daemons run only on the set of cluster coordinators. At cluster bootstrap, 1 (testing-only), 3 (small clusters), or 5 (large clusters) nodes may be chosen as the coordinators. Other nodes can then be added as "hypervisor" nodes, which then provide only block device (storage) and VM (compute) functionality by connecting to the set of coordinators. This limits the scaling problem of the databases while ensuring there is still maximum redundancy and resiliency for the core cluster services.
A subset of the nodes, called "coordinators", are statically configured to provide services for the cluster. For instance, all databases, FRRouting instances, and Ceph management daemons run only on the set of cluster coordinators. At cluster bootstrap, 1 (testing-only), 3 (small clusters), or 5 (large clusters) nodes may be chosen as the coordinators. Other nodes can then be added as "hypervisor" nodes, which then provide only block device (storage) and VM (compute) functionality by connecting to the set of coordinators. This limits the scaling problem of the databases while ensuring there is still maximum redundancy and resiliency for the core cluster services.
Additional nodes can be added to the cluster either as coordinators, or as hypervisors, by adding them to the Ansible configuration and running it against the full set of nodes. Note that the number of coordinators must always be odd, and more than 5 coordinators are normally unnecessary and can cause issues with the database; it is thus normally advisable to add any nodes beyond the initial set as hypervisors instead of coordinators. Nodes can be removed from service, but this is a manual process and should not be attempted unless absolutely required; the Ceph subsystem in particular is sensitive to changes in the coordinator nodes.
Additional nodes can be added to the cluster either as coordinators, or as hypervisors, by adding them to the Ansible configuration and running it against the full set of nodes. Note that the number of coordinators must always be odd, and more than 5 coordinators are normally unnecessary and can cause issues with the database; it is thus normally advisable to add any nodes beyond the initial set as hypervisors instead of coordinators. Nodes can be removed from service, but this is a manual process and should not be attempted unless absolutely required; the Ceph subsystem in particular is sensitive to changes in the coordinator nodes. Nodes can also be upgraded or replaced dynamically and without interrupting the cluster, allowing for seamless hardware maintenance, upgrades, and even replacement, as cluster state configuration is held cluster-wide.
During runtime, one coordinator is elected the "primary" for the cluster. This designation can shift dynamically in response to cluster events, or be manually migrated by an administrator. The coordinator takes on a number of roles for which only one host may be active at once, for instance to provide DHCP services to managed client networks or to interface with the API.
Nodes are networked together via a set of statically-configured networks. At a minimum, 2 discrete networks are required, with an optional 3rd.
Nodes are networked together via a set of statically-configured, simple layer-2 networks. At a minimum, 2 discrete networks are required, with an optional 3rd.
* The "upstream" network is the primary network for the nodes, and provides functions such as upstream Internet access, routing to and from the cluster nodes, and management via the API; it may be either a firewalled public or NAT'd RFC1918 network, but should never be exposed directly to the Internet.
* The "upstream" network is the primary network for the nodes, and provides functions such as upstream Internet access, routing to and from the cluster nodes, and management via the API; it may be either a firewalled public or NAT'd RFC1918 network, but should never be exposed directly to the Internet. It should also contain, or be able to route to, the IPMI BMC management interfaces of the node chassis'.
* The "cluster" network is an unrouted RFC1918 network which provides inter-node communication for managed client network traffic (VXLANs), cross-node routing, VM migration and failover, and database replication and access.
* The "storage" network is another unrouted RFC1918 network which provides a dedicated logical and/or physical link between the nodes for storage traffic, including VM block device storage traffic, inter-OSD replication traffic, and Ceph heartbeat traffic, thus allowing it to be completely isolated from the other networks for maximum performance. This network can be optionally colocated with the "cluster" network, by specifying the same device for both, and can be further combined by specifying the same IP for both to completely collapse the "cluster" and "storage" networks. This may be ideal to simply management of small clusters.
* The "storage" network is another unrouted RFC1918 network which provides a dedicated logical and/or physical link between the nodes for storage traffic, including VM block device storage traffic, inter-OSD replication traffic, and Ceph heartbeat traffic, thus allowing it to be completely isolated from the other networks for maximum performance. This network can be optionally colocated with the "cluster" network, by specifying the same device for both, and can be further combined by specifying the same IP for both to completely collapse the "cluster" and "storage" networks. A collapsed cluster+storage configuration may be ideal to simplify management of small clusters, or a split configuration can be used to provide flexbility for large or demanding high-performance clusters - this choice is left to the administrator based on their needs.
Within each network is a single "floating" IP address which follows the primary coordinator, providing a single interface to the cluster. Once configured, the cluster is then able to create additional networks of two kinds, "bridged" traditional vLANs and "managed" routed VXLANs, to provide network access to VMs.
@ -79,15 +79,15 @@ The API client is a Flask-based RESTful API and is the core interface to PVC. By
The API generally accepts all requests as HTTP form requests following standard RESTful guidelines, supporting arguments in the URI string or, with limited exceptions, in the message body. The API returns JSON response bodies to all requests consisting either of the information requested, or a `{ "message": "text" }` construct to pass informational status messages back to the client.
The API client manual can be found at the [API manual page](/manuals/api), and the full API documentation can be found at the [API reference page](/manuals/api-reference.html).
The API client manual can be found at the [API manual page](/manuals/api), and the full API details can be found in the [API reference specification](/manuals/api-reference.html).
### Direct Bindings
The API client uses a dedicated set of Python libraries, packaged as the `pvc-daemon-common` Debian package, to communicate with the cluster. It is thus possible to build custom Python clients that directly interface with the PVC cluster, without having to get "into the weeds" of the Zookeeper or PostgreSQL databases.
The API client uses a dedicated set of Python libraries, packaged as the `pvc-daemon-common` Debian package, to communicate with the cluster. One can thus use these libraries to build custom Python clients that directly interface with the PVC cluster, without having to get "into the weeds" of the Zookeeper or PostgreSQL databases.
### CLI Client
The CLI client is a Python Click application, which provides a convenient CLI interface to the API client. It supports connecting to multiple clusters from a single instance, with or without authentication and over both HTTP or HTTPS, including a special "local" cluster if the client determines that an API configuration exists on the local host. Information about the configured clusters is stored in a local JSON document, and a default cluster can be set with an environment variable.
The CLI client is a Python Click application, which provides a convenient CLI interface to the API client. It supports connecting to multiple clusters from a single instance, with or without authentication and over both HTTP or HTTPS, including a special "local" cluster if the client determines that an API configuration exists on the local host. Information about the configured clusters is stored in a local JSON document, and a default cluster can be set with an environment variable. The CLI client can thus be run either on PVC nodes themselves, or on other, remote systems which can then interface with cluster(s) over the network.
The CLI client is self-documenting using the `-h`/`--help` arguments throughout, easing the administrator learning curve and providing easy access to command details. A short manual can also be found at the [CLI manual page](/manuals/cli).
@ -97,6 +97,8 @@ The overall management, deployment, bootstrapping, and configuring of nodes is a
The Ansible configuration and architecture manual can be found at the [Ansible manual page](/manuals/ansible).
The [getting started documentation](/getting-started) provides a walkthrough of using these tools to bootstrap a new cluster.
## Frequently Asked Questions
### General Questions
@ -116,12 +118,13 @@ PVC might be right for you if:
1. You need KVM-based VMs.
2. You want management of storage and networking (a.k.a. "batteries-included") in the same tool.
3. You want hypervisor-level redundancy, able to tolerate hypervisor downtime seamlessly, for all elements of the stack.
4. You have a requirement of at least 3 nodes' worth of compute and storage.
I built PVC for my homelab first, found a perfect use-case with my employer, and think it might be useful to you too.
If all you want is a simple home server solution, or you demand scalability beyond a few dozen compute nodes, PVC is likely not what you're looking for. Its sweet spot is specifically in the 3-9 node range, for instance in an advanced homelab, for SMBs or small ISPs with a relatively small server stack, or for MSPs looking to deploy small on-premises clusters at low cost.
#### Is 3 hypervisors really the minimum?
For a redundant cluster, yes. PVC requires a majority quorum for proper operation at various levels, and the smallest possible majority quorum is 2-of-3; thus 3 nodes is the safe minimum. That said, you can run PVC on a single node for testing/lab purposes without host-level redundancy, should you wish to do so, and it might also be possible to run 2 "main" systems with a 3rd "quorum observer" hosting only the management tools but no VMs, however this is not officially supported.
For a redundant cluster, yes. PVC requires a majority quorum for proper operation at various levels, and the smallest possible majority quorum is 2-of-3; thus 3 nodes is the smallest safe minimum. That said, you can run PVC on a single node for testing/lab purposes without host-level redundancy, should you wish to do so, and it might also be possible to run 2 "main" systems with a 3rd "quorum observer" hosting only the management tools but no VMs; however these options are not officially supported, as PVC is designed primarily for 3+ node operation.
### Feature Questions
@ -133,6 +136,10 @@ No, not directly. PVC supports only KVM VMs. To run containers, you would need t
Not yet. Right now, PVC management is done exclusively with the CLI interface to the API. A WebUI can and likely will be built in the future, but I'm not a frontend developer and I do not consider this a personal priority. As of late 2020 the API is generally stable, so I would welcome 3rd party assistance here.
#### I want feature X, does it fit with PVC?
That depends on the specific feature. I will limit features to those that align with the overall goals of PVC, that is to say, to provide an easy-to-use hyperconverged virtualization system focused on redundancy. If a feature suits this goal it is likely to be considered; if it does not, it will not. PVC is rapidly approaching the completion of its 1.0 roadmap, which I consider feature-complete for the primary usecase, and future versions may expand in scope.
### Storage Questions
#### Can I use RAID-5/RAID-6 with PVC?

View File

@ -46,8 +46,6 @@ The following table provides bare-minimum, recommended, and optimal specificatio
| Total RAM (n-1) | 32GB | 96GB | 128GB |
| Total disk space | 200GB | 400GB | 800GB |
Of these totals, some amount of CPU and RAM will be used by the storage subsystem and the PVC daemons themselves, meaning that the total available for virtual machines is slightly less. Generally, each OSD data disk will consume 1 vCPU at load and 1-2GB RAM, so nodes should be sized not only according to the VM workload, but the number of storage disks per node. Additionally the coordinator databases will use additional RAM and CPU resources of up to 1-4GB per node, though there is generally little need to spec coordinators any larger than non-coordinator nodes and the VM automatic node selection process will take used RAM into account by default.
### System Disks
The system disk(s) chosen are important to consider, especially for coordinators. Ideally, an SSD, or two SSDs in RAID-1/mirroring are recommended for system disks. This helps ensure optimal performance for the system (e.g. swap space) and PVC components such as databases as well as the Ceph caches.
@ -62,6 +60,18 @@ The general rule for available resource capacity planning can be though of as "1
For memory provisioning of VMs, PVC will warn the administrator, via a Degraded cluster state, if the "n-1" RAM quantity is exceeded by the total maximum allocation of all running VMs. This situation can be worked around with sufficient swap space on nodes to ensure there is overflow, however the warning cannot be overridden. If nodes are of mismatched sizes, the "n-1" RAM quantity is calculated by removing (one of) the largest node in the cluster and adding the remaining nodes' RAM counts together.
### System Memory Utilization
By default, several components of PVC outside of VMs will have large memory allocations, most notably Ceph OSD processes and Zookeeper database processes. These processes should be considered when selecting the RAM allocation of nodes, and adjusted in the Ansible `group_vars` if lower defaults are required.
#### Ceph OSD processes
By default, PVC will allow several GB (up to 4-6GB) of RAM allocation per OSD to maximize the available cache space and hence disk performance. This can be lowered as far as 939MB should the administrator require due to a low RAM configuration, but no further due to Ceph limitations; therefore at least 1GB of memory per storage OSD is required even in the most limited case.
#### Zookeeper processes
By default, the Java heap and stack sizes are set to 256MB and 512MB respectively, yieliding a memory usage of 500+MB after serveral days or weeks of uptime. This can be lowered to 32M or less for lightly-used clusters should the administrator require due to a low RAM configuration.
### Operating System and Architecture
As an underlying OS, only Debian GNU/Linux 10.x "Buster" is supported by PVC. This is the operating system installed by the PVC [node installer](https://github.com/parallelvirtualcluster/pvc-installer) and expected by the PVC [Ansible configuration system](https://github.com/parallelvirtualcluster/pvc-ansible). Ubuntu or other Debian-derived distributions may work, but are not officially supported. PVC also makes use of a custom repository to provide the PVC software and an updated version of Ceph beyond what is available in the base operating system, and this is only compatible officially with Debian 10 "Buster". PVC will, in the future, upgrade to future versions of Debian based on their release schedule and testing; releases may be skipped for official support if required. As a general rule, using the current versions of the official node installer and Ansible repository is the preferred and only supported method for deploying PVC.

View File

@ -18,6 +18,26 @@ To get started with PVC, please see the [About](https://parallelvirtualcluster.r
## Changelog
#### v0.9.13
* Adds nicer startup messages for daemons
* Adds additional API field for stored_bytes to pool stats
* Fixes sorting issues with snapshot lists
* Fixes missing increment/decrement of snapshot_count on volumes
* Fixes bad calls in pool element API endpoints
* Fixes inconsistent bytes_tohuman behaviour in daemons
* Adds validation and maximum volume size on creation (must be smaller than the pool free space)
#### v0.9.12
* Fixes a bug in the pvcnoded service unit file causing a Zookeeper startup race condition
#### v0.9.11
* Documentation updates
* Adds VNC information to VM info
* Goes back to external Ceph commands for disk usage
#### v0.9.10
* Moves OSD stats uploading to primary, eliminating reporting failures while hosts are down

View File

@ -6,7 +6,7 @@ The PVC Ansible setup and management framework is written in Ansible. It consist
The Base role configures a node to a specific, standard base Debian system, with a number of PVC-specific tweaks. Some examples include:
* Installing the custom PVC repository at Boniface Labs.
* Installing the custom PVC repository hosted at Boniface Labs.
* Removing several unnecessary packages and installing numerous additional packages.
@ -22,6 +22,8 @@ The Base role configures a node to a specific, standard base Debian system, with
The end result is a standardized "PVC node" system ready to have the daemons installed by the PVC role.
The Base role is optional: if an administrator so chooses, they can bypass this role and configure things manually. That said, for the proper functioning of the PVC role, the Base role should always be applied first.
## PVC role
The PVC role configures all the dependencies of PVC, including storage, networking, and databases, then installs the PVC daemon itself. Specifically, it will, in order:
@ -30,21 +32,19 @@ The PVC role configures all the dependencies of PVC, including storage, networki
* Install, configure, and if `bootstrap=yes` is set, bootstrap a Zookeeper cluster (coordinators only).
* Install, configure, and if `bootstrap=yes` is set`, bootstrap a Patroni PostgreSQL cluster for the PowerDNS aggregator (coordinators only).
* Install, configure, and if `bootstrap=yes` is set, bootstrap a Patroni PostgreSQL cluster for the PowerDNS aggregator (coordinators only).
* Install and configure Libvirt.
* Install and configure FRRouting.
* Install and configure the main PVC daemon and API client, including initializing the PVC cluster (`pvc task init`).
* Install and configure the main PVC daemon and API client.
* If `bootstrap=yes` is set, initialize the PVC cluster (`pvc task init`).
## Completion
Once the entire playbook has run for the first time against a given host, the host will be rebooted to apply all the configured services. On startup, the system should immediately launch the PVC daemon, check in to the Zookeeper cluster, and become ready. The node will be in `flushed` state on its first boot; the administrator will need to run `pvc node unflush <node>` to set the node into active state ready to handle virtual machines.
# PVC Ansible configuration manual
This manual documents the various `group_vars` configuration options for the `pvc-ansible` framework. We assume that the administrator is generally familiar with Ansible and its operation.
Once the entire playbook has run for the first time against a given host, the host will be rebooted to apply all the configured services. On startup, the system should immediately launch the PVC daemon, check in to the Zookeeper cluster, and become ready. The node will be in `flushed` state on its first boot; the administrator will need to run `pvc node unflush <node>` to set the node into active state ready to handle virtual machines. On the first bootstrap run, the administrator will also have to configure storage block devices (OSDs), networks, etc. For full details, see [the main getting started page](/getting-started).
## General usage
@ -62,7 +62,7 @@ Create a `group_vars/<cluster>` folder to hold the cluster configuration variabl
### Bootstrapping a cluster
Before bootstrapping a cluster, see the section on [PVC Ansible configuration variables](/manuals/ansible#pvc-ansible-configuration-variables) to configure the cluster.
Before bootstrapping a cluster, see the section on [PVC Ansible configuration variables](/manuals/ansible/#pvc-ansible-configuration-variables) to configure the cluster.
Bootstrapping a cluster can be done using the main `pvc.yml` playbook. Generally, a bootstrap run should be limited to the coordinators of the cluster to avoid potential race conditions or strange bootstrap behaviour. The special variable `bootstrap=yes` must be set to indicate that a cluster bootstrap is to be requested.
@ -74,7 +74,13 @@ Adding new nodes to an existing cluster can be done using the main `pvc.yml` pla
### Reconfiguration and software updates
After modifying configuration settings in the `group_vars`, or to update PVC to the latest version on a release, deployment of updated cluster can be done using the main `pvc.yml` playbook. The configuration should be updated if required, then the playbook run against all hosts in the cluster with no special flags or limits.
For general, day-to-day software updates such as base system updates or upgrading to newer PVC versions, a special playbook, `oneshot/update-pvc-cluster.yml`, is provided. This playbook will gracefully update and upgrade all PVC nodes in the cluster, flush them, reboot them, and then unflush them. This operation should be completely transparent to VMs on the cluster.
For more advanced updates, such as changing configurations in the `group_vars`, the main `pvc.yml` playbook can be used to deploy the changes across all hosts. Note that this may cause downtime due to node reboots if certain configurations change, and it is not recommended to use this process frequently.
# PVC Ansible configuration manual
This manual documents the various `group_vars` configuration options for the `pvc-ansible` framework. We assume that the administrator is generally familiar with Ansible and its operation.
## PVC Ansible configuration variables
@ -96,10 +102,14 @@ Example configuration:
```
---
cluster_group: mycluster
timezone_location: Canada/Eastern
local_domain: upstream.local
username_ipmi_host: "pvc"
passwd_ipmi_host: "MyPassword2019"
passwd_root: MySuperSecretPassword # Not actually used by the playbook, but good for reference
passwdhash_root: "$6$shadowencryptedpassword"
logrotate_keepcount: 7
@ -118,13 +128,19 @@ admin_users:
- "ssh-ed25519 MyKey 2019-06"
networks:
"upstream":
"bondU":
device: "bondU"
type: "bond"
bond_mode: "802.3ad"
bond_devices:
- "enp1s0f0"
- "enp1s0f1"
mtu: 9000
"upstream":
device: "vlan1000"
type: "vlan"
raw_device: "bondU"
mtu: 1500
domain: "{{ local_domain }}"
subnet: "192.168.100.0/24"
@ -144,12 +160,24 @@ networks:
device: "vlan1002"
type: "vlan"
raw_device: "bondU"
mtu: 1500
mtu: 9000
domain: "pvc-storage.local"
subnet: "10.0.1.0/24"
floating_ip: "10.0.1.254/24"
```
#### `cluster_group`
* *required*
The name of the Ansible PVC cluster group in the `hosts` inventory.
#### `timezone_location`
* *required*
The TZ database format name of the local timezone, e.g. `America/Toronto` or `Canada/Eastern`.
#### `local_domain`
* *required*
@ -172,6 +200,12 @@ The IPMI password, in plain text, used by PVC to communicate with the node manag
Generate using `pwgen -s 16` and adjusting length as required.
#### `passwd_root`
* *ignored*
Used only for reference, the plain-text root password for `passwdhash_root`.
#### `passwdhash_root`
* *required*
@ -240,9 +274,13 @@ A list of SSH public key strings, in `authorized_keys` line format, for the user
* *required*
A dictionary of networks to configure on the nodes. Three networks are required by all PVC clusters, though additional networks may be configured here as well.
A dictionary of networks to configure on the nodes.
The three required networks are: `upstream`, `cluster`, `storage`.
The key will be used to "name" the interface file under `/etc/network/interfaces.d`, but otherwise the `device` is the real name of the device (e.g. `iface [device] inet ...`.
The three required networks are: `upstream`, `cluster`, `storage`. If `storage` is configured identically to `cluster`, the two networks will be collapsed into one; for details on this, please see the [documentation about the storage network](/cluster-architecture/#storage-connecting-ceph-daemons-with-each-other-and-with-osds).
Additional networks can also be specified here to automate their configuration. In the above example, a "bondU" interface is configured, which the remaining required networks use as their `raw_device`.
Within each `network` element, the following options may be specified:
@ -250,7 +288,7 @@ Within each `network` element, the following options may be specified:
* *required*
The network device name.
The real network device name.
##### `type`
@ -321,18 +359,33 @@ pvc_log_keepalive_cluster_details: True
pvc_log_keepalive_storage_details: True
pvc_log_console_lines: 1000
pvc_vm_shutdown_timeout: 180
pvc_keepalive_interval: 5
pvc_fence_intervals: 6
pvc_suicide_intervals: 0
pvc_fence_successful_action: migrate
pvc_fence_failed_action: None
pvc_osd_memory_limit: 4294967296
pvc_zookeeper_heap_limit: 256M
pvc_zookeeper_stack_limit: 512M
pvc_api_listen_address: "0.0.0.0"
pvc_api_listen_port: "7370"
pvc_api_enable_authentication: False
pvc_api_secret_key: ""
pvc_api_enable_authentication: False
pvc_api_tokens:
- description: "myuser"
token: ""
pvc_api_enable_ssl: False
pvc_api_ssl_cert_path: /etc/ssl/pvc/cert.pem
pvc_api_ssl_cert: >
-----BEGIN CERTIFICATE-----
MIIxxx
-----END CERTIFICATE-----
pvc_api_ssl_key_path: /etc/ssl/pvc/key.pem
pvc_api_ssl_key: >
-----BEGIN PRIVATE KEY-----
MIIxxx
@ -343,6 +396,9 @@ pvc_ceph_storage_secret_uuid: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
pvc_dns_database_name: "pvcdns"
pvc_dns_database_user: "pvcdns"
pvc_dns_database_password: "xxxxxxxx"
pvc_api_database_name: "pvcapi"
pvc_api_database_user: "pcapi"
pvc_api_database_password: "xxxxxxxx"
pvc_replication_database_user: "replicator"
pvc_replication_database_password: "xxxxxxxx"
pvc_superuser_database_user: "postgres"
@ -393,6 +449,8 @@ pvc_nodes:
ipmi_user: "{{ username_ipmi_host }}"
ipmi_password: "{{ passwd_ipmi_host }}"
pvc_bridge_device: bondU
pvc_upstream_device: "{{ networks['upstream']['device'] }}"
pvc_upstream_mtu: "{{ networks['upstream']['mtu'] }}"
pvc_upstream_domain: "{{ networks['upstream']['domain'] }}"
@ -413,19 +471,23 @@ pvc_storage_floatingip: "{{ networks['storage']['floating_ip'] }}"
#### `pvc_log_to_file`
* *required*
* *optional*
Whether to log PVC output to the file `/var/log/pvc/pvc.log`. Must be one of, unquoted: `True`, `False`.
If unset, a default value of "False" is set in the role defaults.
#### `pvc_log_to_stdout`
* *required*
* *optional*
Whether to log PVC output to stdout, i.e. `journald`. Must be one of, unquoted: `True`, `False`.
If unset, a default value of "True" is set in the role defaults.
#### `pvc_log_colours`
* *required*
* *optional*
Whether to include ANSI coloured prompts (`>>>`) for status in the log output. Must be one of, unquoted: `True`, `False`.
@ -433,39 +495,153 @@ Requires `journalctl -o cat` or file logging in order to be visible and useful.
If set to False, the prompts will instead be text values.
If unset, a default value of "True" is set in the role defaults.
#### `pvc_log_dates`
* *required*
* *optional*
Whether to include dates in the log output. Must be one of, unquoted: `True`, `False`.
Requires `journalctl -o cat` or file logging in order to be visible and useful (and not clutter the logs with duplicate dates).
If unset, a default value of "False" is set in the role defaults.
#### `pvc_log_keepalives`
* *required*
* *optional*
Whether to log keepalive messages. Must be one of, unquoted: `True`, `False`.
Whether to log the regular keepalive messages. Must be one of, unquoted: `True`, `False`.
If unset, a default value of "True" is set in the role defaults.
#### `pvc_log_keepalive_cluster_details`
* *required*
* *optional*
* *ignored* if `pvc_log_keepalives` is `False`
Whether to log cluster and node details during keepalive messages. Must be one of, unquoted: `True`, `False`.
If unset, a default value of "True" is set in the role defaults.
#### `pvc_log_keepalive_storage_details`
* *required*
* *optional*
* *ignored* if `pvc_log_keepalives` is `False`
Whether to log storage cluster details during keepalive messages. Must be one of, unquoted: `True`, `False`.
If unset, a default value of "True" is set in the role defaults.
#### `pvc_log_console_lines`
* *required*
* *optional*
The number of output console lines to log for each VM.
The number of output console lines to log for each VM, to be used by the console log endpoints (`pvc vm log`).
If unset, a default value of "1000" is set in the role defaults.
#### `pvc_vm_shutdown_timeout`
* *optional*
The number of seconds to wait for a VM to `shutdown` before it is forced off.
A value of "0" disables this functionality.
If unset, a default value of "180" is set in the role defaults.
#### `pvc_keepalive_interval`
* *optional*
The number of seconds between node keepalives.
If unset, a default value of "5" is set in the role defaults.
**WARNING**: Changing this value is not recommended except in exceptional circumstances.
#### `pvc_fence_intervals`
* *optional*
The number of keepalive intervals to be missed before other nodes consider a node `dead` and trigger the fencing process. The total time elapsed will be `pvc_keepalive_interval * pvc_fence_intervals`.
If unset, a default value of "6" is set in the role defaults.
**NOTE**: This is not the total time until a node is fenced. A node has a further 6 (hardcoded) `pvc_keepalive_interval`s ("saving throw" attepmts) to try to send a keepalive before it is actually fenced. Thus, with the default values, this works out to a total of 60 +/- 5 seconds between a node crashing, and it being fenced. An administrator of a very important cluster may want to set this lower, perhaps to 2, or even 1, leaving only the "saving throws", though this is not recommended for most clusters, due to timing overhead from various other subsystems.
#### `pvc_suicide intervals`
* *optional*
The number of keepalive intervals without the ability to send a keepalive before a node considers *itself* to be dead and reboots itself.
A value of "0" disables this functionality.
If unset, a default value of "0" is set in the role defaults.
**WARNING**: This option is provided to allow additional flexibility in fencing behaviour. Normally, it is not safe to set a `pvc_fence_failed_action` of `migrate`, since if the other nodes cannot fence a node its VMs cannot be safely started on other nodes. This would also apply to nodes without IPMI-over-LAN which could not be fenced normally. This option provides an alternative way to guarantee this safety, at least in situations where the node can still reliably shut itself down (i.e. it is not hard-locked). The administrator should however take special care and thoroughly test their system before using these alternative fencing options in production, as the results could be disasterous.
#### `pvc_fence_successful_action`
* *optional*
The action the cluster should take upon a successful node fence with respect to running VMs. Must be one of, unquoted: `migrate`, `None`.
If unset, a default value of "migrate" is set in the role defaults.
An administrator can set the value "None" to disable automatic VM recovery migrations after a node fence.
#### `pvc_fence_failed_action`
* *optional*
The action the cluster should take upon a failed node fence with respect to running VMs. Must be one of, unquoted: `migrate`, `None`.
If unset, a default value of "None" is set in the role defaults.
**WARNING**: See the warning in the above `pvc_suicide_intervals` section for details on the purpose of this option. Do not set this option to "migrate" unless you have also set `pvc_suicide_intervals` to a non-"0" value and understand the caveats and risks.
#### `pvc_fence_migrate_target_selector`
* *optional*
The migration selector to use when running a `migrate` command after a node fence. Must be one of, unquoted: `mem`, `load`, `vcpu`, `vms`.
If unset, a default value of "mem" is set in the role defaults.
**NOTE**: These values map to the standard VM meta `selector` options, and determine how nodes select where to run the migrated VMs.
#### `pvc_osd_memory_limit`
* *optional*
The memory limit, in bytes, to pass to the Ceph OSD processes. Only set once, during cluster bootstrap; subsequent changes to this value must be manually made in the `files/*/ceph.conf` static configuration for the cluster in question.
If unset, a default value of "4294967296" (i.e. 4GB) is set in the role defaults.
As per Ceph documentation, the minimum value possible is "939524096" (i.e. ~1GB), and the default matches the Ceph system default. Setting a lower value is only recommended for systems with relatively low memory availability, where the default of 4GB per OSD is too large; it is recommended to increase the total system memory first before tweaking this setting to ensure optimal storage performance across all workloads.
#### `pvc_zookeeper_heap_limit`
* *optional*
The memory limit to pass to the Zookeeper Java process for its heap.
If unset, a default vlue of "256M" is set in the role defaults.
The administrator may set this to a lower value on memory-constrained systems or if the memory usage of the Zookeeper process becomes excessive.
#### `pvc_zookeeper_stack_limit`
* *optional*
The memory limit to pass to the Zookeeper Java process for its stack.
If unset, a defautl value of "512M" is set in the role defaults.
The administrator may set this to a lower value on memory-constrained systems or if the memory usage of the Zookeeper process becomes excessive.
#### `pvc_api_listen_address`
@ -519,17 +695,33 @@ Generate using `uuidgen` or `pwgen -s 32` and adjusting length as required.
Whether to enable SSL for the PVC API. Must be one of, unquoted: `True`, `False`.
#### `pvc_api_ssl_cert_path`
* *optional*
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_cert` is not set.
The path to an (existing) SSL certificate on the node system for the PVC API to use.
#### `pvc_api_ssl_cert`
* *required* if `pvc_api_enable_ssl` is `True`
* *optional*
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_cert_path` is not set.
The SSL certificate, in text form, for the PVC API to use.
The SSL certificate, in text form, for the PVC API to use. Will be installed to `/etc/pvc/api-cert.pem` on the node system.
#### `pc_api_ssl_key_path`
* *optional*
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_key` is not set.
The path to an (existing) SSL private key on the node system for the PVC API to use.
#### `pvc_api_ssl_key`
* *required* if `pvc_api_enable_ssl` is `True`
* *optional*
* *required* if `pvc_api_enable_ssl` is `True` and `pvc_api_ssl_key_path` is not set.
The SSL private key, in text form, for the PVC API to use.
The SSL private key, in text form, for the PVC API to use. Will be installed to `/etc/pvc/api-key.pem` on the node system.
#### `pvc_ceph_storage_secret_uuid`
@ -559,6 +751,26 @@ The password of the PVC DNS aggregator database user.
Generate using `pwgen -s 16` and adjusting length as required.
#### `pvc_api_database_name`
* *required*
The name of the PVC API database.
#### `pvc_api_database_user`
* *required*
The username of the PVC API database user.
#### `pvc_api_database_password`
* *required*
The password of the PVC API database user.
Generate using `pwgen -s 16` and adjusting length as required.
#### `pvc_replication_database_user`
* *required*
@ -589,10 +801,12 @@ Generate using `pwgen -s 16` and adjusting length as required.
#### `pvc_asn`
* *required*
* *optional*
The private autonomous system number used for BGP updates to upstream routers.
A default value of "65001" is set in the role defaults if left unset.
#### `pvc_routers`
A list of upstream routers to communicate BGP routes to.
@ -681,6 +895,12 @@ The IPMI username for the node management controller. Unless a per-host override
The IPMI password for the node management controller. Unless a per-host override is required, should usually use the previously-configured global `passwordname_ipmi_host`. All notes from that entry apply.
#### `pvc_bridge_device`
* *required*
The device name of the underlying network interface to be used for "bridged"-type client networks. For each "bridged"-type network, an IEEE 802.3q vLAN and bridge will be created on top of this device to pass these networks. In most cases, using the reflexive `networks['cluster']['raw_device']` or `networks['upstream']['raw_device']` from the Base role is sufficient.
#### `pvc_<network>_*`
The next set of entries is hard-coded to use the values from the global `networks` list. It should not need to be changed under most circumstances. Refer to the previous sections for specific notes about each entry.

View File

@ -621,7 +621,7 @@
"stats": {
"properties": {
"free_bytes": {
"description": "The total free space (in bytes)",
"description": "The total free space (in bytes. post-replicas)",
"type": "integer"
},
"id": {
@ -660,8 +660,12 @@
"description": "The total read operations on the pool (pool-lifetime)",
"type": "integer"
},
"stored_bytes": {
"description": "The stored data size (in bytes, post-replicas)",
"type": "integer"
},
"used_bytes": {
"description": "The total used space (in bytes)",
"description": "The total used space (in bytes, pre-replicas)",
"type": "integer"
},
"used_percent": {
@ -1302,6 +1306,19 @@
"description": "The topology of the assigned vCPUs in Sockets/Cores/Threads format",
"type": "string"
},
"vnc": {
"properties": {
"listen": {
"description": "The active VNC listen address or 'None'",
"type": "string"
},
"port": {
"description": "The active VNC port or 'None'",
"type": "string"
}
},
"type": "object"
},
"xml": {
"description": "The raw Libvirt XML definition of the VM",
"type": "string"

View File

@ -2,8 +2,7 @@
[Unit]
Description = Parallel Virtual Cluster node daemon
After = network-online.target zookeeper.service libvirtd.service
Wants = zookeeper.service libvirtd.service
After = network-online.target
PartOf = pvc.target
[Service]

View File

@ -54,7 +54,7 @@ import pvcnoded.CephInstance as CephInstance
import pvcnoded.MetadataAPIInstance as MetadataAPIInstance
# Version string for startup output
version = '0.9.10'
version = '0.9.13'
###############################################################################
# PVCD - node daemon startup program
@ -134,7 +134,7 @@ def readConfig(pvcnoded_config_file, myhostname):
with open(pvcnoded_config_file, 'r') as cfgfile:
try:
o_config = yaml.load(cfgfile)
o_config = yaml.load(cfgfile, Loader=yaml.SafeLoader)
except Exception as e:
print('ERROR: Failed to parse configuration file: {}'.format(e))
exit(1)
@ -331,16 +331,29 @@ if not os.path.exists(config['log_directory']):
logger = log.Logger(config)
# Print our startup messages
logger.out('Parallel Virtual Cluster node daemon v{}'.format(version))
logger.out('FQDN: {}'.format(myfqdn))
logger.out('Host: {}'.format(myhostname))
logger.out('ID: {}'.format(mynodeid))
logger.out('IPMI hostname: {}'.format(config['ipmi_hostname']))
logger.out('Machine details:')
logger.out(' CPUs: {}'.format(staticdata[0]))
logger.out(' Arch: {}'.format(staticdata[3]))
logger.out(' OS: {}'.format(staticdata[2]))
logger.out(' Kernel: {}'.format(staticdata[1]))
logger.out('')
logger.out('|--------------------------------------------------|')
logger.out('| ######## ## ## ###### |')
logger.out('| ## ## ## ## ## ## |')
logger.out('| ## ## ## ## ## |')
logger.out('| ######## ## ## ## |')
logger.out('| ## ## ## ## |')
logger.out('| ## ## ## ## ## |')
logger.out('| ## ### ###### |')
logger.out('|--------------------------------------------------|')
logger.out('| Parallel Virtual Cluster node daemon v{0: <10} |'.format(version))
logger.out('| FQDN: {0: <42} |'.format(myfqdn))
logger.out('| Host: {0: <42} |'.format(myhostname))
logger.out('| ID: {0: <44} |'.format(mynodeid))
logger.out('| IPMI hostname: {0: <33} |'.format(config['ipmi_hostname']))
logger.out('| Machine details: |')
logger.out('| CPUs: {0: <40} |'.format(staticdata[0]))
logger.out('| Arch: {0: <40} |'.format(staticdata[3]))
logger.out('| OS: {0: <42} |'.format(staticdata[2]))
logger.out('| Kernel: {0: <38} |'.format(staticdata[1]))
logger.out('|--------------------------------------------------|')
logger.out('')
logger.out('Starting pvcnoded on host {}'.format(myfqdn), state='s')
# Define some colours for future messages if applicable
@ -1105,9 +1118,9 @@ def collect_ceph_stats(queue):
logger.out("Set pool information in zookeeper (primary only)", state='d', prefix='ceph-thread')
# Get pool info
command = {"prefix": "df", "format": "json"}
retcode, stdout, stderr = common.run_os_command('ceph df --format json', timeout=1)
try:
ceph_pool_df_raw = json.loads(ceph_conn.mon_command(json.dumps(command), b'', timeout=1)[1])['pools']
ceph_pool_df_raw = json.loads(stdout)['pools']
except Exception as e:
logger.out('Failed to obtain Pool data (ceph df): {}'.format(e), state='w')
ceph_pool_df_raw = []
@ -1142,6 +1155,7 @@ def collect_ceph_stats(queue):
# Assemble a useful data structure
pool_df = {
'id': pool['id'],
'stored_bytes': pool['stats']['stored'],
'free_bytes': pool['stats']['max_avail'],
'used_bytes': pool['stats']['bytes_used'],
'used_percent': pool['stats']['percent_used'],

View File

@ -62,13 +62,13 @@ class VMConsoleWatcherInstance(object):
def start(self):
self.thread_stopper.clear()
self.thread = Thread(target=self.run, args=(), kwargs={})
self.logger.out('Starting VM log parser', state='i', prefix='Domain {}:'.format(self.domuuid))
self.logger.out('Starting VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
self.thread.start()
# Stop execution thread
def stop(self):
if self.thread and self.thread.isAlive():
self.logger.out('Stopping VM log parser', state='i', prefix='Domain {}:'.format(self.domuuid))
self.logger.out('Stopping VM log parser', state='i', prefix='Domain {}'.format(self.domuuid))
self.thread_stopper.set()
# Do one final flush
self.update()

View File

@ -27,6 +27,8 @@ import json
from threading import Thread
from xml.etree import ElementTree
import pvcnoded.zkhandler as zkhandler
import pvcnoded.common as common
@ -208,6 +210,21 @@ class VMInstance(object):
except Exception as e:
self.logger.out('Error removing domain from list: {}'.format(e), state='e')
# Update the VNC live data
def update_vnc(self):
if self.dom is not None:
live_xml = ElementTree.fromstring(self.dom.XMLDesc(0))
graphics = live_xml.find('./devices/graphics')
if graphics is not None:
self.logger.out('Updating VNC data', state='i', prefix='Domain {}'.format(self.domuuid))
port = graphics.get('port', '')
listen = graphics.get('listen', '')
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): '{}:{}'.format(listen, port)})
else:
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): ''})
else:
zkhandler.writedata(self.zk_conn, {'/domains/{}/vnc'.format(self.domuuid): ''})
# Start up the VM
def start_vm(self):
# Start the log watcher
@ -739,7 +756,8 @@ class VMInstance(object):
self.removeDomainFromList()
# Stop the log watcher
self.console_log_instance.stop()
# Update the VNC information
self.update_vnc()
else:
# Conditional pass three - Is this VM currently running on this node
if running == libvirt.VIR_DOMAIN_RUNNING: