Compare commits

..

369 Commits

Author SHA1 Message Date
3a90fda109 Bump version to 0.9.63 2023-04-28 14:47:04 -04:00
78322f4de4 Improve size handling during volume add/resize 2023-04-28 12:16:16 -04:00
c1782c5004 Add full/nearfull OSD health detection 2023-04-28 11:33:39 -04:00
9114255af5 Add *.update-* obsolete configs to dpkg plugin 2023-04-10 15:39:40 -04:00
b26bb5cb65 Mention Ganeti in the docs 2023-03-19 21:23:21 -04:00
74c4ce3ec7 Increase timeout for connections to API 2023-03-14 09:19:13 -04:00
2c3a3cdf52 Use try when watching health value in NodeInstance 2023-03-07 09:53:01 -05:00
0b583bfdaf Bump IPMI timeout to 2 seconds 2023-03-07 09:25:27 -05:00
7c07fbefff Adjust keepalive health printing and ordering 2023-02-24 11:08:30 -05:00
202dc3ed59 Correct error handling if monitoring plugins fail 2023-02-24 10:19:41 -05:00
8667f4d03b Add documentation details about plugin logging 2023-02-23 22:24:07 -05:00
4c2d99f8a6 Fix bug with SMART info 2023-02-23 13:21:23 -05:00
bcff6650d0 Set timeout on IPMI command 2023-02-23 11:10:09 -05:00
a11206253d Fix ZK check location 2023-02-23 11:04:02 -05:00
7f57c6dbf7 Adjust the main location too 2023-02-23 10:32:31 -05:00
6865979e08 Show possible version minimum 2023-02-23 10:30:45 -05:00
5126bc3272 Handle old clusters in cluster detail list 2023-02-23 10:28:55 -05:00
765f0ef13d Better handle N/A health from old versions 2023-02-23 10:22:00 -05:00
fe258d9d56 Correct bad health text call for old clusters 2023-02-23 10:19:18 -05:00
93d89a2414 Fix status when connecting to old clusters 2023-02-23 10:16:29 -05:00
a49f3810d3 Set maintenance colour in cluster detail 2023-02-22 18:20:18 -05:00
45ad3b9a17 Bump version to 0.9.62 2023-02-22 18:13:45 -05:00
07623fad1a Merge branch 'revamp-health'
Add detailed health checking, status reporting, and enhancements to the
PVC system.

Closes #161 #154 #159
2023-02-22 18:12:35 -05:00
8331b7ecd8 Add cluster detail list
Adds a command to show a list of details including health and item
counts for all configured clusters in the client.
2023-02-22 18:09:11 -05:00
94d4ee5b9b Lower default connect timeout to 1s 2023-02-22 18:09:01 -05:00
e773211293 Add PVC version to cluster status output 2023-02-22 16:09:24 -05:00
32c36c866b Add additional plugins to manual 2023-02-22 15:02:08 -05:00
dc4e56db4b Add IPMI monitoring check 2023-02-22 15:02:08 -05:00
e45b3108a2 Add health delta change to message output 2023-02-22 15:02:08 -05:00
118237a53b Fix bad string value for message 2023-02-22 15:02:08 -05:00
9805681f94 Use consistent connection with other checks 2023-02-22 15:02:08 -05:00
6c9abb2abe Add Libvirtd monitoring check 2023-02-22 15:02:08 -05:00
a1122c6e71 Add Zookeeper monitoring check 2023-02-22 15:02:08 -05:00
3696f81597 Add PostgreSQL monitoring check 2023-02-22 15:02:08 -05:00
5ca0d903b6 Adjust comment message 2023-02-22 15:02:08 -05:00
6ddbde763e Correct lint error E741 2023-02-22 12:21:29 -05:00
626424b74a Adjust Munin threshold values 2023-02-22 10:42:43 -05:00
b3d99827f5 Add documentation about new health and plugins 2023-02-22 01:40:48 -05:00
c9ceb3159b Remove obsolete LINKSPEED variable 2023-02-22 01:04:25 -05:00
6525a2568b Adjust health delta of load to 50
This is a very bad situation and should be critical.
2023-02-22 01:03:12 -05:00
09a005d3d7 Adjust health delta of EDAC Uncorrected to 50
This is a very bad situation and should be critical.
2023-02-22 01:01:54 -05:00
96defebd0b Add last item to swagger doc 2023-02-22 00:25:27 -05:00
d00b8aa6cd Add plugin directory and plugin details log fields 2023-02-22 00:19:05 -05:00
e9aa545e9b Update API specification 2023-02-22 00:06:52 -05:00
fb0fcc0597 Update readme for Munin plugin 2023-02-18 00:00:04 -05:00
3009f24910 Fix typo in var and flip conditional 2023-02-17 16:18:42 -05:00
5ae836f1c5 Fix various issues with PVC Munin plugin 2023-02-17 15:41:16 -05:00
70ba364f1d Flip VM state condition to remove shutdown
Don't cause health degredation for shutdown state, and flip the list
around to make it clearer.
2023-02-16 20:32:33 -05:00
eda1b95d5f Update Munin plugin example 2023-02-16 16:06:00 -05:00
3bd93563e6 Add CheckMK monitoring example plugins 2023-02-16 16:05:47 -05:00
1f8561d59a Format cluster health like node healths
Make a cleaner construct here.
2023-02-16 12:33:36 -05:00
a2efc83953 Exclude monitoring examples from flake8 2023-02-16 12:33:18 -05:00
f2d2537e1c Add JSON output format for node info 2023-02-15 21:35:44 -05:00
1093ca6264 Disallow health less than 0 2023-02-15 16:50:24 -05:00
15ff729f83 Fix comparison in maintenance check 2023-02-15 16:47:31 -05:00
29584e5636 Add per-node health entries for 3rd party checks 2023-02-15 16:44:49 -05:00
f4e8449356 Fix bugs and formatting of health messages 2023-02-15 16:28:56 -05:00
388f6556c0 Remove extra text from packages plugin 2023-02-15 16:28:41 -05:00
ec79acf061 Fix linting of cluster.py file 2023-02-15 15:48:31 -05:00
6c7be492b8 Move Ceph health to global cluster health 2023-02-15 15:46:13 -05:00
00586074cf Modify cluster health to use new values 2023-02-15 15:45:43 -05:00
f4eef30770 Add JSON health to cluster data 2023-02-15 15:26:57 -05:00
8565cf26b3 Add disk monitoring plugin 2023-02-15 11:30:49 -05:00
0ecf219910 Run setup during plugin loads 2023-02-15 10:11:38 -05:00
0f4edc54d1 Use percentage in keepalie output 2023-02-15 01:56:02 -05:00
ca91be51e1 Improve ethtool parsing speeds 2023-02-14 15:49:58 -05:00
e29d0e89eb Add NIC monitoring plugin 2023-02-14 15:43:52 -05:00
14d29f2986 Adjust text on log message 2023-02-13 22:21:23 -05:00
bc88d764b0 Add logging flag for montioring plugin output 2023-02-13 22:04:39 -05:00
a3c31564ca Flip condition in EDAC check 2023-02-13 21:58:56 -05:00
b07396c39a Fix bugs if plugins fail to load 2023-02-13 21:51:48 -05:00
71139fa66d Add EDAC check plugin 2023-02-13 21:43:13 -05:00
e6f9e6e0e8 Fix several bugs and optimize output 2023-02-13 16:36:15 -05:00
1ea4800212 Set node health to None when restarting 2023-02-13 15:54:46 -05:00
9c14d84bfc Add node health value and send out API 2023-02-13 15:53:39 -05:00
d8f346abdd Move Ceph cluster health reporting to plugin
Also removes several outputs from the normal keepalive that were
superfluous/static so that the main output fits on one line.
2023-02-13 13:29:40 -05:00
2ee52e44d3 Move Ceph cluster health reporting to plugin
Also removes several outputs from the normal keepalive that were
superfluous/static so that the main output fits on one line.
2023-02-13 12:13:56 -05:00
3c742a827b Initial implementation of monitoring plugin system 2023-02-13 12:06:26 -05:00
aeb238f43c Bump version to 0.9.61 2023-02-08 10:08:05 -05:00
671a907236 Allow rename in disable state 2023-01-30 11:48:43 -05:00
e945fd8590 Remove bad casting to int in string compare 2023-01-01 13:55:10 -05:00
a49510ecc8 Bump version to 0.9.60 2022-12-06 15:42:55 -05:00
6d7730ab52 Disable RBD caching by default
Results in a massive (~2x) performance boost for random block I/O inside
VMs, and thus a worthwhile default change.
2022-12-05 17:56:59 -05:00
8135426973 Fix bad ref in example scripts 2022-11-18 12:54:28 -05:00
20d436a745 Update description 2022-11-16 22:48:40 -05:00
28f6819726 Fix up remaining bugs in Rinse test script 2022-11-16 13:32:24 -05:00
35c07f0384 Ensure transient dirs are cleaned up 2022-11-16 13:01:15 -05:00
6127387be4 Ensure swap is skipped during cleanup too 2022-11-16 12:52:24 -05:00
343d66875b Skip swap volumes during mounting 2022-11-16 12:42:28 -05:00
92feeefd26 Bump version to 0.9.59 2022-11-15 15:50:15 -05:00
38d63d9837 Flip behaviour of memory selectors
It didn't make any sense to me for mem(prov) to be the default selector,
since this has too many caveats versus mem(free). Switch to using
mem(free) as the default (i.e. "mem") and make memprov the alternative.
2022-11-15 15:45:59 -05:00
095bcb2373 Bump version to 0.9.58 2022-11-07 12:27:48 -05:00
91e450f399 Remove extra lower() call where not needed 2022-11-07 12:26:50 -05:00
79eb994a5e Ensure equality of none and None for selector 2022-11-07 11:59:53 -05:00
d65f512897 Bump version to 0.9.57 2022-11-06 01:39:50 -04:00
8af7189dd0 Add module tag for daemon lib 2022-11-04 03:47:18 -04:00
ea7a4b2b85 Make benchmarker function as a module
1. Move the test_matrix, volume name, and size to module-level variables
so they can be accessed externally if this is imported.
2. Separate the volume creation and volume cleanup into functions.
3. Separate the individual benchmark runs into a function.

This should enable easier calling of the various subcomponents
externally, e.g. for external benchmark scripts.
2022-11-03 21:33:32 -04:00
59f97ebbfb Better handle invalid nets in VMs
1. Error out when trying to add a new network to a VM if the network
doesn't exist on the cluster.
2. When showing the VM list, only show invalid networks in red, not the
whole list.
2022-11-01 10:24:24 -04:00
072337f1f0 Remove VXLAN ref where it isn't correct 2022-11-01 09:40:13 -04:00
c3bc55eff8 Bump version to 0.9.56 2022-10-27 14:21:04 -04:00
6c58d52fa1 Add node autoready oneshot unit
This replicates some of the more important functionality of the defunct
pvc-flush.service unit. On presence of a trigger file (i.e.
/etc/pvc/autoready), it will trigger a "node ready" on boot. It does
nothing on shutdown as this must be handled by other mechanisms, though
a similar autoflush could be added as well.
2022-10-27 14:09:14 -04:00
666e02fbfd Ensure None filesystem is valid 2022-10-21 15:13:52 -04:00
46dde055c4 Move /dev umount to cleanup step 2022-10-21 14:47:48 -04:00
ef437c3dbf Adjust help message text 2022-10-21 14:22:15 -04:00
bd2208e8f6 Add ova as valid name in addition to default_ova 2022-10-21 14:13:40 -04:00
62d5ff11df Fix console config and domain argument 2022-10-21 14:04:17 -04:00
0019881cfa Add cloud-init configuration to debootstrap script
Prevents errors trying to find the cloud-init metadata source.
2022-10-21 14:03:34 -04:00
d46133802b Add pfsense example provisioner script 2022-10-21 13:35:48 -04:00
fcadde057e Include /proc in chroot mounts 2022-10-20 15:00:10 -04:00
2608f38d64 Properly handle missing source_volume from OVAs 2022-10-19 13:18:12 -04:00
89f05ced3f Add missing ceph import 2022-10-19 13:10:40 -04:00
729481126c Move conversion to install() step
Seems more clear to me than doing it in prepare()
2022-10-19 13:09:29 -04:00
41eccb9c7d Add missing imports 2022-10-19 13:07:34 -04:00
e550e39a5a Add output messages during OVA prepare 2022-10-19 12:58:11 -04:00
dff156b2b0 Fix bad comparison 2022-10-19 12:46:15 -04:00
1c4fb80d1f Remove reference to automatic upload of OVA script 2022-10-19 03:37:12 -04:00
ec7beb08cc Add missing flag 2022-10-19 03:34:37 -04:00
3a180193ee Fix wording of initial script paragraphs 2022-10-19 03:27:14 -04:00
e26ff8a975 Fix missing f-string marker 2022-10-15 16:26:47 -04:00
6276414702 Use own domain for docs links 2022-10-08 21:12:59 -04:00
a34d64a71b Add rinse example configuration
Provisions Rocky Linux 8 and 9 systems, and potentially older
CentOS/Fedora/Scientific Linux/SuSE systems. Depends on a custom build
of rinse (3.7.1) with Rocky 9 support.
2022-10-07 19:55:56 -04:00
71297e0179 Add host-model to CPU config in VMs 2022-10-07 09:36:22 -04:00
45c9909428 Add output message to debootstrap install 2022-10-07 02:27:20 -04:00
7268592c87 Fix braces in fstring example 2022-10-06 15:57:31 -04:00
726d0a562b Update copyright header year 2022-10-06 11:55:27 -04:00
39e1fc50ed Fix titles 2022-10-06 11:54:36 -04:00
7a3870fc44 Add OVA script support
1. Ensure that system_template and script are not nullable in the DB.
2. Ensure that the CLI and API enforce the above and clean up CLI
arguments for profile add.
3. Ensure that, before uploading OVAs, a 'default_ova' provisioning
script is present.
4. Use the 'default_ova' script for new OVA uploads.
5. Ensure that OVA details are properly added to the vm_data dict in the
provisioner vmbuilder.
2022-10-06 10:48:12 -04:00
bffab7a5a1 Reverse numbering of example scripts 2022-10-06 10:14:37 -04:00
6cbaeb5dc8 Complete OVA provisioning script 2022-10-06 10:14:04 -04:00
58ce133c8d Remove lingering OVA references 2022-10-06 00:13:36 -04:00
43feb33caa Update documentation to reflect script changes 2022-10-06 00:06:02 -04:00
3a5d8c61da Reorganize and add more comments to examples 2022-10-05 23:35:53 -04:00
1e0b502250 Fix remaining bugs in example scripts 2022-10-05 22:37:11 -04:00
fe17d28385 Ensure inner cleanup and end message response 2022-10-05 22:36:42 -04:00
8aaac33056 Fix bad variable reference 2022-10-05 17:43:23 -04:00
cc7952c232 Add additional import for config 2022-10-05 17:41:37 -04:00
16915ed507 Add better exception handling with ctxtmgrs 2022-10-05 17:35:05 -04:00
2c624ceb2c Add additional missing import in examples 2022-10-05 17:29:34 -04:00
da85480488 Improve error messages 2022-10-05 17:26:09 -04:00
47b0704555 Fix bad ref in examples 2022-10-05 17:25:56 -04:00
7c49967586 Add proper imports to examples 2022-10-05 17:22:04 -04:00
e3f96ac87e Better handle cleanups and fix chroot bug 2022-10-05 17:21:30 -04:00
4df70cf086 Implement new provisioner setup 2022-10-05 16:03:05 -04:00
f1df1cfe93 Bump version to 0.9.55 2022-10-04 13:21:40 -04:00
5942aa50fc Avoid raise/handle deadlocks
Can cause log flooding in some edge cases and isn't really needed any
longer. Use a proper conditional followed by an actual error handler.
2022-10-03 14:04:12 -04:00
096bcdfd75 Try a literal eval first
This is a breakage between the older version of Celery (Deb10) and
newer. The hard removal broke Deb10 instances.

So try that first, and on failure, assume newer Celery format.
2022-09-06 10:34:50 -04:00
239c392892 Bump version to 0.9.54 2022-08-23 11:01:05 -04:00
172d0a86e4 Use proper SSLContext and enable TLSv1
It's bad, but sometimes you need to access the API from a very old
software version. So just enable it for now and clean it up later.
2022-08-23 10:58:47 -04:00
d8e57a26c5 Fix bad variable name 2022-08-18 11:37:57 -04:00
9b499b9f48 Bump version to 0.9.53 2022-08-12 17:47:11 -04:00
881550b610 Actually fix VM sorting
Due to the executor the previous attempt did not work.
2022-08-12 17:46:29 -04:00
2a21d48128 Bump version to 0.9.52 2022-08-12 11:09:25 -04:00
8d0f26ff7a Add additional kb_ values to OSD stats
Allows for easier parsing later to get e.g. % values and more details on
the used amounts.
2022-08-11 11:06:36 -04:00
bcabd7d079 Always sort VM list
Same justification as previous commit.
2022-08-09 12:05:40 -04:00
05a316cdd6 Ensure the node list is sorted
Otherwise the node entries could come back in an arbitrary order; since
this is an ordered list of dictionaries that might not be expected by
the API consumers, so ensure it's always sorted.
2022-08-09 12:03:49 -04:00
4b36753f27 Add reference to bootstrap in index 2022-08-03 20:22:16 -04:00
171f6ac9ed Add missing cluster_req for vm modify 2022-08-02 10:02:26 -04:00
645b525ad7 Bump version to 0.9.51 2022-07-25 23:25:41 -04:00
ec559aec0d Remove pvc-flush service
This service caused more headaches than it was worth, so remove it.

The original goal was to cleanly flush nodes on shutdown and unflush
them on startup, but this is tightly controlled by Ansible playbooks at
this point, and this is something best left to the Administrator and
their particular situation anyways.
2022-07-25 23:21:34 -04:00
71ffd5a191 Add confirmation to disable command 2022-07-21 16:43:37 -04:00
2739c27299 Remove faulty literal_eval 2022-07-18 13:35:15 -04:00
56129a3636 Fix bad changelog entries 2022-07-06 16:57:55 -04:00
932b3c55a3 Bump version to 0.9.50 2022-07-06 16:01:14 -04:00
92e2ff7449 Fix bug with space-containing detect strings 2022-07-06 15:58:57 -04:00
d8d3feee22 Add selector help and adjust flag name
1. Add documentation on the node selector flags. In the API, reference
the daemon configuration manual which now includes details in this
section; in the CLI, provide the help in "pvc vm define" in detail and
then reference that command's help in the other commands that use this
field.

2. Ensure the naming is consistent in the CLI, using the flag name
"--node-selector" everywhere (was "--selector" for "pvc vm" commands and
"--node-selector" for "pvc provisioner" commands).
2022-06-10 02:42:06 -04:00
b1357cafdb Add memfree to selector and use proper defaults 2022-06-10 02:03:12 -04:00
f8cdcb30ba Add migration selector via free memory
Closes #152
2022-05-18 03:47:16 -04:00
51ad2058ed Bump version to 0.9.49 2022-05-06 15:49:39 -04:00
c401a1f655 Use consistent language for primary mode
I didn't call it "router" anywhere else, but the state in the list is
called "coordinator" so, call it "coordinator mode".
2022-05-06 15:40:52 -04:00
7a40c7a55b Add support for replacing/refreshing OSDs
Adds commands to both replace an OSD disk, and refresh (reimport) an
existing OSD disk on a new node. This handles the cases where an OSD
disk should be replaced (either due to upgrades or failures) or where a
node is rebuilt in-place and an existing OSD must be re-imported to it.

This should avoid the need to do a full remove/add sequence for either
case.

Also cleans up some aspects of OSD removal that are identical between
methods (e.g. using safe-to-destroy and sleeping after stopping) and
fixes a bug if an OSD does not truly exist when the daemon starts up.
2022-05-06 15:32:06 -04:00
8027a6efdc Improve handling of rounded values 2022-05-02 15:29:30 -04:00
3801fcc07b Fix bug with initial JSON for stats 2022-05-02 13:28:19 -04:00
c741900baf Refactor OSD removal to use new ZK data
With the OSD LVM information stored in Zookeeper, we can use this to
determine the actual block device to zap rather than relying on runtime
determination and guestimation.
2022-05-02 12:52:22 -04:00
464f0e0356 Store additional OSD information in ZK
Ensures that information like the FSIDs and the OSD LVM volume are
stored in Zookeeper at creation time and updated at daemon start time
(to ensure the data is populated at least once, or if the /dev/sdX
path changes).

This will allow safer operation of OSD removals and the potential
implementation of re-activation after node replacements.
2022-05-02 12:11:39 -04:00
cea8832f90 Ensure initial OSD stats is populated
Values are all invalid but this ensures the client won't error out when
trying to show an OSD that has never checked in yet.
2022-04-29 16:50:30 -04:00
5807351405 Bump version to 0.9.48 2022-04-29 15:03:52 -04:00
d6ca74376a Fix bugs with forced removal 2022-04-29 14:03:07 -04:00
413100a147 Ensure unresponsive OSDs still display in list
It is still useful to see such dead OSDs even if they've never checked
in or have not checked in for quite some time.
2022-04-29 12:11:52 -04:00
4d698be34b Add OSD removal force option
Ensures a removal can continue even in situations where some step(s)
might fail, for instance removing an obsolete OSD from a replaced node.
2022-04-29 11:16:33 -04:00
53aed0a735 Use a singular configured cluster by default
If there is...
  1. No '--cluster' passed, and
  2. No 'local' cluster, and
  3. There is exactly one cluster configured
...then use that cluster by default in the CLI.
2022-01-13 18:36:20 -05:00
ea709f573f Bump version to 0.9.47 2021-12-28 22:03:08 -05:00
1142454934 Add pool PGs count modification
Allows an administrator to adjust the PG count of a given pool. This can
be used to increase the PGs (for example after adding more OSDs) or
decrease it (to remove OSDs, reduce CPU load, etc.).
2021-12-28 21:53:29 -05:00
bbfad340a1 Add PGs count to pool list 2021-12-28 21:12:02 -05:00
c73939e1c5 Fix issue if pool stats have not updated yet 2021-12-28 21:03:10 -05:00
25fe45dd28 Add device class tiers to Ceph pools
Allows specifying a particular device class ("tier") for a given pool,
for instance SSD-only or NVMe-only. This is implemented with Crush
rules on the Ceph side, and via an additional new key in the pool
Zookeeper schema which is defaulted to "default".
2021-12-28 20:58:15 -05:00
58d57d7037 Bump version to 0.9.46 2021-12-28 15:02:14 -05:00
00d2c67c41 Allow single-node clusters to restart and timeout
Prevents a daemon from waiting forever to terminate if it is primary,
and avoids this entirely if there is only a single node in the cluster.
2021-12-28 03:06:03 -05:00
67131de4f6 Fix bug when removing OSDs
Ensure the OSD is down as well as out or purge might fail.
2021-12-28 03:05:34 -05:00
abc23ebb18 Handle detect strings as arguments for blockdevs
Allows specifying blockdevs in the OSD and OSD-DB addition commands as
detect strings rather than actual block device paths. This provides
greater flexibility for automation with pvcbootstrapd (which originates
the concept of detect strings) and in general usage as well.
2021-12-28 02:53:02 -05:00
9f122e916f Allow bypassing confirm message for benchmarks 2021-12-23 21:00:42 -05:00
3ce4d90693 Add auditing to local syslog from PVC client
This ensures that any client command is logged by the local system.
Helps ensure Accounting for users of the CLI. Currently logs the full
command executed along with the $USER environment variable contents.
2021-12-10 16:17:33 -05:00
6ccd19e636 Standardize fuzzy matching and use fullmatch
Solves two problems:

1. How match fuzziness was used was very inconsistent; make them all the
same, i.e. "if is_fuzzy and limit, apply .* to both sides".

2. Use re.fullmatch instead of re.match to ensure exact matching of the
regex to the value. Without fuzziness, this would sometimes cause
inconsistent behavior, for instance if a limit was non-fuzzy "vm",
expecting to match the actual "vm", but also matching "vm1" too.
2021-12-06 16:35:29 -05:00
d8689e6eaa Remove "and started" from message text
This is not necessarily the case.
2021-11-29 16:42:26 -05:00
bc49b5eca2 Fix bug with cloned image sizes 2021-11-29 14:56:50 -05:00
8470dfaa29 Fix bugs with legacy benchmark format 2021-11-26 11:42:35 -05:00
f164d898c1 Bump version to 0.9.45 2021-11-25 09:34:20 -05:00
195f31501c Ensure echo always has an argument 2021-11-25 09:33:26 -05:00
a8899a1d66 Fix ordering of pvcnoded unit
We want to be after network.target and want network-online.target
2021-11-18 16:56:49 -05:00
817dffcf30 Bump version to 0.9.44 2021-11-11 16:20:38 -05:00
eda2a57a73 Add Munin plugin for Ceph utilization 2021-11-08 15:21:09 -05:00
135d28e60b Add 0.05s to connection timeout
This is recommended by the Python Requests documentation:

> It’s a good practice to set connect timeouts to slightly larger than a
  multiple of 3, which is the default TCP packet retransmission window.
2021-11-08 03:11:41 -05:00
e7d7378bae Use separate connect and data timeouts
This allows us to keep a very low connect timeout of 3 seconds, but also
ensure that long commands (e.g. --wait or VM disable) can take as long
as the API requires to complete.

Avoids having to explicitly set very long single-instance timeouts for
other functions which would block forever on an unreachable API.
2021-11-08 03:10:09 -05:00
799c3e8d5d Fix quote in sed for unstable deb build 2021-11-08 02:54:27 -05:00
d0ec24f690 Add sudo to deploy-package task 2021-11-08 02:41:10 -05:00
6e9fcd38a3 Bump version to 0.9.43 2021-11-08 02:29:17 -05:00
f51f9fc4c8 Fix sed commands after Black formatting change 2021-11-08 02:29:05 -05:00
a6dcffc737 Remove references to Ansible manual 2021-11-08 00:29:47 -05:00
364c190106 Remove Ansible and Testing manuals
The Ansible manual can't keep up with the other repo, so it should live
there instead (eventually, after significant rewrites).

The Testing page is obsoleted by the "test-cluster" script.
2021-11-08 00:25:27 -05:00
ea19af6494 Allow American spelling for compatibility 2021-11-08 00:09:59 -05:00
7069d3237c Shorten help messages slightly to fit 2021-11-08 00:07:21 -05:00
619c3f7ff5 Add forced colour support
Allows preserving colour within e.g. watch, where Click would normally
determine that it is "not a terminal". This is done via the wrapper echo
which filters via the local config.
2021-11-08 00:04:20 -05:00
8a75bb3011 Add funding configuration 2021-11-06 18:05:17 -04:00
a817c3e678 Add start delineators to command output 2021-11-06 13:35:30 -04:00
0cc3f2deab Revamp formatting and linting on commit
Remove the prepare script, and run the two stages manually. Better
handle Black reformatting by doing a check (for the errcode) then
reformat and abort commit to review.
2021-11-06 13:34:33 -04:00
21b4bbe51a Apply more granular timeout formatting
We don't need to wait forever if state changes aren't waiting or disable
(which does a shutdown before returning).
2021-11-06 13:34:03 -04:00
87ec31c023 Up timeout when setting VM state
Ensures the API won't time out immediately especially during a
wait-flagged or disable action.
2021-11-06 04:15:10 -04:00
0d857d5ab8 Use positive check rather than negative
Ensure the VM is start before doing shutdown/stop, rather than being
stopped. Prevents overwrite of existing disable state and other
weirdness.
2021-11-06 04:08:33 -04:00
006f40f195 Add disable forcing to CLI
References #148
2021-11-06 04:02:50 -04:00
5f193a6134 Perform automatic shutdown/stop on VM disable
Instead of requiring the VM to already be stopped, instead allow disable
state changes to perform a shutdown first. Also add a force option which
will do a hard stop instead of a shutdown.

References #148
2021-11-06 03:57:24 -04:00
78faa90139 Reformat recent changes with Black 2021-11-06 03:27:07 -04:00
23b1501f40 Fix linting error F541 f-string placeholders 2021-11-06 03:26:03 -04:00
66bfad3109 Fix linting errors F522/F523 unused args 2021-11-06 03:24:50 -04:00
eee5c25d6f Rename build-deb.sh to build-stable-deb.sh
Unifies the naming with the other build-unstable-deb.sh script.
2021-11-06 03:18:58 -04:00
ff4fc18a60 Remove obsolete gitlab-ci config 2021-11-06 03:18:22 -04:00
ac885b855a Ensure all helper scripts pushd/popd
Make sure all of these move to the root of the repository first, then
return to where they were afterwards, using pushd/popd. This allows them
to be executed from anywhere in the repo.
2021-11-06 03:17:47 -04:00
b9c30baf80 Unify formatting and linting
Ensures optimal formatting in addition to linting during manual deploys
and during pre-commit actions.
2021-11-06 03:10:17 -04:00
9b12cc0236 Add newline to start of lint 2021-11-06 03:04:14 -04:00
c41664d2da Reformat code with Black code formatter
Unify the code style along PEP and Black principles using the tool.
2021-11-06 03:02:43 -04:00
3779bc960e Add safe mode to Black 2021-11-06 02:59:54 -04:00
5c620262e9 Move Flake configuration into dedicated file
Avoid passing arguments in the script.
2021-11-06 02:55:37 -04:00
6b88fbd1e3 Clean up linter after Black add (pass two) 2021-11-06 02:51:14 -04:00
a50c8e6a4d Exclude Alembic migrations from Black
These files are autogenerated with their own formats, so we don't want
to override that.
2021-11-06 02:46:06 -04:00
7d6e4353f1 Clean up linter after Black add (pass one) 2021-11-06 02:44:24 -04:00
bf30b31db6 Add black formatter to project root 2021-11-06 02:44:05 -04:00
70bd601dc1 Add Basic Builder configuration
Configuration for my new CI system under Gitea.
2021-10-31 00:09:55 -04:00
2e7b9b28b3 Add some delay and additional tries to fencing 2021-10-27 16:24:17 -04:00
12eef58d42 Fix ordering to show correct message 2021-10-27 13:37:52 -04:00
f2e6892fd2 Support adding the same network to a VM again
This is a supported configuration for some edge cases and should be
allowed.
2021-10-27 13:33:27 -04:00
91fb9e1241 Reorder linting on build-and-deploy 2021-10-27 13:25:14 -04:00
d87bea4159 More gracefully handle restart + live
Instead of erroring, just use the implication that restarting a VM does
not want a live modification, and proceed from there. Update the help
text to match.
2021-10-27 13:23:39 -04:00
3a6f442856 Support removing VM interfaces by MAC
Provides a way to handle multiple interfaces in the same network
gracefully, while making the previous behaviour explicit.
2021-10-27 13:20:05 -04:00
dfca998adf Fix bad test in postinst 2021-10-19 00:27:12 -04:00
55f397a347 Fix bad location of config sets 2021-10-12 17:23:04 -04:00
dfebb2d3e5 Also validate on failures 2021-10-12 17:11:03 -04:00
e88147db4a Bump version to 0.9.42 2021-10-12 15:25:42 -04:00
b8204d89ac Go back to passing if exception
Validation already happened and the set happens again later.
2021-10-12 14:21:52 -04:00
fe73dfbdc9 Use current live value for bridge_mtu
This will ensure that upgrading without the bridge_mtu config key set
will keep things as they are.
2021-10-12 12:24:03 -04:00
8f906c1f81 Use power off in fence instead of reset
Use a power off (and then make the power on a requirement) during a node
fence. Removes some potential ambiguity in the power state, since we
will know for certain if it is off.
2021-10-12 11:04:27 -04:00
2d9fb9688d Validate network MTU after initial read 2021-10-12 10:53:17 -04:00
fb84685c2a Make cluster example images clickable 2021-10-12 03:15:04 -04:00
032ba44d9c Mention fencing only in run state 2021-10-12 03:05:01 -04:00
b7761877e7 Adjust more wording and fix typos 2021-10-12 03:00:21 -04:00
1fe07640b3 Adjust some wording 2021-10-12 02:54:16 -04:00
b8d843ebe4 Remove codeql setup
I don't use this for anything useful, so disable it since a run takes
ages.
2021-10-12 02:51:19 -04:00
95d983ddff Fix formatting of subsection 2021-10-12 02:49:40 -04:00
4c5da1b6a8 Add reference to Ansible manual 2021-10-12 02:48:47 -04:00
be6b1e02e3 Fix spelling errors 2021-10-12 02:47:31 -04:00
ec2a72ed4b Fix link to cluster architecture docs 2021-10-12 02:41:22 -04:00
b06e327add Adjust getting started docs
Update the docs with the current information on setting up a cluster,
including simplifying the Ansible configuration to use the new
create-local-repo.sh script, and simplifying some other sections.
2021-10-12 02:39:25 -04:00
d1f32d2b9c Default to removing build artifacts in b-a-d.sh 2021-10-11 16:41:00 -04:00
3f78ca1cc9 Add explicit 3 second timeout to requests 2021-10-11 16:31:18 -04:00
e866335918 Add version function support to CLI 2021-10-11 15:34:41 -04:00
221494ed1b Add new configs for Ansible 2021-10-11 14:44:18 -04:00
f13cc04b89 Bump version to 0.9.41 2021-10-09 19:39:21 -04:00
4ed537ee3b Add bridge_mtu config to docs 2021-10-09 19:28:50 -04:00
95e01f38d5 Adjust log type of object setup message 2021-10-09 19:23:12 -04:00
3122d73bf5 Avoid duplicate runs of MTU set
It wasn't the validator duplicating, but the update duplicating, so
avoid that happening properly this time.
2021-10-09 19:21:47 -04:00
7ed8ef179c Revert "Avoid duplicate runs of MTU validator"
This reverts commit 56021c443a.
2021-10-09 19:11:42 -04:00
caead02b2a Set all log messages to information state
None of these were "success" messages and thus shouldn't have been ok
state.
2021-10-09 19:09:38 -04:00
87bc5f93e6 Avoid duplicate runs of MTU validator 2021-10-09 19:07:41 -04:00
203893559e Use correct isinstance instead of type 2021-10-09 19:03:31 -04:00
2c51bb0705 Move MTU validation to function
Prevents code duplication and ensures validation runs when an MTU is
updated, not just on network creation.
2021-10-09 19:01:45 -04:00
46d3daf686 Add logger message when setting MTU 2021-10-09 18:56:18 -04:00
e9d05aa24e Ensure vx_mtu is always an int() 2021-10-09 18:52:50 -04:00
d2c18d7b46 Fix bad header length in network list 2021-10-09 18:50:32 -04:00
6ce28c43af Add MTU value checking and log messages
Ensures that if a specified MTU is more than the maximum it is set to
the maximum instead, and adds warning messages for both situations.
2021-10-09 18:48:56 -04:00
87cda72ca9 Fix invalid schema key
Addresses #144
2021-10-09 18:42:33 -04:00
8f71a6d2f6 Add MTU support to network add/modify commands
Addresses #144
2021-10-09 18:06:21 -04:00
c45f8f5bd5 Have VXNetworkInstance set MTU if unset
Makes this explicit in Zookeeper if a network is unset, post-migration
(schema version 6).

Addresses #144
2021-10-09 17:52:57 -04:00
24de0f4189 Add MTU to network creation/modification
Addresses #144
2021-10-09 17:51:32 -04:00
3690a2c1e0 Fix migration bugs and invalid vx_mtu
Addresses #144
2021-10-09 17:35:10 -04:00
50d8aa0586 Add handlers for client network MTUs
Refactors some of the code in VXNetworkInterface to handle MTUs in a
more streamlined fashion. Also fixes a bug whereby bridge client
networks were being explicitly given the cluster dev MTU which might not
be correct. Now adds support for this option explicitly in the configs,
and defaults to 1500 for safety (the standard Ethernet MTU).

Addresses #144
2021-10-09 17:02:27 -04:00
db6e65712d Make n-1 values clearer 2021-10-07 18:11:15 -04:00
cf8e16543c Correct levels in TOC 2021-10-07 18:08:28 -04:00
1a4fcdcc2d Correct spelling errors 2021-10-07 18:07:06 -04:00
9a71db0800 Add documentation sections on IPMI and fencing 2021-10-07 18:05:47 -04:00
6ee4c55071 Correct flawed conditional in verify_ipmi 2021-10-07 15:11:19 -04:00
c27359c4bf Bump version to 0.9.40 2021-10-07 14:42:04 -04:00
46078932c3 Correct bad stop_keepalive_timer call 2021-10-07 14:41:12 -04:00
c89699bc6f Remove redundant wording from header 2021-10-07 12:20:04 -04:00
1b9507e4f5 Replace headers with links in CHANGELOG.md 2021-10-07 12:17:44 -04:00
3db7ac48f4 Add missing period to changelog sentence 2021-10-07 12:10:35 -04:00
1830ec6465 Move changelog into dedicated file
The changelog was getting far too long for the README/docs index to
support, so move it into CHANGELOG.md and link to it instead.
2021-10-07 12:09:26 -04:00
bdb9db8375 Bump version to 0.9.39 2021-10-07 11:52:38 -04:00
c61d7bc313 Add linting to build-and-deploy
Ensures that bad code isn't deployed during testing.
2021-10-07 11:51:05 -04:00
c0f7ba0125 Add limit negation to VM list
When using the "state", "node", or "tag" arguments to a VM list, add
support for a "negate" flag to look for all VMs *not in* the state,
node, or tag state.
2021-10-07 11:50:52 -04:00
761032b321 Add note about fencing at remote sites 2021-10-04 19:58:08 -04:00
3566e13e79 Correct TOC in architecture page 2021-10-04 01:54:22 -04:00
6b324029cf Correct spelling errors 2021-10-04 01:51:58 -04:00
13eeabf44b Double image sizes for example clusters 2021-10-04 01:47:35 -04:00
d86768d3d0 Adjust toc_depth for RTD theme 2021-10-04 01:45:05 -04:00
a167757600 Revamp about and architecture docs
Makes these a little simpler to follow and provides some more up-to-date
information based on recent tests and developments.
2021-10-04 01:42:08 -04:00
a95d9680ac Adjust bump-version changelog heading level 2021-10-04 01:41:48 -04:00
63962f10ba Move changelog headers down one more level 2021-10-04 01:41:22 -04:00
a7a681d92a Adjust indent of index/README versions 2021-10-04 00:33:24 -04:00
da9248cfa2 Bump version to 0.9.38 2021-10-03 22:32:41 -04:00
aa035a61a7 Correct latency units and format name 2021-10-03 17:06:34 -04:00
7c8ba56561 Revamp test result display
Instead of showing CLAT percentiles, which are very hard to interpret
and understand, instead use the main latency buckets.
2021-10-03 15:49:01 -04:00
bba73980de Revamp postinst for the API daemon
Ensures that the worker is always restarted and make the NOTE
conditional more specific.
2021-10-03 15:15:26 -04:00
32b3af697c Tweak fio tests for benchmarks
1. Remove ramp_time as this was giving very strange results.

2. Up the runtime to 75 seconds to compensate.

3. Print the fio command to the console to validate.
2021-10-03 15:06:18 -04:00
7c122ac921 Add benchmark format to list 2021-10-03 15:05:58 -04:00
0dbf139706 Adjust ETA for benchmarks 2021-10-02 04:51:01 -04:00
c909beaf6d Add format parsing for format 1 storage benchmarks 2021-10-02 04:46:44 -04:00
2da49297d2 Add version 2 benchmark list formatting 2021-10-02 02:47:17 -04:00
0ff9a6b8c4 Handle benchmark running state properly 2021-10-02 01:54:51 -04:00
28377178d2 Fix missing argument in database insert 2021-10-02 01:49:47 -04:00
e06b114c48 Update to storage benchmark format 1
1. Runs `fio` with the `--format=json` option and removes all terse
format parsing from the results.

2. Adds a 15-second ramp time to minimize wonky ramp-up results.

3. Sets group_reporting, which isn't necessary with only a single job,
but is here for consistency.
2021-10-02 01:41:08 -04:00
0058f19d88 Fix handling of array of information
With a benchmark info we only ever want test one, so pass only that to
the formatter. Simplifies the format function.
2021-10-02 01:28:39 -04:00
056cf3740d Avoid versioning benchmark lists
This wouldn't work since each individual test is versioned. Instead add
a placeholder for later once additional format(s) are defined.
2021-10-02 01:25:18 -04:00
58f174b87b Add format option to benchmark info
Allows specifying of raw json or json-pretty formats in addition to the
"pretty" formatted option.
2021-10-02 01:13:50 -04:00
37b98fd54f Add benchmark format function support
Allows choosing different list and info functions based on the benchmark
version found. Currently only implements "legacy" version 0 with more to
be added.
2021-10-02 01:07:25 -04:00
f83a345bfe Add test format versioning to storage benchmarks
Adds a test_format database column and a value in the API return for the
test format version, starting at 0 for the existing format as of 0.9.37.

References #143
2021-10-02 00:55:27 -04:00
ce06e4d81b Load benchmark results as JSON
Load the JSON at the API side instead of client side, because that's
what the API doc says it is and it just makes more sense.
2021-09-30 23:40:24 -04:00
23977b04fc Bump version to 0.9.37 2021-09-30 02:08:14 -04:00
bb1cca522f Revamp benchmark tests
1. Move to a time-based (60s) benchmark to avoid these taking an absurd
amount of time to show the same information.

2. Eliminate the 256k random benchmarks, since they don't really add
anything.

3. Add in a 4k single-queue benchmark as this might provide valuable
insight into latency.

4. Adjust the output to reflect the above changes.

While this does change the benchmarking, this should not invalidate any
existing benchmarks since most of the test suit is unchanged (especially
the most important 4M sequential and 4K random tests). It simply removes
an unused entry and adds a more helpful one. The time-based change
should not significantly affect the results either, just reduces the
total runtime for long-tests and increase the runtime for quick tests to
provide a better picture.
2021-09-29 20:51:30 -04:00
9a4dce4e4c Add primary node to benchmark job name
Ensures tracking of the current primary node the job was run on, since
this may be relevant for performance reasons.
2021-09-28 09:58:22 -04:00
f6f6f07488 Add timeouts to queue gets and adjust
Ensure that all keepalive timeouts are set (prevent the queue.get()
actions from blocking forever) and set the thread timeouts to line up as
well. Everything here is thus limited to keepalive_interval seconds
(default 5s) to keep it uniform.
2021-09-27 16:10:27 -04:00
142c999ce8 Re-add success log output during migration 2021-09-27 11:50:55 -04:00
1de069298c Fix missing character in log message 2021-09-27 00:49:43 -04:00
55221b3d97 Simplify VM migration down to 3 steps
Remove two superfluous synchronization steps which are not needed here,
since the exclusive lock handles that situation anyways.

Still does not fix the weird flush->unflush lock timeout bug, but is
better worked-around now due to the cancelling of the other wait freeing
this up and continuing.
2021-09-27 00:03:20 -04:00
0d72798814 Work around synchronization lock issues
Make the block on stage C only wait for 900 seconds (15 minutes) to
prevent indefinite blocking.

The issue comes if a VM is being received, and the current unflush is
cancelled for a flush. When this happens, this lock acquisition seems to
block for no obvious reason, and no other changes seem to affect it.
This is certainly some sort of locking bug within Kazoo but I can't
diagnose it as-is. Leave a TODO to look into this again in the future.
2021-09-26 23:26:21 -04:00
3638efc77e Improve log messages during VM migration 2021-09-26 23:15:38 -04:00
c2c888d684 Use event to non-block wait and fix inf wait 2021-09-26 22:55:39 -04:00
febef2e406 Track status of VM state thread 2021-09-26 22:55:21 -04:00
2a4f38e933 Simplify locking process for VM migration
Rather than using a cumbersome and overly complex ping-pong of read and
write locks, instead move to a much simpler process using exclusive
locks.

Describing the process in ASCII or narrative is cumbersome, but the
process ping-pongs via a set of exclusive locks and wait timers, so that
the two sides are able to synchronize via blocking the exclusive lock.
The end result is a much more streamlined migration (takes about half
the time all things considered) which should be less error-prone.
2021-09-26 22:08:07 -04:00
3b805cdc34 Fix failure to connect to libvirt in keepalive
This should be caught and abort the thread rather than failing and
holding up keepalives.
2021-09-26 20:42:01 -04:00
06f0f7ed91 Fix several bugs in fence handling
1. Output from ipmitool was not being stripped, and stray newlines were
throwing off the comparisons. Fixes this.

2. Several stages were lacking meaningful messages. Adds these in so the
output is more clear about what is going on.

3. Reduce the sleep time after a fence to just 1x the
keepalive_interval, rather than 2x, because this seemed like excessively
long even for slow IPMI interfaces, especially since we're checking the
power state now anyways.

4. Set the node daemon state to an explicit 'fenced' state after a
successful fence to indicate to users that the node was indeed fenced
successfully and not still 'dead'.
2021-09-26 20:07:30 -04:00
fd040ab45a Ensure pvc-flush is after network-online 2021-09-26 17:40:42 -04:00
e23e2dd9bf Fix typo in log message 2021-09-26 03:35:30 -04:00
ee4266f8ca Tweak CLI helptext around OSD actions
Adds some more detail about OSD commands and their values.
2021-09-26 01:29:23 -04:00
0f02c5eaef Fix typo in sgdisk command options 2021-09-26 00:59:05 -04:00
075abec5fe Use re.search instead of re.match
Required since we're not matching the start of the string.
2021-09-26 00:55:29 -04:00
3a1cbf8d01 Raise basic exceptions in CephInstance
Avoids no exception to reraise errors on failures.
2021-09-26 00:50:10 -04:00
a438a4155a Fix OSD creation for partition paths and fix gdisk
The previous implementation did not work with /dev/nvme devices or any
/dev/disk/by-* devices due to some logical failures in the partition
naming scheme, so fix these, and be explicit about what is supported in
the PVC CLI command output.

The 'echo | gdisk' implementation of partition creation also did not
work due to limitations of subprocess.run; instead, use sgdisk which
allows these commands to be written out explicitly and is included in
the same package as gdisk.
2021-09-26 00:12:28 -04:00
65df807b09 Add support for configurable OSD DB ratios
The default of 0.05 (5%) is likely ideal in the initial implementation,
but allow this to be set explicitly for maximum flexibility in
space-constrained or performance-critical use-cases.
2021-09-24 01:06:39 -04:00
d0f3e9e285 Bump version to 0.9.36 2021-09-23 14:01:38 -04:00
adc8a5a3bc Add separate OSD DB device support
Adds in three parts:

1. Create an API endpoint to create OSD DB volume groups on a device.
Passed through to the node via the same command pipeline as
creating/removing OSDs, and creates a volume group with a fixed name
(osd-db).

2. Adds API support for specifying whether or not to use this DB volume
group when creating a new OSD via the "ext_db" flag. Naming and sizing
is fixed for simplicity and based on Ceph recommendations (5% of OSD
size). The Zookeeper schema tracks the block device to use during
removal.

3. Adds CLI support for the new and modified API endpoints, as well as
displaying the block device and DB block device in the OSD list.

While I debated supporting adding a DB device to an existing OSD, in
practice this ended up being a very complex operation involving stopping
the OSD and setting some options, so this is not supported; this can be
specified during OSD creation only.

Closes #142
2021-09-23 13:59:49 -04:00
df277edf1c Move console watcher stop try up
Could cause an exception if d_domain is not defined yet.
2021-09-22 16:02:04 -04:00
772807deb3 Bump version to 0.9.35 2021-09-13 02:20:46 -04:00
58db537093 Add memory and vCPU checks to VM define/modify
Ensures that a VM won't:

(a) Have provisioned more RAM than there is available on a given node.
Due to memory overprovisioning, this is simply a "is the VM memory count
more than the node count", and doesn't factor in free or used memory on
a node, total cluster usage, etc. So if a node has 64GB total RAM, the
VM limit is 64GB. It is up to an administrator to ensure sanity *below*
that value.

(b) Have provisioned more vCPUs than there are CPU cores on the node,
minus 2 to account for hypervisor/storage processes. Will ensure there
is no severe CPU contention caused by a single VM having more vCPUs than
there are actual execution threads available.

Closes #139
2021-09-13 01:51:21 -04:00
e71a6c90bf Add pool size check when resizing volumes
Closes #140
2021-09-12 19:54:51 -04:00
a8e9a56924 Increase build-and-deploy sleep 2021-09-12 19:50:58 -04:00
f3fb492633 Handle VM disk/network stats gathering exceptions 2021-09-12 19:41:07 -04:00
e962743e51 Add VM device hot attach/detach support
Adds a new API endpoint to support hot attach/detach of devices, and the
corresponding client-side logic to use this endpoint when doing VM
network/storage add/remove actions.

The live attach is now the default behaviour for these types of
additions and removals, and can be disabled if needed.

Closes #141
2021-09-12 19:33:00 -04:00
46f1d761f6 Adjust lint script for newer linter 2021-09-12 15:40:38 -04:00
be954c1625 Don't crash cleanup if no this_node 2021-08-29 03:52:18 -04:00
fb46f5f9e9 Change default node object state to flushed 2021-08-29 03:34:08 -04:00
694b8e85a0 Bump version to 0.9.34 2021-08-24 16:15:25 -04:00
eb321497ee Properly handle =-separated fsargs 2021-08-24 11:40:22 -04:00
5b81e59481 Convert argument elements of task status to types 2021-08-23 14:28:12 -04:00
a4c0e0befd Fix typo in output message 2021-08-23 00:39:19 -04:00
a18cef5f25 Bump version to 0.9.33 2021-08-21 03:28:48 -04:00
f6c5aa9992 Avoid failing if no provisioner tasks 2021-08-21 03:25:16 -04:00
ffa3dd5edb Ensure pycache files are removed on deb creation 2021-08-21 03:19:18 -04:00
afb0359c20 Refactor pvcnoded to reduce Daemon.py size
This branch commit refactors the pvcnoded component to better adhere to
good programming practices. The previous Daemon.py was a massive file
which contained almost 2000 lines of direct, root-level code which was
directly imported. Not only was this poor practice, but this resulted
in a nigh-unmaintainable file which was hard even for me to understand.

This refactoring splits a large section of the code from Daemon.py into
separate small modules and functions in the `util/` directory. This will
hopefully make most of the functionality easy to find and modify without
having to dig through a single large file.

Further the existing subcomponents have been moved to the `objects/`
directory which clearly separates them.

Finally, the Daemon.py code has mostly been moved into a function,
`entrypoint()`, which is then called from the `pvcnoded.py` stub.

An additional item is that most format strings have been replaced by
f-strings to make use of the Python 3.6 features in Daemon.py and the
utility files.
2021-08-21 03:14:22 -04:00
afdf254297 Bump version to 0.9.32 2021-08-19 12:37:58 -04:00
42e776fac1 Properly handle exceptions getting VM stats 2021-08-19 12:36:31 -04:00
dae67a1b7b Fix image dimensions and size 2021-08-18 19:51:55 -04:00
b86f8c1e09 Add screenshots to docs 2021-08-18 19:49:53 -04:00
42 changed files with 111 additions and 16571 deletions

View File

@@ -3,9 +3,7 @@
# * W503 (line break before binary operator): Black moves these to new lines
# * E501 (line too long): Long lines are a fact of life in comment blocks; Black handles active instances of this
# * E203 (whitespace before ':'): Black recommends this as disabled
# * F403 (import * used; unable to detect undefined names): We use a wildcard for helpers
# * F405 (possibly undefined name): We use a wildcard for helpers
ignore = W503, E501, F403, F405
ignore = W503, E501
extend-ignore = E203
# We exclude the Debian, migrations, and provisioner examples
exclude = debian,api-daemon/migrations/versions,api-daemon/provisioner/examples,node-daemon/monitoring

View File

@@ -441,7 +441,7 @@ class VMBuilderScript(VMBuilder):
# The directory we mounted things on earlier during prepare(); this could very well
# be exposed as a module-level variable if you so choose
temp_dir = "/tmp/target"
temporary_directory = "/tmp/target"
# Use these convenient aliases for later (avoiding lots of "self.vm_data" everywhere)
vm_name = self.vm_name
@@ -469,8 +469,6 @@ class VMBuilderScript(VMBuilder):
"grub-pc",
"cloud-init",
"python3-cffi-backend",
"acpid",
"acpi-support-base",
"wget",
]
@@ -484,17 +482,17 @@ class VMBuilderScript(VMBuilder):
# Perform a debootstrap installation
print(
f"Installing system with debootstrap: debootstrap --include={','.join(deb_packages)} {deb_release} {temp_dir} {deb_mirror}"
f"Installing system with debootstrap: debootstrap --include={','.join(deb_packages)} {deb_release} {temporary_directory} {deb_mirror}"
)
os.system(
f"debootstrap --include={','.join(deb_packages)} {deb_release} {temp_dir} {deb_mirror}"
f"debootstrap --include={','.join(deb_packages)} {deb_release} {temporary_directory} {deb_mirror}"
)
# Bind mount the devfs so we can grub-install later
os.system("mount --bind /dev {}/dev".format(temp_dir))
os.system("mount --bind /dev {}/dev".format(temporary_directory))
# Create an fstab entry for each volume
fstab_file = "{}/etc/fstab".format(temp_dir)
fstab_file = "{}/etc/fstab".format(temporary_directory)
# The volume ID starts at zero and increments by one for each volume in the fixed-order
# volume list. This lets us work around the insanity of Libvirt IDs not matching guest IDs,
# while still letting us have some semblance of control here without enforcing things
@@ -539,13 +537,13 @@ class VMBuilderScript(VMBuilder):
volume_id += 1
# Write the hostname; you could also take an FQDN argument for this as an example
hostname_file = "{}/etc/hostname".format(temp_dir)
hostname_file = "{}/etc/hostname".format(temporary_directory)
with open(hostname_file, "w") as fh:
fh.write("{}".format(vm_name))
# Fix the cloud-init.target since it's broken by default in Debian 11
cloudinit_target_file = "{}/etc/systemd/system/cloud-init.target".format(
temp_dir
temporary_directory
)
with open(cloudinit_target_file, "w") as fh:
# We lose our indent on these raw blocks to preserve the apperance of the files
@@ -559,7 +557,7 @@ After=multi-user.target
fh.write(data)
# Write the cloud-init configuration
ci_cfg_file = "{}/etc/cloud/cloud.cfg".format(temp_dir)
ci_cfg_file = "{}/etc/cloud/cloud.cfg".format(temporary_directory)
with open(ci_cfg_file, "w") as fh:
fh.write(
"""
@@ -620,15 +618,15 @@ After=multi-user.target
- arches: [default]
failsafe:
primary: {deb_mirror}
""".format(
deb_mirror=deb_mirror
)
)
"""
).format(deb_mirror=deb_mirror)
# Due to device ordering within the Libvirt XML configuration, the first Ethernet interface
# will always be on PCI bus ID 2, hence the name "ens2".
# Write a DHCP stanza for ens2
ens2_network_file = "{}/etc/network/interfaces.d/ens2".format(temp_dir)
ens2_network_file = "{}/etc/network/interfaces.d/ens2".format(
temporary_directory
)
with open(ens2_network_file, "w") as fh:
data = """auto ens2
iface ens2 inet dhcp
@@ -636,7 +634,7 @@ iface ens2 inet dhcp
fh.write(data)
# Write the DHCP config for ens2
dhclient_file = "{}/etc/dhcp/dhclient.conf".format(temp_dir)
dhclient_file = "{}/etc/dhcp/dhclient.conf".format(temporary_directory)
with open(dhclient_file, "w") as fh:
# We can use fstrings too, since PVC will always have Python 3.6+, though
# using format() might be preferable for clarity in some situations
@@ -656,7 +654,7 @@ interface "ens2" {{
fh.write(data)
# Write the GRUB configuration
grubcfg_file = "{}/etc/default/grub".format(temp_dir)
grubcfg_file = "{}/etc/default/grub".format(temporary_directory)
with open(grubcfg_file, "w") as fh:
data = """# Written by the PVC provisioner
GRUB_DEFAULT=0
@@ -673,7 +671,7 @@ GRUB_DISABLE_LINUX_UUID=false
fh.write(data)
# Do some tasks inside the chroot using the provided context manager
with chroot(temp_dir):
with chroot(temporary_directory):
# Install and update GRUB
os.system(
"grub-install --force /dev/rbd/{}/{}_{}".format(
@@ -706,17 +704,16 @@ GRUB_DISABLE_LINUX_UUID=false
"""
# Run any imports first
import os
from pvcapid.vmbuilder import open_zk
from pvcapid.Daemon import config
import daemon_lib.common as pvc_common
import daemon_lib.ceph as pvc_ceph
# Set the temp_dir we used in the prepare() and install() steps
# Set the tempdir we used in the prepare() and install() steps
temp_dir = "/tmp/target"
# Unmount the bound devfs
os.system("umount {}/dev".format(temp_dir))
os.system("umount {}/dev".format(temporary_directory))
# Use this construct for reversing the list, as the normal reverse() messes with the list
for volume in list(reversed(self.vm_data["volumes"])):

View File

@@ -1,33 +0,0 @@
#!/usr/bin/env python3
# pvc.py - PVC client command-line interface (stub testing interface)
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import pvc.pvc
#
# Main entry point
#
def main():
return pvc.pvc.cli(obj={})
if __name__ == "__main__":
main()

View File

@@ -1,20 +0,0 @@
from setuptools import setup
setup(
name="pvc",
version="0.9.63",
packages=["pvc", "pvc.lib"],
install_requires=[
"Click",
"PyYAML",
"lxml",
"colorama",
"requests",
"requests-toolbelt",
],
entry_points={
"console_scripts": [
"pvc = pvc.pvc:cli",
],
},
)

View File

@@ -1,33 +0,0 @@
#!/usr/bin/env python3
# pvc.py - PVC client command-line interface (stub testing interface)
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from pvc.cli.cli import cli
#
# Main entry point
#
def main():
return cli(obj={})
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@@ -1,732 +0,0 @@
#!/usr/bin/env python3
# formatters.py - PVC Click CLI output formatters library
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2023 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from pvc.lib.node import format_info as node_format_info
from pvc.lib.node import format_list as node_format_list
from pvc.lib.vm import format_vm_tags as vm_format_tags
from pvc.lib.vm import format_vm_vcpus as vm_format_vcpus
from pvc.lib.vm import format_vm_memory as vm_format_memory
from pvc.lib.vm import format_vm_networks as vm_format_networks
from pvc.lib.vm import format_vm_volumes as vm_format_volumes
from pvc.lib.vm import format_info as vm_format_info
from pvc.lib.vm import format_list as vm_format_list
from pvc.lib.network import format_info as network_format_info
from pvc.lib.network import format_list as network_format_list
from pvc.lib.network import format_list_dhcp as network_format_dhcp_list
from pvc.lib.network import format_list_acl as network_format_acl_list
from pvc.lib.network import format_list_sriov_pf as network_format_sriov_pf_list
from pvc.lib.network import format_info_sriov_vf as network_format_sriov_vf_info
from pvc.lib.network import format_list_sriov_vf as network_format_sriov_vf_list
from pvc.lib.storage import format_raw_output as storage_format_raw
from pvc.lib.storage import format_info_benchmark as storage_format_benchmark_info
from pvc.lib.storage import format_list_benchmark as storage_format_benchmark_list
from pvc.lib.storage import format_list_osd as storage_format_osd_list
from pvc.lib.storage import format_list_pool as storage_format_pool_list
from pvc.lib.storage import format_list_volume as storage_format_volume_list
from pvc.lib.storage import format_list_snapshot as storage_format_snapshot_list
from pvc.lib.provisioner import format_list_template as provisioner_format_template_list
from pvc.lib.provisioner import format_list_userdata as provisioner_format_userdata_list
from pvc.lib.provisioner import format_list_script as provisioner_format_script_list
from pvc.lib.provisioner import format_list_ova as provisioner_format_ova_list
from pvc.lib.provisioner import format_list_profile as provisioner_format_profile_list
from pvc.lib.provisioner import format_list_task as provisioner_format_task_status
# Define colour values for use in formatters
ansii = {
"red": "\033[91m",
"blue": "\033[94m",
"cyan": "\033[96m",
"green": "\033[92m",
"yellow": "\033[93m",
"purple": "\033[95m",
"bold": "\033[1m",
"end": "\033[0m",
}
def cli_cluster_status_format_pretty(CLI_CONFIG, data):
"""
Pretty format the full output of cli_cluster_status
"""
# Normalize data to local variables
health = data.get("cluster_health", {}).get("health", -1)
messages = data.get("cluster_health", {}).get("messages", None)
maintenance = data.get("maintenance", "N/A")
primary_node = data.get("primary_node", "N/A")
pvc_version = data.get("pvc_version", "N/A")
upstream_ip = data.get("upstream_ip", "N/A")
total_nodes = data.get("nodes", {}).get("total", 0)
total_vms = data.get("vms", {}).get("total", 0)
total_networks = data.get("networks", 0)
total_osds = data.get("osds", {}).get("total", 0)
total_pools = data.get("pools", 0)
total_volumes = data.get("volumes", 0)
total_snapshots = data.get("snapshots", 0)
if maintenance == "true" or health == -1:
health_colour = ansii["blue"]
elif health > 90:
health_colour = ansii["green"]
elif health > 50:
health_colour = ansii["yellow"]
else:
health_colour = ansii["red"]
output = list()
output.append(f"{ansii['bold']}PVC cluster status:{ansii['end']}")
output.append("")
if health != "-1":
health = f"{health}%"
else:
health = "N/A"
if maintenance == "true":
health = f"{health} (maintenance on)"
output.append(
f"{ansii['purple']}Cluster health:{ansii['end']} {health_colour}{health}{ansii['end']}"
)
if messages is not None and len(messages) > 0:
messages = "\n ".join(sorted(messages))
output.append(f"{ansii['purple']}Health messages:{ansii['end']} {messages}")
output.append("")
output.append(f"{ansii['purple']}Primary node:{ansii['end']} {primary_node}")
output.append(f"{ansii['purple']}PVC version:{ansii['end']} {pvc_version}")
output.append(f"{ansii['purple']}Upstream IP:{ansii['end']} {upstream_ip}")
output.append("")
node_states = ["run,ready"]
node_states.extend(
[
state
for state in data.get("nodes", {}).keys()
if state not in ["total", "run,ready"]
]
)
nodes_strings = list()
for state in node_states:
if state in ["run,ready"]:
state_colour = ansii["green"]
elif state in ["run,flush", "run,unflush", "run,flushed"]:
state_colour = ansii["blue"]
elif "dead" in state or "stop" in state:
state_colour = ansii["red"]
else:
state_colour = ansii["yellow"]
nodes_strings.append(
f"{data.get('nodes', {}).get(state)}/{total_nodes} {state_colour}{state}{ansii['end']}"
)
nodes_string = ", ".join(nodes_strings)
output.append(f"{ansii['purple']}Nodes:{ansii['end']} {nodes_string}")
vm_states = ["start", "disable"]
vm_states.extend(
[
state
for state in data.get("vms", {}).keys()
if state not in ["total", "start", "disable"]
]
)
vms_strings = list()
for state in vm_states:
if state in ["start"]:
state_colour = ansii["green"]
elif state in ["migrate", "disable"]:
state_colour = ansii["blue"]
elif state in ["stop", "fail"]:
state_colour = ansii["red"]
else:
state_colour = ansii["yellow"]
vms_strings.append(
f"{data.get('vms', {}).get(state)}/{total_vms} {state_colour}{state}{ansii['end']}"
)
vms_string = ", ".join(vms_strings)
output.append(f"{ansii['purple']}VMs:{ansii['end']} {vms_string}")
osd_states = ["up,in"]
osd_states.extend(
[
state
for state in data.get("osds", {}).keys()
if state not in ["total", "up,in"]
]
)
osds_strings = list()
for state in osd_states:
if state in ["up,in"]:
state_colour = ansii["green"]
elif state in ["down,out"]:
state_colour = ansii["red"]
else:
state_colour = ansii["yellow"]
osds_strings.append(
f"{data.get('osds', {}).get(state)}/{total_osds} {state_colour}{state}{ansii['end']}"
)
osds_string = " ".join(osds_strings)
output.append(f"{ansii['purple']}OSDs:{ansii['end']} {osds_string}")
output.append(f"{ansii['purple']}Pools:{ansii['end']} {total_pools}")
output.append(f"{ansii['purple']}Volumes:{ansii['end']} {total_volumes}")
output.append(f"{ansii['purple']}Snapshots:{ansii['end']} {total_snapshots}")
output.append(f"{ansii['purple']}Networks:{ansii['end']} {total_networks}")
output.append("")
return "\n".join(output)
def cli_cluster_status_format_short(CLI_CONFIG, data):
"""
Pretty format the health-only output of cli_cluster_status
"""
# Normalize data to local variables
health = data.get("cluster_health", {}).get("health", -1)
messages = data.get("cluster_health", {}).get("messages", None)
maintenance = data.get("maintenance", "N/A")
if maintenance == "true" or health == -1:
health_colour = ansii["blue"]
elif health > 90:
health_colour = ansii["green"]
elif health > 50:
health_colour = ansii["yellow"]
else:
health_colour = ansii["red"]
output = list()
output.append(f"{ansii['bold']}PVC cluster status:{ansii['end']}")
output.append("")
if health != "-1":
health = f"{health}%"
else:
health = "N/A"
if maintenance == "true":
health = f"{health} (maintenance on)"
output.append(
f"{ansii['purple']}Cluster health:{ansii['end']} {health_colour}{health}{ansii['end']}"
)
if messages is not None and len(messages) > 0:
messages = "\n ".join(sorted(messages))
output.append(f"{ansii['purple']}Health messages:{ansii['end']} {messages}")
output.append("")
return "\n".join(output)
def cli_connection_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_connection_list
"""
# Set the fields data
fields = {
"name": {"header": "Name", "length": len("Name") + 1},
"description": {"header": "Description", "length": len("Description") + 1},
"address": {"header": "Address", "length": len("Address") + 1},
"port": {"header": "Port", "length": len("Port") + 1},
"scheme": {"header": "Scheme", "length": len("Scheme") + 1},
"api_key": {"header": "API Key", "length": len("API Key") + 1},
}
# Parse each connection and adjust field lengths
for connection in data:
for field, length in [(f, fields[f]["length"]) for f in fields]:
_length = len(str(connection[field]))
if _length > length:
length = len(str(connection[field])) + 1
fields[field]["length"] = length
# Create the output object and define the line format
output = list()
line = "{bold}{name: <{lname}} {desc: <{ldesc}} {addr: <{laddr}} {port: <{lport}} {schm: <{lschm}} {akey: <{lakey}}{end}"
# Add the header line
output.append(
line.format(
bold=ansii["bold"],
end=ansii["end"],
name=fields["name"]["header"],
lname=fields["name"]["length"],
desc=fields["description"]["header"],
ldesc=fields["description"]["length"],
addr=fields["address"]["header"],
laddr=fields["address"]["length"],
port=fields["port"]["header"],
lport=fields["port"]["length"],
schm=fields["scheme"]["header"],
lschm=fields["scheme"]["length"],
akey=fields["api_key"]["header"],
lakey=fields["api_key"]["length"],
)
)
# Add a line per connection
for connection in data:
output.append(
line.format(
bold="",
end="",
name=connection["name"],
lname=fields["name"]["length"],
desc=connection["description"],
ldesc=fields["description"]["length"],
addr=connection["address"],
laddr=fields["address"]["length"],
port=connection["port"],
lport=fields["port"]["length"],
schm=connection["scheme"],
lschm=fields["scheme"]["length"],
akey=connection["api_key"],
lakey=fields["api_key"]["length"],
)
)
return "\n".join(output)
def cli_connection_detail_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_connection_detail
"""
# Set the fields data
fields = {
"name": {"header": "Name", "length": len("Name") + 1},
"description": {"header": "Description", "length": len("Description") + 1},
"health": {"header": "Health", "length": len("Health") + 1},
"primary_node": {"header": "Primary", "length": len("Primary") + 1},
"pvc_version": {"header": "Version", "length": len("Version") + 1},
"nodes": {"header": "Nodes", "length": len("Nodes") + 1},
"vms": {"header": "VMs", "length": len("VMs") + 1},
"networks": {"header": "Networks", "length": len("Networks") + 1},
"osds": {"header": "OSDs", "length": len("OSDs") + 1},
"pools": {"header": "Pools", "length": len("Pools") + 1},
"volumes": {"header": "Volumes", "length": len("Volumes") + 1},
"snapshots": {"header": "Snapshots", "length": len("Snapshots") + 1},
}
# Parse each connection and adjust field lengths
for connection in data:
for field, length in [(f, fields[f]["length"]) for f in fields]:
_length = len(str(connection[field]))
if _length > length:
length = len(str(connection[field])) + 1
fields[field]["length"] = length
# Create the output object and define the line format
output = list()
line = "{bold}{name: <{lname}} {desc: <{ldesc}} {chlth}{hlth: <{lhlth}}{endc} {prin: <{lprin}} {vers: <{lvers}} {nods: <{lnods}} {vms: <{lvms}} {nets: <{lnets}} {osds: <{losds}} {pols: <{lpols}} {vols: <{lvols}} {snts: <{lsnts}}{end}"
# Add the header line
output.append(
line.format(
bold=ansii["bold"],
end=ansii["end"],
chlth="",
endc="",
name=fields["name"]["header"],
lname=fields["name"]["length"],
desc=fields["description"]["header"],
ldesc=fields["description"]["length"],
hlth=fields["health"]["header"],
lhlth=fields["health"]["length"],
prin=fields["primary_node"]["header"],
lprin=fields["primary_node"]["length"],
vers=fields["pvc_version"]["header"],
lvers=fields["pvc_version"]["length"],
nods=fields["nodes"]["header"],
lnods=fields["nodes"]["length"],
vms=fields["vms"]["header"],
lvms=fields["vms"]["length"],
nets=fields["networks"]["header"],
lnets=fields["networks"]["length"],
osds=fields["osds"]["header"],
losds=fields["osds"]["length"],
pols=fields["pools"]["header"],
lpols=fields["pools"]["length"],
vols=fields["volumes"]["header"],
lvols=fields["volumes"]["length"],
snts=fields["snapshots"]["header"],
lsnts=fields["snapshots"]["length"],
)
)
# Add a line per connection
for connection in data:
if connection["health"] == "N/A":
health_value = "N/A"
health_colour = ansii["purple"]
else:
health_value = f"{connection['health']}%"
if connection["maintenance"] == "true":
health_colour = ansii["blue"]
elif connection["health"] > 90:
health_colour = ansii["green"]
elif connection["health"] > 50:
health_colour = ansii["yellow"]
else:
health_colour = ansii["red"]
output.append(
line.format(
bold="",
end="",
chlth=health_colour,
endc=ansii["end"],
name=connection["name"],
lname=fields["name"]["length"],
desc=connection["description"],
ldesc=fields["description"]["length"],
hlth=health_value,
lhlth=fields["health"]["length"],
prin=connection["primary_node"],
lprin=fields["primary_node"]["length"],
vers=connection["pvc_version"],
lvers=fields["pvc_version"]["length"],
nods=connection["nodes"],
lnods=fields["nodes"]["length"],
vms=connection["vms"],
lvms=fields["vms"]["length"],
nets=connection["networks"],
lnets=fields["networks"]["length"],
osds=connection["osds"],
losds=fields["osds"]["length"],
pols=connection["pools"],
lpols=fields["pools"]["length"],
vols=connection["volumes"],
lvols=fields["volumes"]["length"],
snts=connection["snapshots"],
lsnts=fields["snapshots"]["length"],
)
)
return "\n".join(output)
def cli_node_info_format_pretty(CLI_CONFIG, data):
"""
Pretty format the basic output of cli_node_info
"""
return node_format_info(CLI_CONFIG, data, long_output=False)
def cli_node_info_format_long(CLI_CONFIG, data):
"""
Pretty format the full output of cli_node_info
"""
return node_format_info(CLI_CONFIG, data, long_output=True)
def cli_node_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_node_list
"""
return node_format_list(CLI_CONFIG, data)
def cli_vm_tag_get_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_vm_tag_get
"""
return vm_format_tags(CLI_CONFIG, data)
def cli_vm_vcpu_get_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_vm_vcpu_get
"""
return vm_format_vcpus(CLI_CONFIG, data)
def cli_vm_memory_get_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_vm_memory_get
"""
return vm_format_memory(CLI_CONFIG, data)
def cli_vm_network_get_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_vm_network_get
"""
return vm_format_networks(CLI_CONFIG, data)
def cli_vm_volume_get_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_vm_volume_get
"""
return vm_format_volumes(CLI_CONFIG, data)
def cli_vm_info_format_pretty(CLI_CONFIG, data):
"""
Pretty format the basic output of cli_vm_info
"""
return vm_format_info(CLI_CONFIG, data, long_output=False)
def cli_vm_info_format_long(CLI_CONFIG, data):
"""
Pretty format the full output of cli_vm_info
"""
return vm_format_info(CLI_CONFIG, data, long_output=True)
def cli_vm_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_vm_list
"""
return vm_format_list(CLI_CONFIG, data)
def cli_network_info_format_pretty(CLI_CONFIG, data):
"""
Pretty format the full output of cli_network_info
"""
return network_format_info(CLI_CONFIG, data, long_output=True)
def cli_network_info_format_long(CLI_CONFIG, data):
"""
Pretty format the full output of cli_network_info
"""
return network_format_info(CLI_CONFIG, data, long_output=True)
def cli_network_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_network_list
"""
return network_format_list(CLI_CONFIG, data)
def cli_network_dhcp_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_network_dhcp_list
"""
return network_format_dhcp_list(CLI_CONFIG, data)
def cli_network_acl_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_network_acl_list
"""
return network_format_acl_list(CLI_CONFIG, data)
def cli_network_sriov_pf_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_network_sriov_pf_list
"""
return network_format_sriov_pf_list(CLI_CONFIG, data)
def cli_network_sriov_vf_info_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_network_sriov_vf_info
"""
return network_format_sriov_vf_info(CLI_CONFIG, data)
def cli_network_sriov_vf_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_network_sriov_vf_list
"""
return network_format_sriov_vf_list(CLI_CONFIG, data)
def cli_storage_status_format_raw(CLI_CONFIG, data):
"""
Direct format the output of cli_storage_status
"""
return storage_format_raw(CLI_CONFIG, data)
def cli_storage_util_format_raw(CLI_CONFIG, data):
"""
Direct format the output of cli_storage_util
"""
return storage_format_raw(CLI_CONFIG, data)
def cli_storage_benchmark_info_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_storage_benchmark_info
"""
return storage_format_benchmark_info(CLI_CONFIG, data)
def cli_storage_benchmark_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_storage_benchmark_list
"""
return storage_format_benchmark_list(CLI_CONFIG, data)
def cli_storage_osd_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_storage_osd_list
"""
return storage_format_osd_list(CLI_CONFIG, data)
def cli_storage_pool_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_storage_pool_list
"""
return storage_format_pool_list(CLI_CONFIG, data)
def cli_storage_volume_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_storage_volume_list
"""
return storage_format_volume_list(CLI_CONFIG, data)
def cli_storage_snapshot_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_storage_snapshot_list
"""
return storage_format_snapshot_list(CLI_CONFIG, data)
def cli_provisioner_template_system_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_template_system_list
"""
return provisioner_format_template_list(CLI_CONFIG, data, template_type="system")
def cli_provisioner_template_network_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_template_network_list
"""
return provisioner_format_template_list(CLI_CONFIG, data, template_type="network")
def cli_provisioner_template_storage_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_template_storage_list
"""
return provisioner_format_template_list(CLI_CONFIG, data, template_type="storage")
def cli_provisioner_userdata_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_userdata_list
"""
return provisioner_format_userdata_list(CLI_CONFIG, data)
def cli_provisioner_script_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_script_list
"""
return provisioner_format_script_list(CLI_CONFIG, data)
def cli_provisioner_ova_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_ova_list
"""
return provisioner_format_ova_list(CLI_CONFIG, data)
def cli_provisioner_profile_list_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_profile_list
"""
return provisioner_format_profile_list(CLI_CONFIG, data)
def cli_provisioner_status_format_pretty(CLI_CONFIG, data):
"""
Pretty format the output of cli_provisioner_status
"""
return provisioner_format_task_status(CLI_CONFIG, data)

View File

@@ -1,241 +0,0 @@
#!/usr/bin/env python3
# helpers.py - PVC Click CLI helper function library
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2023 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from click import echo as click_echo
from click import progressbar
from distutils.util import strtobool
from json import load as jload
from json import dump as jdump
from os import chmod, environ, getpid, path
from socket import gethostname
from sys import argv
from syslog import syslog, openlog, closelog, LOG_AUTH
from time import sleep
from yaml import load as yload
from yaml import BaseLoader
import pvc.lib.provisioner
DEFAULT_STORE_DATA = {"cfgfile": "/etc/pvc/pvcapid.yaml"}
DEFAULT_STORE_FILENAME = "pvc.json"
DEFAULT_API_PREFIX = "/api/v1"
DEFAULT_NODE_HOSTNAME = gethostname().split(".")[0]
def echo(config, message, newline=True, stderr=False):
"""
Output a message with click.echo respecting our configuration
"""
if config.get("colour", False):
colour = True
else:
colour = None
if config.get("silent", False):
pass
elif config.get("quiet", False) and stderr:
pass
else:
click_echo(message=message, color=colour, nl=newline, err=stderr)
def audit():
"""
Log an audit message to the local syslog AUTH facility
"""
args = argv
args[0] = "pvc"
pid = getpid()
openlog(facility=LOG_AUTH, ident=f"{args[0]}[{pid}]")
syslog(
f"""client audit: command "{' '.join(args)}" by user {environ.get('USER', None)}"""
)
closelog()
def read_config_from_yaml(cfgfile):
"""
Read the PVC API configuration from the local API configuration file
"""
try:
with open(cfgfile) as fh:
api_config = yload(fh, Loader=BaseLoader)["pvc"]["api"]
host = api_config["listen_address"]
port = api_config["listen_port"]
scheme = "https" if strtobool(api_config["ssl"]["enabled"]) else "http"
api_key = (
api_config["authentication"]["tokens"][0]["token"]
if strtobool(api_config["authentication"]["enabled"])
else None
)
except KeyError:
host = None
port = None
scheme = None
api_key = None
return cfgfile, host, port, scheme, api_key
def get_config(store_data, connection=None):
"""
Load CLI configuration from store data
"""
if store_data is None:
return {"badcfg": True}
connection_details = store_data.get(connection, None)
if not connection_details:
connection = "local"
connection_details = DEFAULT_STORE_DATA
if connection_details.get("cfgfile", None) is not None:
if path.isfile(connection_details.get("cfgfile", None)):
description, host, port, scheme, api_key = read_config_from_yaml(
connection_details.get("cfgfile", None)
)
if None in [description, host, port, scheme]:
return {"badcfg": True}
else:
return {"badcfg": True}
# Rewrite a wildcard listener to use localhost instead
if host == "0.0.0.0":
host = "127.0.0.1"
else:
# This is a static configuration, get the details directly
description = connection_details["description"]
host = connection_details["host"]
port = connection_details["port"]
scheme = connection_details["scheme"]
api_key = connection_details["api_key"]
config = dict()
config["debug"] = False
config["connection"] = connection
config["description"] = description
config["api_host"] = f"{host}:{port}"
config["api_scheme"] = scheme
config["api_key"] = api_key
config["api_prefix"] = DEFAULT_API_PREFIX
if connection == "local":
config["verify_ssl"] = False
else:
config["verify_ssl"] = bool(
strtobool(environ.get("PVC_CLIENT_VERIFY_SSL", "True"))
)
return config
def get_store(store_path):
"""
Load store information from the store path
"""
store_file = f"{store_path}/{DEFAULT_STORE_FILENAME}"
with open(store_file) as fh:
try:
store_data = jload(fh)
return store_data
except Exception:
return dict()
def update_store(store_path, store_data):
"""
Update store information to the store path, creating it (with sensible permissions) if needed
"""
store_file = f"{store_path}/{DEFAULT_STORE_FILENAME}"
if not path.exists(store_file):
with open(store_file, "w") as fh:
fh.write("")
chmod(store_file, int(environ.get("PVC_CLIENT_DB_PERMS", "600"), 8))
with open(store_file, "w") as fh:
jdump(store_data, fh, sort_keys=True, indent=4)
def wait_for_provisioner(CLI_CONFIG, task_id):
"""
Wait for a provisioner task to complete
"""
echo(CLI_CONFIG, f"Task ID: {task_id}")
echo(CLI_CONFIG, "")
# Wait for the task to start
echo(CLI_CONFIG, "Waiting for task to start...", newline=False)
while True:
sleep(1)
task_status = pvc.lib.provisioner.task_status(
CLI_CONFIG, task_id, is_watching=True
)
if task_status.get("state") != "PENDING":
break
echo(".", newline=False)
echo(CLI_CONFIG, " done.")
echo(CLI_CONFIG, "")
# Start following the task state, updating progress as we go
total_task = task_status.get("total")
with progressbar(length=total_task, show_eta=False) as bar:
last_task = 0
maxlen = 0
while True:
sleep(1)
if task_status.get("state") != "RUNNING":
break
if task_status.get("current") > last_task:
current_task = int(task_status.get("current"))
bar.update(current_task - last_task)
last_task = current_task
# The extensive spaces at the end cause this to overwrite longer previous messages
curlen = len(str(task_status.get("status")))
if curlen > maxlen:
maxlen = curlen
lendiff = maxlen - curlen
overwrite_whitespace = " " * lendiff
echo(
CLI_CONFIG,
" " + task_status.get("status") + overwrite_whitespace,
newline=False,
)
task_status = pvc.lib.provisioner.task_status(
CLI_CONFIG, task_id, is_watching=True
)
if task_status.get("state") == "SUCCESS":
bar.update(total_task - last_task)
echo(CLI_CONFIG, "")
retdata = task_status.get("state") + ": " + task_status.get("status")
return retdata

View File

@@ -1,124 +0,0 @@
#!/usr/bin/env python3
# parsers.py - PVC Click CLI data parser function library
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2023 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from os import path
from re import sub
from pvc.cli.helpers import read_config_from_yaml, get_config
import pvc.lib.cluster
def cli_connection_list_parser(connections_config, show_keys_flag):
"""
Parse connections_config into formatable data for cli_connection_list
"""
connections_data = list()
for connection, details in connections_config.items():
if details.get("cfgfile", None) is not None:
if path.isfile(details.get("cfgfile")):
description, address, port, scheme, api_key = read_config_from_yaml(
details.get("cfgfile")
)
else:
continue
if not show_keys_flag and api_key is not None:
api_key = sub(r"[a-z0-9]", "x", api_key)
connections_data.append(
{
"name": connection,
"description": description,
"address": address,
"port": port,
"scheme": scheme,
"api_key": api_key,
}
)
else:
if not show_keys_flag:
details["api_key"] = sub(r"[a-z0-9]", "x", details["api_key"])
connections_data.append(
{
"name": connection,
"description": details["description"],
"address": details["host"],
"port": details["port"],
"scheme": details["scheme"],
"api_key": details["api_key"],
}
)
return connections_data
def cli_connection_detail_parser(connections_config):
"""
Parse connections_config into formatable data for cli_connection_detail
"""
connections_data = list()
for connection, details in connections_config.items():
cluster_config = get_config(connections_config, connection=connection)
if cluster_config.get("badcfg", False):
continue
# Connect to each API and gather cluster status
retcode, retdata = pvc.lib.cluster.get_info(cluster_config)
if retcode == 0:
# Create dummy data of N/A for all fields
connections_data.append(
{
"name": cluster_config["connection"],
"description": cluster_config["description"],
"health": "N/A",
"maintenance": "N/A",
"primary_node": "N/A",
"pvc_version": "N/A",
"nodes": "N/A",
"vms": "N/A",
"networks": "N/A",
"osds": "N/A",
"pools": "N/A",
"volumes": "N/A",
"snapshots": "N/A",
}
)
else:
# Normalize data into nice formattable version
connections_data.append(
{
"name": cluster_config["connection"],
"description": cluster_config["description"],
"health": retdata.get("cluster_health", {}).get("health", "N/A"),
"maintenance": retdata.get("maintenance", "N/A"),
"primary_node": retdata.get("primary_node", "N/A"),
"pvc_version": retdata.get("pvc_version", "N/A"),
"nodes": retdata.get("nodes", {}).get("total", "N/A"),
"vms": retdata.get("vms", {}).get("total", "N/A"),
"networks": retdata.get("networks", "N/A"),
"osds": retdata.get("osds", {}).get("total", "N/A"),
"pools": retdata.get("pools", "N/A"),
"volumes": retdata.get("volumes", "N/A"),
"snapshots": retdata.get("snapshots", "N/A"),
}
)
return connections_data

View File

@@ -1,64 +0,0 @@
#!/usr/bin/env python3
# waiters.py - PVC Click CLI output waiters library
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2023 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from time import sleep, time
from pvc.cli.helpers import echo
import pvc.lib.node
def cli_node_waiter(config, node, state_field, state_value):
"""
Wait for state transitions for cli_node tasks
{node} is the name of the node
{state_field} is the node_info field to check for {state_value}
{state_value} is the TRANSITIONAL value that, when no longer set, will terminate waiting
"""
# Sleep for this long between API polls
sleep_time = 1
# Print a dot after this many {sleep_time}s
dot_time = 5
t_start = time()
echo(config, "Waiting...", newline=False)
sleep(sleep_time)
count = 0
while True:
count += 1
try:
_retcode, _retdata = pvc.lib.node.node_info(config, node)
if _retdata[state_field] != state_value:
break
else:
raise ValueError
except Exception:
sleep(sleep_time)
if count % dot_time == 0:
echo(config, ".", newline=False)
t_end = time()
echo(config, f" done. [{int(t_end - t_start)}s]")

View File

@@ -27,8 +27,8 @@ from requests_toolbelt.multipart.encoder import (
MultipartEncoderMonitor,
)
import pvc.lib.ansiprint as ansiprint
from pvc.lib.common import UploadProgressBar, call_api
import pvc.cli_lib.ansiprint as ansiprint
from pvc.cli_lib.common import UploadProgressBar, call_api
#
# Supplemental functions

View File

@@ -21,8 +21,8 @@
import json
import pvc.lib.ansiprint as ansiprint
from pvc.lib.common import call_api
import pvc.cli_lib.ansiprint as ansiprint
from pvc.cli_lib.common import call_api
def initialize(config, overwrite=False):

View File

@@ -20,8 +20,8 @@
###############################################################################
import re
import pvc.lib.ansiprint as ansiprint
from pvc.lib.common import call_api
import pvc.cli_lib.ansiprint as ansiprint
from pvc.cli_lib.common import call_api
def isValidMAC(macaddr):

View File

@@ -21,8 +21,8 @@
import time
import pvc.lib.ansiprint as ansiprint
from pvc.lib.common import call_api
import pvc.cli_lib.ansiprint as ansiprint
from pvc.cli_lib.common import call_api
#

View File

@@ -24,8 +24,8 @@ from requests_toolbelt.multipart.encoder import (
MultipartEncoderMonitor,
)
import pvc.lib.ansiprint as ansiprint
from pvc.lib.common import UploadProgressBar, call_api
import pvc.cli_lib.ansiprint as ansiprint
from pvc.cli_lib.common import UploadProgressBar, call_api
from ast import literal_eval

View File

@@ -22,8 +22,8 @@
import time
import re
import pvc.lib.ansiprint as ansiprint
from pvc.lib.common import call_api, format_bytes, format_metric
import pvc.cli_lib.ansiprint as ansiprint
from pvc.cli_lib.common import call_api, format_bytes, format_metric
#
@@ -677,7 +677,7 @@ def vm_networks_add(
from lxml.objectify import fromstring
from lxml.etree import tostring
from random import randint
import pvc.lib.network as pvc_network
import pvc.cli_lib.network as pvc_network
network_exists, _ = pvc_network.net_info(config, network)
if not network_exists:
@@ -1046,7 +1046,7 @@ def vm_volumes_add(config, vm, volume, disk_id, bus, disk_type, live, restart):
from lxml.objectify import fromstring
from lxml.etree import tostring
from copy import deepcopy
import pvc.lib.ceph as pvc_ceph
import pvc.cli_lib.ceph as pvc_ceph
if disk_type == "rbd":
# Verify that the provided volume is valid

View File

@@ -1,97 +0,0 @@
#!/usr/bin/env python3
# ansiprint.py - Printing function for formatted messages
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import datetime
# ANSII colours for output
def red():
return "\033[91m"
def blue():
return "\033[94m"
def cyan():
return "\033[96m"
def green():
return "\033[92m"
def yellow():
return "\033[93m"
def purple():
return "\033[95m"
def bold():
return "\033[1m"
def end():
return "\033[0m"
# Print function
def echo(message, prefix, state):
# Get the date
date = "{} - ".format(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S.%f"))
endc = end()
# Continuation
if state == "c":
date = ""
colour = ""
prompt = " "
# OK
elif state == "o":
colour = green()
prompt = ">>> "
# Error
elif state == "e":
colour = red()
prompt = ">>> "
# Warning
elif state == "w":
colour = yellow()
prompt = ">>> "
# Tick
elif state == "t":
colour = purple()
prompt = ">>> "
# Information
elif state == "i":
colour = blue()
prompt = ">>> "
else:
colour = bold()
prompt = ">>> "
# Append space to prefix
if prefix != "":
prefix = prefix + " "
print(colour + prompt + endc + date + prefix + message)

View File

@@ -1,116 +0,0 @@
#!/usr/bin/env python3
# cluster.py - PVC CLI client function library, cluster management
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import json
from pvc.lib.common import call_api
def initialize(config, overwrite=False):
"""
Initialize the PVC cluster
API endpoint: GET /api/v1/initialize
API arguments: overwrite, yes-i-really-mean-it
API schema: {json_data_object}
"""
params = {"yes-i-really-mean-it": "yes", "overwrite": overwrite}
response = call_api(config, "post", "/initialize", params=params)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get("message", "")
def backup(config):
"""
Get a JSON backup of the cluster
API endpoint: GET /api/v1/backup
API arguments:
API schema: {json_data_object}
"""
response = call_api(config, "get", "/backup")
if response.status_code == 200:
return True, response.json()
else:
return False, response.json().get("message", "")
def restore(config, cluster_data):
"""
Restore a JSON backup to the cluster
API endpoint: POST /api/v1/restore
API arguments: yes-i-really-mean-it
API schema: {json_data_object}
"""
cluster_data_json = json.dumps(cluster_data)
params = {"yes-i-really-mean-it": "yes"}
data = {"cluster_data": cluster_data_json}
response = call_api(config, "post", "/restore", params=params, data=data)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get("message", "")
def maintenance_mode(config, state):
"""
Enable or disable PVC cluster maintenance mode
API endpoint: POST /api/v1/status
API arguments: {state}={state}
API schema: {json_data_object}
"""
params = {"state": state}
response = call_api(config, "post", "/status", params=params)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get("message", "")
def get_info(config):
"""
Get status of the PVC cluster
API endpoint: GET /api/v1/status
API arguments:
API schema: {json_data_object}
"""
response = call_api(config, "get", "/status")
if response.status_code == 200:
return True, response.json()
else:
return False, response.json().get("message", "")

View File

@@ -1,201 +0,0 @@
#!/usr/bin/env python3
# common.py - PVC CLI client function library, Common functions
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os
import math
import time
import requests
import click
from urllib3 import disable_warnings
def format_bytes(size_bytes):
byte_unit_matrix = {
"B": 1,
"K": 1024,
"M": 1024 * 1024,
"G": 1024 * 1024 * 1024,
"T": 1024 * 1024 * 1024 * 1024,
"P": 1024 * 1024 * 1024 * 1024 * 1024,
}
human_bytes = "0B"
for unit in sorted(byte_unit_matrix, key=byte_unit_matrix.get):
formatted_bytes = int(math.ceil(size_bytes / byte_unit_matrix[unit]))
if formatted_bytes < 10000:
human_bytes = "{}{}".format(formatted_bytes, unit)
break
return human_bytes
def format_metric(integer):
integer_unit_matrix = {
"": 1,
"K": 1000,
"M": 1000 * 1000,
"B": 1000 * 1000 * 1000,
"T": 1000 * 1000 * 1000 * 1000,
"Q": 1000 * 1000 * 1000 * 1000 * 1000,
}
human_integer = "0"
for unit in sorted(integer_unit_matrix, key=integer_unit_matrix.get):
formatted_integer = int(math.ceil(integer / integer_unit_matrix[unit]))
if formatted_integer < 10000:
human_integer = "{}{}".format(formatted_integer, unit)
break
return human_integer
class UploadProgressBar(object):
def __init__(self, filename, end_message="", end_nl=True):
file_size = os.path.getsize(filename)
file_size_human = format_bytes(file_size)
click.echo("Uploading file (total size {})...".format(file_size_human))
self.length = file_size
self.time_last = int(round(time.time() * 1000)) - 1000
self.bytes_last = 0
self.bytes_diff = 0
self.is_end = False
self.end_message = end_message
self.end_nl = end_nl
if not self.end_nl:
self.end_suffix = " "
else:
self.end_suffix = ""
self.bar = click.progressbar(length=self.length, show_eta=True)
def update(self, monitor):
bytes_cur = monitor.bytes_read
self.bytes_diff += bytes_cur - self.bytes_last
if self.bytes_last == bytes_cur:
self.is_end = True
self.bytes_last = bytes_cur
time_cur = int(round(time.time() * 1000))
if (time_cur - 1000) > self.time_last:
self.time_last = time_cur
self.bar.update(self.bytes_diff)
self.bytes_diff = 0
if self.is_end:
self.bar.update(self.bytes_diff)
self.bytes_diff = 0
click.echo()
click.echo()
if self.end_message:
click.echo(self.end_message + self.end_suffix, nl=self.end_nl)
class ErrorResponse(requests.Response):
def __init__(self, json_data, status_code):
self.json_data = json_data
self.status_code = status_code
def json(self):
return self.json_data
def call_api(
config,
operation,
request_uri,
headers={},
params=None,
data=None,
files=None,
):
# Set the connect timeout to 2 seconds but extremely long (48 hour) data timeout
timeout = (2.05, 172800)
# Craft the URI
uri = "{}://{}{}{}".format(
config["api_scheme"], config["api_host"], config["api_prefix"], request_uri
)
# Craft the authentication header if required
if config["api_key"]:
headers["X-Api-Key"] = config["api_key"]
# Determine the request type and hit the API
disable_warnings()
try:
if operation == "get":
response = requests.get(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config["verify_ssl"],
)
if operation == "post":
response = requests.post(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
files=files,
verify=config["verify_ssl"],
)
if operation == "put":
response = requests.put(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
files=files,
verify=config["verify_ssl"],
)
if operation == "patch":
response = requests.patch(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config["verify_ssl"],
)
if operation == "delete":
response = requests.delete(
uri,
timeout=timeout,
headers=headers,
params=params,
data=data,
verify=config["verify_ssl"],
)
except Exception as e:
message = "Failed to connect to the API: {}".format(e)
response = ErrorResponse({"message": message}, 500)
# Display debug output
if config["debug"]:
click.echo("API endpoint: {}".format(uri), err=True)
click.echo("Response code: {}".format(response.status_code), err=True)
click.echo("Response headers: {}".format(response.headers), err=True)
click.echo(err=True)
# Return the response object
return response

File diff suppressed because it is too large Load Diff

View File

@@ -1,706 +0,0 @@
#!/usr/bin/env python3
# node.py - PVC CLI client function library, node management
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import time
import pvc.lib.ansiprint as ansiprint
from pvc.lib.common import call_api
#
# Primary functions
#
def node_coordinator_state(config, node, action):
"""
Set node coordinator state state (primary/secondary)
API endpoint: POST /api/v1/node/{node}/coordinator-state
API arguments: action={action}
API schema: {"message": "{data}"}
"""
params = {"state": action}
response = call_api(
config,
"post",
"/node/{node}/coordinator-state".format(node=node),
params=params,
)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get("message", "")
def node_domain_state(config, node, action):
"""
Set node domain state state (flush/ready)
API endpoint: POST /api/v1/node/{node}/domain-state
API arguments: action={action}, wait={wait}
API schema: {"message": "{data}"}
"""
params = {"state": action}
response = call_api(
config, "post", "/node/{node}/domain-state".format(node=node), params=params
)
if response.status_code == 200:
retstatus = True
else:
retstatus = False
return retstatus, response.json().get("message", "")
def view_node_log(config, node, lines=100):
"""
Return node log lines from the API (and display them in a pager in the main CLI)
API endpoint: GET /node/{node}/log
API arguments: lines={lines}
API schema: {"name":"{node}","data":"{node_log}"}
"""
params = {"lines": lines}
response = call_api(
config, "get", "/node/{node}/log".format(node=node), params=params
)
if response.status_code != 200:
return False, response.json().get("message", "")
node_log = response.json()["data"]
# Shrink the log buffer to length lines
shrunk_log = node_log.split("\n")[-lines:]
loglines = "\n".join(shrunk_log)
return True, loglines
def follow_node_log(config, node, lines=10):
"""
Return and follow node log lines from the API
API endpoint: GET /node/{node}/log
API arguments: lines={lines}
API schema: {"name":"{nodename}","data":"{node_log}"}
"""
# We always grab 200 to match the follow call, but only _show_ `lines` number
params = {"lines": 200}
response = call_api(
config, "get", "/node/{node}/log".format(node=node), params=params
)
if response.status_code != 200:
return False, response.json().get("message", "")
# Shrink the log buffer to length lines
node_log = response.json()["data"]
shrunk_log = node_log.split("\n")[-int(lines) :]
loglines = "\n".join(shrunk_log)
# Print the initial data and begin following
print(loglines, end="")
print("\n", end="")
while True:
# Grab the next line set (200 is a reasonable number of lines per half-second; any more are skipped)
try:
params = {"lines": 200}
response = call_api(
config, "get", "/node/{node}/log".format(node=node), params=params
)
new_node_log = response.json()["data"]
except Exception:
break
# Split the new and old log strings into constitutent lines
old_node_loglines = node_log.split("\n")
new_node_loglines = new_node_log.split("\n")
# Set the node log to the new log value for the next iteration
node_log = new_node_log
# Get the difference between the two sets of lines
old_node_loglines_set = set(old_node_loglines)
diff_node_loglines = [
x for x in new_node_loglines if x not in old_node_loglines_set
]
# If there's a difference, print it out
if len(diff_node_loglines) > 0:
print("\n".join(diff_node_loglines), end="")
print("\n", end="")
# Wait half a second
time.sleep(0.5)
return True, ""
def node_info(config, node):
"""
Get information about node
API endpoint: GET /api/v1/node/{node}
API arguments:
API schema: {json_data_object}
"""
response = call_api(config, "get", "/node/{node}".format(node=node))
if response.status_code == 200:
if isinstance(response.json(), list) and len(response.json()) != 1:
# No exact match, return not found
return False, "Node not found."
else:
# Return a single instance if the response is a list
if isinstance(response.json(), list):
return True, response.json()[0]
# This shouldn't happen, but is here just in case
else:
return True, response.json()
else:
return False, response.json().get("message", "")
def node_list(
config, limit, target_daemon_state, target_coordinator_state, target_domain_state
):
"""
Get list information about nodes (limited by {limit})
API endpoint: GET /api/v1/node
API arguments: limit={limit}
API schema: [{json_data_object},{json_data_object},etc.]
"""
params = dict()
if limit:
params["limit"] = limit
if target_daemon_state:
params["daemon_state"] = target_daemon_state
if target_coordinator_state:
params["coordinator_state"] = target_coordinator_state
if target_domain_state:
params["domain_state"] = target_domain_state
response = call_api(config, "get", "/node", params=params)
if response.status_code == 200:
return True, response.json()
else:
return False, response.json().get("message", "")
#
# Output display functions
#
def getOutputColours(node_information):
node_health = node_information.get("health", "N/A")
if isinstance(node_health, int):
if node_health <= 50:
health_colour = ansiprint.red()
elif node_health <= 90:
health_colour = ansiprint.yellow()
elif node_health <= 100:
health_colour = ansiprint.green()
else:
health_colour = ansiprint.blue()
else:
health_colour = ansiprint.blue()
if node_information["daemon_state"] == "run":
daemon_state_colour = ansiprint.green()
elif node_information["daemon_state"] == "stop":
daemon_state_colour = ansiprint.red()
elif node_information["daemon_state"] == "shutdown":
daemon_state_colour = ansiprint.yellow()
elif node_information["daemon_state"] == "init":
daemon_state_colour = ansiprint.yellow()
elif node_information["daemon_state"] == "dead":
daemon_state_colour = ansiprint.red() + ansiprint.bold()
else:
daemon_state_colour = ansiprint.blue()
if node_information["coordinator_state"] == "primary":
coordinator_state_colour = ansiprint.green()
elif node_information["coordinator_state"] == "secondary":
coordinator_state_colour = ansiprint.blue()
else:
coordinator_state_colour = ansiprint.cyan()
if node_information["domain_state"] == "ready":
domain_state_colour = ansiprint.green()
else:
domain_state_colour = ansiprint.blue()
if node_information["memory"]["allocated"] > node_information["memory"]["total"]:
mem_allocated_colour = ansiprint.yellow()
else:
mem_allocated_colour = ""
if node_information["memory"]["provisioned"] > node_information["memory"]["total"]:
mem_provisioned_colour = ansiprint.yellow()
else:
mem_provisioned_colour = ""
return (
health_colour,
daemon_state_colour,
coordinator_state_colour,
domain_state_colour,
mem_allocated_colour,
mem_provisioned_colour,
)
def format_info(config, node_information, long_output):
(
health_colour,
daemon_state_colour,
coordinator_state_colour,
domain_state_colour,
mem_allocated_colour,
mem_provisioned_colour,
) = getOutputColours(node_information)
# Format a nice output; do this line-by-line then concat the elements at the end
ainformation = []
# Basic information
ainformation.append(
"{}Name:{} {}".format(
ansiprint.purple(),
ansiprint.end(),
node_information["name"],
)
)
ainformation.append(
"{}PVC Version:{} {}".format(
ansiprint.purple(),
ansiprint.end(),
node_information["pvc_version"],
)
)
node_health = node_information.get("health", "N/A")
if isinstance(node_health, int):
node_health_text = f"{node_health}%"
else:
node_health_text = node_health
ainformation.append(
"{}Health:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
health_colour,
node_health_text,
ansiprint.end(),
)
)
node_health_details = node_information.get("health_details", [])
if long_output:
node_health_messages = "\n ".join(
[f"{plugin['name']}: {plugin['message']}" for plugin in node_health_details]
)
else:
node_health_messages = "\n ".join(
[
f"{plugin['name']}: {plugin['message']}"
for plugin in node_health_details
if int(plugin.get("health_delta", 0)) > 0
]
)
if len(node_health_messages) > 0:
ainformation.append(
"{}Health Plugin Details:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_health_messages
)
)
ainformation.append("")
ainformation.append(
"{}Daemon State:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
daemon_state_colour,
node_information["daemon_state"],
ansiprint.end(),
)
)
ainformation.append(
"{}Coordinator State:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
coordinator_state_colour,
node_information["coordinator_state"],
ansiprint.end(),
)
)
ainformation.append(
"{}Domain State:{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
domain_state_colour,
node_information["domain_state"],
ansiprint.end(),
)
)
if long_output:
ainformation.append("")
ainformation.append(
"{}Architecture:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["arch"]
)
)
ainformation.append(
"{}Operating System:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["os"]
)
)
ainformation.append(
"{}Kernel Version:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["kernel"]
)
)
ainformation.append("")
ainformation.append(
"{}Active VM Count:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["domains_count"]
)
)
ainformation.append(
"{}Host CPUs:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["vcpu"]["total"]
)
)
ainformation.append(
"{}vCPUs:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["vcpu"]["allocated"]
)
)
ainformation.append(
"{}Load:{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["load"]
)
)
ainformation.append(
"{}Total RAM (MiB):{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["memory"]["total"]
)
)
ainformation.append(
"{}Used RAM (MiB):{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["memory"]["used"]
)
)
ainformation.append(
"{}Free RAM (MiB):{} {}".format(
ansiprint.purple(), ansiprint.end(), node_information["memory"]["free"]
)
)
ainformation.append(
"{}Allocated RAM (MiB):{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
mem_allocated_colour,
node_information["memory"]["allocated"],
ansiprint.end(),
)
)
ainformation.append(
"{}Provisioned RAM (MiB):{} {}{}{}".format(
ansiprint.purple(),
ansiprint.end(),
mem_provisioned_colour,
node_information["memory"]["provisioned"],
ansiprint.end(),
)
)
# Join it all together
ainformation.append("")
return "\n".join(ainformation)
def format_list(config, node_list):
if node_list == "Node not found.":
return node_list
node_list_output = []
# Determine optimal column widths
node_name_length = 5
pvc_version_length = 8
health_length = 7
daemon_state_length = 7
coordinator_state_length = 12
domain_state_length = 7
domains_count_length = 4
cpu_count_length = 6
load_length = 5
mem_total_length = 6
mem_used_length = 5
mem_free_length = 5
mem_alloc_length = 6
mem_prov_length = 5
for node_information in node_list:
# node_name column
_node_name_length = len(node_information["name"]) + 1
if _node_name_length > node_name_length:
node_name_length = _node_name_length
# node_pvc_version column
_pvc_version_length = len(node_information.get("pvc_version", "N/A")) + 1
if _pvc_version_length > pvc_version_length:
pvc_version_length = _pvc_version_length
# node_health column
node_health = node_information.get("health", "N/A")
if isinstance(node_health, int):
node_health_text = f"{node_health}%"
else:
node_health_text = node_health
_health_length = len(node_health_text) + 1
if _health_length > health_length:
health_length = _health_length
# daemon_state column
_daemon_state_length = len(node_information["daemon_state"]) + 1
if _daemon_state_length > daemon_state_length:
daemon_state_length = _daemon_state_length
# coordinator_state column
_coordinator_state_length = len(node_information["coordinator_state"]) + 1
if _coordinator_state_length > coordinator_state_length:
coordinator_state_length = _coordinator_state_length
# domain_state column
_domain_state_length = len(node_information["domain_state"]) + 1
if _domain_state_length > domain_state_length:
domain_state_length = _domain_state_length
# domains_count column
_domains_count_length = len(str(node_information["domains_count"])) + 1
if _domains_count_length > domains_count_length:
domains_count_length = _domains_count_length
# cpu_count column
_cpu_count_length = len(str(node_information["cpu_count"])) + 1
if _cpu_count_length > cpu_count_length:
cpu_count_length = _cpu_count_length
# load column
_load_length = len(str(node_information["load"])) + 1
if _load_length > load_length:
load_length = _load_length
# mem_total column
_mem_total_length = len(str(node_information["memory"]["total"])) + 1
if _mem_total_length > mem_total_length:
mem_total_length = _mem_total_length
# mem_used column
_mem_used_length = len(str(node_information["memory"]["used"])) + 1
if _mem_used_length > mem_used_length:
mem_used_length = _mem_used_length
# mem_free column
_mem_free_length = len(str(node_information["memory"]["free"])) + 1
if _mem_free_length > mem_free_length:
mem_free_length = _mem_free_length
# mem_alloc column
_mem_alloc_length = len(str(node_information["memory"]["allocated"])) + 1
if _mem_alloc_length > mem_alloc_length:
mem_alloc_length = _mem_alloc_length
# mem_prov column
_mem_prov_length = len(str(node_information["memory"]["provisioned"])) + 1
if _mem_prov_length > mem_prov_length:
mem_prov_length = _mem_prov_length
# Format the string (header)
node_list_output.append(
"{bold}{node_header: <{node_header_length}} {state_header: <{state_header_length}} {resource_header: <{resource_header_length}} {memory_header: <{memory_header_length}}{end_bold}".format(
node_header_length=node_name_length
+ pvc_version_length
+ health_length
+ 2,
state_header_length=daemon_state_length
+ coordinator_state_length
+ domain_state_length
+ 2,
resource_header_length=domains_count_length
+ cpu_count_length
+ load_length
+ 2,
memory_header_length=mem_total_length
+ mem_used_length
+ mem_free_length
+ mem_alloc_length
+ mem_prov_length
+ 4,
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
node_header="Nodes "
+ "".join(
[
"-"
for _ in range(
6, node_name_length + pvc_version_length + health_length + 1
)
]
),
state_header="States "
+ "".join(
[
"-"
for _ in range(
7,
daemon_state_length
+ coordinator_state_length
+ domain_state_length
+ 1,
)
]
),
resource_header="Resources "
+ "".join(
[
"-"
for _ in range(
10, domains_count_length + cpu_count_length + load_length + 1
)
]
),
memory_header="Memory (M) "
+ "".join(
[
"-"
for _ in range(
11,
mem_total_length
+ mem_used_length
+ mem_free_length
+ mem_alloc_length
+ mem_prov_length
+ 3,
)
]
),
)
)
node_list_output.append(
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} {node_health: <{health_length}} \
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {node_mem_allocated: <{mem_alloc_length}} {node_mem_provisioned: <{mem_prov_length}}{end_bold}".format(
node_name_length=node_name_length,
pvc_version_length=pvc_version_length,
health_length=health_length,
daemon_state_length=daemon_state_length,
coordinator_state_length=coordinator_state_length,
domain_state_length=domain_state_length,
domains_count_length=domains_count_length,
cpu_count_length=cpu_count_length,
load_length=load_length,
mem_total_length=mem_total_length,
mem_used_length=mem_used_length,
mem_free_length=mem_free_length,
mem_alloc_length=mem_alloc_length,
mem_prov_length=mem_prov_length,
bold=ansiprint.bold(),
end_bold=ansiprint.end(),
daemon_state_colour="",
coordinator_state_colour="",
domain_state_colour="",
end_colour="",
node_name="Name",
node_pvc_version="Version",
node_health="Health",
node_daemon_state="Daemon",
node_coordinator_state="Coordinator",
node_domain_state="Domain",
node_domains_count="VMs",
node_cpu_count="vCPUs",
node_load="Load",
node_mem_total="Total",
node_mem_used="Used",
node_mem_free="Free",
node_mem_allocated="Alloc",
node_mem_provisioned="Prov",
)
)
# Format the string (elements)
for node_information in sorted(node_list, key=lambda n: n["name"]):
(
health_colour,
daemon_state_colour,
coordinator_state_colour,
domain_state_colour,
mem_allocated_colour,
mem_provisioned_colour,
) = getOutputColours(node_information)
node_health = node_information.get("health", "N/A")
if isinstance(node_health, int):
node_health_text = f"{node_health}%"
else:
node_health_text = node_health
node_list_output.append(
"{bold}{node_name: <{node_name_length}} {node_pvc_version: <{pvc_version_length}} {health_colour}{node_health: <{health_length}}{end_colour} \
{daemon_state_colour}{node_daemon_state: <{daemon_state_length}}{end_colour} {coordinator_state_colour}{node_coordinator_state: <{coordinator_state_length}}{end_colour} {domain_state_colour}{node_domain_state: <{domain_state_length}}{end_colour} \
{node_domains_count: <{domains_count_length}} {node_cpu_count: <{cpu_count_length}} {node_load: <{load_length}} \
{node_mem_total: <{mem_total_length}} {node_mem_used: <{mem_used_length}} {node_mem_free: <{mem_free_length}} {mem_allocated_colour}{node_mem_allocated: <{mem_alloc_length}}{end_colour} {mem_provisioned_colour}{node_mem_provisioned: <{mem_prov_length}}{end_colour}{end_bold}".format(
node_name_length=node_name_length,
pvc_version_length=pvc_version_length,
health_length=health_length,
daemon_state_length=daemon_state_length,
coordinator_state_length=coordinator_state_length,
domain_state_length=domain_state_length,
domains_count_length=domains_count_length,
cpu_count_length=cpu_count_length,
load_length=load_length,
mem_total_length=mem_total_length,
mem_used_length=mem_used_length,
mem_free_length=mem_free_length,
mem_alloc_length=mem_alloc_length,
mem_prov_length=mem_prov_length,
bold="",
end_bold="",
health_colour=health_colour,
daemon_state_colour=daemon_state_colour,
coordinator_state_colour=coordinator_state_colour,
domain_state_colour=domain_state_colour,
mem_allocated_colour=mem_allocated_colour,
mem_provisioned_colour=mem_allocated_colour,
end_colour=ansiprint.end(),
node_name=node_information["name"],
node_pvc_version=node_information.get("pvc_version", "N/A"),
node_health=node_health_text,
node_daemon_state=node_information["daemon_state"],
node_coordinator_state=node_information["coordinator_state"],
node_domain_state=node_information["domain_state"],
node_domains_count=node_information["domains_count"],
node_cpu_count=node_information["vcpu"]["allocated"],
node_load=node_information["load"],
node_mem_total=node_information["memory"]["total"],
node_mem_used=node_information["memory"]["used"],
node_mem_free=node_information["memory"]["free"],
node_mem_allocated=node_information["memory"]["allocated"],
node_mem_provisioned=node_information["memory"]["provisioned"],
)
)
return "\n".join(node_list_output)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,102 +0,0 @@
#!/usr/bin/env python3
# zkhandler.py - Secure versioned ZooKeeper updates
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2022 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import uuid
# Exists function
def exists(zk_conn, key):
stat = zk_conn.exists(key)
if stat:
return True
else:
return False
# Child list function
def listchildren(zk_conn, key):
children = zk_conn.get_children(key)
return children
# Delete key function
def deletekey(zk_conn, key, recursive=True):
zk_conn.delete(key, recursive=recursive)
# Data read function
def readdata(zk_conn, key):
data_raw = zk_conn.get(key)
data = data_raw[0].decode("utf8")
return data
# Data write function
def writedata(zk_conn, kv):
# Start up a transaction
zk_transaction = zk_conn.transaction()
# Proceed one KV pair at a time
for key in sorted(kv):
data = kv[key]
# Check if this key already exists or not
if not zk_conn.exists(key):
# We're creating a new key
zk_transaction.create(key, str(data).encode("utf8"))
else:
# We're updating a key with version validation
orig_data = zk_conn.get(key)
version = orig_data[1].version
# Set what we expect the new version to be
new_version = version + 1
# Update the data
zk_transaction.set_data(key, str(data).encode("utf8"))
# Set up the check
try:
zk_transaction.check(key, new_version)
except TypeError:
print('Zookeeper key "{}" does not match expected version'.format(key))
return False
# Commit the transaction
try:
zk_transaction.commit()
return True
except Exception:
return False
# Write lock function
def writelock(zk_conn, key):
lock_id = str(uuid.uuid1())
lock = zk_conn.WriteLock("{}".format(key), lock_id)
return lock
# Read lock function
def readlock(zk_conn, key):
lock_id = str(uuid.uuid1())
lock = zk_conn.ReadLock("{}".format(key), lock_id)
return lock

View File

@@ -37,13 +37,13 @@ from distutils.util import strtobool
from functools import wraps
import pvc.lib.ansiprint as ansiprint
import pvc.lib.cluster as pvc_cluster
import pvc.lib.node as pvc_node
import pvc.lib.vm as pvc_vm
import pvc.lib.network as pvc_network
import pvc.lib.ceph as pvc_ceph
import pvc.lib.provisioner as pvc_provisioner
import pvc.cli_lib.ansiprint as ansiprint
import pvc.cli_lib.cluster as pvc_cluster
import pvc.cli_lib.node as pvc_node
import pvc.cli_lib.vm as pvc_vm
import pvc.cli_lib.network as pvc_network
import pvc.cli_lib.ceph as pvc_ceph
import pvc.cli_lib.provisioner as pvc_provisioner
myhostname = socket.gethostname().split(".")[0]

View File

@@ -3,7 +3,7 @@ from setuptools import setup
setup(
name="pvc",
version="0.9.63",
packages=["pvc.cli", "pvc.lib"],
packages=["pvc", "pvc.cli_lib"],
install_requires=[
"Click",
"PyYAML",
@@ -14,7 +14,7 @@ setup(
],
entry_points={
"console_scripts": [
"pvc = pvc.cli.cli:cli",
"pvc = pvc.pvc:cli",
],
},
)

View File

@@ -127,14 +127,16 @@ def getNodeInformation(zkhandler, node_name):
def secondary_node(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, "ERROR: No node named {} is present in the cluster.".format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Ensure node is a coordinator
daemon_mode = zkhandler.read(("node.mode", node))
if daemon_mode == "hypervisor":
return (
False,
"ERROR: Cannot change coordinator state on non-coordinator node {}".format(
'ERROR: Cannot change coordinator mode on non-coordinator node "{}"'.format(
node
),
)
@@ -142,14 +144,14 @@ def secondary_node(zkhandler, node):
# Ensure node is in run daemonstate
daemon_state = zkhandler.read(("node.state.daemon", node))
if daemon_state != "run":
return False, "ERROR: Node {} is not active".format(node)
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.read(("node.state.router", node))
if current_state == "secondary":
return True, "Node {} is already in secondary coordinator state.".format(node)
return True, 'Node "{}" is already in secondary coordinator mode.'.format(node)
retmsg = "Setting node {} in secondary coordinator state.".format(node)
retmsg = "Setting node {} in secondary coordinator mode.".format(node)
zkhandler.write([("base.config.primary_node", "none")])
return True, retmsg
@@ -158,14 +160,16 @@ def secondary_node(zkhandler, node):
def primary_node(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, "ERROR: No node named {} is present in the cluster.".format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Ensure node is a coordinator
daemon_mode = zkhandler.read(("node.mode", node))
if daemon_mode == "hypervisor":
return (
False,
"ERROR: Cannot change coordinator state on non-coordinator node {}".format(
'ERROR: Cannot change coordinator mode on non-coordinator node "{}"'.format(
node
),
)
@@ -173,14 +177,14 @@ def primary_node(zkhandler, node):
# Ensure node is in run daemonstate
daemon_state = zkhandler.read(("node.state.daemon", node))
if daemon_state != "run":
return False, "ERROR: Node {} is not active".format(node)
return False, 'ERROR: Node "{}" is not active'.format(node)
# Get current state
current_state = zkhandler.read(("node.state.router", node))
if current_state == "primary":
return True, "Node {} is already in primary coordinator state.".format(node)
return True, 'Node "{}" is already in primary coordinator mode.'.format(node)
retmsg = "Setting node {} in primary coordinator state.".format(node)
retmsg = "Setting node {} in primary coordinator mode.".format(node)
zkhandler.write([("base.config.primary_node", node)])
return True, retmsg
@@ -189,12 +193,14 @@ def primary_node(zkhandler, node):
def flush_node(zkhandler, node, wait=False):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, "ERROR: No node named {} is present in the cluster.".format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
if zkhandler.read(("node.state.domain", node)) == "flushed":
return True, "Node {} is already flushed.".format(node)
return True, "Hypervisor {} is already flushed.".format(node)
retmsg = "Removing node {} from active service.".format(node)
retmsg = "Flushing hypervisor {} of running VMs.".format(node)
# Add the new domain to Zookeeper
zkhandler.write([(("node.state.domain", node), "flush")])
@@ -202,7 +208,7 @@ def flush_node(zkhandler, node, wait=False):
if wait:
while zkhandler.read(("node.state.domain", node)) == "flush":
time.sleep(1)
retmsg = "Removed node {} from active service.".format(node)
retmsg = "Flushed hypervisor {} of running VMs.".format(node)
return True, retmsg
@@ -210,12 +216,14 @@ def flush_node(zkhandler, node, wait=False):
def ready_node(zkhandler, node, wait=False):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, "ERROR: No node named {} is present in the cluster.".format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
if zkhandler.read(("node.state.domain", node)) == "ready":
return True, "Node {} is already ready.".format(node)
return True, "Hypervisor {} is already ready.".format(node)
retmsg = "Restoring node {} to active service.".format(node)
retmsg = "Restoring hypervisor {} to active service.".format(node)
# Add the new domain to Zookeeper
zkhandler.write([(("node.state.domain", node), "unflush")])
@@ -223,7 +231,7 @@ def ready_node(zkhandler, node, wait=False):
if wait:
while zkhandler.read(("node.state.domain", node)) == "unflush":
time.sleep(1)
retmsg = "Restored node {} to active service.".format(node)
retmsg = "Restored hypervisor {} to active service.".format(node)
return True, retmsg
@@ -231,7 +239,9 @@ def ready_node(zkhandler, node, wait=False):
def get_node_log(zkhandler, node, lines=2000):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, "ERROR: No node named {} is present in the cluster.".format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Get the data from ZK
node_log = zkhandler.read(("logs.messages", node))
@@ -249,12 +259,14 @@ def get_node_log(zkhandler, node, lines=2000):
def get_info(zkhandler, node):
# Verify node is valid
if not common.verifyNode(zkhandler, node):
return False, "ERROR: No node named {} is present in the cluster.".format(node)
return False, 'ERROR: No node named "{}" is present in the cluster.'.format(
node
)
# Get information about node in a pretty format
node_information = getNodeInformation(zkhandler, node)
if not node_information:
return False, "ERROR: Could not get information about node {}.".format(node)
return False, 'ERROR: Could not get information about node "{}".'.format(node)
return True, node_information

View File

@@ -25,13 +25,13 @@ As part of these trends, Infrastructure-as-a-Service (IaaS) has become a critica
However, the current state of the free and open source virtualization ecosystem is lacking.
At the lower- to middle-end, projects like ProxMox provide an easy way to administer small virtualization clusters, but these projects tend to lack advanced redundancy facilities that are built-in by default. Ganeti, a former Google tool, was long-dead when PVC was initially conceived, but has recently been given new life by the FLOSS community, and was the inspiration for much of PVC's functionality. Harvester is also a newer player in the space, created by Rancher Labs after PVC was established, but its use of custom solutions for everything, especially the storage backend, gives us some pause.
At the lower end, projects like ProxMox provide an easy way to administer small virtualization clusters, but these projects tend to lack advanced redundancy facilities that are built-in by default. While there are some new contenders in this space, such as Harvester, the options are limited and their feature-sets and tool stacks can be cumbersome or unproven.
At the high-end, very large projects like OpenStack and CloudStack provide very advanced functionality, but these project are sprawling and complicated for Administrators to use, and are very focused on large enterprise deployments, not suitable for smaller clusters and teams.
At the higher end, very large projects like OpenStack and CloudStack provide very advanced functionality, but these project are sprawling and complicated for Administrators to use, and are very focused on large enterprise deployments, not suitable for smaller clusters and teams.
Finally, proprietary solutions dominate this space. VMWare and Nutanix are the two largest names, with these products providing functionality for both small and large clusters, but proprietary software limits both flexibility and freedom, and the costs associated with these solutions is immense.
PVC aims to bridge the gaps between these categories. Like the larger FLOSS and proprietary projects, PVC can scale up to very large cluster sizes, while remaining usable even for small clusters as well. Like the smaller FLOSS and proprietary projects, PVC aims to be very simple to use, with a fully programmable API, allowing administrators to get on with more important things. Like the other FLOSS solutions, PVC is free, both as in beer and as in speech, allowing the administrator to inspect, modify, and tailor it to their needs. And finally, PVC is built from the ground-up to support host-level redundancy at every layer, rather than this being an expensive, optional, or tacked on feature, using standard, well-tested and well-supported components.
PVC aims to bridge the gaps between these three categories. Like the larger FLOSS and proprietary projects, PVC can scale up to very large cluster sizes, while remaining usable even for small clusters as well. Like the smaller FLOSS and proprietary projects, PVC aims to be very simple to use, with a fully programmable API, allowing administrators to get on with more important things. Like the other FLOSS solutions, PVC is free, both as in beer and as in speech, allowing the administrator to inspect, modify, and tailor it to their needs. And finally, PVC is built from the ground-up to support host-level redundancy at every layer, rather than this being an expensive, optional, or tacked on feature.
In short, it is a Free Software, scalable, redundant, self-healing, and self-managing private cloud solution designed with administrator simplicity in mind.

View File

@@ -790,19 +790,6 @@ class NodeInstance(object):
self.flush_stopper = False
return
# Wait for a VM in "restart" or "shutdown" state to complete transition
while self.zkhandler.read(("domain.state", dom_uuid)) in [
"restart",
"shutdown",
]:
self.logger.out(
'Waiting 2s for VM state change completion for VM "{}"'.format(
dom_uuid
),
state="i",
)
time.sleep(2)
self.logger.out(
'Selecting target to migrate VM "{}"'.format(dom_uuid), state="i"
)
@@ -819,13 +806,11 @@ class NodeInstance(object):
if target_node is None:
self.logger.out(
'Failed to find migration target for running VM "{}"; shutting down and setting autostart flag'.format(
'Failed to find migration target for VM "{}"; shutting down and setting autostart flag'.format(
dom_uuid
),
state="e",
)
if self.zkhandler.read(("domain.state", dom_uuid)) in ["start"]:
self.zkhandler.write(
[
(("domain.state", dom_uuid), "shutdown"),

View File

@@ -1,54 +1,31 @@
#!/usr/bin/env bash
set -o errexit
if [[ -z ${1} ]]; then
echo "Please specify a cluster to run tests against."
exit 1
fi
test_cluster="${1}"
shift
if [[ ${1} == "--test-dangerously" ]]; then
test_dangerously="y"
else
test_dangerously=""
fi
_pvc() {
echo "> pvc --connection ${test_cluster} $@"
pvc --quiet --connection ${test_cluster} "$@"
echo "> pvc --cluster ${test_cluster} $@"
pvc --quiet --cluster ${test_cluster} "$@"
sleep 1
}
time_start=$(date +%s)
set -o errexit
pushd $( git rev-parse --show-toplevel ) &>/dev/null
# Cluster tests
_pvc connection list
_pvc connection detail
_pvc cluster maintenance on
_pvc cluster maintenance off
_pvc cluster status
_pvc maintenance on
_pvc maintenance off
backup_tmp=$(mktemp)
_pvc cluster backup --file ${backup_tmp}
if [[ -n ${test_dangerously} ]]; then
# This is dangerous, so don't test it unless option given
_pvc cluster restore --yes --file ${backup_tmp}
fi
_pvc task backup --file ${backup_tmp}
_pvc task restore --yes --file ${backup_tmp}
rm ${backup_tmp} || true
# Provisioner tests
_pvc provisioner profile list test || true
_pvc provisioner template system add --vcpus 1 --vram 1024 --serial --vnc --vnc-bind 0.0.0.0 --node-limit hv1 --node-selector mem --node-autostart --migration-method live system-test || true
_pvc provisioner template network add network-test || true
_pvc provisioner template network vni add network-test 10000 || true
_pvc provisioner template storage add storage-test || true
_pvc provisioner template storage disk add --pool vms --size 8 --filesystem ext4 --mountpoint / storage-test sda || true
_pvc provisioner script add script-test $( find . -name "3-debootstrap.py" ) || true
_pvc provisioner profile add --profile-type provisioner --system-template system-test --network-template network-test --storage-template storage-test --userdata empty --script script-test --script-arg deb_release=bullseye test || true
_pvc provisioner profile list test
_pvc provisioner create --wait testx test
sleep 30
@@ -59,7 +36,7 @@ _pvc vm shutdown --yes --wait testx
_pvc vm start testx
sleep 30
_pvc vm stop --yes testx
_pvc vm disable --yes testx
_pvc vm disable testx
_pvc vm undefine --yes testx
_pvc vm define --target hv3 --tag pvc-test ${vm_tmp}
_pvc vm start testx
@@ -72,21 +49,21 @@ _pvc vm unmigrate --wait testx
sleep 5
_pvc vm move --wait --target hv1 testx
sleep 5
_pvc vm meta testx --limit hv1 --node-selector vms --method live --profile test --no-autostart
_pvc vm meta testx --limit hv1 --selector vms --method live --profile test --no-autostart
_pvc vm tag add testx mytag
_pvc vm tag get testx
_pvc vm list --tag mytag
_pvc vm tag remove testx mytag
_pvc vm network get testx
_pvc vm vcpu set --no-restart testx 4
_pvc vm vcpu set testx 4
_pvc vm vcpu get testx
_pvc vm memory set --no-restart testx 4096
_pvc vm memory set testx 4096
_pvc vm memory get testx
_pvc vm vcpu set --no-restart testx 2
_pvc vm vcpu set testx 2
_pvc vm memory set testx 2048 --restart --yes
sleep 15
sleep 5
_pvc vm list testx
_pvc vm info --format long testx
_pvc vm info --long testx
rm ${vm_tmp} || true
# Node tests
@@ -100,7 +77,6 @@ _pvc node flush --wait hv1
_pvc node ready --wait hv1
_pvc node list hv1
_pvc node info hv1
sleep 15
# Network tests
_pvc network add 10001 --description testing --type managed --domain testing.local --ipnet 10.100.100.0/24 --gateway 10.100.100.1 --dhcp --dhcp-start 10.100.100.100 --dhcp-end 10.100.100.199
@@ -108,7 +84,7 @@ sleep 5
_pvc vm network add --restart --yes testx 10001
sleep 30
_pvc vm network remove --restart --yes testx 10001
sleep 15
sleep 5
_pvc network acl add 10001 --in --description test-acl --order 0 --rule "'ip daddr 10.0.0.0/8 counter'"
_pvc network acl list 10001
@@ -119,34 +95,31 @@ _pvc network dhcp remove --yes 10001 12:34:56:78:90:ab
_pvc network modify --domain test10001.local 10001
_pvc network list
_pvc network info --format long 10001
_pvc network info --long 10001
# Network-VM interaction tests
_pvc vm network add testx 10001 --model virtio --restart --yes
sleep 30
_pvc vm network get testx
_pvc vm network remove testx 10001 --restart --yes
sleep 15
sleep 5
_pvc network remove --yes 10001
# Storage tests
_pvc storage status
_pvc storage util
if [[ -n ${test_dangerously} ]]; then
# This is dangerous, so don't test it unless option given
_pvc storage osd set noout
_pvc storage osd out 0
_pvc storage osd in 0
_pvc storage osd unset noout
fi
_pvc storage osd list
_pvc storage pool add testing 64 --replcfg "copies=3,mincopies=2"
sleep 5
_pvc storage pool list
_pvc storage volume add testing testx 1G
_pvc storage volume resize --yes testing testx 2G
_pvc storage volume rename --yes testing testx testerX
_pvc storage volume resize testing testx 2G
_pvc storage volume rename testing testx testerX
_pvc storage volume clone testing testerX testerY
_pvc storage volume list --pool testing
_pvc storage volume snapshot add testing testerX asnapshotX
@@ -159,7 +132,7 @@ _pvc vm volume add testx --type rbd --disk-id sdh --bus scsi testing/testerY --r
sleep 30
_pvc vm volume get testx
_pvc vm volume remove testx testing/testerY --restart --yes
sleep 15
sleep 5
_pvc storage volume remove --yes testing testerY
_pvc storage volume remove --yes testing testerX
@@ -169,14 +142,6 @@ _pvc storage pool remove --yes testing
_pvc vm stop --yes testx
_pvc vm remove --yes testx
_pvc provisioner profile remove --yes test
_pvc provisioner script remove --yes script-test
_pvc provisioner template system remove --yes system-test
_pvc provisioner template network remove --yes network-test
_pvc provisioner template storage remove --yes storage-test
popd
time_end=$(date +%s)
echo