Initial commit of PVC Bootstrap system

Adds the PVC Bootstrap system, which allows the automated deployment of
one or more PVC clusters.
This commit is contained in:
2021-12-29 22:31:01 -05:00
commit 379262a74f
37 changed files with 4513 additions and 0 deletions

View File

@ -0,0 +1,78 @@
#!/usr/bin/env python3
# ansible.py - PVC Cluster Auto-bootstrap Ansible libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import pvcbootstrapd.lib.git as git
import ansible_runner
import tempfile
from time import sleep
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
def run_bootstrap(config, cspec, cluster, nodes):
"""
Run an Ansible bootstrap against a cluster
"""
logger.debug(nodes)
# Construct our temporary INI inventory string
logger.info("Constructing virtual Ansible inventory")
base_yaml = cspec["clusters"][cluster.name]["base_yaml"]
local_domain = base_yaml.get("local_domain")
inventory = [f"""[{cluster.name}]"""]
for node in nodes:
inventory.append(
f"""{node.name}.{local_domain} ansible_host={node.host_ipaddr}"""
)
inventory = "\n".join(inventory)
logger.debug(inventory)
# Waiting 30 seconds to ensure everything is booted an stabilized
logger.info("Waiting 60s before starting Ansible bootstrap.")
sleep(60)
# Run the Ansible playbooks
with tempfile.TemporaryDirectory(prefix="pvc-ansible-bootstrap_") as pdir:
try:
r = ansible_runner.run(
private_data_dir=f"{pdir}",
inventory=inventory,
limit=f"{cluster.name}",
playbook=f"{config['ansible_path']}/pvc.yml",
extravars={
"ansible_ssh_private_key_file": config["ansible_keyfile"],
"bootstrap": "yes",
},
forks=len(nodes),
verbosity=2,
)
logger.info("Final status:")
logger.info("{}: {}".format(r.status, r.rc))
logger.info(r.stats)
if r.rc == 0:
git.commit_repository(config)
git.push_repository(config)
except Exception as e:
logger.warning(f"Error: {e}")

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python3
# dataclasses.py - PVC Cluster Auto-bootstrap dataclasses
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from dataclasses import dataclass
@dataclass
class Cluster:
"""
An instance of a Cluster
"""
id: int
name: str
state: str
@dataclass
class Node:
"""
An instance of a Node
"""
id: int
cluster: str
state: str
name: str
nid: int
bmc_macaddr: str
bmc_iapddr: str
host_macaddr: str
host_ipaddr: str

View File

@ -0,0 +1,267 @@
#!/usr/bin/env python3
# db.py - PVC Cluster Auto-bootstrap database libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os
import sqlite3
import contextlib
from pvcbootstrapd.lib.dataclasses import Cluster, Node
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
#
# Database functions
#
@contextlib.contextmanager
def dbconn(db_path):
conn = sqlite3.connect(db_path)
conn.execute("PRAGMA foreign_keys = 1")
cur = conn.cursor()
yield cur
conn.commit()
conn.close()
def init_database(config):
db_path = config["database_path"]
if not os.path.isfile(db_path):
print("First run: initializing database.")
# Initializing the database
with dbconn(db_path) as cur:
# Table listing all clusters
cur.execute(
"""CREATE TABLE clusters
(id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
state TEXT NOT NULL)"""
)
# Table listing all nodes
# FK: cluster -> clusters.id
cur.execute(
"""CREATE TABLE nodes
(id INTEGER PRIMARY KEY AUTOINCREMENT,
cluster INTEGER NOT NULL,
state TEXT NOT NULL,
name TEXT UNIQUE NOT NULL,
nodeid INTEGER NOT NULL,
bmc_macaddr TEXT NOT NULL,
bmc_ipaddr TEXT NOT NULL,
host_macaddr TEXT NOT NULL,
host_ipaddr TEXT NOT NULL,
CONSTRAINT cluster_col FOREIGN KEY (cluster) REFERENCES clusters(id) ON DELETE CASCADE )"""
)
#
# Cluster functions
#
def get_cluster(config, cid=None, name=None):
if cid is None and name is None:
return None
elif cid is not None:
findfield = "id"
datafield = cid
elif name is not None:
findfield = "name"
datafield = name
with dbconn(config["database_path"]) as cur:
cur.execute(f"""SELECT * FROM clusters WHERE {findfield} = ?""", (datafield,))
rows = cur.fetchall()
if len(rows) > 0:
row = rows[0]
else:
return None
return Cluster(row[0], row[1], row[2])
def add_cluster(config, cspec, name, state):
with dbconn(config["database_path"]) as cur:
cur.execute(
"""INSERT INTO clusters
(name, state)
VALUES
(?, ?)""",
(name, state),
)
logger.info(f"New cluster {name} added, populating bootstrap nodes from cspec")
for bmcmac in cspec["clusters"][name]["cspec_yaml"]["bootstrap"]:
hostname = cspec["clusters"][name]["cspec_yaml"]["bootstrap"][bmcmac]["node"][
"hostname"
]
add_node(
config,
name,
hostname,
int("".join(filter(str.isdigit, hostname))),
"init",
bmcmac,
"",
"",
"",
)
logger.info(f"Added node {hostname}")
return get_cluster(config, name=name)
def update_cluster_state(config, name, state):
with dbconn(config["database_path"]) as cur:
cur.execute(
"""UPDATE clusters
SET state = ?
WHERE name = ?""",
(state, name),
)
return get_cluster(config, name=name)
#
# Node functions
#
def get_node(config, cluster_name, nid=None, name=None, bmc_macaddr=None):
cluster = get_cluster(config, name=cluster_name)
if nid is None and name is None and bmc_macaddr is None:
return None
elif nid is not None:
findfield = "id"
datafield = nid
elif bmc_macaddr is not None:
findfield = "bmc_macaddr"
datafield = bmc_macaddr
elif name is not None:
findfield = "name"
datafield = name
with dbconn(config["database_path"]) as cur:
cur.execute(
f"""SELECT * FROM nodes WHERE {findfield} = ? AND cluster = ?""",
(datafield, cluster.id),
)
rows = cur.fetchall()
if len(rows) > 0:
row = rows[0]
else:
return None
return Node(
row[0], cluster.name, row[2], row[3], row[4], row[5], row[6], row[7], row[8]
)
def get_nodes_in_cluster(config, cluster_name):
cluster = get_cluster(config, name=cluster_name)
with dbconn(config["database_path"]) as cur:
cur.execute("""SELECT * FROM nodes WHERE cluster = ?""", (cluster.id,))
rows = cur.fetchall()
node_list = list()
for row in rows:
node_list.append(
Node(
row[0],
cluster.name,
row[2],
row[3],
row[4],
row[5],
row[6],
row[7],
row[8],
)
)
return node_list
def add_node(
config,
cluster_name,
name,
nodeid,
state,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
):
cluster = get_cluster(config, name=cluster_name)
with dbconn(config["database_path"]) as cur:
cur.execute(
"""INSERT INTO nodes
(cluster, state, name, nodeid, bmc_macaddr, bmc_ipaddr, host_macaddr, host_ipaddr)
VALUES
(?, ?, ?, ?, ?, ?, ?, ?)""",
(
cluster.id,
state,
name,
nodeid,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
),
)
return get_node(config, cluster_name, name=name)
def update_node_state(config, cluster_name, name, state):
cluster = get_cluster(config, name=cluster_name)
with dbconn(config["database_path"]) as cur:
cur.execute(
"""UPDATE nodes
SET state = ?
WHERE name = ? AND cluster = ?""",
(state, name, cluster.id),
)
return get_node(config, cluster_name, name=name)
def update_node_addresses(
config, cluster_name, name, bmc_macaddr, bmc_ipaddr, host_macaddr, host_ipaddr
):
cluster = get_cluster(config, name=cluster_name)
with dbconn(config["database_path"]) as cur:
cur.execute(
"""UPDATE nodes
SET bmc_macaddr = ?, bmc_ipaddr = ?, host_macaddr = ?, host_ipaddr = ?
WHERE name = ? AND cluster = ?""",
(bmc_macaddr, bmc_ipaddr, host_macaddr, host_ipaddr, name, cluster.id),
)
return get_node(config, cluster_name, name=name)

View File

@ -0,0 +1,99 @@
#!/usr/bin/env python3
# dnsmasq.py - PVC Cluster Auto-bootstrap DNSMasq instance
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os
import subprocess
import signal
from threading import Thread
class DNSMasq:
"""
Implementes a daemonized instance of DNSMasq for providing DHCP and TFTP services
The DNSMasq instance listens on the configured 'dhcp_address', and instead of a "real"
leases database forwards requests to the 'dnsmasq-lease.py' script. This script will
then hit the pvcbootstrapd '/checkin' API endpoint to perform actions.
TFTP is provided to automate the bootstrap of a node, providing the pvc-installer
over TFTP as well as a seed configuration which is created by the API.
"""
def __init__(self, config):
self.environment = {
"API_URI": f"http://{config['api_address']}:{config['api_port']}/checkin/dnsmasq"
}
self.dnsmasq_cmd = [
"/usr/sbin/dnsmasq",
"--bogus-priv",
"--no-hosts",
"--dhcp-authoritative",
"--filterwin2k",
"--expand-hosts",
"--domain-needed",
f"--domain={config['dhcp_domain']}",
f"--local=/{config['dhcp_domain']}/",
"--log-facility=-",
"--log-dhcp",
"--keep-in-foreground",
f"--dhcp-script={os.getcwd()}/pvcbootstrapd/dnsmasq-lease.py",
"--bind-interfaces",
f"--listen-address={config['dhcp_address']}",
f"--dhcp-option=3,{config['dhcp_gateway']}",
f"--dhcp-range={config['dhcp_lease_start']},{config['dhcp_lease_end']},{config['dhcp_lease_time']}",
"--enable-tftp",
f"--tftp-root={config['tftp_root_path']}/",
# This block of dhcp-match, tag-if, and dhcp-boot statements sets the following TFTP setup:
# If the machine sends client-arch 0, and is not tagged iPXE, load undionly.kpxe (chainload)
# If the machine sends client-arch 7 or 9, and is not tagged iPXE, load ipxe.efi (chainload)
# If the machine sends the iPXE option, load boot.ipxe (iPXE boot configuration)
"--dhcp-match=set:o_bios,option:client-arch,0",
"--dhcp-match=set:o_uefi,option:client-arch,7",
"--dhcp-match=set:o_uefi,option:client-arch,9",
"--dhcp-match=set:ipxe,175",
"--tag-if=set:bios,tag:!ipxe,tag:o_bios",
"--tag-if=set:uefi,tag:!ipxe,tag:o_uefi",
"--dhcp-boot=tag:bios,undionly.kpxe",
"--dhcp-boot=tag:uefi,ipxe.efi",
"--dhcp-boot=tag:ipxe,boot.ipxe",
]
if config["debug"]:
self.dnsmasq_cmd.append("--leasefile-ro")
print(self.dnsmasq_cmd)
self.stdout = subprocess.PIPE
def execute(self):
self.proc = subprocess.Popen(
self.dnsmasq_cmd,
env=self.environment,
)
def start(self):
self.thread = Thread(target=self.execute, args=())
self.thread.start()
def stop(self):
self.proc.send_signal(signal.SIGTERM)
def reload(self):
self.proc.send_signal(signal.SIGHUP)

View File

@ -0,0 +1,213 @@
#!/usr/bin/env python3
# git.py - PVC Cluster Auto-bootstrap Git repository libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os.path
import git
import yaml
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
def init_repository(config):
"""
Clone the Ansible git repository
"""
try:
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no"
if not os.path.exists(config["ansible_path"]):
print(
f"First run: cloning repository {config['ansible_remote']} branch {config['ansible_branch']} to {config['ansible_path']}"
)
git.Repo.clone_from(
config["ansible_remote"],
config["ansible_path"],
branch=config["ansible_branch"],
env=dict(GIT_SSH_COMMAND=git_ssh_cmd),
)
g = git.cmd.Git(f"{config['ansible_path']}")
g.checkout(config["ansible_branch"])
g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
except Exception as e:
print(f"Error: {e}")
def pull_repository(config):
"""
Pull (with rebase) the Ansible git repository
"""
logger.info(f"Updating local configuration repository {config['ansible_path']}")
try:
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no"
g = git.cmd.Git(f"{config['ansible_path']}")
g.pull(rebase=True, env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
except Exception as e:
logger.warn(e)
def commit_repository(config):
"""
Commit uncommitted changes to the Ansible git repository
"""
logger.info(
f"Committing changes to local configuration repository {config['ansible_path']}"
)
try:
g = git.cmd.Git(f"{config['ansible_path']}")
g.add("--all")
commit_env = {
"GIT_COMMITTER_NAME": "PVC Bootstrap",
"GIT_COMMITTER_EMAIL": "git@pvcbootstrapd",
}
g.commit(
"-m",
"Automated commit from PVC Bootstrap Ansible subsystem",
author="PVC Bootstrap <git@pvcbootstrapd>",
env=commit_env,
)
except Exception as e:
logger.warn(e)
def push_repository(config):
"""
Push changes to the default remote
"""
logger.info(
f"Pushing changes from local configuration repository {config['ansible_path']}"
)
try:
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no"
g = git.Repo(f"{config['ansible_path']}")
origin = g.remote(name="origin")
origin.push(env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
except Exception as e:
logger.warn(e)
def load_cspec_yaml(config):
"""
Load the bootstrap group_vars for all known clusters
"""
# Pull down the repository
pull_repository(config)
# Load our clusters file and read the clusters from it
clusters_file = f"{config['ansible_path']}/{config['ansible_clusters_file']}"
logger.info(f"Loading cluster configuration from file '{clusters_file}'")
with open(clusters_file, "r") as clustersfh:
clusters = yaml.load(clustersfh, Loader=yaml.SafeLoader).get("clusters", list())
# Define a base cpec
cspec = {
"bootstrap": dict(),
"hooks": dict(),
"clusters": dict(),
}
# Read each cluster's cspec and update the base cspec
logger.info("Loading per-cluster specifications")
for cluster in clusters:
cspec["clusters"][cluster] = dict()
cspec["clusters"][cluster]["bootstrap_nodes"] = list()
cspec_file = f"{config['ansible_path']}/group_vars/{cluster}/{config['ansible_cspec_files_bootstrap']}"
if os.path.exists(cspec_file):
with open(cspec_file, "r") as cpsecfh:
try:
cspec_yaml = yaml.load(cpsecfh, Loader=yaml.SafeLoader)
except Exception as e:
logger.warn(
f"Failed to load {config['ansible_cspec_files_bootstrap']} for cluster {cluster}: {e}"
)
continue
cspec["clusters"][cluster]["cspec_yaml"] = cspec_yaml
# Convert the MAC address keys to lowercase
# DNSMasq operates with lowercase keys, but often these are written with uppercase.
# Convert them to lowercase to prevent discrepancies later on.
cspec_yaml["bootstrap"] = {
k.lower(): v for k, v in cspec_yaml["bootstrap"].items()
}
# Load in the YAML for the cluster
base_yaml = load_base_yaml(config, cluster)
cspec["clusters"][cluster]["base_yaml"] = base_yaml
pvc_yaml = load_pvc_yaml(config, cluster)
cspec["clusters"][cluster]["pvc_yaml"] = pvc_yaml
# Set per-node values from elsewhere
for node in cspec_yaml["bootstrap"]:
cspec["clusters"][cluster]["bootstrap_nodes"].append(
cspec_yaml["bootstrap"][node]["node"]["hostname"]
)
# Set the cluster value automatically
cspec_yaml["bootstrap"][node]["node"]["cluster"] = cluster
# Set the domain value automatically via base config
cspec_yaml["bootstrap"][node]["node"]["domain"] = base_yaml[
"local_domain"
]
# Set the node FQDN value automatically
cspec_yaml["bootstrap"][node]["node"][
"fqdn"
] = f"{cspec_yaml['bootstrap'][node]['node']['hostname']}.{cspec_yaml['bootstrap'][node]['node']['domain']}"
# Append bootstrap entries to the main dictionary
cspec["bootstrap"] = {**cspec["bootstrap"], **cspec_yaml["bootstrap"]}
# Append hooks to the main dictionary (per-cluster)
if cspec_yaml.get("hooks"):
cspec["hooks"][cluster] = cspec_yaml["hooks"]
logger.info("Finished loading per-cluster specifications")
return cspec
def load_base_yaml(config, cluster):
"""
Load the base.yml group_vars for a cluster
"""
base_file = f"{config['ansible_path']}/group_vars/{cluster}/{config['ansible_cspec_files_base']}"
with open(base_file, "r") as varsfile:
base_yaml = yaml.load(varsfile, Loader=yaml.SafeLoader)
return base_yaml
def load_pvc_yaml(config, cluster):
"""
Load the pvc.yml group_vars for a cluster
"""
pvc_file = f"{config['ansible_path']}/group_vars/{cluster}/{config['ansible_cspec_files_pvc']}"
with open(pvc_file, "r") as varsfile:
pvc_yaml = yaml.load(varsfile, Loader=yaml.SafeLoader)
return pvc_yaml

View File

@ -0,0 +1,316 @@
#!/usr/bin/env python3
# hooks.py - PVC Cluster Auto-bootstrap Hook libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import pvcbootstrapd.lib.db as db
import json
import tempfile
import paramiko
import contextlib
import requests
from re import match
from time import sleep
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
@contextlib.contextmanager
def run_paramiko(config, node_address):
ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh_client.connect(
hostname=node_address,
username=config["deploy_username"],
key_filename=config["ansible_keyfile"],
)
yield ssh_client
ssh_client.close()
def run_hook_osddb(config, targets, args):
"""
Add an OSD DB defined by args['disk']
"""
for node in targets:
node_name = node.name
node_address = node.host_ipaddr
device = args["disk"]
logger.info(f"Creating OSD DB on node {node_name} device {device}")
# Using a direct command on the target here is somewhat messy, but avoids many
# complexities of determining a valid API listen address, etc.
pvc_cmd_string = f"pvc storage osd create-db-vg --yes {node_name} {device}"
with run_paramiko(config, node_address) as c:
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
logger.debug(stdout.readlines())
logger.debug(stderr.readlines())
def run_hook_osd(config, targets, args):
"""
Add an OSD defined by args['disk'] with weight args['weight']
"""
for node in targets:
node_name = node.name
node_address = node.host_ipaddr
device = args["disk"]
weight = args.get("weight", 1)
ext_db_flag = args.get("ext_db", False)
ext_db_ratio = args.get("ext_db_ratio", 0.05)
logger.info(f"Creating OSD on node {node_name} device {device} weight {weight}")
# Using a direct command on the target here is somewhat messy, but avoids many
# complexities of determining a valid API listen address, etc.
pvc_cmd_string = (
f"pvc storage osd add --yes {node_name} {device} --weight {weight}"
)
if ext_db_flag:
pvc_cmd_string = f"{pvc_cmd_string} --ext-db --ext-db-ratio {ext_db_ratio}"
with run_paramiko(config, node_address) as c:
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
logger.debug(stdout.readlines())
logger.debug(stderr.readlines())
def run_hook_pool(config, targets, args):
"""
Add an pool defined by args['name'] on device tier args['tier']
"""
for node in targets:
node_name = node.name
node_address = node.host_ipaddr
name = args["name"]
pgs = args.get("pgs", "64")
tier = args.get("tier", "default") # Does nothing yet
logger.info(
f"Creating storage pool on node {node_name} name {name} pgs {pgs} tier {tier}"
)
# Using a direct command on the target here is somewhat messy, but avoids many
# complexities of determining a valid API listen address, etc.
pvc_cmd_string = f"pvc storage pool add {name} {pgs}"
with run_paramiko(config, node_address) as c:
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
logger.debug(stdout.readlines())
logger.debug(stderr.readlines())
# This only runs once on whatever the first node is
break
def run_hook_network(config, targets, args):
"""
Add an network defined by args (many)
"""
for node in targets:
node_name = node.name
node_address = node.host_ipaddr
vni = args["vni"]
description = args["description"]
nettype = args["type"]
mtu = args.get("mtu", None)
pvc_cmd_string = (
f"pvc network add {vni} --description {description} --type {nettype}"
)
if mtu is not None and mtu not in ["auto", "default"]:
pvc_cmd_string = f"{pvc_cmd_string} --mtu {mtu}"
if nettype == "managed":
domain = args["domain"]
pvc_cmd_string = f"{pvc_cmd_string} --domain {domain}"
dns_servers = args.get("dns_servers", [])
for dns_server in dns_servers:
pvc_cmd_string = f"{pvc_cmd_string} --dns-server {dns_server}"
is_ip4 = args["ip4"]
if is_ip4:
ip4_network = args["ip4_network"]
pvc_cmd_string = f"{pvc_cmd_string} --ipnet {ip4_network}"
ip4_gateway = args["ip4_gateway"]
pvc_cmd_string = f"{pvc_cmd_string} --gateway {ip4_gateway}"
ip4_dhcp = args["ip4_dhcp"]
if ip4_dhcp:
pvc_cmd_string = f"{pvc_cmd_string} --dhcp"
ip4_dhcp_start = args["ip4_dhcp_start"]
ip4_dhcp_end = args["ip4_dhcp_end"]
pvc_cmd_string = f"{pvc_cmd_string} --dhcp-start {ip4_dhcp_start} --dhcp-end {ip4_dhcp_end}"
else:
pvc_cmd_string = f"{pvc_cmd_string} --no-dhcp"
is_ip6 = args["ip6"]
if is_ip6:
ip6_network = args["ip6_network"]
pvc_cmd_string = f"{pvc_cmd_string} --ipnet6 {ip6_network}"
ip6_gateway = args["ip6_gateway"]
pvc_cmd_string = f"{pvc_cmd_string} --gateway6 {ip6_gateway}"
logger.info(f"Creating network on node {node_name} VNI {vni} type {nettype}")
with run_paramiko(config, node_address) as c:
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
logger.debug(stdout.readlines())
logger.debug(stderr.readlines())
# This only runs once on whatever the first node is
break
def run_hook_script(config, targets, args):
"""
Run a script on the targets
"""
for node in targets:
node_name = node.name
node_address = node.host_ipaddr
script = args.get("script", None)
source = args.get("source", None)
path = args.get("path", None)
logger.info(f"Running script on node {node_name}")
with run_paramiko(config, node_address) as c:
if script is not None:
remote_path = "/tmp/pvcbootstrapd.hook"
with tempfile.NamedTemporaryFile(mode="w") as tf:
tf.write(script)
tf.seek(0)
# Send the file to the remote system
tc = c.open_sftp()
tc.put(tf.name, remote_path)
tc.chmod(remote_path, 0o755)
tc.close()
elif source == "local":
if not match(r"^/", path):
path = config["ansible_path"] + "/" + path
remote_path = "/tmp/pvcbootstrapd.hook"
if path is None:
continue
tc = c.open_sftp()
tc.put(path, remote_path)
tc.chmod(remote_path, 0o755)
tc.close()
elif source == "remote":
remote_path = path
stdin, stdout, stderr = c.exec_command(remote_path)
logger.debug(stdout.readlines())
logger.debug(stderr.readlines())
def run_hook_webhook(config, targets, args):
"""
Send an HTTP requests (no targets)
"""
logger.info(f"Running webhook against {args['uri']}")
# Get the body data
data = json.dumps(args["body"])
headers = {"content-type": "application/json"}
# Craft up a Requests endpoint set for this
requests_actions = {
"get": requests.get,
"post": requests.post,
"put": requests.put,
"patch": requests.patch,
"delete": requests.delete,
"options": requests.options,
}
action = args["action"]
result = requests_actions[action](args["uri"], headers=headers, data=data)
logger.info(f"Result: {result}")
hook_functions = {
"osddb": run_hook_osddb,
"osd": run_hook_osd,
"pool": run_hook_pool,
"network": run_hook_network,
"script": run_hook_script,
"webhook": run_hook_webhook,
}
def run_hooks(config, cspec, cluster, nodes):
"""
Run an Ansible bootstrap against a cluster
"""
# Waiting 30 seconds to ensure everything is booted an stabilized
logger.info("Waiting 300s before starting hook run.")
sleep(300)
cluster_hooks = cspec["hooks"][cluster.name]
cluster_nodes = db.get_nodes_in_cluster(config, cluster.name)
for hook in cluster_hooks:
hook_target = hook.get("target", "all")
hook_name = hook.get("name")
logger.info(f"Running hook on {hook_target}: {hook_name}")
if "all" in hook_target:
target_nodes = cluster_nodes
else:
target_nodes = [node for node in cluster_nodes if node.name in hook_target]
hook_type = hook.get("type")
hook_args = hook.get("args")
if hook_type is None or hook_args is None:
logger.warning("Invalid hook: missing required configuration")
continue
# Run the hook function
try:
hook_functions[hook_type](config, target_nodes, hook_args)
except Exception as e:
logger.warning(f"Error running hook: {e}")
# Wait 5s between hooks
sleep(5)
# Restart nodes to complete setup
hook_functions['script'](config, cluster_nodes, {'script': '#!/usr/bin/env bash\necho bootstrapped | sudo tee /etc/pvc-install.hooks\nsudo reboot'})

View File

@ -0,0 +1,86 @@
#!/usr/bin/env python3
# host.py - PVC Cluster Auto-bootstrap host libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from celery.utils.log import get_task_logger
import pvcbootstrapd.lib.db as db
logger = get_task_logger(__name__)
def installer_init(config, cspec, data):
bmc_macaddr = data["bmc_macaddr"]
bmc_ipaddr = data["bmc_ipaddr"]
host_macaddr = data["host_macaddr"]
host_ipaddr = data["host_ipaddr"]
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
cspec_hostname = cspec["bootstrap"][bmc_macaddr]["node"]["hostname"]
cluster = db.get_cluster(config, name=cspec_cluster)
if cluster is None:
cluster = db.add_cluster(config, cspec, cspec_cluster, "provisioning")
logger.debug(cluster)
db.update_node_addresses(
config,
cspec_cluster,
cspec_hostname,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
)
db.update_node_state(config, cspec_cluster, cspec_hostname, "installing")
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
logger.debug(node)
def installer_complete(config, cspec, data):
bmc_macaddr = data["bmc_macaddr"]
cspec_hostname = cspec["bootstrap"][bmc_macaddr]["node"]["hostname"]
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
db.update_node_state(config, cspec_cluster, cspec_hostname, "installed")
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
logger.debug(node)
def set_boot_state(config, cspec, data, state):
bmc_macaddr = data["bmc_macaddr"]
bmc_ipaddr = data["bmc_ipaddr"]
host_macaddr = data["host_macaddr"]
host_ipaddr = data["host_ipaddr"]
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
cspec_hostname = cspec["bootstrap"][bmc_macaddr]["node"]["hostname"]
db.update_node_addresses(
config,
cspec_cluster,
cspec_hostname,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
)
db.update_node_state(config, cspec_cluster, cspec_hostname, state)
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
logger.debug(node)

View File

@ -0,0 +1,81 @@
#!/usr/bin/env python3
# lib.py - PVC Cluster Auto-bootstrap libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
from jinja2 import Template
#
# Worker Functions - PXE/Installer Per-host Templates
#
def add_pxe(config, cspec_node, host_macaddr):
# Generate a per-client iPXE configuration for this host
destination_filename = (
f"{config['tftp_host_path']}/mac-{host_macaddr.replace(':', '')}.ipxe"
)
template_filename = f"{config['tftp_root_path']}/host-ipxe.j2"
with open(template_filename, "r") as tfh:
template = Template(tfh.read())
imgargs_host_list = cspec_node.get("config", {}).get("kernel_options")
if imgargs_host_list is not None:
imgargs_host = " ".join(imgargs_host_list)
else:
imgargs_host = None
rendered = template.render(imgargs_host=imgargs_host)
with open(destination_filename, "w") as dfh:
dfh.write(rendered)
dfh.write("\n")
def add_preseed(config, cspec_node, host_macaddr, system_drive_target):
# Generate a per-client Installer configuration for this host
destination_filename = (
f"{config['tftp_host_path']}/mac-{host_macaddr.replace(':', '')}.preseed"
)
template_filename = f"{config['tftp_root_path']}/host-preseed.j2"
with open(template_filename, "r") as tfh:
template = Template(tfh.read())
add_packages_list = cspec_node.get("config", {}).get("packages")
if add_packages_list is not None:
add_packages = ",".join(add_packages_list)
else:
add_packages = None
# We use the dhcp_address here to allow the listen_address to be 0.0.0.0
rendered = template.render(
debrelease=cspec_node.get("config", {}).get("release"),
debmirror=cspec_node.get("config", {}).get("mirror"),
addpkglist=add_packages,
filesystem=cspec_node.get("config", {}).get("filesystem"),
skip_blockcheck=False,
fqdn=cspec_node["node"]["fqdn"],
target_disk=system_drive_target,
pvcbootstrapd_checkin_uri=f"http://{config['dhcp_address']}:{config['api_port']}/checkin/host",
)
with open(destination_filename, "w") as dfh:
dfh.write(rendered)
dfh.write("\n")

View File

@ -0,0 +1,151 @@
#!/usr/bin/env python3
# lib.py - PVC Cluster Auto-bootstrap libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import pvcbootstrapd.lib.db as db
import pvcbootstrapd.lib.git as git
import pvcbootstrapd.lib.redfish as redfish
import pvcbootstrapd.lib.host as host
import pvcbootstrapd.lib.ansible as ansible
import pvcbootstrapd.lib.hooks as hooks
from time import sleep
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
#
# Worker Functions - Checkins (Celery root tasks)
#
def dnsmasq_checkin(config, data):
"""
Handle checkins from DNSMasq
"""
logger.debug(f"data = {data}")
# This is an add event; what we do depends on some stuff
if data["action"] in ["add"]:
logger.info(
f"Receiving 'add' checkin from DNSMasq for MAC address '{data['macaddr']}'"
)
cspec = git.load_cspec_yaml(config)
is_in_bootstrap_map = True if data["macaddr"] in cspec["bootstrap"] else False
if is_in_bootstrap_map:
if (
cspec["bootstrap"][data["macaddr"]]["bmc"].get("redfish", None)
is not None
):
if cspec["bootstrap"][data["macaddr"]]["bmc"]["redfish"]:
is_redfish = True
else:
is_redfish = False
else:
is_redfish = redfish.check_redfish(config, data)
logger.info(f"Is device '{data['macaddr']}' Redfish capable? {is_redfish}")
if is_redfish:
redfish.redfish_init(config, cspec, data)
else:
logger.warn(f"Device '{data['macaddr']}' not in bootstrap map; ignoring.")
return
# This is a tftp event; a node installer has booted
if data["action"] in ["tftp"]:
logger.info(
f"Receiving 'tftp' checkin from DNSMasq for IP address '{data['destaddr']}'"
)
return
def host_checkin(config, data):
"""
Handle checkins from the PVC node
"""
logger.info(f"Registering checkin for host {data['hostname']}")
logger.debug(f"data = {data}")
cspec = git.load_cspec_yaml(config)
bmc_macaddr = data["bmc_macaddr"]
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
if data["action"] in ["install-start"]:
# Node install has started
logger.info(f"Registering install start for host {data['hostname']}")
host.installer_init(config, cspec, data)
elif data["action"] in ["install-complete"]:
# Node install has finished
logger.info(f"Registering install complete for host {data['hostname']}")
host.installer_complete(config, cspec, data)
elif data["action"] in ["system-boot_initial"]:
# Node has booted for the first time and can begin Ansible runs once all nodes up
logger.info(f"Registering first boot for host {data['hostname']}")
target_state = "booted-initial"
host.set_boot_state(config, cspec, data, target_state)
sleep(1)
all_nodes = db.get_nodes_in_cluster(config, cspec_cluster)
ready_nodes = [node for node in all_nodes if node.state == target_state]
# Continue once all nodes are in the booted-initial state
logger.info(f"Ready: {len(ready_nodes)} All: {len(all_nodes)}")
if len(ready_nodes) >= len(all_nodes):
cluster = db.update_cluster_state(config, cspec_cluster, "ansible-running")
ansible.run_bootstrap(config, cspec, cluster, ready_nodes)
elif data["action"] in ["system-boot_configured"]:
# Node has been booted after Ansible run and can begin hook runs
logger.info(f"Registering post-Ansible boot for host {data['hostname']}")
target_state = "booted-configured"
host.set_boot_state(config, cspec, data, target_state)
sleep(1)
all_nodes = db.get_nodes_in_cluster(config, cspec_cluster)
ready_nodes = [node for node in all_nodes if node.state == target_state]
# Continue once all nodes are in the booted-configured state
logger.info(f"Ready: {len(ready_nodes)} All: {len(all_nodes)}")
if len(ready_nodes) >= len(all_nodes):
cluster = db.update_cluster_state(config, cspec_cluster, "hooks-running")
hooks.run_hooks(config, cspec, cluster, ready_nodes)
elif data["action"] in ["system-boot_completed"]:
# Node has been fully configured and can be shut down for the final time
logger.info(f"Registering post-hooks boot for host {data['hostname']}")
target_state = "booted-completed"
host.set_boot_state(config, cspec, data, target_state)
sleep(1)
all_nodes = db.get_nodes_in_cluster(config, cspec_cluster)
ready_nodes = [node for node in all_nodes if node.state == target_state]
logger.info(f"Ready: {len(ready_nodes)} All: {len(all_nodes)}")
if len(ready_nodes) >= len(all_nodes):
cluster = db.update_cluster_state(config, cspec_cluster, "completed")
# Hosts will now power down ready for real activation in production

View File

@ -0,0 +1,835 @@
#!/usr/bin/env python3
# redfish.py - PVC Cluster Auto-bootstrap Redfish libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
# Refs:
# https://downloads.dell.com/manuals/all-products/esuprt_software/esuprt_it_ops_datcentr_mgmt/dell-management-solution-resources_white-papers11_en-us.pdf
# https://downloads.dell.com/solutions/dell-management-solution-resources/RESTfulSerConfig-using-iDRAC-REST%20API%28DTC%20copy%29.pdf
import requests
import urllib3
import json
import re
import math
from time import sleep
from celery.utils.log import get_task_logger
import pvcbootstrapd.lib.installer as installer
import pvcbootstrapd.lib.db as db
logger = get_task_logger(__name__)
#
# Helper Classes
#
class AuthenticationException(Exception):
def __init__(self, error=None, response=None):
if error is not None:
self.short_message = error
else:
self.short_message = "Generic authentication failure"
if response is not None:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
self.full_message = rinfo["Message"]
self.res_message = rinfo["Resolution"]
self.severity = rinfo["Severity"]
self.message_id = rinfo["MessageId"]
else:
self.full_message = ""
self.res_message = ""
self.severity = "Fatal"
self.message_id = rinfo["MessageId"]
self.status_code = response.status_code
else:
self.status_code = None
def __str__(self):
if self.status_code is not None:
message = f"{self.short_message}: {self.full_message} {self.res_message} (HTTP Code: {self.status_code}, Severity: {self.severity}, ID: {self.message_id})"
else:
message = f"{self.short_message}"
return str(message)
class RedfishSession:
def __init__(self, host, username, password):
# Disable urllib3 warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Perform login
login_payload = {"UserName": username, "Password": password}
login_uri = f"{host}/redfish/v1/Sessions"
login_headers = {"content-type": "application/json"}
self.host = None
login_response = None
tries = 1
max_tries = 25
while tries < max_tries:
logger.info(f"Trying to log in to Redfish ({tries}/{max_tries - 1})...")
try:
login_response = requests.post(
login_uri,
data=json.dumps(login_payload),
headers=login_headers,
verify=False,
timeout=5,
)
break
except Exception:
sleep(2)
tries += 1
if login_response is None:
logger.error("Failed to log in to Redfish")
return
if login_response.status_code not in [200, 201]:
raise AuthenticationException("Login failed", response=login_response)
logger.info(f"Logged in to Redfish at {host} successfully")
self.host = host
self.token = login_response.headers.get("X-Auth-Token")
self.headers = {"content-type": "application/json", "x-auth-token": self.token}
logout_uri = login_response.headers.get("Location")
if re.match(r"^/", logout_uri):
self.logout_uri = f"{host}{logout_uri}"
else:
self.logout_uri = logout_uri
def __del__(self):
if self.host is None:
return
logout_headers = {
"Content-Type": "application/json",
"X-Auth-Token": self.token,
}
logout_response = requests.delete(
self.logout_uri, headers=logout_headers, verify=False, timeout=15
)
if logout_response.status_code not in [200, 201]:
raise AuthenticationException("Logout failed", response=logout_response)
logger.info(f"Logged out of Redfish at {self.host} successfully")
def get(self, uri):
url = f"{self.host}{uri}"
response = requests.get(url, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: GET request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def delete(self, uri):
url = f"{self.host}{uri}"
response = requests.delete(url, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: DELETE request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def post(self, uri, data):
url = f"{self.host}{uri}"
payload = json.dumps(data)
response = requests.post(url, data=payload, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: POST request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def put(self, uri, data):
url = f"{self.host}{uri}"
payload = json.dumps(data)
response = requests.put(url, data=payload, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: PUT request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
def patch(self, uri, data):
url = f"{self.host}{uri}"
payload = json.dumps(data)
response = requests.patch(url, data=payload, headers=self.headers, verify=False)
if response.status_code in [200, 201]:
return response.json()
else:
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
if rinfo.get("Message") is not None:
message = f"{rinfo['Message']} {rinfo['Resolution']}"
severity = rinfo["Severity"]
message_id = rinfo["MessageId"]
else:
message = rinfo
severity = "Error"
message_id = "N/A"
logger.warn(f"! Error: PATCH request to {url} failed")
logger.warn(
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
)
logger.warn(f"! Details: {message}")
return None
#
# Helper functions
#
def format_bytes_tohuman(databytes):
"""
Format a string of bytes into a human-readable value (using base-1000)
"""
# Matrix of human-to-byte values
byte_unit_matrix = {
"B": 1,
"KB": 1000,
"MB": 1000 * 1000,
"GB": 1000 * 1000 * 1000,
"TB": 1000 * 1000 * 1000 * 1000,
"PB": 1000 * 1000 * 1000 * 1000 * 1000,
"EB": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
}
datahuman = ""
for unit in sorted(byte_unit_matrix, key=byte_unit_matrix.get, reverse=True):
if unit in ["TB", "PB", "EB"]:
# Handle the situation where we might want to round to integer values
# for some entries (2TB) but not others (e.g. 1.92TB). We round if the
# result is within +/- 2% of the integer value, otherwise we use two
# decimal places.
new_bytes = databytes / byte_unit_matrix[unit]
new_bytes_plustwopct = new_bytes * 1.02
new_bytes_minustwopct = new_bytes * 0.98
cieled_bytes = int(math.ceil(databytes / byte_unit_matrix[unit]))
rounded_bytes = round(databytes / byte_unit_matrix[unit], 2)
if (
cieled_bytes > new_bytes_minustwopct
and cieled_bytes < new_bytes_plustwopct
):
new_bytes = cieled_bytes
else:
new_bytes = rounded_bytes
# Round up if 5 or more digits
if new_bytes > 999:
# We can jump down another level
continue
else:
# We're at the end, display with this size
datahuman = "{}{}".format(new_bytes, unit)
return datahuman
def get_system_drive_target(session, cspec_node, storage_root):
"""
Determine the system drive target for the installer
"""
# Handle an invalid >2 number of system disks, use only first 2
if len(cspec_node["config"]["system_disks"]) > 2:
cspec_drives = cspec_node["config"]["system_disks"][0:2]
else:
cspec_drives = cspec_node["config"]["system_disks"]
# If we have no storage root, we just return the first entry from
# the cpsec_drives as-is and hope the administrator has the right
# format here.
if storage_root is None:
return cspec_drives[0]
# We proceed with Redfish configuration to determine the disks
else:
storage_detail = session.get(storage_root)
# Grab a full list of drives
drive_list = list()
for storage_member in storage_detail["Members"]:
storage_member_root = storage_member["@odata.id"]
storage_member_detail = session.get(storage_member_root)
for drive in storage_member_detail["Drives"]:
drive_root = drive["@odata.id"]
drive_detail = session.get(drive_root)
drive_list.append(drive_detail)
system_drives = list()
# Iterate through each drive and include those that match
for cspec_drive in cspec_drives:
if re.match(r"^\/dev", cspec_drive) or re.match(r"^detect:", cspec_drive):
# We only match the first drive that has these conditions for use in the preseed config
logger.info(
"Found a drive with a 'detect:' string or Linux '/dev' path, using it for bootstrap."
)
return cspec_drive
# Match any chassis-ID spec drives
for drive in drive_list:
# Like "Disk.Bay.2:Enclosure.Internal.0-1:RAID.Integrated.1-1"
drive_name = drive["Id"].split(":")[0]
# Craft up the cspec version of this
cspec_drive_name = f"Drive.Bay.{cspec_drive}"
if drive_name == cspec_drive_name:
system_drives.append(drive)
# We found a single drive, so determine its actual detect string
if len(system_drives) == 1:
logger.info(
"Found a single drive matching the requested chassis ID, using it as the system disk."
)
# Get the model's first word
drive_model = system_drives[0].get("Model", "INVALID").split()[0]
# Get and convert the size in bytes value to human
drive_size_bytes = system_drives[0].get("CapacityBytes", 0)
drive_size_human = format_bytes_tohuman(drive_size_bytes)
# Get the drive ID out of all the valid entries
# How this works is that, for each non-array disk, we must find what position our exact disk is
# So for example, say we want disk 3 out of 4, and all 4 are the same size and model and not in
# another (RAID) volume. This will give us an index of 2. Then in the installer this will match
# the 3rd list entry from "lsscsi". This is probably an unneccessary hack, since people will
# probably just give the first disk if they want one, or 2 disks if they want a RAID-1, but this
# is here just in case
idx = 0
for drive in drive_list:
list_drive_model = drive.get("Model", "INVALID").split()[0]
list_drive_size_bytes = drive.get("CapacityBytes", 0)
list_drive_in_array = (
False
if drive.get("Links", {})
.get("Volumes", [""])[0]
.get("@odata.id")
.split("/")[-1]
== drive.get("Id")
else True
)
if (
drive_model == list_drive_model
and drive_size_bytes == list_drive_size_bytes
and not list_drive_in_array
):
index = idx
idx += 1
drive_id = index
# Create the target string
system_drive_target = f"detect:{drive_model}:{drive_size_human}:{drive_id}"
# We found two drives, so create a RAID-1 array then determine the volume's detect string
elif len(system_drives) == 2:
logger.info(
"Found two drives matching the requested chassis IDs, creating a RAID-1 and using it as the system disk."
)
drive_one = system_drives[0]
drive_one_id = drive_one.get("Id", "INVALID")
drive_one_path = drive_one.get("@odata.id", "INVALID")
drive_one_controller = drive_one_id.split(":")[-1]
drive_two = system_drives[1]
drive_two_id = drive_two.get("Id", "INVALID")
drive_two_path = drive_two.get("@odata.id", "INVALID")
drive_two_controller = drive_two_id.split(":")[-1]
# Determine that the drives are on the same controller
if drive_one_controller != drive_two_controller:
logger.error(
"Two drives are not on the same controller; this should not happen"
)
return None
# Get the controller details
controller_root = f"{storage_root}/{drive_one_controller}"
controller_detail = session.get(controller_root)
# Get the name of the controller (for crafting the detect string)
controller_name = controller_detail.get("Name", "INVALID").split()[0]
# Get the volume root for the controller
controller_volume_root = controller_detail.get("Volumes", {}).get(
"@odata.id"
)
# Get the pre-creation list of volumes on the controller
controller_volumes_pre = [
volume["@odata.id"]
for volume in session.get(controller_volume_root).get("Members", [])
]
# Create the RAID-1 volume
payload = {
"VolumeType": "Mirrored",
"Drives": [
{"@odata.id": drive_one_path},
{"@odata.id": drive_two_path},
],
}
session.post(controller_volume_root, payload)
# Wait for the volume to be created
new_volume_list = []
while len(new_volume_list) < 1:
sleep(5)
controller_volumes_post = [
volume["@odata.id"]
for volume in session.get(controller_volume_root).get("Members", [])
]
new_volume_list = list(
set(controller_volumes_post).difference(controller_volumes_pre)
)
new_volume_root = new_volume_list[0]
# Get the IDX of the volume out of any others
volume_id = 0
for idx, volume in enumerate(controller_volumes_post):
if volume == new_volume_root:
volume_id = idx
break
# Get and convert the size in bytes value to human
volume_detail = session.get(new_volume_root)
volume_size_bytes = volume_detail.get("CapacityBytes", 0)
volume_size_human = format_bytes_tohuman(volume_size_bytes)
# Create the target string
system_drive_target = (
f"detect:{controller_name}:{volume_size_human}:{volume_id}"
)
# We found too few or too many drives, error
else:
system_drive_target = None
return system_drive_target
#
# Redfish Task functions
#
def set_indicator_state(session, system_root, redfish_vendor, state):
"""
Set the system indicator LED to the desired state (on/off)
"""
state_values_write = {
"Dell": {
"on": "Blinking",
"off": "Off",
},
"default": {
"on": "Lit",
"off": "Off",
},
}
state_values_read = {
"default": {
"on": "Lit",
"off": "Off",
},
}
try:
# Allow vendor-specific overrides
if redfish_vendor not in state_values_write:
redfish_vendor = "default"
# Allow nice names ("on"/"off")
if state in state_values_write[redfish_vendor]:
state = state_values_write[redfish_vendor][state]
# Get current state
system_detail = session.get(system_root)
current_state = system_detail["IndicatorLED"]
except KeyError:
return False
try:
state_read = state
# Allow vendor-specific overrides
if redfish_vendor not in state_values_read:
redfish_vendor = "default"
# Allow nice names ("on"/"off")
if state_read in state_values_read[redfish_vendor]:
state_read = state_values_read[redfish_vendor][state]
if state_read == current_state:
return False
except KeyError:
return False
session.patch(system_root, {"IndicatorLED": state})
return True
def set_power_state(session, system_root, redfish_vendor, state):
"""
Set the system power state to the desired state
"""
state_values = {
"default": {
"on": "On",
"off": "ForceOff",
},
}
try:
# Allow vendor-specific overrides
if redfish_vendor not in state_values:
redfish_vendor = "default"
# Allow nice names ("on"/"off")
if state in state_values[redfish_vendor]:
state = state_values[redfish_vendor][state]
# Get current state, target URI, and allowable values
system_detail = session.get(system_root)
current_state = system_detail["PowerState"]
power_root = system_detail["Actions"]["#ComputerSystem.Reset"]["target"]
power_choices = system_detail["Actions"]["#ComputerSystem.Reset"][
"ResetType@Redfish.AllowableValues"
]
except KeyError:
return False
# Remap some namings so we can check the current state against the target state
if state in ["ForceOff"]:
target_state = "Off"
else:
target_state = state
if target_state == current_state:
return False
if state not in power_choices:
return False
session.post(power_root, {"ResetType": state})
return True
def set_boot_override(session, system_root, redfish_vendor, target):
"""
Set the system boot override to the desired target
"""
try:
system_detail = session.get(system_root)
boot_targets = system_detail["Boot"]["BootSourceOverrideSupported"]
except KeyError:
return False
if target not in boot_targets:
return False
session.patch(system_root, {"Boot": {"BootSourceOverrideTarget": target}})
return True
def check_redfish(config, data):
"""
Validate that a BMC is Redfish-capable
"""
headers = {"Content-Type": "application/json"}
logger.info("Checking for Redfish response...")
count = 0
while True:
try:
count += 1
if count > 30:
retcode = 500
logger.warn("Aborted after 300s; device too slow or not booting.")
break
resp = requests.get(
f"https://{data['ipaddr']}/redfish/v1",
headers=headers,
verify=False,
timeout=10,
)
retcode = resp.retcode
break
except Exception:
logger.info(f"Attempt {count}...")
continue
if retcode == 200:
return True
else:
return False
#
# Entry function
#
def redfish_init(config, cspec, data):
"""
Initialize a new node with Redfish
"""
bmc_ipaddr = data["ipaddr"]
bmc_macaddr = data["macaddr"]
bmc_host = f"https://{bmc_ipaddr}"
cspec_node = cspec["bootstrap"][bmc_macaddr]
logger.debug(f"cspec_node = {cspec_node}")
bmc_username = cspec_node["bmc"]["username"]
bmc_password = cspec_node["bmc"]["password"]
host_macaddr = ""
host_ipaddr = ""
cspec_cluster = cspec_node["node"]["cluster"]
cspec_hostname = cspec_node["node"]["hostname"]
cluster = db.get_cluster(config, name=cspec_cluster)
if cluster is None:
cluster = db.add_cluster(config, cspec, cspec_cluster, "provisioning")
logger.debug(cluster)
db.update_node_state(config, cspec_cluster, cspec_hostname, "characterzing")
db.update_node_addresses(
config,
cspec_cluster,
cspec_hostname,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
)
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
logger.debug(node)
# Create the session and log in
session = RedfishSession(bmc_host, bmc_username, bmc_password)
if session.host is None:
logger.info("Aborting Redfish configuration; reboot BMC to try again.")
del session
return
logger.info("Characterizing node...")
# Get Refish bases
redfish_base_root = "/redfish/v1"
redfish_base_detail = session.get(redfish_base_root)
redfish_vendor = list(redfish_base_detail["Oem"].keys())[0]
redfish_name = redfish_base_detail["Name"]
redfish_version = redfish_base_detail["RedfishVersion"]
systems_base_root = redfish_base_detail["Systems"]["@odata.id"].rstrip("/")
systems_base_detail = session.get(systems_base_root)
system_root = systems_base_detail["Members"][0]["@odata.id"].rstrip("/")
# Force off the system and turn on the indicator
set_power_state(session, system_root, redfish_vendor, "off")
set_indicator_state(session, system_root, redfish_vendor, "on")
# Get the system details
system_detail = session.get(system_root)
system_sku = system_detail["SKU"].strip()
system_serial = system_detail["SerialNumber"].strip()
system_power_state = system_detail["PowerState"].strip()
system_indicator_state = system_detail["IndicatorLED"].strip()
system_health_state = system_detail["Status"]["Health"].strip()
# Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address
try:
ethernet_root = system_detail["EthernetInterfaces"]["@odata.id"].rstrip("/")
ethernet_detail = session.get(ethernet_root)
first_interface_root = ethernet_detail["Members"][0]["@odata.id"].rstrip("/")
first_interface_detail = session.get(first_interface_root)
# Something went wrong, so fall back
except KeyError:
first_interface_detail = dict()
# Try to get the MAC address directly from the interface detail (Redfish standard)
if first_interface_detail.get("MACAddress") is not None:
bootstrap_mac_address = first_interface_detail["MACAddress"].strip().lower()
# Try to get the MAC address from the HostCorrelation->HostMACAddress (HP DL360x G8)
elif len(system_detail.get("HostCorrelation", {}).get("HostMACAddress", [])) > 0:
bootstrap_mac_address = (
system_detail["HostCorrelation"]["HostMACAddress"][0].strip().lower()
)
# We can't find it, so use a dummy value
else:
logger.error("Could not find a valid MAC address for the bootstrap interface.")
return
# Display the system details
logger.info("Found details from node characterization:")
logger.info(f"> System Manufacturer: {redfish_vendor}")
logger.info(f"> System Redfish Version: {redfish_version}")
logger.info(f"> System Redfish Name: {redfish_name}")
logger.info(f"> System SKU: {system_sku}")
logger.info(f"> System Serial: {system_serial}")
logger.info(f"> Power State: {system_power_state}")
logger.info(f"> Indicator LED: {system_indicator_state}")
logger.info(f"> Health State: {system_health_state}")
logger.info(f"> Bootstrap NIC MAC: {bootstrap_mac_address}")
# Update node host MAC address
host_macaddr = bootstrap_mac_address
node = db.update_node_addresses(
config,
cspec_cluster,
cspec_hostname,
bmc_macaddr,
bmc_ipaddr,
host_macaddr,
host_ipaddr,
)
logger.debug(node)
logger.info("Determining system disk...")
storage_root = system_detail.get("Storage", {}).get("@odata.id")
system_drive_target = get_system_drive_target(session, cspec_node, storage_root)
if system_drive_target is None:
logger.error(
"No valid drives found; configure a single system drive as a 'detect:' string or Linux '/dev' path instead and try again."
)
return
logger.info(f"Found system disk {system_drive_target}")
# Create our preseed configuration
logger.info("Creating node boot configurations...")
installer.add_pxe(config, cspec_node, host_macaddr)
installer.add_preseed(config, cspec_node, host_macaddr, system_drive_target)
# Adjust any BIOS settings
logger.info("Adjusting BIOS settings...")
bios_root = system_detail.get("Bios", {}).get("@odata.id")
if bios_root is not None:
bios_detail = session.get(bios_root)
bios_attributes = list(bios_detail["Attributes"].keys())
for setting, value in cspec_node["bmc"].get("bios_settings", {}).items():
if setting not in bios_attributes:
continue
payload = {"Attributes": {setting: value}}
session.patch(f"{bios_root}/Settings", payload)
# Set boot override to Pxe for the installer boot
logger.info("Setting temporary PXE boot...")
set_boot_override(session, system_root, redfish_vendor, "Pxe")
# Turn on the system
logger.info("Powering on node...")
set_power_state(session, system_root, redfish_vendor, "on")
node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting")
logger.info("Waiting for completion of node and cluster installation...")
# Wait for the system to install and be configured
while node.state != "booted-completed":
sleep(60)
# Keep the Redfish session alive
session.get(redfish_base_root)
# Refresh our node state
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
# Graceful shutdown of the machine
set_power_state(session, system_root, redfish_vendor, "GracefulShutdown")
system_power_state = "On"
while system_power_state != "Off":
sleep(5)
# Refresh our power state from the system details
system_detail = session.get(system_root)
system_power_state = system_detail["PowerState"].strip()
# Turn off the indicator to indicate bootstrap has completed
set_indicator_state(session, system_root, redfish_vendor, "off")
# We must delete the session
del session
return

View File

@ -0,0 +1,45 @@
#!/usr/bin/env python3
# tftp.py - PVC Cluster Auto-bootstrap TFTP preparation libraries
# Part of the Parallel Virtual Cluster (PVC) system
#
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
###############################################################################
import os.path
import shutil
def build_tftp_repository(config):
# Generate an installer config
build_cmd = f"{config['ansible_path']}/pvc-installer/buildpxe.sh -o {config['tftp_root_path']} -u {config['deploy_username']}"
print(f"Building TFTP contents via pvc-installer command: {build_cmd}")
os.system(build_cmd)
def init_tftp(config):
"""
Prepare a TFTP root
"""
if not os.path.exists(config["tftp_root_path"]):
print("First run: building TFTP root and contents - this will take some time!")
os.makedirs(config["tftp_root_path"])
os.makedirs(config["tftp_host_path"])
shutil.copyfile(
f"{config['ansible_keyfile']}.pub", f"{config['tftp_root_path']}/keys.txt"
)
build_tftp_repository(config)