Initial commit of PVC Bootstrap system
Adds the PVC Bootstrap system, which allows the automated deployment of one or more PVC clusters.
This commit is contained in:
0
bootstrap-daemon/pvcbootstrapd/lib/__init__.py
Normal file
0
bootstrap-daemon/pvcbootstrapd/lib/__init__.py
Normal file
78
bootstrap-daemon/pvcbootstrapd/lib/ansible.py
Executable file
78
bootstrap-daemon/pvcbootstrapd/lib/ansible.py
Executable file
@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# ansible.py - PVC Cluster Auto-bootstrap Ansible libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import pvcbootstrapd.lib.git as git
|
||||
|
||||
import ansible_runner
|
||||
import tempfile
|
||||
|
||||
from time import sleep
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
def run_bootstrap(config, cspec, cluster, nodes):
|
||||
"""
|
||||
Run an Ansible bootstrap against a cluster
|
||||
"""
|
||||
logger.debug(nodes)
|
||||
|
||||
# Construct our temporary INI inventory string
|
||||
logger.info("Constructing virtual Ansible inventory")
|
||||
base_yaml = cspec["clusters"][cluster.name]["base_yaml"]
|
||||
local_domain = base_yaml.get("local_domain")
|
||||
inventory = [f"""[{cluster.name}]"""]
|
||||
for node in nodes:
|
||||
inventory.append(
|
||||
f"""{node.name}.{local_domain} ansible_host={node.host_ipaddr}"""
|
||||
)
|
||||
inventory = "\n".join(inventory)
|
||||
logger.debug(inventory)
|
||||
|
||||
# Waiting 30 seconds to ensure everything is booted an stabilized
|
||||
logger.info("Waiting 60s before starting Ansible bootstrap.")
|
||||
sleep(60)
|
||||
|
||||
# Run the Ansible playbooks
|
||||
with tempfile.TemporaryDirectory(prefix="pvc-ansible-bootstrap_") as pdir:
|
||||
try:
|
||||
r = ansible_runner.run(
|
||||
private_data_dir=f"{pdir}",
|
||||
inventory=inventory,
|
||||
limit=f"{cluster.name}",
|
||||
playbook=f"{config['ansible_path']}/pvc.yml",
|
||||
extravars={
|
||||
"ansible_ssh_private_key_file": config["ansible_keyfile"],
|
||||
"bootstrap": "yes",
|
||||
},
|
||||
forks=len(nodes),
|
||||
verbosity=2,
|
||||
)
|
||||
logger.info("Final status:")
|
||||
logger.info("{}: {}".format(r.status, r.rc))
|
||||
logger.info(r.stats)
|
||||
if r.rc == 0:
|
||||
git.commit_repository(config)
|
||||
git.push_repository(config)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error: {e}")
|
50
bootstrap-daemon/pvcbootstrapd/lib/dataclasses.py
Executable file
50
bootstrap-daemon/pvcbootstrapd/lib/dataclasses.py
Executable file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# dataclasses.py - PVC Cluster Auto-bootstrap dataclasses
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Cluster:
|
||||
"""
|
||||
An instance of a Cluster
|
||||
"""
|
||||
|
||||
id: int
|
||||
name: str
|
||||
state: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Node:
|
||||
"""
|
||||
An instance of a Node
|
||||
"""
|
||||
|
||||
id: int
|
||||
cluster: str
|
||||
state: str
|
||||
name: str
|
||||
nid: int
|
||||
bmc_macaddr: str
|
||||
bmc_iapddr: str
|
||||
host_macaddr: str
|
||||
host_ipaddr: str
|
267
bootstrap-daemon/pvcbootstrapd/lib/db.py
Executable file
267
bootstrap-daemon/pvcbootstrapd/lib/db.py
Executable file
@ -0,0 +1,267 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# db.py - PVC Cluster Auto-bootstrap database libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
import contextlib
|
||||
|
||||
from pvcbootstrapd.lib.dataclasses import Cluster, Node
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
#
|
||||
# Database functions
|
||||
#
|
||||
@contextlib.contextmanager
|
||||
def dbconn(db_path):
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.execute("PRAGMA foreign_keys = 1")
|
||||
cur = conn.cursor()
|
||||
yield cur
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def init_database(config):
|
||||
db_path = config["database_path"]
|
||||
if not os.path.isfile(db_path):
|
||||
print("First run: initializing database.")
|
||||
# Initializing the database
|
||||
with dbconn(db_path) as cur:
|
||||
# Table listing all clusters
|
||||
cur.execute(
|
||||
"""CREATE TABLE clusters
|
||||
(id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
state TEXT NOT NULL)"""
|
||||
)
|
||||
# Table listing all nodes
|
||||
# FK: cluster -> clusters.id
|
||||
cur.execute(
|
||||
"""CREATE TABLE nodes
|
||||
(id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
cluster INTEGER NOT NULL,
|
||||
state TEXT NOT NULL,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
nodeid INTEGER NOT NULL,
|
||||
bmc_macaddr TEXT NOT NULL,
|
||||
bmc_ipaddr TEXT NOT NULL,
|
||||
host_macaddr TEXT NOT NULL,
|
||||
host_ipaddr TEXT NOT NULL,
|
||||
CONSTRAINT cluster_col FOREIGN KEY (cluster) REFERENCES clusters(id) ON DELETE CASCADE )"""
|
||||
)
|
||||
|
||||
|
||||
#
|
||||
# Cluster functions
|
||||
#
|
||||
def get_cluster(config, cid=None, name=None):
|
||||
if cid is None and name is None:
|
||||
return None
|
||||
elif cid is not None:
|
||||
findfield = "id"
|
||||
datafield = cid
|
||||
elif name is not None:
|
||||
findfield = "name"
|
||||
datafield = name
|
||||
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute(f"""SELECT * FROM clusters WHERE {findfield} = ?""", (datafield,))
|
||||
rows = cur.fetchall()
|
||||
|
||||
if len(rows) > 0:
|
||||
row = rows[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
return Cluster(row[0], row[1], row[2])
|
||||
|
||||
|
||||
def add_cluster(config, cspec, name, state):
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO clusters
|
||||
(name, state)
|
||||
VALUES
|
||||
(?, ?)""",
|
||||
(name, state),
|
||||
)
|
||||
|
||||
logger.info(f"New cluster {name} added, populating bootstrap nodes from cspec")
|
||||
for bmcmac in cspec["clusters"][name]["cspec_yaml"]["bootstrap"]:
|
||||
hostname = cspec["clusters"][name]["cspec_yaml"]["bootstrap"][bmcmac]["node"][
|
||||
"hostname"
|
||||
]
|
||||
add_node(
|
||||
config,
|
||||
name,
|
||||
hostname,
|
||||
int("".join(filter(str.isdigit, hostname))),
|
||||
"init",
|
||||
bmcmac,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
)
|
||||
logger.info(f"Added node {hostname}")
|
||||
|
||||
return get_cluster(config, name=name)
|
||||
|
||||
|
||||
def update_cluster_state(config, name, state):
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute(
|
||||
"""UPDATE clusters
|
||||
SET state = ?
|
||||
WHERE name = ?""",
|
||||
(state, name),
|
||||
)
|
||||
|
||||
return get_cluster(config, name=name)
|
||||
|
||||
|
||||
#
|
||||
# Node functions
|
||||
#
|
||||
def get_node(config, cluster_name, nid=None, name=None, bmc_macaddr=None):
|
||||
cluster = get_cluster(config, name=cluster_name)
|
||||
|
||||
if nid is None and name is None and bmc_macaddr is None:
|
||||
return None
|
||||
elif nid is not None:
|
||||
findfield = "id"
|
||||
datafield = nid
|
||||
elif bmc_macaddr is not None:
|
||||
findfield = "bmc_macaddr"
|
||||
datafield = bmc_macaddr
|
||||
elif name is not None:
|
||||
findfield = "name"
|
||||
datafield = name
|
||||
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute(
|
||||
f"""SELECT * FROM nodes WHERE {findfield} = ? AND cluster = ?""",
|
||||
(datafield, cluster.id),
|
||||
)
|
||||
rows = cur.fetchall()
|
||||
|
||||
if len(rows) > 0:
|
||||
row = rows[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
return Node(
|
||||
row[0], cluster.name, row[2], row[3], row[4], row[5], row[6], row[7], row[8]
|
||||
)
|
||||
|
||||
|
||||
def get_nodes_in_cluster(config, cluster_name):
|
||||
cluster = get_cluster(config, name=cluster_name)
|
||||
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute("""SELECT * FROM nodes WHERE cluster = ?""", (cluster.id,))
|
||||
rows = cur.fetchall()
|
||||
|
||||
node_list = list()
|
||||
for row in rows:
|
||||
node_list.append(
|
||||
Node(
|
||||
row[0],
|
||||
cluster.name,
|
||||
row[2],
|
||||
row[3],
|
||||
row[4],
|
||||
row[5],
|
||||
row[6],
|
||||
row[7],
|
||||
row[8],
|
||||
)
|
||||
)
|
||||
|
||||
return node_list
|
||||
|
||||
|
||||
def add_node(
|
||||
config,
|
||||
cluster_name,
|
||||
name,
|
||||
nodeid,
|
||||
state,
|
||||
bmc_macaddr,
|
||||
bmc_ipaddr,
|
||||
host_macaddr,
|
||||
host_ipaddr,
|
||||
):
|
||||
cluster = get_cluster(config, name=cluster_name)
|
||||
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO nodes
|
||||
(cluster, state, name, nodeid, bmc_macaddr, bmc_ipaddr, host_macaddr, host_ipaddr)
|
||||
VALUES
|
||||
(?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
cluster.id,
|
||||
state,
|
||||
name,
|
||||
nodeid,
|
||||
bmc_macaddr,
|
||||
bmc_ipaddr,
|
||||
host_macaddr,
|
||||
host_ipaddr,
|
||||
),
|
||||
)
|
||||
|
||||
return get_node(config, cluster_name, name=name)
|
||||
|
||||
|
||||
def update_node_state(config, cluster_name, name, state):
|
||||
cluster = get_cluster(config, name=cluster_name)
|
||||
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute(
|
||||
"""UPDATE nodes
|
||||
SET state = ?
|
||||
WHERE name = ? AND cluster = ?""",
|
||||
(state, name, cluster.id),
|
||||
)
|
||||
|
||||
return get_node(config, cluster_name, name=name)
|
||||
|
||||
|
||||
def update_node_addresses(
|
||||
config, cluster_name, name, bmc_macaddr, bmc_ipaddr, host_macaddr, host_ipaddr
|
||||
):
|
||||
cluster = get_cluster(config, name=cluster_name)
|
||||
|
||||
with dbconn(config["database_path"]) as cur:
|
||||
cur.execute(
|
||||
"""UPDATE nodes
|
||||
SET bmc_macaddr = ?, bmc_ipaddr = ?, host_macaddr = ?, host_ipaddr = ?
|
||||
WHERE name = ? AND cluster = ?""",
|
||||
(bmc_macaddr, bmc_ipaddr, host_macaddr, host_ipaddr, name, cluster.id),
|
||||
)
|
||||
|
||||
return get_node(config, cluster_name, name=name)
|
99
bootstrap-daemon/pvcbootstrapd/lib/dnsmasq.py
Executable file
99
bootstrap-daemon/pvcbootstrapd/lib/dnsmasq.py
Executable file
@ -0,0 +1,99 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# dnsmasq.py - PVC Cluster Auto-bootstrap DNSMasq instance
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import signal
|
||||
|
||||
from threading import Thread
|
||||
|
||||
|
||||
class DNSMasq:
|
||||
"""
|
||||
Implementes a daemonized instance of DNSMasq for providing DHCP and TFTP services
|
||||
|
||||
The DNSMasq instance listens on the configured 'dhcp_address', and instead of a "real"
|
||||
leases database forwards requests to the 'dnsmasq-lease.py' script. This script will
|
||||
then hit the pvcbootstrapd '/checkin' API endpoint to perform actions.
|
||||
|
||||
TFTP is provided to automate the bootstrap of a node, providing the pvc-installer
|
||||
over TFTP as well as a seed configuration which is created by the API.
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
self.environment = {
|
||||
"API_URI": f"http://{config['api_address']}:{config['api_port']}/checkin/dnsmasq"
|
||||
}
|
||||
self.dnsmasq_cmd = [
|
||||
"/usr/sbin/dnsmasq",
|
||||
"--bogus-priv",
|
||||
"--no-hosts",
|
||||
"--dhcp-authoritative",
|
||||
"--filterwin2k",
|
||||
"--expand-hosts",
|
||||
"--domain-needed",
|
||||
f"--domain={config['dhcp_domain']}",
|
||||
f"--local=/{config['dhcp_domain']}/",
|
||||
"--log-facility=-",
|
||||
"--log-dhcp",
|
||||
"--keep-in-foreground",
|
||||
f"--dhcp-script={os.getcwd()}/pvcbootstrapd/dnsmasq-lease.py",
|
||||
"--bind-interfaces",
|
||||
f"--listen-address={config['dhcp_address']}",
|
||||
f"--dhcp-option=3,{config['dhcp_gateway']}",
|
||||
f"--dhcp-range={config['dhcp_lease_start']},{config['dhcp_lease_end']},{config['dhcp_lease_time']}",
|
||||
"--enable-tftp",
|
||||
f"--tftp-root={config['tftp_root_path']}/",
|
||||
# This block of dhcp-match, tag-if, and dhcp-boot statements sets the following TFTP setup:
|
||||
# If the machine sends client-arch 0, and is not tagged iPXE, load undionly.kpxe (chainload)
|
||||
# If the machine sends client-arch 7 or 9, and is not tagged iPXE, load ipxe.efi (chainload)
|
||||
# If the machine sends the iPXE option, load boot.ipxe (iPXE boot configuration)
|
||||
"--dhcp-match=set:o_bios,option:client-arch,0",
|
||||
"--dhcp-match=set:o_uefi,option:client-arch,7",
|
||||
"--dhcp-match=set:o_uefi,option:client-arch,9",
|
||||
"--dhcp-match=set:ipxe,175",
|
||||
"--tag-if=set:bios,tag:!ipxe,tag:o_bios",
|
||||
"--tag-if=set:uefi,tag:!ipxe,tag:o_uefi",
|
||||
"--dhcp-boot=tag:bios,undionly.kpxe",
|
||||
"--dhcp-boot=tag:uefi,ipxe.efi",
|
||||
"--dhcp-boot=tag:ipxe,boot.ipxe",
|
||||
]
|
||||
if config["debug"]:
|
||||
self.dnsmasq_cmd.append("--leasefile-ro")
|
||||
|
||||
print(self.dnsmasq_cmd)
|
||||
self.stdout = subprocess.PIPE
|
||||
|
||||
def execute(self):
|
||||
self.proc = subprocess.Popen(
|
||||
self.dnsmasq_cmd,
|
||||
env=self.environment,
|
||||
)
|
||||
|
||||
def start(self):
|
||||
self.thread = Thread(target=self.execute, args=())
|
||||
self.thread.start()
|
||||
|
||||
def stop(self):
|
||||
self.proc.send_signal(signal.SIGTERM)
|
||||
|
||||
def reload(self):
|
||||
self.proc.send_signal(signal.SIGHUP)
|
213
bootstrap-daemon/pvcbootstrapd/lib/git.py
Executable file
213
bootstrap-daemon/pvcbootstrapd/lib/git.py
Executable file
@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# git.py - PVC Cluster Auto-bootstrap Git repository libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import os.path
|
||||
import git
|
||||
import yaml
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
def init_repository(config):
|
||||
"""
|
||||
Clone the Ansible git repository
|
||||
"""
|
||||
try:
|
||||
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no"
|
||||
if not os.path.exists(config["ansible_path"]):
|
||||
print(
|
||||
f"First run: cloning repository {config['ansible_remote']} branch {config['ansible_branch']} to {config['ansible_path']}"
|
||||
)
|
||||
git.Repo.clone_from(
|
||||
config["ansible_remote"],
|
||||
config["ansible_path"],
|
||||
branch=config["ansible_branch"],
|
||||
env=dict(GIT_SSH_COMMAND=git_ssh_cmd),
|
||||
)
|
||||
|
||||
g = git.cmd.Git(f"{config['ansible_path']}")
|
||||
g.checkout(config["ansible_branch"])
|
||||
g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
def pull_repository(config):
|
||||
"""
|
||||
Pull (with rebase) the Ansible git repository
|
||||
"""
|
||||
logger.info(f"Updating local configuration repository {config['ansible_path']}")
|
||||
try:
|
||||
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no"
|
||||
g = git.cmd.Git(f"{config['ansible_path']}")
|
||||
g.pull(rebase=True, env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
|
||||
g.submodule("update", "--init", env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
|
||||
except Exception as e:
|
||||
logger.warn(e)
|
||||
|
||||
|
||||
def commit_repository(config):
|
||||
"""
|
||||
Commit uncommitted changes to the Ansible git repository
|
||||
"""
|
||||
logger.info(
|
||||
f"Committing changes to local configuration repository {config['ansible_path']}"
|
||||
)
|
||||
|
||||
try:
|
||||
g = git.cmd.Git(f"{config['ansible_path']}")
|
||||
g.add("--all")
|
||||
commit_env = {
|
||||
"GIT_COMMITTER_NAME": "PVC Bootstrap",
|
||||
"GIT_COMMITTER_EMAIL": "git@pvcbootstrapd",
|
||||
}
|
||||
g.commit(
|
||||
"-m",
|
||||
"Automated commit from PVC Bootstrap Ansible subsystem",
|
||||
author="PVC Bootstrap <git@pvcbootstrapd>",
|
||||
env=commit_env,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warn(e)
|
||||
|
||||
|
||||
def push_repository(config):
|
||||
"""
|
||||
Push changes to the default remote
|
||||
"""
|
||||
logger.info(
|
||||
f"Pushing changes from local configuration repository {config['ansible_path']}"
|
||||
)
|
||||
|
||||
try:
|
||||
git_ssh_cmd = f"ssh -i {config['ansible_keyfile']} -o StrictHostKeyChecking=no"
|
||||
g = git.Repo(f"{config['ansible_path']}")
|
||||
origin = g.remote(name="origin")
|
||||
origin.push(env=dict(GIT_SSH_COMMAND=git_ssh_cmd))
|
||||
except Exception as e:
|
||||
logger.warn(e)
|
||||
|
||||
|
||||
def load_cspec_yaml(config):
|
||||
"""
|
||||
Load the bootstrap group_vars for all known clusters
|
||||
"""
|
||||
# Pull down the repository
|
||||
pull_repository(config)
|
||||
|
||||
# Load our clusters file and read the clusters from it
|
||||
clusters_file = f"{config['ansible_path']}/{config['ansible_clusters_file']}"
|
||||
logger.info(f"Loading cluster configuration from file '{clusters_file}'")
|
||||
with open(clusters_file, "r") as clustersfh:
|
||||
clusters = yaml.load(clustersfh, Loader=yaml.SafeLoader).get("clusters", list())
|
||||
|
||||
# Define a base cpec
|
||||
cspec = {
|
||||
"bootstrap": dict(),
|
||||
"hooks": dict(),
|
||||
"clusters": dict(),
|
||||
}
|
||||
|
||||
# Read each cluster's cspec and update the base cspec
|
||||
logger.info("Loading per-cluster specifications")
|
||||
for cluster in clusters:
|
||||
cspec["clusters"][cluster] = dict()
|
||||
cspec["clusters"][cluster]["bootstrap_nodes"] = list()
|
||||
|
||||
cspec_file = f"{config['ansible_path']}/group_vars/{cluster}/{config['ansible_cspec_files_bootstrap']}"
|
||||
if os.path.exists(cspec_file):
|
||||
with open(cspec_file, "r") as cpsecfh:
|
||||
try:
|
||||
cspec_yaml = yaml.load(cpsecfh, Loader=yaml.SafeLoader)
|
||||
except Exception as e:
|
||||
logger.warn(
|
||||
f"Failed to load {config['ansible_cspec_files_bootstrap']} for cluster {cluster}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
cspec["clusters"][cluster]["cspec_yaml"] = cspec_yaml
|
||||
|
||||
# Convert the MAC address keys to lowercase
|
||||
# DNSMasq operates with lowercase keys, but often these are written with uppercase.
|
||||
# Convert them to lowercase to prevent discrepancies later on.
|
||||
cspec_yaml["bootstrap"] = {
|
||||
k.lower(): v for k, v in cspec_yaml["bootstrap"].items()
|
||||
}
|
||||
|
||||
# Load in the YAML for the cluster
|
||||
base_yaml = load_base_yaml(config, cluster)
|
||||
cspec["clusters"][cluster]["base_yaml"] = base_yaml
|
||||
pvc_yaml = load_pvc_yaml(config, cluster)
|
||||
cspec["clusters"][cluster]["pvc_yaml"] = pvc_yaml
|
||||
|
||||
# Set per-node values from elsewhere
|
||||
for node in cspec_yaml["bootstrap"]:
|
||||
cspec["clusters"][cluster]["bootstrap_nodes"].append(
|
||||
cspec_yaml["bootstrap"][node]["node"]["hostname"]
|
||||
)
|
||||
|
||||
# Set the cluster value automatically
|
||||
cspec_yaml["bootstrap"][node]["node"]["cluster"] = cluster
|
||||
|
||||
# Set the domain value automatically via base config
|
||||
cspec_yaml["bootstrap"][node]["node"]["domain"] = base_yaml[
|
||||
"local_domain"
|
||||
]
|
||||
|
||||
# Set the node FQDN value automatically
|
||||
cspec_yaml["bootstrap"][node]["node"][
|
||||
"fqdn"
|
||||
] = f"{cspec_yaml['bootstrap'][node]['node']['hostname']}.{cspec_yaml['bootstrap'][node]['node']['domain']}"
|
||||
|
||||
# Append bootstrap entries to the main dictionary
|
||||
cspec["bootstrap"] = {**cspec["bootstrap"], **cspec_yaml["bootstrap"]}
|
||||
|
||||
# Append hooks to the main dictionary (per-cluster)
|
||||
if cspec_yaml.get("hooks"):
|
||||
cspec["hooks"][cluster] = cspec_yaml["hooks"]
|
||||
|
||||
logger.info("Finished loading per-cluster specifications")
|
||||
return cspec
|
||||
|
||||
|
||||
def load_base_yaml(config, cluster):
|
||||
"""
|
||||
Load the base.yml group_vars for a cluster
|
||||
"""
|
||||
base_file = f"{config['ansible_path']}/group_vars/{cluster}/{config['ansible_cspec_files_base']}"
|
||||
with open(base_file, "r") as varsfile:
|
||||
base_yaml = yaml.load(varsfile, Loader=yaml.SafeLoader)
|
||||
|
||||
return base_yaml
|
||||
|
||||
|
||||
def load_pvc_yaml(config, cluster):
|
||||
"""
|
||||
Load the pvc.yml group_vars for a cluster
|
||||
"""
|
||||
pvc_file = f"{config['ansible_path']}/group_vars/{cluster}/{config['ansible_cspec_files_pvc']}"
|
||||
with open(pvc_file, "r") as varsfile:
|
||||
pvc_yaml = yaml.load(varsfile, Loader=yaml.SafeLoader)
|
||||
|
||||
return pvc_yaml
|
316
bootstrap-daemon/pvcbootstrapd/lib/hooks.py
Executable file
316
bootstrap-daemon/pvcbootstrapd/lib/hooks.py
Executable file
@ -0,0 +1,316 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# hooks.py - PVC Cluster Auto-bootstrap Hook libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import pvcbootstrapd.lib.db as db
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import paramiko
|
||||
import contextlib
|
||||
import requests
|
||||
|
||||
from re import match
|
||||
from time import sleep
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def run_paramiko(config, node_address):
|
||||
ssh_client = paramiko.SSHClient()
|
||||
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
ssh_client.connect(
|
||||
hostname=node_address,
|
||||
username=config["deploy_username"],
|
||||
key_filename=config["ansible_keyfile"],
|
||||
)
|
||||
yield ssh_client
|
||||
ssh_client.close()
|
||||
|
||||
|
||||
def run_hook_osddb(config, targets, args):
|
||||
"""
|
||||
Add an OSD DB defined by args['disk']
|
||||
"""
|
||||
for node in targets:
|
||||
node_name = node.name
|
||||
node_address = node.host_ipaddr
|
||||
|
||||
device = args["disk"]
|
||||
|
||||
logger.info(f"Creating OSD DB on node {node_name} device {device}")
|
||||
|
||||
# Using a direct command on the target here is somewhat messy, but avoids many
|
||||
# complexities of determining a valid API listen address, etc.
|
||||
pvc_cmd_string = f"pvc storage osd create-db-vg --yes {node_name} {device}"
|
||||
|
||||
with run_paramiko(config, node_address) as c:
|
||||
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
|
||||
logger.debug(stdout.readlines())
|
||||
logger.debug(stderr.readlines())
|
||||
|
||||
|
||||
def run_hook_osd(config, targets, args):
|
||||
"""
|
||||
Add an OSD defined by args['disk'] with weight args['weight']
|
||||
"""
|
||||
for node in targets:
|
||||
node_name = node.name
|
||||
node_address = node.host_ipaddr
|
||||
|
||||
device = args["disk"]
|
||||
weight = args.get("weight", 1)
|
||||
ext_db_flag = args.get("ext_db", False)
|
||||
ext_db_ratio = args.get("ext_db_ratio", 0.05)
|
||||
|
||||
logger.info(f"Creating OSD on node {node_name} device {device} weight {weight}")
|
||||
|
||||
# Using a direct command on the target here is somewhat messy, but avoids many
|
||||
# complexities of determining a valid API listen address, etc.
|
||||
pvc_cmd_string = (
|
||||
f"pvc storage osd add --yes {node_name} {device} --weight {weight}"
|
||||
)
|
||||
if ext_db_flag:
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --ext-db --ext-db-ratio {ext_db_ratio}"
|
||||
|
||||
with run_paramiko(config, node_address) as c:
|
||||
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
|
||||
logger.debug(stdout.readlines())
|
||||
logger.debug(stderr.readlines())
|
||||
|
||||
|
||||
def run_hook_pool(config, targets, args):
|
||||
"""
|
||||
Add an pool defined by args['name'] on device tier args['tier']
|
||||
"""
|
||||
for node in targets:
|
||||
node_name = node.name
|
||||
node_address = node.host_ipaddr
|
||||
|
||||
name = args["name"]
|
||||
pgs = args.get("pgs", "64")
|
||||
tier = args.get("tier", "default") # Does nothing yet
|
||||
|
||||
logger.info(
|
||||
f"Creating storage pool on node {node_name} name {name} pgs {pgs} tier {tier}"
|
||||
)
|
||||
|
||||
# Using a direct command on the target here is somewhat messy, but avoids many
|
||||
# complexities of determining a valid API listen address, etc.
|
||||
pvc_cmd_string = f"pvc storage pool add {name} {pgs}"
|
||||
|
||||
with run_paramiko(config, node_address) as c:
|
||||
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
|
||||
logger.debug(stdout.readlines())
|
||||
logger.debug(stderr.readlines())
|
||||
|
||||
# This only runs once on whatever the first node is
|
||||
break
|
||||
|
||||
|
||||
def run_hook_network(config, targets, args):
|
||||
"""
|
||||
Add an network defined by args (many)
|
||||
"""
|
||||
for node in targets:
|
||||
node_name = node.name
|
||||
node_address = node.host_ipaddr
|
||||
|
||||
vni = args["vni"]
|
||||
description = args["description"]
|
||||
nettype = args["type"]
|
||||
mtu = args.get("mtu", None)
|
||||
|
||||
pvc_cmd_string = (
|
||||
f"pvc network add {vni} --description {description} --type {nettype}"
|
||||
)
|
||||
|
||||
if mtu is not None and mtu not in ["auto", "default"]:
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --mtu {mtu}"
|
||||
|
||||
if nettype == "managed":
|
||||
domain = args["domain"]
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --domain {domain}"
|
||||
|
||||
dns_servers = args.get("dns_servers", [])
|
||||
for dns_server in dns_servers:
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --dns-server {dns_server}"
|
||||
|
||||
is_ip4 = args["ip4"]
|
||||
if is_ip4:
|
||||
ip4_network = args["ip4_network"]
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --ipnet {ip4_network}"
|
||||
|
||||
ip4_gateway = args["ip4_gateway"]
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --gateway {ip4_gateway}"
|
||||
|
||||
ip4_dhcp = args["ip4_dhcp"]
|
||||
if ip4_dhcp:
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --dhcp"
|
||||
ip4_dhcp_start = args["ip4_dhcp_start"]
|
||||
ip4_dhcp_end = args["ip4_dhcp_end"]
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --dhcp-start {ip4_dhcp_start} --dhcp-end {ip4_dhcp_end}"
|
||||
else:
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --no-dhcp"
|
||||
|
||||
is_ip6 = args["ip6"]
|
||||
if is_ip6:
|
||||
ip6_network = args["ip6_network"]
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --ipnet6 {ip6_network}"
|
||||
|
||||
ip6_gateway = args["ip6_gateway"]
|
||||
pvc_cmd_string = f"{pvc_cmd_string} --gateway6 {ip6_gateway}"
|
||||
|
||||
logger.info(f"Creating network on node {node_name} VNI {vni} type {nettype}")
|
||||
|
||||
with run_paramiko(config, node_address) as c:
|
||||
stdin, stdout, stderr = c.exec_command(pvc_cmd_string)
|
||||
logger.debug(stdout.readlines())
|
||||
logger.debug(stderr.readlines())
|
||||
|
||||
# This only runs once on whatever the first node is
|
||||
break
|
||||
|
||||
|
||||
def run_hook_script(config, targets, args):
|
||||
"""
|
||||
Run a script on the targets
|
||||
"""
|
||||
for node in targets:
|
||||
node_name = node.name
|
||||
node_address = node.host_ipaddr
|
||||
|
||||
script = args.get("script", None)
|
||||
source = args.get("source", None)
|
||||
path = args.get("path", None)
|
||||
|
||||
logger.info(f"Running script on node {node_name}")
|
||||
|
||||
with run_paramiko(config, node_address) as c:
|
||||
if script is not None:
|
||||
remote_path = "/tmp/pvcbootstrapd.hook"
|
||||
with tempfile.NamedTemporaryFile(mode="w") as tf:
|
||||
tf.write(script)
|
||||
tf.seek(0)
|
||||
|
||||
# Send the file to the remote system
|
||||
tc = c.open_sftp()
|
||||
tc.put(tf.name, remote_path)
|
||||
tc.chmod(remote_path, 0o755)
|
||||
tc.close()
|
||||
elif source == "local":
|
||||
if not match(r"^/", path):
|
||||
path = config["ansible_path"] + "/" + path
|
||||
|
||||
remote_path = "/tmp/pvcbootstrapd.hook"
|
||||
if path is None:
|
||||
continue
|
||||
|
||||
tc = c.open_sftp()
|
||||
tc.put(path, remote_path)
|
||||
tc.chmod(remote_path, 0o755)
|
||||
tc.close()
|
||||
elif source == "remote":
|
||||
remote_path = path
|
||||
|
||||
stdin, stdout, stderr = c.exec_command(remote_path)
|
||||
logger.debug(stdout.readlines())
|
||||
logger.debug(stderr.readlines())
|
||||
|
||||
|
||||
def run_hook_webhook(config, targets, args):
|
||||
"""
|
||||
Send an HTTP requests (no targets)
|
||||
"""
|
||||
logger.info(f"Running webhook against {args['uri']}")
|
||||
|
||||
# Get the body data
|
||||
data = json.dumps(args["body"])
|
||||
headers = {"content-type": "application/json"}
|
||||
|
||||
# Craft up a Requests endpoint set for this
|
||||
requests_actions = {
|
||||
"get": requests.get,
|
||||
"post": requests.post,
|
||||
"put": requests.put,
|
||||
"patch": requests.patch,
|
||||
"delete": requests.delete,
|
||||
"options": requests.options,
|
||||
}
|
||||
action = args["action"]
|
||||
|
||||
result = requests_actions[action](args["uri"], headers=headers, data=data)
|
||||
|
||||
logger.info(f"Result: {result}")
|
||||
|
||||
|
||||
hook_functions = {
|
||||
"osddb": run_hook_osddb,
|
||||
"osd": run_hook_osd,
|
||||
"pool": run_hook_pool,
|
||||
"network": run_hook_network,
|
||||
"script": run_hook_script,
|
||||
"webhook": run_hook_webhook,
|
||||
}
|
||||
|
||||
|
||||
def run_hooks(config, cspec, cluster, nodes):
|
||||
"""
|
||||
Run an Ansible bootstrap against a cluster
|
||||
"""
|
||||
# Waiting 30 seconds to ensure everything is booted an stabilized
|
||||
logger.info("Waiting 300s before starting hook run.")
|
||||
sleep(300)
|
||||
|
||||
cluster_hooks = cspec["hooks"][cluster.name]
|
||||
|
||||
cluster_nodes = db.get_nodes_in_cluster(config, cluster.name)
|
||||
|
||||
for hook in cluster_hooks:
|
||||
hook_target = hook.get("target", "all")
|
||||
hook_name = hook.get("name")
|
||||
logger.info(f"Running hook on {hook_target}: {hook_name}")
|
||||
|
||||
if "all" in hook_target:
|
||||
target_nodes = cluster_nodes
|
||||
else:
|
||||
target_nodes = [node for node in cluster_nodes if node.name in hook_target]
|
||||
|
||||
hook_type = hook.get("type")
|
||||
hook_args = hook.get("args")
|
||||
|
||||
if hook_type is None or hook_args is None:
|
||||
logger.warning("Invalid hook: missing required configuration")
|
||||
continue
|
||||
|
||||
# Run the hook function
|
||||
try:
|
||||
hook_functions[hook_type](config, target_nodes, hook_args)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error running hook: {e}")
|
||||
|
||||
# Wait 5s between hooks
|
||||
sleep(5)
|
||||
|
||||
# Restart nodes to complete setup
|
||||
hook_functions['script'](config, cluster_nodes, {'script': '#!/usr/bin/env bash\necho bootstrapped | sudo tee /etc/pvc-install.hooks\nsudo reboot'})
|
86
bootstrap-daemon/pvcbootstrapd/lib/host.py
Executable file
86
bootstrap-daemon/pvcbootstrapd/lib/host.py
Executable file
@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# host.py - PVC Cluster Auto-bootstrap host libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
import pvcbootstrapd.lib.db as db
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
def installer_init(config, cspec, data):
|
||||
bmc_macaddr = data["bmc_macaddr"]
|
||||
bmc_ipaddr = data["bmc_ipaddr"]
|
||||
host_macaddr = data["host_macaddr"]
|
||||
host_ipaddr = data["host_ipaddr"]
|
||||
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
|
||||
cspec_hostname = cspec["bootstrap"][bmc_macaddr]["node"]["hostname"]
|
||||
|
||||
cluster = db.get_cluster(config, name=cspec_cluster)
|
||||
if cluster is None:
|
||||
cluster = db.add_cluster(config, cspec, cspec_cluster, "provisioning")
|
||||
logger.debug(cluster)
|
||||
|
||||
db.update_node_addresses(
|
||||
config,
|
||||
cspec_cluster,
|
||||
cspec_hostname,
|
||||
bmc_macaddr,
|
||||
bmc_ipaddr,
|
||||
host_macaddr,
|
||||
host_ipaddr,
|
||||
)
|
||||
db.update_node_state(config, cspec_cluster, cspec_hostname, "installing")
|
||||
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
|
||||
logger.debug(node)
|
||||
|
||||
|
||||
def installer_complete(config, cspec, data):
|
||||
bmc_macaddr = data["bmc_macaddr"]
|
||||
cspec_hostname = cspec["bootstrap"][bmc_macaddr]["node"]["hostname"]
|
||||
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
|
||||
|
||||
db.update_node_state(config, cspec_cluster, cspec_hostname, "installed")
|
||||
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
|
||||
logger.debug(node)
|
||||
|
||||
|
||||
def set_boot_state(config, cspec, data, state):
|
||||
bmc_macaddr = data["bmc_macaddr"]
|
||||
bmc_ipaddr = data["bmc_ipaddr"]
|
||||
host_macaddr = data["host_macaddr"]
|
||||
host_ipaddr = data["host_ipaddr"]
|
||||
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
|
||||
cspec_hostname = cspec["bootstrap"][bmc_macaddr]["node"]["hostname"]
|
||||
|
||||
db.update_node_addresses(
|
||||
config,
|
||||
cspec_cluster,
|
||||
cspec_hostname,
|
||||
bmc_macaddr,
|
||||
bmc_ipaddr,
|
||||
host_macaddr,
|
||||
host_ipaddr,
|
||||
)
|
||||
db.update_node_state(config, cspec_cluster, cspec_hostname, state)
|
||||
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
|
||||
logger.debug(node)
|
81
bootstrap-daemon/pvcbootstrapd/lib/installer.py
Executable file
81
bootstrap-daemon/pvcbootstrapd/lib/installer.py
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# lib.py - PVC Cluster Auto-bootstrap libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
from jinja2 import Template
|
||||
|
||||
|
||||
#
|
||||
# Worker Functions - PXE/Installer Per-host Templates
|
||||
#
|
||||
def add_pxe(config, cspec_node, host_macaddr):
|
||||
# Generate a per-client iPXE configuration for this host
|
||||
destination_filename = (
|
||||
f"{config['tftp_host_path']}/mac-{host_macaddr.replace(':', '')}.ipxe"
|
||||
)
|
||||
template_filename = f"{config['tftp_root_path']}/host-ipxe.j2"
|
||||
|
||||
with open(template_filename, "r") as tfh:
|
||||
template = Template(tfh.read())
|
||||
|
||||
imgargs_host_list = cspec_node.get("config", {}).get("kernel_options")
|
||||
if imgargs_host_list is not None:
|
||||
imgargs_host = " ".join(imgargs_host_list)
|
||||
else:
|
||||
imgargs_host = None
|
||||
|
||||
rendered = template.render(imgargs_host=imgargs_host)
|
||||
|
||||
with open(destination_filename, "w") as dfh:
|
||||
dfh.write(rendered)
|
||||
dfh.write("\n")
|
||||
|
||||
|
||||
def add_preseed(config, cspec_node, host_macaddr, system_drive_target):
|
||||
# Generate a per-client Installer configuration for this host
|
||||
destination_filename = (
|
||||
f"{config['tftp_host_path']}/mac-{host_macaddr.replace(':', '')}.preseed"
|
||||
)
|
||||
template_filename = f"{config['tftp_root_path']}/host-preseed.j2"
|
||||
|
||||
with open(template_filename, "r") as tfh:
|
||||
template = Template(tfh.read())
|
||||
|
||||
add_packages_list = cspec_node.get("config", {}).get("packages")
|
||||
if add_packages_list is not None:
|
||||
add_packages = ",".join(add_packages_list)
|
||||
else:
|
||||
add_packages = None
|
||||
|
||||
# We use the dhcp_address here to allow the listen_address to be 0.0.0.0
|
||||
rendered = template.render(
|
||||
debrelease=cspec_node.get("config", {}).get("release"),
|
||||
debmirror=cspec_node.get("config", {}).get("mirror"),
|
||||
addpkglist=add_packages,
|
||||
filesystem=cspec_node.get("config", {}).get("filesystem"),
|
||||
skip_blockcheck=False,
|
||||
fqdn=cspec_node["node"]["fqdn"],
|
||||
target_disk=system_drive_target,
|
||||
pvcbootstrapd_checkin_uri=f"http://{config['dhcp_address']}:{config['api_port']}/checkin/host",
|
||||
)
|
||||
|
||||
with open(destination_filename, "w") as dfh:
|
||||
dfh.write(rendered)
|
||||
dfh.write("\n")
|
151
bootstrap-daemon/pvcbootstrapd/lib/lib.py
Executable file
151
bootstrap-daemon/pvcbootstrapd/lib/lib.py
Executable file
@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# lib.py - PVC Cluster Auto-bootstrap libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import pvcbootstrapd.lib.db as db
|
||||
import pvcbootstrapd.lib.git as git
|
||||
import pvcbootstrapd.lib.redfish as redfish
|
||||
import pvcbootstrapd.lib.host as host
|
||||
import pvcbootstrapd.lib.ansible as ansible
|
||||
import pvcbootstrapd.lib.hooks as hooks
|
||||
|
||||
from time import sleep
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
#
|
||||
# Worker Functions - Checkins (Celery root tasks)
|
||||
#
|
||||
def dnsmasq_checkin(config, data):
|
||||
"""
|
||||
Handle checkins from DNSMasq
|
||||
"""
|
||||
logger.debug(f"data = {data}")
|
||||
|
||||
# This is an add event; what we do depends on some stuff
|
||||
if data["action"] in ["add"]:
|
||||
logger.info(
|
||||
f"Receiving 'add' checkin from DNSMasq for MAC address '{data['macaddr']}'"
|
||||
)
|
||||
cspec = git.load_cspec_yaml(config)
|
||||
is_in_bootstrap_map = True if data["macaddr"] in cspec["bootstrap"] else False
|
||||
if is_in_bootstrap_map:
|
||||
if (
|
||||
cspec["bootstrap"][data["macaddr"]]["bmc"].get("redfish", None)
|
||||
is not None
|
||||
):
|
||||
if cspec["bootstrap"][data["macaddr"]]["bmc"]["redfish"]:
|
||||
is_redfish = True
|
||||
else:
|
||||
is_redfish = False
|
||||
else:
|
||||
is_redfish = redfish.check_redfish(config, data)
|
||||
|
||||
logger.info(f"Is device '{data['macaddr']}' Redfish capable? {is_redfish}")
|
||||
if is_redfish:
|
||||
redfish.redfish_init(config, cspec, data)
|
||||
else:
|
||||
logger.warn(f"Device '{data['macaddr']}' not in bootstrap map; ignoring.")
|
||||
|
||||
return
|
||||
|
||||
# This is a tftp event; a node installer has booted
|
||||
if data["action"] in ["tftp"]:
|
||||
logger.info(
|
||||
f"Receiving 'tftp' checkin from DNSMasq for IP address '{data['destaddr']}'"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
def host_checkin(config, data):
|
||||
"""
|
||||
Handle checkins from the PVC node
|
||||
"""
|
||||
logger.info(f"Registering checkin for host {data['hostname']}")
|
||||
logger.debug(f"data = {data}")
|
||||
cspec = git.load_cspec_yaml(config)
|
||||
bmc_macaddr = data["bmc_macaddr"]
|
||||
cspec_cluster = cspec["bootstrap"][bmc_macaddr]["node"]["cluster"]
|
||||
|
||||
if data["action"] in ["install-start"]:
|
||||
# Node install has started
|
||||
logger.info(f"Registering install start for host {data['hostname']}")
|
||||
host.installer_init(config, cspec, data)
|
||||
|
||||
elif data["action"] in ["install-complete"]:
|
||||
# Node install has finished
|
||||
logger.info(f"Registering install complete for host {data['hostname']}")
|
||||
host.installer_complete(config, cspec, data)
|
||||
|
||||
elif data["action"] in ["system-boot_initial"]:
|
||||
# Node has booted for the first time and can begin Ansible runs once all nodes up
|
||||
logger.info(f"Registering first boot for host {data['hostname']}")
|
||||
target_state = "booted-initial"
|
||||
|
||||
host.set_boot_state(config, cspec, data, target_state)
|
||||
sleep(1)
|
||||
|
||||
all_nodes = db.get_nodes_in_cluster(config, cspec_cluster)
|
||||
ready_nodes = [node for node in all_nodes if node.state == target_state]
|
||||
|
||||
# Continue once all nodes are in the booted-initial state
|
||||
logger.info(f"Ready: {len(ready_nodes)} All: {len(all_nodes)}")
|
||||
if len(ready_nodes) >= len(all_nodes):
|
||||
cluster = db.update_cluster_state(config, cspec_cluster, "ansible-running")
|
||||
|
||||
ansible.run_bootstrap(config, cspec, cluster, ready_nodes)
|
||||
|
||||
elif data["action"] in ["system-boot_configured"]:
|
||||
# Node has been booted after Ansible run and can begin hook runs
|
||||
logger.info(f"Registering post-Ansible boot for host {data['hostname']}")
|
||||
target_state = "booted-configured"
|
||||
|
||||
host.set_boot_state(config, cspec, data, target_state)
|
||||
sleep(1)
|
||||
|
||||
all_nodes = db.get_nodes_in_cluster(config, cspec_cluster)
|
||||
ready_nodes = [node for node in all_nodes if node.state == target_state]
|
||||
|
||||
# Continue once all nodes are in the booted-configured state
|
||||
logger.info(f"Ready: {len(ready_nodes)} All: {len(all_nodes)}")
|
||||
if len(ready_nodes) >= len(all_nodes):
|
||||
cluster = db.update_cluster_state(config, cspec_cluster, "hooks-running")
|
||||
|
||||
hooks.run_hooks(config, cspec, cluster, ready_nodes)
|
||||
|
||||
elif data["action"] in ["system-boot_completed"]:
|
||||
# Node has been fully configured and can be shut down for the final time
|
||||
logger.info(f"Registering post-hooks boot for host {data['hostname']}")
|
||||
target_state = "booted-completed"
|
||||
|
||||
host.set_boot_state(config, cspec, data, target_state)
|
||||
sleep(1)
|
||||
|
||||
all_nodes = db.get_nodes_in_cluster(config, cspec_cluster)
|
||||
ready_nodes = [node for node in all_nodes if node.state == target_state]
|
||||
|
||||
logger.info(f"Ready: {len(ready_nodes)} All: {len(all_nodes)}")
|
||||
if len(ready_nodes) >= len(all_nodes):
|
||||
cluster = db.update_cluster_state(config, cspec_cluster, "completed")
|
||||
|
||||
# Hosts will now power down ready for real activation in production
|
835
bootstrap-daemon/pvcbootstrapd/lib/redfish.py
Executable file
835
bootstrap-daemon/pvcbootstrapd/lib/redfish.py
Executable file
@ -0,0 +1,835 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# redfish.py - PVC Cluster Auto-bootstrap Redfish libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
# Refs:
|
||||
# https://downloads.dell.com/manuals/all-products/esuprt_software/esuprt_it_ops_datcentr_mgmt/dell-management-solution-resources_white-papers11_en-us.pdf
|
||||
# https://downloads.dell.com/solutions/dell-management-solution-resources/RESTfulSerConfig-using-iDRAC-REST%20API%28DTC%20copy%29.pdf
|
||||
|
||||
import requests
|
||||
import urllib3
|
||||
import json
|
||||
import re
|
||||
import math
|
||||
from time import sleep
|
||||
from celery.utils.log import get_task_logger
|
||||
|
||||
import pvcbootstrapd.lib.installer as installer
|
||||
import pvcbootstrapd.lib.db as db
|
||||
|
||||
|
||||
logger = get_task_logger(__name__)
|
||||
|
||||
|
||||
#
|
||||
# Helper Classes
|
||||
#
|
||||
class AuthenticationException(Exception):
|
||||
def __init__(self, error=None, response=None):
|
||||
if error is not None:
|
||||
self.short_message = error
|
||||
else:
|
||||
self.short_message = "Generic authentication failure"
|
||||
|
||||
if response is not None:
|
||||
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
|
||||
if rinfo.get("Message") is not None:
|
||||
self.full_message = rinfo["Message"]
|
||||
self.res_message = rinfo["Resolution"]
|
||||
self.severity = rinfo["Severity"]
|
||||
self.message_id = rinfo["MessageId"]
|
||||
else:
|
||||
self.full_message = ""
|
||||
self.res_message = ""
|
||||
self.severity = "Fatal"
|
||||
self.message_id = rinfo["MessageId"]
|
||||
self.status_code = response.status_code
|
||||
else:
|
||||
self.status_code = None
|
||||
|
||||
def __str__(self):
|
||||
if self.status_code is not None:
|
||||
message = f"{self.short_message}: {self.full_message} {self.res_message} (HTTP Code: {self.status_code}, Severity: {self.severity}, ID: {self.message_id})"
|
||||
else:
|
||||
message = f"{self.short_message}"
|
||||
return str(message)
|
||||
|
||||
|
||||
class RedfishSession:
|
||||
def __init__(self, host, username, password):
|
||||
# Disable urllib3 warnings
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
# Perform login
|
||||
login_payload = {"UserName": username, "Password": password}
|
||||
login_uri = f"{host}/redfish/v1/Sessions"
|
||||
login_headers = {"content-type": "application/json"}
|
||||
|
||||
self.host = None
|
||||
login_response = None
|
||||
|
||||
tries = 1
|
||||
max_tries = 25
|
||||
while tries < max_tries:
|
||||
logger.info(f"Trying to log in to Redfish ({tries}/{max_tries - 1})...")
|
||||
try:
|
||||
login_response = requests.post(
|
||||
login_uri,
|
||||
data=json.dumps(login_payload),
|
||||
headers=login_headers,
|
||||
verify=False,
|
||||
timeout=5,
|
||||
)
|
||||
break
|
||||
except Exception:
|
||||
sleep(2)
|
||||
tries += 1
|
||||
|
||||
if login_response is None:
|
||||
logger.error("Failed to log in to Redfish")
|
||||
return
|
||||
|
||||
if login_response.status_code not in [200, 201]:
|
||||
raise AuthenticationException("Login failed", response=login_response)
|
||||
logger.info(f"Logged in to Redfish at {host} successfully")
|
||||
|
||||
self.host = host
|
||||
self.token = login_response.headers.get("X-Auth-Token")
|
||||
self.headers = {"content-type": "application/json", "x-auth-token": self.token}
|
||||
|
||||
logout_uri = login_response.headers.get("Location")
|
||||
if re.match(r"^/", logout_uri):
|
||||
self.logout_uri = f"{host}{logout_uri}"
|
||||
else:
|
||||
self.logout_uri = logout_uri
|
||||
|
||||
def __del__(self):
|
||||
if self.host is None:
|
||||
return
|
||||
|
||||
logout_headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Auth-Token": self.token,
|
||||
}
|
||||
|
||||
logout_response = requests.delete(
|
||||
self.logout_uri, headers=logout_headers, verify=False, timeout=15
|
||||
)
|
||||
|
||||
if logout_response.status_code not in [200, 201]:
|
||||
raise AuthenticationException("Logout failed", response=logout_response)
|
||||
logger.info(f"Logged out of Redfish at {self.host} successfully")
|
||||
|
||||
def get(self, uri):
|
||||
url = f"{self.host}{uri}"
|
||||
|
||||
response = requests.get(url, headers=self.headers, verify=False)
|
||||
|
||||
if response.status_code in [200, 201]:
|
||||
return response.json()
|
||||
else:
|
||||
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
|
||||
if rinfo.get("Message") is not None:
|
||||
message = f"{rinfo['Message']} {rinfo['Resolution']}"
|
||||
severity = rinfo["Severity"]
|
||||
message_id = rinfo["MessageId"]
|
||||
else:
|
||||
message = rinfo
|
||||
severity = "Error"
|
||||
message_id = "N/A"
|
||||
logger.warn(f"! Error: GET request to {url} failed")
|
||||
logger.warn(
|
||||
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
|
||||
)
|
||||
logger.warn(f"! Details: {message}")
|
||||
return None
|
||||
|
||||
def delete(self, uri):
|
||||
url = f"{self.host}{uri}"
|
||||
|
||||
response = requests.delete(url, headers=self.headers, verify=False)
|
||||
|
||||
if response.status_code in [200, 201]:
|
||||
return response.json()
|
||||
else:
|
||||
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
|
||||
if rinfo.get("Message") is not None:
|
||||
message = f"{rinfo['Message']} {rinfo['Resolution']}"
|
||||
severity = rinfo["Severity"]
|
||||
message_id = rinfo["MessageId"]
|
||||
else:
|
||||
message = rinfo
|
||||
severity = "Error"
|
||||
message_id = "N/A"
|
||||
|
||||
logger.warn(f"! Error: DELETE request to {url} failed")
|
||||
logger.warn(
|
||||
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
|
||||
)
|
||||
logger.warn(f"! Details: {message}")
|
||||
return None
|
||||
|
||||
def post(self, uri, data):
|
||||
url = f"{self.host}{uri}"
|
||||
payload = json.dumps(data)
|
||||
|
||||
response = requests.post(url, data=payload, headers=self.headers, verify=False)
|
||||
|
||||
if response.status_code in [200, 201]:
|
||||
return response.json()
|
||||
else:
|
||||
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
|
||||
if rinfo.get("Message") is not None:
|
||||
message = f"{rinfo['Message']} {rinfo['Resolution']}"
|
||||
severity = rinfo["Severity"]
|
||||
message_id = rinfo["MessageId"]
|
||||
else:
|
||||
message = rinfo
|
||||
severity = "Error"
|
||||
message_id = "N/A"
|
||||
|
||||
logger.warn(f"! Error: POST request to {url} failed")
|
||||
logger.warn(
|
||||
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
|
||||
)
|
||||
logger.warn(f"! Details: {message}")
|
||||
return None
|
||||
|
||||
def put(self, uri, data):
|
||||
url = f"{self.host}{uri}"
|
||||
payload = json.dumps(data)
|
||||
|
||||
response = requests.put(url, data=payload, headers=self.headers, verify=False)
|
||||
|
||||
if response.status_code in [200, 201]:
|
||||
return response.json()
|
||||
else:
|
||||
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
|
||||
if rinfo.get("Message") is not None:
|
||||
message = f"{rinfo['Message']} {rinfo['Resolution']}"
|
||||
severity = rinfo["Severity"]
|
||||
message_id = rinfo["MessageId"]
|
||||
else:
|
||||
message = rinfo
|
||||
severity = "Error"
|
||||
message_id = "N/A"
|
||||
|
||||
logger.warn(f"! Error: PUT request to {url} failed")
|
||||
logger.warn(
|
||||
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
|
||||
)
|
||||
logger.warn(f"! Details: {message}")
|
||||
return None
|
||||
|
||||
def patch(self, uri, data):
|
||||
url = f"{self.host}{uri}"
|
||||
payload = json.dumps(data)
|
||||
|
||||
response = requests.patch(url, data=payload, headers=self.headers, verify=False)
|
||||
|
||||
if response.status_code in [200, 201]:
|
||||
return response.json()
|
||||
else:
|
||||
rinfo = response.json()["error"]["@Message.ExtendedInfo"][0]
|
||||
if rinfo.get("Message") is not None:
|
||||
message = f"{rinfo['Message']} {rinfo['Resolution']}"
|
||||
severity = rinfo["Severity"]
|
||||
message_id = rinfo["MessageId"]
|
||||
else:
|
||||
message = rinfo
|
||||
severity = "Error"
|
||||
message_id = "N/A"
|
||||
|
||||
logger.warn(f"! Error: PATCH request to {url} failed")
|
||||
logger.warn(
|
||||
f"! HTTP Code: {response.status_code} Severity: {severity} ID: {message_id}"
|
||||
)
|
||||
logger.warn(f"! Details: {message}")
|
||||
return None
|
||||
|
||||
|
||||
#
|
||||
# Helper functions
|
||||
#
|
||||
def format_bytes_tohuman(databytes):
|
||||
"""
|
||||
Format a string of bytes into a human-readable value (using base-1000)
|
||||
"""
|
||||
# Matrix of human-to-byte values
|
||||
byte_unit_matrix = {
|
||||
"B": 1,
|
||||
"KB": 1000,
|
||||
"MB": 1000 * 1000,
|
||||
"GB": 1000 * 1000 * 1000,
|
||||
"TB": 1000 * 1000 * 1000 * 1000,
|
||||
"PB": 1000 * 1000 * 1000 * 1000 * 1000,
|
||||
"EB": 1000 * 1000 * 1000 * 1000 * 1000 * 1000,
|
||||
}
|
||||
|
||||
datahuman = ""
|
||||
for unit in sorted(byte_unit_matrix, key=byte_unit_matrix.get, reverse=True):
|
||||
if unit in ["TB", "PB", "EB"]:
|
||||
# Handle the situation where we might want to round to integer values
|
||||
# for some entries (2TB) but not others (e.g. 1.92TB). We round if the
|
||||
# result is within +/- 2% of the integer value, otherwise we use two
|
||||
# decimal places.
|
||||
new_bytes = databytes / byte_unit_matrix[unit]
|
||||
new_bytes_plustwopct = new_bytes * 1.02
|
||||
new_bytes_minustwopct = new_bytes * 0.98
|
||||
cieled_bytes = int(math.ceil(databytes / byte_unit_matrix[unit]))
|
||||
rounded_bytes = round(databytes / byte_unit_matrix[unit], 2)
|
||||
if (
|
||||
cieled_bytes > new_bytes_minustwopct
|
||||
and cieled_bytes < new_bytes_plustwopct
|
||||
):
|
||||
new_bytes = cieled_bytes
|
||||
else:
|
||||
new_bytes = rounded_bytes
|
||||
|
||||
# Round up if 5 or more digits
|
||||
if new_bytes > 999:
|
||||
# We can jump down another level
|
||||
continue
|
||||
else:
|
||||
# We're at the end, display with this size
|
||||
datahuman = "{}{}".format(new_bytes, unit)
|
||||
|
||||
return datahuman
|
||||
|
||||
|
||||
def get_system_drive_target(session, cspec_node, storage_root):
|
||||
"""
|
||||
Determine the system drive target for the installer
|
||||
"""
|
||||
# Handle an invalid >2 number of system disks, use only first 2
|
||||
if len(cspec_node["config"]["system_disks"]) > 2:
|
||||
cspec_drives = cspec_node["config"]["system_disks"][0:2]
|
||||
else:
|
||||
cspec_drives = cspec_node["config"]["system_disks"]
|
||||
|
||||
# If we have no storage root, we just return the first entry from
|
||||
# the cpsec_drives as-is and hope the administrator has the right
|
||||
# format here.
|
||||
if storage_root is None:
|
||||
return cspec_drives[0]
|
||||
# We proceed with Redfish configuration to determine the disks
|
||||
else:
|
||||
storage_detail = session.get(storage_root)
|
||||
|
||||
# Grab a full list of drives
|
||||
drive_list = list()
|
||||
for storage_member in storage_detail["Members"]:
|
||||
storage_member_root = storage_member["@odata.id"]
|
||||
storage_member_detail = session.get(storage_member_root)
|
||||
for drive in storage_member_detail["Drives"]:
|
||||
drive_root = drive["@odata.id"]
|
||||
drive_detail = session.get(drive_root)
|
||||
drive_list.append(drive_detail)
|
||||
|
||||
system_drives = list()
|
||||
|
||||
# Iterate through each drive and include those that match
|
||||
for cspec_drive in cspec_drives:
|
||||
if re.match(r"^\/dev", cspec_drive) or re.match(r"^detect:", cspec_drive):
|
||||
# We only match the first drive that has these conditions for use in the preseed config
|
||||
logger.info(
|
||||
"Found a drive with a 'detect:' string or Linux '/dev' path, using it for bootstrap."
|
||||
)
|
||||
return cspec_drive
|
||||
|
||||
# Match any chassis-ID spec drives
|
||||
for drive in drive_list:
|
||||
# Like "Disk.Bay.2:Enclosure.Internal.0-1:RAID.Integrated.1-1"
|
||||
drive_name = drive["Id"].split(":")[0]
|
||||
# Craft up the cspec version of this
|
||||
cspec_drive_name = f"Drive.Bay.{cspec_drive}"
|
||||
if drive_name == cspec_drive_name:
|
||||
system_drives.append(drive)
|
||||
|
||||
# We found a single drive, so determine its actual detect string
|
||||
if len(system_drives) == 1:
|
||||
logger.info(
|
||||
"Found a single drive matching the requested chassis ID, using it as the system disk."
|
||||
)
|
||||
|
||||
# Get the model's first word
|
||||
drive_model = system_drives[0].get("Model", "INVALID").split()[0]
|
||||
# Get and convert the size in bytes value to human
|
||||
drive_size_bytes = system_drives[0].get("CapacityBytes", 0)
|
||||
drive_size_human = format_bytes_tohuman(drive_size_bytes)
|
||||
# Get the drive ID out of all the valid entries
|
||||
# How this works is that, for each non-array disk, we must find what position our exact disk is
|
||||
# So for example, say we want disk 3 out of 4, and all 4 are the same size and model and not in
|
||||
# another (RAID) volume. This will give us an index of 2. Then in the installer this will match
|
||||
# the 3rd list entry from "lsscsi". This is probably an unneccessary hack, since people will
|
||||
# probably just give the first disk if they want one, or 2 disks if they want a RAID-1, but this
|
||||
# is here just in case
|
||||
idx = 0
|
||||
for drive in drive_list:
|
||||
list_drive_model = drive.get("Model", "INVALID").split()[0]
|
||||
list_drive_size_bytes = drive.get("CapacityBytes", 0)
|
||||
list_drive_in_array = (
|
||||
False
|
||||
if drive.get("Links", {})
|
||||
.get("Volumes", [""])[0]
|
||||
.get("@odata.id")
|
||||
.split("/")[-1]
|
||||
== drive.get("Id")
|
||||
else True
|
||||
)
|
||||
if (
|
||||
drive_model == list_drive_model
|
||||
and drive_size_bytes == list_drive_size_bytes
|
||||
and not list_drive_in_array
|
||||
):
|
||||
index = idx
|
||||
idx += 1
|
||||
drive_id = index
|
||||
|
||||
# Create the target string
|
||||
system_drive_target = f"detect:{drive_model}:{drive_size_human}:{drive_id}"
|
||||
|
||||
# We found two drives, so create a RAID-1 array then determine the volume's detect string
|
||||
elif len(system_drives) == 2:
|
||||
logger.info(
|
||||
"Found two drives matching the requested chassis IDs, creating a RAID-1 and using it as the system disk."
|
||||
)
|
||||
|
||||
drive_one = system_drives[0]
|
||||
drive_one_id = drive_one.get("Id", "INVALID")
|
||||
drive_one_path = drive_one.get("@odata.id", "INVALID")
|
||||
drive_one_controller = drive_one_id.split(":")[-1]
|
||||
drive_two = system_drives[1]
|
||||
drive_two_id = drive_two.get("Id", "INVALID")
|
||||
drive_two_path = drive_two.get("@odata.id", "INVALID")
|
||||
drive_two_controller = drive_two_id.split(":")[-1]
|
||||
|
||||
# Determine that the drives are on the same controller
|
||||
if drive_one_controller != drive_two_controller:
|
||||
logger.error(
|
||||
"Two drives are not on the same controller; this should not happen"
|
||||
)
|
||||
return None
|
||||
|
||||
# Get the controller details
|
||||
controller_root = f"{storage_root}/{drive_one_controller}"
|
||||
controller_detail = session.get(controller_root)
|
||||
|
||||
# Get the name of the controller (for crafting the detect string)
|
||||
controller_name = controller_detail.get("Name", "INVALID").split()[0]
|
||||
|
||||
# Get the volume root for the controller
|
||||
controller_volume_root = controller_detail.get("Volumes", {}).get(
|
||||
"@odata.id"
|
||||
)
|
||||
|
||||
# Get the pre-creation list of volumes on the controller
|
||||
controller_volumes_pre = [
|
||||
volume["@odata.id"]
|
||||
for volume in session.get(controller_volume_root).get("Members", [])
|
||||
]
|
||||
|
||||
# Create the RAID-1 volume
|
||||
payload = {
|
||||
"VolumeType": "Mirrored",
|
||||
"Drives": [
|
||||
{"@odata.id": drive_one_path},
|
||||
{"@odata.id": drive_two_path},
|
||||
],
|
||||
}
|
||||
session.post(controller_volume_root, payload)
|
||||
|
||||
# Wait for the volume to be created
|
||||
new_volume_list = []
|
||||
while len(new_volume_list) < 1:
|
||||
sleep(5)
|
||||
controller_volumes_post = [
|
||||
volume["@odata.id"]
|
||||
for volume in session.get(controller_volume_root).get("Members", [])
|
||||
]
|
||||
new_volume_list = list(
|
||||
set(controller_volumes_post).difference(controller_volumes_pre)
|
||||
)
|
||||
new_volume_root = new_volume_list[0]
|
||||
|
||||
# Get the IDX of the volume out of any others
|
||||
volume_id = 0
|
||||
for idx, volume in enumerate(controller_volumes_post):
|
||||
if volume == new_volume_root:
|
||||
volume_id = idx
|
||||
break
|
||||
|
||||
# Get and convert the size in bytes value to human
|
||||
volume_detail = session.get(new_volume_root)
|
||||
volume_size_bytes = volume_detail.get("CapacityBytes", 0)
|
||||
volume_size_human = format_bytes_tohuman(volume_size_bytes)
|
||||
|
||||
# Create the target string
|
||||
system_drive_target = (
|
||||
f"detect:{controller_name}:{volume_size_human}:{volume_id}"
|
||||
)
|
||||
|
||||
# We found too few or too many drives, error
|
||||
else:
|
||||
system_drive_target = None
|
||||
|
||||
return system_drive_target
|
||||
|
||||
|
||||
#
|
||||
# Redfish Task functions
|
||||
#
|
||||
def set_indicator_state(session, system_root, redfish_vendor, state):
|
||||
"""
|
||||
Set the system indicator LED to the desired state (on/off)
|
||||
"""
|
||||
state_values_write = {
|
||||
"Dell": {
|
||||
"on": "Blinking",
|
||||
"off": "Off",
|
||||
},
|
||||
"default": {
|
||||
"on": "Lit",
|
||||
"off": "Off",
|
||||
},
|
||||
}
|
||||
|
||||
state_values_read = {
|
||||
"default": {
|
||||
"on": "Lit",
|
||||
"off": "Off",
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
# Allow vendor-specific overrides
|
||||
if redfish_vendor not in state_values_write:
|
||||
redfish_vendor = "default"
|
||||
# Allow nice names ("on"/"off")
|
||||
if state in state_values_write[redfish_vendor]:
|
||||
state = state_values_write[redfish_vendor][state]
|
||||
|
||||
# Get current state
|
||||
system_detail = session.get(system_root)
|
||||
current_state = system_detail["IndicatorLED"]
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
try:
|
||||
state_read = state
|
||||
# Allow vendor-specific overrides
|
||||
if redfish_vendor not in state_values_read:
|
||||
redfish_vendor = "default"
|
||||
# Allow nice names ("on"/"off")
|
||||
if state_read in state_values_read[redfish_vendor]:
|
||||
state_read = state_values_read[redfish_vendor][state]
|
||||
|
||||
if state_read == current_state:
|
||||
return False
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
session.patch(system_root, {"IndicatorLED": state})
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def set_power_state(session, system_root, redfish_vendor, state):
|
||||
"""
|
||||
Set the system power state to the desired state
|
||||
"""
|
||||
state_values = {
|
||||
"default": {
|
||||
"on": "On",
|
||||
"off": "ForceOff",
|
||||
},
|
||||
}
|
||||
|
||||
try:
|
||||
# Allow vendor-specific overrides
|
||||
if redfish_vendor not in state_values:
|
||||
redfish_vendor = "default"
|
||||
# Allow nice names ("on"/"off")
|
||||
if state in state_values[redfish_vendor]:
|
||||
state = state_values[redfish_vendor][state]
|
||||
|
||||
# Get current state, target URI, and allowable values
|
||||
system_detail = session.get(system_root)
|
||||
current_state = system_detail["PowerState"]
|
||||
power_root = system_detail["Actions"]["#ComputerSystem.Reset"]["target"]
|
||||
power_choices = system_detail["Actions"]["#ComputerSystem.Reset"][
|
||||
"ResetType@Redfish.AllowableValues"
|
||||
]
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
# Remap some namings so we can check the current state against the target state
|
||||
if state in ["ForceOff"]:
|
||||
target_state = "Off"
|
||||
else:
|
||||
target_state = state
|
||||
|
||||
if target_state == current_state:
|
||||
return False
|
||||
|
||||
if state not in power_choices:
|
||||
return False
|
||||
|
||||
session.post(power_root, {"ResetType": state})
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def set_boot_override(session, system_root, redfish_vendor, target):
|
||||
"""
|
||||
Set the system boot override to the desired target
|
||||
"""
|
||||
try:
|
||||
system_detail = session.get(system_root)
|
||||
boot_targets = system_detail["Boot"]["BootSourceOverrideSupported"]
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
if target not in boot_targets:
|
||||
return False
|
||||
|
||||
session.patch(system_root, {"Boot": {"BootSourceOverrideTarget": target}})
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_redfish(config, data):
|
||||
"""
|
||||
Validate that a BMC is Redfish-capable
|
||||
"""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
logger.info("Checking for Redfish response...")
|
||||
count = 0
|
||||
while True:
|
||||
try:
|
||||
count += 1
|
||||
if count > 30:
|
||||
retcode = 500
|
||||
logger.warn("Aborted after 300s; device too slow or not booting.")
|
||||
break
|
||||
resp = requests.get(
|
||||
f"https://{data['ipaddr']}/redfish/v1",
|
||||
headers=headers,
|
||||
verify=False,
|
||||
timeout=10,
|
||||
)
|
||||
retcode = resp.retcode
|
||||
break
|
||||
except Exception:
|
||||
logger.info(f"Attempt {count}...")
|
||||
continue
|
||||
|
||||
if retcode == 200:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
#
|
||||
# Entry function
|
||||
#
|
||||
def redfish_init(config, cspec, data):
|
||||
"""
|
||||
Initialize a new node with Redfish
|
||||
"""
|
||||
bmc_ipaddr = data["ipaddr"]
|
||||
bmc_macaddr = data["macaddr"]
|
||||
bmc_host = f"https://{bmc_ipaddr}"
|
||||
|
||||
cspec_node = cspec["bootstrap"][bmc_macaddr]
|
||||
logger.debug(f"cspec_node = {cspec_node}")
|
||||
|
||||
bmc_username = cspec_node["bmc"]["username"]
|
||||
bmc_password = cspec_node["bmc"]["password"]
|
||||
|
||||
host_macaddr = ""
|
||||
host_ipaddr = ""
|
||||
|
||||
cspec_cluster = cspec_node["node"]["cluster"]
|
||||
cspec_hostname = cspec_node["node"]["hostname"]
|
||||
|
||||
cluster = db.get_cluster(config, name=cspec_cluster)
|
||||
if cluster is None:
|
||||
cluster = db.add_cluster(config, cspec, cspec_cluster, "provisioning")
|
||||
|
||||
logger.debug(cluster)
|
||||
|
||||
db.update_node_state(config, cspec_cluster, cspec_hostname, "characterzing")
|
||||
db.update_node_addresses(
|
||||
config,
|
||||
cspec_cluster,
|
||||
cspec_hostname,
|
||||
bmc_macaddr,
|
||||
bmc_ipaddr,
|
||||
host_macaddr,
|
||||
host_ipaddr,
|
||||
)
|
||||
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
|
||||
logger.debug(node)
|
||||
|
||||
# Create the session and log in
|
||||
session = RedfishSession(bmc_host, bmc_username, bmc_password)
|
||||
if session.host is None:
|
||||
logger.info("Aborting Redfish configuration; reboot BMC to try again.")
|
||||
del session
|
||||
return
|
||||
|
||||
logger.info("Characterizing node...")
|
||||
# Get Refish bases
|
||||
redfish_base_root = "/redfish/v1"
|
||||
redfish_base_detail = session.get(redfish_base_root)
|
||||
|
||||
redfish_vendor = list(redfish_base_detail["Oem"].keys())[0]
|
||||
redfish_name = redfish_base_detail["Name"]
|
||||
redfish_version = redfish_base_detail["RedfishVersion"]
|
||||
|
||||
systems_base_root = redfish_base_detail["Systems"]["@odata.id"].rstrip("/")
|
||||
systems_base_detail = session.get(systems_base_root)
|
||||
|
||||
system_root = systems_base_detail["Members"][0]["@odata.id"].rstrip("/")
|
||||
|
||||
# Force off the system and turn on the indicator
|
||||
set_power_state(session, system_root, redfish_vendor, "off")
|
||||
set_indicator_state(session, system_root, redfish_vendor, "on")
|
||||
|
||||
# Get the system details
|
||||
system_detail = session.get(system_root)
|
||||
|
||||
system_sku = system_detail["SKU"].strip()
|
||||
system_serial = system_detail["SerialNumber"].strip()
|
||||
system_power_state = system_detail["PowerState"].strip()
|
||||
system_indicator_state = system_detail["IndicatorLED"].strip()
|
||||
system_health_state = system_detail["Status"]["Health"].strip()
|
||||
|
||||
# Walk down the EthernetInterfaces construct to get the bootstrap interface MAC address
|
||||
try:
|
||||
ethernet_root = system_detail["EthernetInterfaces"]["@odata.id"].rstrip("/")
|
||||
ethernet_detail = session.get(ethernet_root)
|
||||
first_interface_root = ethernet_detail["Members"][0]["@odata.id"].rstrip("/")
|
||||
first_interface_detail = session.get(first_interface_root)
|
||||
# Something went wrong, so fall back
|
||||
except KeyError:
|
||||
first_interface_detail = dict()
|
||||
|
||||
# Try to get the MAC address directly from the interface detail (Redfish standard)
|
||||
if first_interface_detail.get("MACAddress") is not None:
|
||||
bootstrap_mac_address = first_interface_detail["MACAddress"].strip().lower()
|
||||
# Try to get the MAC address from the HostCorrelation->HostMACAddress (HP DL360x G8)
|
||||
elif len(system_detail.get("HostCorrelation", {}).get("HostMACAddress", [])) > 0:
|
||||
bootstrap_mac_address = (
|
||||
system_detail["HostCorrelation"]["HostMACAddress"][0].strip().lower()
|
||||
)
|
||||
# We can't find it, so use a dummy value
|
||||
else:
|
||||
logger.error("Could not find a valid MAC address for the bootstrap interface.")
|
||||
return
|
||||
|
||||
# Display the system details
|
||||
logger.info("Found details from node characterization:")
|
||||
logger.info(f"> System Manufacturer: {redfish_vendor}")
|
||||
logger.info(f"> System Redfish Version: {redfish_version}")
|
||||
logger.info(f"> System Redfish Name: {redfish_name}")
|
||||
logger.info(f"> System SKU: {system_sku}")
|
||||
logger.info(f"> System Serial: {system_serial}")
|
||||
logger.info(f"> Power State: {system_power_state}")
|
||||
logger.info(f"> Indicator LED: {system_indicator_state}")
|
||||
logger.info(f"> Health State: {system_health_state}")
|
||||
logger.info(f"> Bootstrap NIC MAC: {bootstrap_mac_address}")
|
||||
|
||||
# Update node host MAC address
|
||||
host_macaddr = bootstrap_mac_address
|
||||
node = db.update_node_addresses(
|
||||
config,
|
||||
cspec_cluster,
|
||||
cspec_hostname,
|
||||
bmc_macaddr,
|
||||
bmc_ipaddr,
|
||||
host_macaddr,
|
||||
host_ipaddr,
|
||||
)
|
||||
logger.debug(node)
|
||||
|
||||
logger.info("Determining system disk...")
|
||||
storage_root = system_detail.get("Storage", {}).get("@odata.id")
|
||||
system_drive_target = get_system_drive_target(session, cspec_node, storage_root)
|
||||
if system_drive_target is None:
|
||||
logger.error(
|
||||
"No valid drives found; configure a single system drive as a 'detect:' string or Linux '/dev' path instead and try again."
|
||||
)
|
||||
return
|
||||
logger.info(f"Found system disk {system_drive_target}")
|
||||
|
||||
# Create our preseed configuration
|
||||
logger.info("Creating node boot configurations...")
|
||||
installer.add_pxe(config, cspec_node, host_macaddr)
|
||||
installer.add_preseed(config, cspec_node, host_macaddr, system_drive_target)
|
||||
|
||||
# Adjust any BIOS settings
|
||||
logger.info("Adjusting BIOS settings...")
|
||||
bios_root = system_detail.get("Bios", {}).get("@odata.id")
|
||||
if bios_root is not None:
|
||||
bios_detail = session.get(bios_root)
|
||||
bios_attributes = list(bios_detail["Attributes"].keys())
|
||||
for setting, value in cspec_node["bmc"].get("bios_settings", {}).items():
|
||||
if setting not in bios_attributes:
|
||||
continue
|
||||
|
||||
payload = {"Attributes": {setting: value}}
|
||||
session.patch(f"{bios_root}/Settings", payload)
|
||||
|
||||
# Set boot override to Pxe for the installer boot
|
||||
logger.info("Setting temporary PXE boot...")
|
||||
set_boot_override(session, system_root, redfish_vendor, "Pxe")
|
||||
|
||||
# Turn on the system
|
||||
logger.info("Powering on node...")
|
||||
set_power_state(session, system_root, redfish_vendor, "on")
|
||||
|
||||
node = db.update_node_state(config, cspec_cluster, cspec_hostname, "pxe-booting")
|
||||
|
||||
logger.info("Waiting for completion of node and cluster installation...")
|
||||
# Wait for the system to install and be configured
|
||||
while node.state != "booted-completed":
|
||||
sleep(60)
|
||||
# Keep the Redfish session alive
|
||||
session.get(redfish_base_root)
|
||||
# Refresh our node state
|
||||
node = db.get_node(config, cspec_cluster, name=cspec_hostname)
|
||||
|
||||
# Graceful shutdown of the machine
|
||||
set_power_state(session, system_root, redfish_vendor, "GracefulShutdown")
|
||||
system_power_state = "On"
|
||||
while system_power_state != "Off":
|
||||
sleep(5)
|
||||
# Refresh our power state from the system details
|
||||
system_detail = session.get(system_root)
|
||||
system_power_state = system_detail["PowerState"].strip()
|
||||
|
||||
# Turn off the indicator to indicate bootstrap has completed
|
||||
set_indicator_state(session, system_root, redfish_vendor, "off")
|
||||
|
||||
# We must delete the session
|
||||
del session
|
||||
return
|
45
bootstrap-daemon/pvcbootstrapd/lib/tftp.py
Executable file
45
bootstrap-daemon/pvcbootstrapd/lib/tftp.py
Executable file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# tftp.py - PVC Cluster Auto-bootstrap TFTP preparation libraries
|
||||
# Part of the Parallel Virtual Cluster (PVC) system
|
||||
#
|
||||
# Copyright (C) 2018-2021 Joshua M. Boniface <joshua@boniface.me>
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
import os.path
|
||||
import shutil
|
||||
|
||||
|
||||
def build_tftp_repository(config):
|
||||
# Generate an installer config
|
||||
build_cmd = f"{config['ansible_path']}/pvc-installer/buildpxe.sh -o {config['tftp_root_path']} -u {config['deploy_username']}"
|
||||
print(f"Building TFTP contents via pvc-installer command: {build_cmd}")
|
||||
os.system(build_cmd)
|
||||
|
||||
|
||||
def init_tftp(config):
|
||||
"""
|
||||
Prepare a TFTP root
|
||||
"""
|
||||
if not os.path.exists(config["tftp_root_path"]):
|
||||
print("First run: building TFTP root and contents - this will take some time!")
|
||||
os.makedirs(config["tftp_root_path"])
|
||||
os.makedirs(config["tftp_host_path"])
|
||||
shutil.copyfile(
|
||||
f"{config['ansible_keyfile']}.pub", f"{config['tftp_root_path']}/keys.txt"
|
||||
)
|
||||
|
||||
build_tftp_repository(config)
|
Reference in New Issue
Block a user