Add Ceph OSD cpuset tuning options

Allows an administrator to set CPU pinning with the cpuset tool for Ceph
OSDs, in situations where CPU contention with VMs or other system tasks
may be negatively affecting OSD performance. This is optional, advanced
tuning and is disabled by default.
This commit is contained in:
2023-09-01 15:42:27 -04:00
parent 45424a28ce
commit 6e48d6fe84
7 changed files with 221 additions and 32 deletions

View File

@ -117,4 +117,55 @@
- ceph-mon@{{ ansible_hostname }}
- ceph-mgr@{{ ansible_hostname }}
# System OSD CPU shielding activation
- block:
- name: install packages
apt:
name:
- cpuset
- numactl
state: latest
- name: install ceph-osd-cpuset script
template:
src: ceph/ceph-osd-cpuset.j2
dest: /usr/local/sbin/ceph-osd-cpuset
mode: 0755
- name: install ceph-osd-cpuset service unit
template:
src: ceph/ceph-osd-cpuset.service.j2
dest: /etc/systemd/system/ceph-osd-cpuset.service
register: systemd_file_cpuset
- name: create ceph-osd override dropin directory
file:
dest: /etc/systemd/system/ceph-osd@.service.d
state: directory
- name: install ceph-osd override dropin
template:
src: ceph/ceph-osd-cpuset.conf.j2
dest: /etc/systemd/system/ceph-osd@.service.d/cpuset.conf
register: systemd_file_osd
- name: reload systemd to apply previous changes
command: "systemctl daemon-reload"
when: systemd_file_cpuset.changed or systemd_file_osd.changed
- name: enable ceph-osd-cpuset service
service:
name: ceph-osd-cpuset
enabled: yes
- debug:
msg: "NOTICE: cpuset configs have NOT been applied to the running system. This node must be rebooted to apply these changes."
when: systemd_file_cpuset.changed or systemd_file_osd.changed
tags: pvc-ceph-cpuset
when:
- pvc_shield_osds_enable is defined
- pvc_shield_osds_enable
- pvc_shield_osds_cset is defined
- pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) | list | count > 0
- meta: flush_handlers

View File

@ -23,17 +23,9 @@
when: newhost is defined and newhost
tags: always
# General blacklisting of modules
- name: add module blacklist
template:
src: system/blacklist.j2
dest: /etc/modprobe.d/blacklist.conf
# Logrotate configuration
- name: add logrotate configuration
template:
src: system/pvc.j2
dest: /etc/logrotate.d/pvc
# Install system tweaks
- include: system/main.yml
tags: pvc-system
# Install base databases (coordinators only)
- include: ceph/main.yml

View File

@ -0,0 +1,14 @@
---
# General blacklisting of modules
- name: add module blacklist
template:
src: system/blacklist.j2
dest: /etc/modprobe.d/blacklist.conf
# Logrotate configuration
- name: add logrotate configuration
template:
src: system/pvc.j2
dest: /etc/logrotate.d/pvc
- meta: flush_handlers

View File

@ -0,0 +1,5 @@
# ceph-osd@.service overrides for cpuset
# {{ ansible_managed }}
[Service]
ExecStart =
ExecStart = /usr/bin/cset proc --set=osd --exec /usr/bin/ceph-osd -- -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph

View File

@ -0,0 +1,63 @@
#!/bin/bash
# PVC Ceph OSD cpuset preparation script
# {{ ansible_managed }}
# This script is designed to prepare the cpusets for use by Ceph OSDs, VMs, and other system resources.
# Libvirt does not make this easy with any way to globally set its CPUs, so we must do this trickery.
{% set cset_host = pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) %}
A_OSD_CPUS=( {{ cset_host[0]['osd_cset'] | join(' ') }} )
A_SYS_CPUS=()
CPU_INFO="$( lscpu )"
# First, we must determine how many NUMA nodes we have
NUMA_COUNT="$( grep '^NUMA node(s)' <<<"${CPU_INFO}" | awk '{ print $NF }' )"
# If we have 1 NUMA node, our SYS_MEMS is 0; otherwise it's 0-X
# This is needed to explicitly set our memspec during the set
if [[ ${NUMA_COUNT} -eq 1 ]]; then
SYS_MEMS="0"
else
SYS_MEMS="0-$(( ${NUMA_COUNT} - 1 ))"
fi
# We must determine which NUMA nodes our OSD CPUS are in for the memspec during the set
A_OSD_MEMS=()
for CPU in ${A_OSD_CPUS[@]}; do
NODE="$( grep -E '^NUMA node[0-9]+ CPU' <<<"${CPU_INFO}" | grep -w "${CPU}" | awk '{ print $2 }' | sed 's/node//' )"
if [[ ! " ${A_OSD_MEMS} " =~ " ${NODE} " ]]; then
A_OSD_MEMS+=( $NODE )
fi
done
# Determine our CPU count
CPU_COUNT="$( grep '^CPU(s)' <<<"${CPU_INFO}" | awk '{ print $NF }' )"
echo "CPU count: ${CPU_COUNT}"
# Loop through all the CPUs in the count; if they are not in OSD_CPUS, add them to the SYS_CPUS array
for i in $( seq 0 $(( ${CPU_COUNT} - 1)) ); do
if [[ ! " ${A_OSD_CPUS[*]} " =~ " ${i} " ]]; then
A_SYS_CPUS+=( $i )
fi
done
# Convert arrays into CSV
OSD_MEMS="$( IFS=, ; echo "${A_OSD_MEMS[*]}" )"
OSD_CPUS="$( IFS=, ; echo "${A_OSD_CPUS[*]}" )"
SYS_CPUS="$( IFS=, ; echo "${A_SYS_CPUS[*]}" )"
echo "OSD CPUs: ${OSD_CPUS}"
echo "OSD Mems: ${OSD_MEMS}"
echo "System/VM CPUs: ${SYS_CPUS}"
echo "System/VM Mems: ${SYS_MEMS}"
# Create the system cpuset and move everything currently running into it
/usr/bin/cset set --cpu=${SYS_CPUS} --mem=${SYS_MEMS} system
/usr/bin/cset proc --move --force --threads root --toset=system
# Create our Libvirt cpuset (identical to system cpuset)
/usr/bin/cset set --cpu=${SYS_CPUS} --mem=${SYS_MEMS} machine
# Create our OSD cpuset
/usr/bin/cset set --cpu=${OSD_CPUS} --mem=${OSD_MEMS} osd

View File

@ -0,0 +1,13 @@
# PVC Ceph OSD cpuset service unit
# {{ ansible_managed }}
{% set cset_host = pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) %}
[Unit]
Description = Ceph OSD cpuset shield creation
Before = ceph-osd@.service libvirtd.service
[Service]
Type = oneshot
ExecStart = /usr/local/sbin/ceph-osd-cpuset
[Install]
WantedBy = ceph.target