Add Ceph OSD cpuset tuning options
Allows an administrator to set CPU pinning with the cpuset tool for Ceph OSDs, in situations where CPU contention with VMs or other system tasks may be negatively affecting OSD performance. This is optional, advanced tuning and is disabled by default.
This commit is contained in:
@ -117,4 +117,55 @@
|
||||
- ceph-mon@{{ ansible_hostname }}
|
||||
- ceph-mgr@{{ ansible_hostname }}
|
||||
|
||||
# System OSD CPU shielding activation
|
||||
- block:
|
||||
- name: install packages
|
||||
apt:
|
||||
name:
|
||||
- cpuset
|
||||
- numactl
|
||||
state: latest
|
||||
|
||||
- name: install ceph-osd-cpuset script
|
||||
template:
|
||||
src: ceph/ceph-osd-cpuset.j2
|
||||
dest: /usr/local/sbin/ceph-osd-cpuset
|
||||
mode: 0755
|
||||
|
||||
- name: install ceph-osd-cpuset service unit
|
||||
template:
|
||||
src: ceph/ceph-osd-cpuset.service.j2
|
||||
dest: /etc/systemd/system/ceph-osd-cpuset.service
|
||||
register: systemd_file_cpuset
|
||||
|
||||
- name: create ceph-osd override dropin directory
|
||||
file:
|
||||
dest: /etc/systemd/system/ceph-osd@.service.d
|
||||
state: directory
|
||||
|
||||
- name: install ceph-osd override dropin
|
||||
template:
|
||||
src: ceph/ceph-osd-cpuset.conf.j2
|
||||
dest: /etc/systemd/system/ceph-osd@.service.d/cpuset.conf
|
||||
register: systemd_file_osd
|
||||
|
||||
- name: reload systemd to apply previous changes
|
||||
command: "systemctl daemon-reload"
|
||||
when: systemd_file_cpuset.changed or systemd_file_osd.changed
|
||||
|
||||
- name: enable ceph-osd-cpuset service
|
||||
service:
|
||||
name: ceph-osd-cpuset
|
||||
enabled: yes
|
||||
|
||||
- debug:
|
||||
msg: "NOTICE: cpuset configs have NOT been applied to the running system. This node must be rebooted to apply these changes."
|
||||
when: systemd_file_cpuset.changed or systemd_file_osd.changed
|
||||
tags: pvc-ceph-cpuset
|
||||
when:
|
||||
- pvc_shield_osds_enable is defined
|
||||
- pvc_shield_osds_enable
|
||||
- pvc_shield_osds_cset is defined
|
||||
- pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) | list | count > 0
|
||||
|
||||
- meta: flush_handlers
|
||||
|
@ -23,17 +23,9 @@
|
||||
when: newhost is defined and newhost
|
||||
tags: always
|
||||
|
||||
# General blacklisting of modules
|
||||
- name: add module blacklist
|
||||
template:
|
||||
src: system/blacklist.j2
|
||||
dest: /etc/modprobe.d/blacklist.conf
|
||||
|
||||
# Logrotate configuration
|
||||
- name: add logrotate configuration
|
||||
template:
|
||||
src: system/pvc.j2
|
||||
dest: /etc/logrotate.d/pvc
|
||||
# Install system tweaks
|
||||
- include: system/main.yml
|
||||
tags: pvc-system
|
||||
|
||||
# Install base databases (coordinators only)
|
||||
- include: ceph/main.yml
|
||||
|
14
roles/pvc/tasks/system/main.yml
Normal file
14
roles/pvc/tasks/system/main.yml
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
# General blacklisting of modules
|
||||
- name: add module blacklist
|
||||
template:
|
||||
src: system/blacklist.j2
|
||||
dest: /etc/modprobe.d/blacklist.conf
|
||||
|
||||
# Logrotate configuration
|
||||
- name: add logrotate configuration
|
||||
template:
|
||||
src: system/pvc.j2
|
||||
dest: /etc/logrotate.d/pvc
|
||||
|
||||
- meta: flush_handlers
|
5
roles/pvc/templates/ceph/ceph-osd-cpuset.conf.j2
Normal file
5
roles/pvc/templates/ceph/ceph-osd-cpuset.conf.j2
Normal file
@ -0,0 +1,5 @@
|
||||
# ceph-osd@.service overrides for cpuset
|
||||
# {{ ansible_managed }}
|
||||
[Service]
|
||||
ExecStart =
|
||||
ExecStart = /usr/bin/cset proc --set=osd --exec /usr/bin/ceph-osd -- -f --cluster ${CLUSTER} --id %i --setuser ceph --setgroup ceph
|
63
roles/pvc/templates/ceph/ceph-osd-cpuset.j2
Executable file
63
roles/pvc/templates/ceph/ceph-osd-cpuset.j2
Executable file
@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
# PVC Ceph OSD cpuset preparation script
|
||||
# {{ ansible_managed }}
|
||||
|
||||
# This script is designed to prepare the cpusets for use by Ceph OSDs, VMs, and other system resources.
|
||||
# Libvirt does not make this easy with any way to globally set its CPUs, so we must do this trickery.
|
||||
{% set cset_host = pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) %}
|
||||
|
||||
A_OSD_CPUS=( {{ cset_host[0]['osd_cset'] | join(' ') }} )
|
||||
A_SYS_CPUS=()
|
||||
|
||||
CPU_INFO="$( lscpu )"
|
||||
|
||||
# First, we must determine how many NUMA nodes we have
|
||||
NUMA_COUNT="$( grep '^NUMA node(s)' <<<"${CPU_INFO}" | awk '{ print $NF }' )"
|
||||
|
||||
# If we have 1 NUMA node, our SYS_MEMS is 0; otherwise it's 0-X
|
||||
# This is needed to explicitly set our memspec during the set
|
||||
if [[ ${NUMA_COUNT} -eq 1 ]]; then
|
||||
SYS_MEMS="0"
|
||||
else
|
||||
SYS_MEMS="0-$(( ${NUMA_COUNT} - 1 ))"
|
||||
fi
|
||||
|
||||
# We must determine which NUMA nodes our OSD CPUS are in for the memspec during the set
|
||||
A_OSD_MEMS=()
|
||||
for CPU in ${A_OSD_CPUS[@]}; do
|
||||
NODE="$( grep -E '^NUMA node[0-9]+ CPU' <<<"${CPU_INFO}" | grep -w "${CPU}" | awk '{ print $2 }' | sed 's/node//' )"
|
||||
if [[ ! " ${A_OSD_MEMS} " =~ " ${NODE} " ]]; then
|
||||
A_OSD_MEMS+=( $NODE )
|
||||
fi
|
||||
done
|
||||
|
||||
# Determine our CPU count
|
||||
CPU_COUNT="$( grep '^CPU(s)' <<<"${CPU_INFO}" | awk '{ print $NF }' )"
|
||||
echo "CPU count: ${CPU_COUNT}"
|
||||
|
||||
# Loop through all the CPUs in the count; if they are not in OSD_CPUS, add them to the SYS_CPUS array
|
||||
for i in $( seq 0 $(( ${CPU_COUNT} - 1)) ); do
|
||||
if [[ ! " ${A_OSD_CPUS[*]} " =~ " ${i} " ]]; then
|
||||
A_SYS_CPUS+=( $i )
|
||||
fi
|
||||
done
|
||||
|
||||
# Convert arrays into CSV
|
||||
OSD_MEMS="$( IFS=, ; echo "${A_OSD_MEMS[*]}" )"
|
||||
OSD_CPUS="$( IFS=, ; echo "${A_OSD_CPUS[*]}" )"
|
||||
SYS_CPUS="$( IFS=, ; echo "${A_SYS_CPUS[*]}" )"
|
||||
|
||||
echo "OSD CPUs: ${OSD_CPUS}"
|
||||
echo "OSD Mems: ${OSD_MEMS}"
|
||||
echo "System/VM CPUs: ${SYS_CPUS}"
|
||||
echo "System/VM Mems: ${SYS_MEMS}"
|
||||
|
||||
# Create the system cpuset and move everything currently running into it
|
||||
/usr/bin/cset set --cpu=${SYS_CPUS} --mem=${SYS_MEMS} system
|
||||
/usr/bin/cset proc --move --force --threads root --toset=system
|
||||
|
||||
# Create our Libvirt cpuset (identical to system cpuset)
|
||||
/usr/bin/cset set --cpu=${SYS_CPUS} --mem=${SYS_MEMS} machine
|
||||
|
||||
# Create our OSD cpuset
|
||||
/usr/bin/cset set --cpu=${OSD_CPUS} --mem=${OSD_MEMS} osd
|
13
roles/pvc/templates/ceph/ceph-osd-cpuset.service.j2
Normal file
13
roles/pvc/templates/ceph/ceph-osd-cpuset.service.j2
Normal file
@ -0,0 +1,13 @@
|
||||
# PVC Ceph OSD cpuset service unit
|
||||
# {{ ansible_managed }}
|
||||
{% set cset_host = pvc_shield_osds_cset | selectattr('hostname', 'equalto', inventory_hostname) %}
|
||||
[Unit]
|
||||
Description = Ceph OSD cpuset shield creation
|
||||
Before = ceph-osd@.service libvirtd.service
|
||||
|
||||
[Service]
|
||||
Type = oneshot
|
||||
ExecStart = /usr/local/sbin/ceph-osd-cpuset
|
||||
|
||||
[Install]
|
||||
WantedBy = ceph.target
|
Reference in New Issue
Block a user