Skip to content

Commit

Permalink
Add preflight OS and other checks
Browse files Browse the repository at this point in the history
- Implemented OS preflight checks to validate system requirements before Ceph cluster creation.
- Checks include:
  - OS version (RHEL 9+ required)
  - SELinux enforcing mode
  - Firewalld installation and status
  - Required package availability (rpcbind, podman, firewalld)
  - Podman version check (>= 3.3)
  - RHEL software profile validation
  - Tuned profile check
  - CPU, RAM, Swap, and Filesystem (part of other checks)
  • Loading branch information
Kushal-deb committed Feb 6, 2025
1 parent 1d3efbc commit 6e47331
Show file tree
Hide file tree
Showing 6 changed files with 277 additions and 0 deletions.
2 changes: 2 additions & 0 deletions ceph_defaults/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ infra_pkgs:
- podman
- lvm2
- sos
- rpcbind
- firewalld
client_group: clients
10 changes: 10 additions & 0 deletions cephadm-preflight.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
become: true
gather_facts: true
vars:
preflight_results: []
repos_4_to_disable:
- rhceph-4-tools-for-rhel-{{ ansible_facts['distribution_major_version'] }}-{{ ansible_facts['architecture'] }}-rpms
- rhceph-4-mon-for-rhel-{{ ansible_facts['distribution_major_version'] }}-{{ ansible_facts['architecture'] }}-rpms
Expand All @@ -45,6 +46,12 @@
import_role:
name: ceph_defaults

- name: Run preflight system checks
import_tasks: tasks/preflight_system_checks.yml

- name: Run CPU and memory checks
import_tasks: tasks/preflight_cpu_mem_checks.yml

- name: redhat family of OS related tasks
when: ansible_facts['os_family'] == 'RedHat'
block:
Expand Down Expand Up @@ -320,6 +327,9 @@
- docker-ce-cli
- containerd.io

- name: Generate preflight check report
import_tasks: tasks/generate_report.yml

- name: set insecure container registry in /etc/containers/registries.conf
ansible.builtin.import_playbook: cephadm-set-container-insecure-registries.yml
when: set_insecure_registries | default(false) | bool
12 changes: 12 additions & 0 deletions tasks/generate_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
- name: Generate preflight check report
delegate_to: localhost
run_once: true
become: false
template:
src: templates/preflight_report.j2
dest: ./ceph_preflight_report.txt

- name: Notify user about report location
debug:
msg: "Preflight check report has been generated at ./ceph_preflight_report.txt (local machine)"
118 changes: 118 additions & 0 deletions tasks/preflight_cpu_mem_checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
---
- name: Check CPU supports x86-64-v2 for RHEL 9
shell: "lscpu | grep -q 'avx2' && echo 'yes' || echo 'no'"
register: cpu_supports_x86_64_v2
changed_when: false

- name: Fail if CPU does not support x86-64-v2
fail:
msg: "CPU does not support x86-64-v2. RHEL 9 requires at least AVX2 support."
when: cpu_supports_x86_64_v2.stdout != "yes"
register: cpu_fail

- name: Store CPU instruction set check result
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': 'CPU x86-64-v2',
'Result': 'PASS' if cpu_supports_x86_64_v2.stdout == 'yes' else 'FAIL',
'Reason': 'AVX2 instruction set missing' if cpu_supports_x86_64_v2.stdout != 'yes' else 'N/A'
}
] }}"

- name: Check available CPU cores
shell: "nproc"
register: cpu_cores
changed_when: false
failed_when: false

- name: Store CPU Core Check
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': 'CPU Cores >= 4',
'Result': 'PASS' if cpu_cores.stdout | int >= 4 else 'FAIL',
'Reason': 'System has only ' ~ cpu_cores.stdout ~ ' cores, required: 4' if cpu_cores.stdout | int < 4 else 'N/A'
}
] }}"

- name: Check total RAM available
shell: "free -m | awk '/Mem:/ {print $2}'"
register: total_ram
changed_when: false
failed_when: false

- name: Store RAM Check Result
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': 'Minimum RAM (8GB)',
'Result': 'PASS' if total_ram.stdout | int >= 8192 else 'FAIL',
'Reason': 'System has only ' ~ total_ram.stdout ~ ' MB RAM, required: 8192MB' if total_ram.stdout | int < 8192 else 'N/A'
}
] }}"

- name: Check total swap available
shell: "free -m | awk '/Swap:/ {print $2}'"
register: total_swap
changed_when: false
failed_when: false

- name: Calculate required swap space
set_fact:
required_swap: "{{ ((total_ram.stdout | int * 1.5) | round) | int }}"

- name: Store Swap Space Check
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': 'Swap Space (1.5x RAM)',
'Result': 'PASS' if (total_swap.stdout | int) >= (required_swap | int) else 'FAIL',
'Reason': 'System has only ' ~ (total_swap.stdout | int) ~ ' MB Swap, required: ' ~ (required_swap | int) ~ ' MB' if (total_swap.stdout | int) < (required_swap | int) else 'N/A'
}
] }}"

- name: Check if /var is a separate partition
shell: "df -hT | grep '/var'"
register: var_partition
changed_when: false
failed_when: false

- name: Check root filesystem size
shell: "df -hT | grep ' /$' | awk '{print $3}'"
register: root_fs_size
changed_when: false
failed_when: false

- name: Store Root Filesystem Check
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': 'Root Filesystem >= 100GB',
'Result': 'PASS' if root_fs_size.stdout | int >= 100 else 'FAIL',
'Reason': 'Root FS is only ' ~ root_fs_size.stdout ~ 'GB, required: 100GB' if root_fs_size.stdout | int < 100 else 'N/A'
}
] }}"

- name: Store /var Partition Check
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': '/var is a separate partition',
'Result': 'PASS' if var_partition.stdout != '' else 'FAIL',
'Reason': '/var is not a separate partition' if var_partition.stdout == '' else 'N/A'
}
] }}"

- name: Fail if /var is not a separate partition
fail:
msg: "/var is not a separate partition, which is required for Ceph."
when: var_partition.stdout == ''
register: var_fail

- name: Fail if Root Filesystem is less than 100GB
fail:
msg: "Root filesystem is only {{ root_fs_size.stdout }}GB, but 100GB is required."
when: root_fs_size.stdout | int < 100
register: root_fs_fail

124 changes: 124 additions & 0 deletions tasks/preflight_system_checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
---
- name: Ensure OS is RHEL 9+
fail:
msg: "Ceph requires RHEL 9+. Detected: {{ ansible_facts['distribution'] }} {{ ansible_facts['distribution_version'] }}"
when:
- ansible_facts['distribution'] == 'RedHat'
- ansible_facts['distribution_major_version'] | int < 9

- name: Store OS check result
set_fact:
preflight_results: "{{ preflight_results + [{'Check': 'OS Version', 'Result': 'PASS' if ansible_facts['distribution_major_version'] | int >= 9 else 'FAIL'}] }}"

- name: Ensure SELinux is set to Enforcing mode
ansible.posix.selinux:
policy: targeted
state: enforcing
register: selinux_status
changed_when: false
failed_when: selinux_status.state != 'enforcing'

- name: Store SELinux check result
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': 'SELinux',
'Result': 'PASS' if selinux_status.state == 'enforcing' else 'FAIL',
'Reason': 'SELinux was not in enforcing mode and could not be enforced automatically' if selinux_status.failed else 'N/A'
}
] }}"

- name: Ensure required packages are installed
package:
name: "{{ infra_pkgs }}"
state: present

- name: Ensure firewalld is enabled and running
systemd:
name: firewalld
state: started
enabled: true
register: firewall_status
failed_when: firewall_status.status.ActiveState != "active"

- name: Fail if firewalld is not running
fail:
msg: "Firewalld is required for ceph but is not running"
when: firewall_status.status.ActiveState != "active"
register: firewall_fail

- name: Store Firewalld check result
set_fact:
preflight_results: "{{ preflight_results + [{'Check': 'Firewalld Running', 'Result': 'PASS' if firewall_status is succeeded else 'FAIL'}] }}"

- name: Collect installed package facts
package_facts:
manager: auto

- name: Check if Podman is installed
set_fact:
podman_installed: "{{ 'podman' in ansible_facts.packages }}"

- name: Extract Podman version
set_fact:
podman_version: "{{ ansible_facts.packages['podman'][0].version if podman_installed else 'NOT_INSTALLED' }}"

- name: Check if podman version is less than 3.3
fail:
msg: "Podman version must be >=3.3, but detected: {{ podman_version }}"
when:
- podman_installed
- podman_version is version('3.3', '<')
register: podman_fail

- name: check RHEL software profile
command: subscription-manager list --consumed
register: rhel_profile
changed_when: false

- name: Debug RHEL profile
debug:
msg: "Detected RHEL Profile: {{ rhel_profile.stdout }}"

- name: Validate RHEL software profile
set_fact:
rhel_profile_valid: "{{ 'Server' in rhel_profile.stdout and 'File and Storage Server' in rhel_profile.stdout }}"

- name: Fail if RHEL profile is incorrect
fail:
msg: "Incorrect RHEL software profile. Expected: 'Server' with 'File and Storage Server', but detected: {{ rhel_profile.stdout | default('UNKNOWN') }}"
when: not rhel_profile_valid
register: rhel_fail

- name: Store RHEL Profile check
set_fact:
preflight_results: "{{ preflight_results + [
{
'Check': 'RHEL Profile',
'Result': 'FAIL' if rhel_fail.failed else 'PASS',
'Reason': rhel_fail.msg if rhel_fail.failed else 'N/A'
}
] }}"

- name: Get current tuned profile
command: tuned-adm active
register: tuned_profile
changed_when: false
failed_when: false

- name: Debug tuned profile output
debug:
msg: "Detected tuned profile: {{ tuned_profile.stdout | default('UNKNOWN') }}"

- name: Fail if tuned profile is incorrect
fail:
msg: "Incorrect tuned profile. Expected: 'throughput-performance', but detected: {{ tuned_profile.stdout }}"
when: "'throughput-performance' not in tuned_profile.stdout"
register: tuned_fail

- name: Store Tuned Profile Check
set_fact:
preflight_results: "{{ preflight_results + [{'Check': 'Tuned Profile', 'Result': 'FAIL' if tuned_fail.failed else 'PASS', 'Reason': tuned_fail.msg if tuned_fail.failed else 'N/A'}] }}"

- name: Run CPU and memory checks
import_tasks: preflight_cpu_mem_checks.yml
11 changes: 11 additions & 0 deletions templates/preflight_report.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Preflight Check Report
======================

Node: {{ ansible_facts['hostname'] }}
--------------------------------------
{% for item in preflight_results %}
- {{ item['Check'] }}: {{ item['Result'] }}
{% if item['Result'] == 'FAIL' and item['Reason'] != 'N/A' %}
-> Reason: {{ item['Reason'] }}
{% endif %}
{% endfor %}

0 comments on commit 6e47331

Please sign in to comment.