diff --git a/utils/create_slurm_repo.yml b/utils/create_slurm_repo.yml new file mode 100644 index 0000000000..24372fefaf --- /dev/null +++ b/utils/create_slurm_repo.yml @@ -0,0 +1,21 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- + +- name: Prepare the cluster with passwordless ssh from manager to compute + hosts: localhost + gather_facts: false + any_errors_fatal: true + roles: + - create_slurm_repo diff --git a/utils/roles/create_slurm_repo/tasks/build_rpm.yml b/utils/roles/create_slurm_repo/tasks/build_rpm.yml new file mode 100644 index 0000000000..013c6a3bb5 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/build_rpm.yml @@ -0,0 +1,36 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Verify Slurm tarball exists + ansible.builtin.stat: + path: "{{ slurm_base }}/{{ slurm_tarball }}" + register: slurm_tarball_check + +- name: Fail if Slurm tarball is missing + ansible.builtin.fail: + msg: "Slurm source tarball not found at {{ slurm_base }}/{{ slurm_tarball }}." + when: not slurm_tarball_check.stat.exists + +- name: Run rpmbuild for Slurm (This task may take few minutes) + ansible.builtin.shell: > + rpmbuild -ta {{ slurm_base }}/{{ slurm_tarball }} + --define "_topdir {{ rpmbuild_dir }}" + --define "_configure_args {{ slurm_configure_args | join(' ') }} + --with-nvml={{ slurm_nvml_config.path }}" + > "{{ log_dir }}/rpm_build.log" 2>&1 + args: + chdir: "{{ slurm_base }}" + register: rpmbuild_result + changed_when: rpmbuild_result.rc == 0 + failed_when: rpmbuild_result.rc != 0 diff --git a/utils/roles/create_slurm_repo/tasks/clean_up.yml b/utils/roles/create_slurm_repo/tasks/clean_up.yml new file mode 100644 index 0000000000..b4b757fb12 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/clean_up.yml @@ -0,0 +1,32 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Announce cleanup + ansible.builtin.debug: + msg: "Cleaning up Slurm build environment at {{ slurm_base }}" + +- name: Check if Slurm base directory exists + ansible.builtin.stat: + path: "{{ slurm_base }}" + register: slurm_dir + +- name: Remove Slurm base directory + ansible.builtin.file: + path: "{{ slurm_base }}" + state: absent + when: slurm_dir.stat.exists + +- name: Confirm cleanup complete + ansible.builtin.debug: + msg: "Cleanup complete." diff --git a/utils/roles/create_slurm_repo/tasks/copy_rpms.yml b/utils/roles/create_slurm_repo/tasks/copy_rpms.yml new file mode 100644 index 0000000000..317691f8ee --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/copy_rpms.yml @@ -0,0 +1,49 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Remove existing destination directory if present + ansible.builtin.file: + path: "{{ dest_repo_path }}" + state: absent + +- name: Create destination directory + ansible.builtin.file: + path: "{{ dest_repo_path }}" + state: directory + owner: apache + group: apache + mode: "{{ repo_dir_mode }}" + +- name: Copy RPMs from source to destination + ansible.builtin.copy: + src: "{{ source_rpm_path }}/" + dest: "{{ dest_repo_path }}/" + owner: apache + group: apache + mode: '0644' + remote_src: false + +- name: Install createrepo if not installed + ansible.builtin.dnf: + name: createrepo + state: present + +- name: Create repo metadata + ansible.builtin.command: createrepo --update "{{ dest_repo_path }}" + args: + creates: "{{ dest_repo_path }}/repodata" + +- name: Display success message + ansible.builtin.debug: + msg: "Slurm RPM repository successfully created and accessible at http://{{ ansible_host | default(inventory_hostname) }}/slurm_rpms/" diff --git a/utils/roles/create_slurm_repo/tasks/create_dirs.yml b/utils/roles/create_slurm_repo/tasks/create_dirs.yml new file mode 100644 index 0000000000..11c15f3e73 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/create_dirs.yml @@ -0,0 +1,23 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Ensure base, rpmbuild, and log directories exist + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "{{ repo_dir_mode }}" + loop: + - "{{ slurm_base }}" + - "{{ rpmbuild_dir }}" + - "{{ log_dir }}" diff --git a/utils/roles/create_slurm_repo/tasks/download_tarball.yml b/utils/roles/create_slurm_repo/tasks/download_tarball.yml new file mode 100644 index 0000000000..7e1ee163d6 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/download_tarball.yml @@ -0,0 +1,32 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Ensure Slurm base directory exists + ansible.builtin.file: + path: "{{ slurm_base }}" + state: directory + mode: "{{ repo_dir_mode }}" + +- name: Download Slurm source tarball + ansible.builtin.get_url: + url: "{{ slurm_download_url }}" + dest: "{{ slurm_base }}/{{ slurm_tarball }}" + mode: '0644' + force: true + register: download_result + +- name: Verify download success + ansible.builtin.fail: + msg: "Failed to download {{ slurm_tarball }} from {{ slurm_download_url }}" + when: download_result is failed diff --git a/utils/roles/create_slurm_repo/tasks/install_apache.yml b/utils/roles/create_slurm_repo/tasks/install_apache.yml new file mode 100644 index 0000000000..5666bab176 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/install_apache.yml @@ -0,0 +1,60 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Ensure Apache (httpd) is installed + ansible.builtin.dnf: + name: httpd + state: present + +- name: Enable and start Apache service + ansible.builtin.service: + name: httpd + state: started + enabled: true + +- name: Allow HTTP service in the firewall + ansible.posix.firewalld: + service: http + permanent: true + state: enabled + immediate: true + +- name: Confirm Apache is listening on port 80 + ansible.builtin.wait_for: + port: "{{ apache_config.port | int }}" + state: started + timeout: 10 + changed_when: false + +- name: Ensure Apache is installed and running + ansible.builtin.dnf: + name: httpd + state: present + +- name: Ensure /var/www/html exists + ansible.builtin.file: + path: "{{ html_repo_path }}" + state: directory + owner: apache + group: apache + mode: "{{ repo_dir_mode }}" + +- name: Restore SELinux context on /var/www/html (if enabled) + ansible.builtin.command: restorecon -Rv {{ html_repo_path }} + when: ansible_selinux is defined and ansible_selinux.status == "enabled" + changed_when: false + +- name: Display success message + ansible.builtin.debug: + msg: "Apache successfully installed and serving" diff --git a/utils/roles/create_slurm_repo/tasks/install_dependencies.yml b/utils/roles/create_slurm_repo/tasks/install_dependencies.yml new file mode 100644 index 0000000000..7dd3e17ca8 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/install_dependencies.yml @@ -0,0 +1,31 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- +- name: Update package metadata + ansible.builtin.dnf: + name: "*" + state: latest + update_only: true + +- name: Install Slurm build dependencies + ansible.builtin.yum: + name: "{{ slurm_dependencies.common_pkg + slurm_dependencies.tools_pkg + slurm_dependencies.slurm_pkg }}" + state: present + register: install_result + +- name: Fail play if dependency installation failed + ansible.builtin.fail: + msg: "Slurm dependency installation failed on {{ inventory_hostname }}!" + when: install_result is failed diff --git a/utils/roles/create_slurm_repo/tasks/install_sshpass.yml b/utils/roles/create_slurm_repo/tasks/install_sshpass.yml new file mode 100644 index 0000000000..fefa12e0b0 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/install_sshpass.yml @@ -0,0 +1,27 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Install sshpass + ansible.builtin.dnf: + name: sshpass + state: present + +- name: Verify sshpass installation + ansible.builtin.command: sshpass -V + register: sshpass_version + changed_when: false + +- name: Display sshpass installation status + ansible.builtin.debug: + msg: "sshpass installed successfully" diff --git a/utils/roles/create_slurm_repo/tasks/main.yml b/utils/roles/create_slurm_repo/tasks/main.yml new file mode 100644 index 0000000000..6ef3a57ba6 --- /dev/null +++ b/utils/roles/create_slurm_repo/tasks/main.yml @@ -0,0 +1,56 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +# Play 1 Tasks - Executed on localhost (Control Node) +- name: Install sshpass + ansible.builtin.include_tasks: install_sshpass.yml + +- name: Clean up previous build + ansible.builtin.include_tasks: clean_up.yml + +- name: Create directory structure + ansible.builtin.include_tasks: create_dirs.yml + +- name: Install dependencies + ansible.builtin.include_tasks: install_dependencies.yml + +- name: Download Slurm source tarball + ansible.builtin.include_tasks: download_tarball.yml + +- name: Build Slurm RPM package + ansible.builtin.include_tasks: build_rpm.yml + +# Play 2 Tasks - Executed on Delegated Apache Host +- name: Collect installed rpm package facts + delegate_to: "{{ groups['apache_server'][0] }}" + run_once: true + ansible.builtin.package_facts: + +- name: Check if Apache is already installed on apache_server + ansible.builtin.set_fact: + apache_installed: "{{ apache_pkg in ansible_facts.packages }}" + delegate_to: "{{ groups['apache_server'][0] }}" + run_once: true + +- name: Slurm Repo creation tasks on Apache Server + delegate_to: "{{ groups['apache_server'][0] }}" + run_once: true + block: + + - name: Install and configure Apache (only if not installed) + ansible.builtin.include_tasks: install_apache.yml + when: not apache_installed | bool + + - name: Copy Slurm RPMs and create repository + ansible.builtin.include_tasks: copy_rpms.yml diff --git a/utils/roles/create_slurm_repo/vars/main.yml b/utils/roles/create_slurm_repo/vars/main.yml new file mode 100644 index 0000000000..29251ca1c0 --- /dev/null +++ b/utils/roles/create_slurm_repo/vars/main.yml @@ -0,0 +1,97 @@ +# Copyright 2025 Dell Inc. or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ---- Slurm Build Configuration ---- +slurm_version: "25.05.2" +slurm_tarball: "slurm-{{ slurm_version }}.tar.bz2" +slurm_download_url: "https://download.schedmd.com/slurm/{{ slurm_tarball }}" +dir_name: "slurm_{{ slurm_version }}" +repo_dir_mode: "0755" + +# ---- Apache Configd ---- +apache_config: + port: + - 80 + +# ---- Package Groups ---- +slurm_dependencies: + common_pkg: + - wget + - git + - make + - gcc + - gcc-c++ + - rpm-build + - autoconf + - automake + - python3 + - python3-devel + - perl + - perl-devel + - readline-devel + - zlib-devel + - pam-devel + - dbus-devel + - hwloc-devel + - libbpf-devel + + tools_pkg: + - ucx + - ucx-devel + - openmpi + - openmpi-devel + - pmix + - pmix-devel + + slurm_pkg: + - jansson-devel + - libcurl + - libcurl-devel + - json-c + - json-c-devel + - libyaml + - libyaml-devel + - http-parser-devel + - openssl-devel + - mariadb-devel + - systemd-devel + - munge + - munge-devel + +apache_pkg: httpd + +# ----- slurm configs ----- +slurm_configure_args: + - "--with-ucx" + - "--with-pmix" + - "--enable-cgroupv2" + - "--with-yaml" + - "--with-jwt" + - "--with-libcurl" + - "--without-hdf5" + +slurm_nvml_config: + path: "/usr/local/cuda" + +# ----- paths ----- +base_dir: "/opt/omnia" +slurm_base: "{{ base_dir }}/{{ dir_name }}" +log_dir: "{{ base_dir }}/log/{{ dir_name }}" +rpmbuild_dir: "{{ slurm_base }}/rpmbuild" + + +# ----- rpms copy path ----- +source_rpm_path: "{{ slurm_base }}/rpmbuild/RPMS/x86_64" +html_repo_path: "/var/www/html" +dest_repo_path: "{{ html_repo_path }}/slurm_rpms"