From 7241690330c209698e293eea6bd3b846674f9542 Mon Sep 17 00:00:00 2001 From: DarkAssassin23 <15916504+DarkAssassin23@users.noreply.github.com> Date: Fri, 11 Aug 2023 20:36:29 -0400 Subject: [PATCH 1/7] Added native support for RHEL based distros --- dependencies/rhel-based.yml | 15 +++++++++++++++ firewall/configure-firewall.yml | 30 ++++++++++++++++++++++++++++++ firewall/reset-firewall.yml | 20 ++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100755 dependencies/rhel-based.yml create mode 100755 firewall/configure-firewall.yml create mode 100755 firewall/reset-firewall.yml diff --git a/dependencies/rhel-based.yml b/dependencies/rhel-based.yml new file mode 100755 index 0000000..0db3ae3 --- /dev/null +++ b/dependencies/rhel-based.yml @@ -0,0 +1,15 @@ +- name: Update dnf cache. + ansible.builtin.dnf: + update_cache: true + become: true + +- name: Install dependencies. + ansible.builtin.package: + name: + - '@Development Tools' + - gcc-gfortran + # Fedora, at least Fedora Server, doesn't install this + # with the Development Tools group for some reason + - gcc-g++ + state: present + become: true \ No newline at end of file diff --git a/firewall/configure-firewall.yml b/firewall/configure-firewall.yml new file mode 100755 index 0000000..776a50e --- /dev/null +++ b/firewall/configure-firewall.yml @@ -0,0 +1,30 @@ +- name: Creating new custom firewall zone. + ansible.posix.firewalld: + zone: top500 + permanent: true + state: present + become: true + +- name: Setting custom firewall zone to accept connections. + ansible.posix.firewalld: + zone: top500 + target: ACCEPT + state: enabled + permanent: true + become: true + +- name: Adding nodes as trusted sources in the firewall. + ansible.posix.firewalld: + source: "{{ item }}" + zone: top500 + state: enabled + permanent: true + loop: "{{ host_ips }}" + when: item != ansible_default_ipv4.address + become: true + +- name: Restarting firewall for changes to take effect. + ansible.builtin.service: + name: firewalld + state: restarted + become: true \ No newline at end of file diff --git a/firewall/reset-firewall.yml b/firewall/reset-firewall.yml new file mode 100755 index 0000000..c127598 --- /dev/null +++ b/firewall/reset-firewall.yml @@ -0,0 +1,20 @@ +# Remove our custom firewall zone since we don't need it anymore +- name: Reverting firewall back to its original state. + ansible.posix.firewalld: + zone: top500 + state: absent + permanent: true + become: true + +- name: Restarting firewall for changes to take effect. + ansible.builtin.service: + name: firewalld + state: restarted + become: true + +# When removing a custom firewall zone, a .xml.old file will sometimes remain +- name: Cleaning up firewall rules. + ansible.builtin.file: + path: /etc/firewalld/zones/top500.xml.old + state: absent + become: true \ No newline at end of file From b1658fa92ab6a98d6b871b60c9a3a21665e89a3a Mon Sep 17 00:00:00 2001 From: DarkAssassin23 <15916504+DarkAssassin23@users.noreply.github.com> Date: Fri, 11 Aug 2023 20:37:28 -0400 Subject: [PATCH 2/7] Added support for Arch-based distros --- dependencies/arch-based.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100755 dependencies/arch-based.yml diff --git a/dependencies/arch-based.yml b/dependencies/arch-based.yml new file mode 100755 index 0000000..b61e5dd --- /dev/null +++ b/dependencies/arch-based.yml @@ -0,0 +1,12 @@ +- name: Update pacman cache. + community.general.pacman: + update_cache: true + become: true + +- name: Install dependencies. + ansible.builtin.package: + name: + - base-devel + - gcc-fortran + state: present + become: true \ No newline at end of file From a334a05b4d00ea91c9cfd94ae015bdbe7965215f Mon Sep 17 00:00:00 2001 From: DarkAssassin23 <15916504+DarkAssassin23@users.noreply.github.com> Date: Fri, 11 Aug 2023 20:39:34 -0400 Subject: [PATCH 3/7] Moved Debian-based dependencies to its own file --- dependencies/debian-based.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 dependencies/debian-based.yml diff --git a/dependencies/debian-based.yml b/dependencies/debian-based.yml new file mode 100755 index 0000000..6b0ae69 --- /dev/null +++ b/dependencies/debian-based.yml @@ -0,0 +1,14 @@ +- name: Update apt cache. + ansible.builtin.apt: + update_cache: true + cache_valid_time: 600 + become: true + +- name: Install dependencies. + ansible.builtin.package: + name: + - build-essential + - gfortran + - automake + state: present + become: true \ No newline at end of file From 826f71ab2dcd86da556d97474f5487eb411f5fec Mon Sep 17 00:00:00 2001 From: DarkAssassin23 <15916504+DarkAssassin23@users.noreply.github.com> Date: Fri, 11 Aug 2023 20:40:42 -0400 Subject: [PATCH 4/7] Adds patches to ATLAS to fix issues with grep 3.8+ --- patch-atlas-src.yml | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100755 patch-atlas-src.yml diff --git a/patch-atlas-src.yml b/patch-atlas-src.yml new file mode 100755 index 0000000..5808941 --- /dev/null +++ b/patch-atlas-src.yml @@ -0,0 +1,40 @@ +- name: Checking to see if patches need to be applied. + ansible.builtin.stat: + path: "{{ hpl_root }}/tmp/ATLAS_PATCHING_COMPLETE" + register: no_patches_needed + +# Fixes fgrep warning being added to output and causing +# the ATLAS configuration to fail to identify aspects of +# the given node +- name: Patching ATLAS source code (Part 1). + ansible.builtin.replace: + path: "{{ hpl_root }}tmp/ATLAS/CONFIG/include/atlas_sys.h" + regexp: '2>&1' + replace: '2>/dev/null' + when: no_patches_needed.stat.exists == false + +# Fixes 'free(): invalid pointer' error during +# ATLAS configuration +- name: Patching ATLAS source code (Part 2). + ansible.builtin.lineinfile: + path: "{{ hpl_root }}tmp/ATLAS/CONFIG/src/probe_comp.c" + insertbefore: 'free\(cmnd\)' + line: ' printf("\n");' + firstmatch: true + when: no_patches_needed.stat.exists == false + +# Fixes 'corrupted size vs. prev_size' error during +# ATLAS configuration +- name: Patching ATLAS source code (Part 3). + ansible.builtin.lineinfile: + path: "{{ hpl_root }}tmp/ATLAS/CONFIG/include/atlas_sys.h" + insertbefore: 'if \(fgets\(sout, len, fpin\)\)' + line: ' printf("\n");' + firstmatch: true + when: no_patches_needed.stat.exists == false + +- name: Create 'ATLAS_PATCHING_COMPLETE' file. + ansible.builtin.file: + path: "{{ hpl_root }}/tmp/ATLAS_PATCHING_COMPLETE" + state: touch + mode: 0644 \ No newline at end of file From b1b3f3a654b5ab3151dfd5ca669a819bb9aa3ea8 Mon Sep 17 00:00:00 2001 From: DarkAssassin23 <15916504+DarkAssassin23@users.noreply.github.com> Date: Fri, 11 Aug 2023 20:41:16 -0400 Subject: [PATCH 5/7] Updated main.yml to reflect changes --- main.yml | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/main.yml b/main.yml index 5ad0c2a..237d0c5 100644 --- a/main.yml +++ b/main.yml @@ -12,21 +12,14 @@ vars_files: ['config.yml'] tasks: - - name: Update apt cache. - ansible.builtin.apt: - update_cache: true - cache_valid_time: 600 + - include_tasks: dependencies/rhel-based.yml + when: ansible_os_family == 'RedHat' + + - include_tasks: dependencies/debian-based.yml when: ansible_os_family == 'Debian' - become: true - - name: Install dependencies. - ansible.builtin.package: - name: - - build-essential - - gfortran - - automake - state: present - become: true + - include_tasks: dependencies/arch-based.yml + when: ansible_os_family == 'Archlinux' - name: Create required temporary directories. ansible.builtin.file: @@ -89,6 +82,24 @@ remote_src: true creates: "{{ hpl_root }}/tmp/ATLAS/README" + # The source code for the configure script that ATLAS uses makes calls + # with fgrep to find information about the system from its various + # outputs to set the configuration accordingly. However, + # grep versions 3.8+ throw the following warning when calling fgrep: + # 'fgrep: warning: fgrep is obsolescent; using grep -F' + # This occurs as output from fgrep which causes the ATLAS configuration + # to get invalid return values, causing the configuration to fail + # due to it assuming the first value is the number it's looking for. + # + # If the version of grep is 3.8 or newer, we have to patch the ATLAS + # source code to get rid of the warning produced by fgrep. + - name: Checking installed software. + package_facts: + manager: auto + + - include_tasks: patch-atlas-src.yml + when: ansible_facts.packages['grep'][0].version is version("3.8", ">=") + - name: Install ATLAS (takes a LONG time). ansible.builtin.command: "{{ item }}" args: @@ -188,6 +199,11 @@ loop: "{{ groups['cluster'] }}" become: true + # If this is not done, the nodes will fail to connect to each other + # causing the playbook to hang at 'Run the benchmark.' + - include_tasks: firewall/configure-firewall.yml + when: ansible_os_family == "RedHat" + - name: Run linpack benchmark. hosts: cluster become: false @@ -217,5 +233,8 @@ register: mpirun_output run_once: true + - include_tasks: firewall/reset-firewall.yml + when: ansible_os_family == "RedHat" + - name: Output the results. debug: var=mpirun_output.stdout From ae8f6d8111c2459019fd4a0537f774f42a2182f7 Mon Sep 17 00:00:00 2001 From: DarkAssassin23 <15916504+DarkAssassin23@users.noreply.github.com> Date: Fri, 11 Aug 2023 20:41:54 -0400 Subject: [PATCH 6/7] Updated README to reflect new OS support --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 43176e8..c0c5740 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,13 @@ Currently supported OSes: - Ubuntu (20.04+) - Raspberry Pi OS (11+) - Debian (11+) + - Rocky Linux (9+) + - AlmaLinux (9+) + - CentOS Stream(9+) + - RHEL (9+) + - Fedora (38+) + - Arch Linux + - Manjaro Other OSes may need a few tweaks to work correctly. You can also run the playbook inside Docker (see the note under 'Benchmarking - Single Node'), but performance will be artificially limited. @@ -33,6 +40,10 @@ Each host should be reachable via SSH using the username set in `ansible_user`. Tweak other settings inside `config.yml` as desired (the most important being `hpl_root`—this is where the compiled MPI, ATLAS, and HPL benchmarking code will live). +> **Note**: +> The names of the nodes inside `hosts.ini` must match the hostname of their corresponding node; otherwise, the benchmark will hang when you try to run it in a cluster. +> For example, if you have `node-01.local` in your `hosts.ini` your host's hostname should be `node-01` and not something else like `raspberry-pi`. + Then run the benchmarking playbook inside this directory: ``` From 9c619f88e362fd9e585f816fe7207927e5b2825a Mon Sep 17 00:00:00 2001 From: DarkAssassin23 <15916504+DarkAssassin23@users.noreply.github.com> Date: Sat, 19 Aug 2023 13:13:18 -0400 Subject: [PATCH 7/7] Moved firewall configuration to benchmark play --- main.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.yml b/main.yml index 237d0c5..19ce1ea 100644 --- a/main.yml +++ b/main.yml @@ -199,11 +199,6 @@ loop: "{{ groups['cluster'] }}" become: true - # If this is not done, the nodes will fail to connect to each other - # causing the playbook to hang at 'Run the benchmark.' - - include_tasks: firewall/configure-firewall.yml - when: ansible_os_family == "RedHat" - - name: Run linpack benchmark. hosts: cluster become: false @@ -226,6 +221,11 @@ dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/rpi/HPL.dat" mode: 0644 + # If this is not done, the nodes will fail to connect to each other + # causing the playbook to hang at 'Run the benchmark.' + - include_tasks: firewall/configure-firewall.yml + when: ansible_os_family == "RedHat" + - name: Run the benchmark. ansible.builtin.command: mpirun -f cluster-hosts ./xhpl args: