diff --git a/README.md b/README.md index 90ab849..1db8988 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,13 @@ Currently supported OSes: - Ubuntu (20.04+) - Raspberry Pi OS (11+) - Debian (11+) + - Rocky Linux (9+) + - AlmaLinux (9+) + - CentOS Stream(9+) + - RHEL (9+) + - Fedora (38+) + - Arch Linux + - Manjaro Other OSes may need a few tweaks to work correctly. You can also run the playbook inside Docker (see the note under 'Benchmarking - Single Node'), but performance will be artificially limited. @@ -33,6 +40,10 @@ Each host should be reachable via SSH using the username set in `ansible_user`. Tweak other settings inside `config.yml` as desired (the most important being `hpl_root`—this is where the compiled MPI, ATLAS, and HPL benchmarking code will live). +> **Note**: +> The names of the nodes inside `hosts.ini` must match the hostname of their corresponding node; otherwise, the benchmark will hang when you try to run it in a cluster. +> For example, if you have `node-01.local` in your `hosts.ini` your host's hostname should be `node-01` and not something else like `raspberry-pi`. + Then run the benchmarking playbook inside this directory: ``` diff --git a/dependencies/arch-based.yml b/dependencies/arch-based.yml new file mode 100755 index 0000000..b61e5dd --- /dev/null +++ b/dependencies/arch-based.yml @@ -0,0 +1,12 @@ +- name: Update pacman cache. + community.general.pacman: + update_cache: true + become: true + +- name: Install dependencies. + ansible.builtin.package: + name: + - base-devel + - gcc-fortran + state: present + become: true \ No newline at end of file diff --git a/dependencies/debian-based.yml b/dependencies/debian-based.yml new file mode 100755 index 0000000..6b0ae69 --- /dev/null +++ b/dependencies/debian-based.yml @@ -0,0 +1,14 @@ +- name: Update apt cache. + ansible.builtin.apt: + update_cache: true + cache_valid_time: 600 + become: true + +- name: Install dependencies. + ansible.builtin.package: + name: + - build-essential + - gfortran + - automake + state: present + become: true \ No newline at end of file diff --git a/dependencies/rhel-based.yml b/dependencies/rhel-based.yml new file mode 100755 index 0000000..0db3ae3 --- /dev/null +++ b/dependencies/rhel-based.yml @@ -0,0 +1,15 @@ +- name: Update dnf cache. + ansible.builtin.dnf: + update_cache: true + become: true + +- name: Install dependencies. + ansible.builtin.package: + name: + - '@Development Tools' + - gcc-gfortran + # Fedora, at least Fedora Server, doesn't install this + # with the Development Tools group for some reason + - gcc-g++ + state: present + become: true \ No newline at end of file diff --git a/firewall/configure-firewall.yml b/firewall/configure-firewall.yml new file mode 100755 index 0000000..776a50e --- /dev/null +++ b/firewall/configure-firewall.yml @@ -0,0 +1,30 @@ +- name: Creating new custom firewall zone. + ansible.posix.firewalld: + zone: top500 + permanent: true + state: present + become: true + +- name: Setting custom firewall zone to accept connections. + ansible.posix.firewalld: + zone: top500 + target: ACCEPT + state: enabled + permanent: true + become: true + +- name: Adding nodes as trusted sources in the firewall. + ansible.posix.firewalld: + source: "{{ item }}" + zone: top500 + state: enabled + permanent: true + loop: "{{ host_ips }}" + when: item != ansible_default_ipv4.address + become: true + +- name: Restarting firewall for changes to take effect. + ansible.builtin.service: + name: firewalld + state: restarted + become: true \ No newline at end of file diff --git a/firewall/reset-firewall.yml b/firewall/reset-firewall.yml new file mode 100755 index 0000000..c127598 --- /dev/null +++ b/firewall/reset-firewall.yml @@ -0,0 +1,20 @@ +# Remove our custom firewall zone since we don't need it anymore +- name: Reverting firewall back to its original state. + ansible.posix.firewalld: + zone: top500 + state: absent + permanent: true + become: true + +- name: Restarting firewall for changes to take effect. + ansible.builtin.service: + name: firewalld + state: restarted + become: true + +# When removing a custom firewall zone, a .xml.old file will sometimes remain +- name: Cleaning up firewall rules. + ansible.builtin.file: + path: /etc/firewalld/zones/top500.xml.old + state: absent + become: true \ No newline at end of file diff --git a/main.yml b/main.yml index 5ad0c2a..19ce1ea 100644 --- a/main.yml +++ b/main.yml @@ -12,21 +12,14 @@ vars_files: ['config.yml'] tasks: - - name: Update apt cache. - ansible.builtin.apt: - update_cache: true - cache_valid_time: 600 + - include_tasks: dependencies/rhel-based.yml + when: ansible_os_family == 'RedHat' + + - include_tasks: dependencies/debian-based.yml when: ansible_os_family == 'Debian' - become: true - - name: Install dependencies. - ansible.builtin.package: - name: - - build-essential - - gfortran - - automake - state: present - become: true + - include_tasks: dependencies/arch-based.yml + when: ansible_os_family == 'Archlinux' - name: Create required temporary directories. ansible.builtin.file: @@ -89,6 +82,24 @@ remote_src: true creates: "{{ hpl_root }}/tmp/ATLAS/README" + # The source code for the configure script that ATLAS uses makes calls + # with fgrep to find information about the system from its various + # outputs to set the configuration accordingly. However, + # grep versions 3.8+ throw the following warning when calling fgrep: + # 'fgrep: warning: fgrep is obsolescent; using grep -F' + # This occurs as output from fgrep which causes the ATLAS configuration + # to get invalid return values, causing the configuration to fail + # due to it assuming the first value is the number it's looking for. + # + # If the version of grep is 3.8 or newer, we have to patch the ATLAS + # source code to get rid of the warning produced by fgrep. + - name: Checking installed software. + package_facts: + manager: auto + + - include_tasks: patch-atlas-src.yml + when: ansible_facts.packages['grep'][0].version is version("3.8", ">=") + - name: Install ATLAS (takes a LONG time). ansible.builtin.command: "{{ item }}" args: @@ -210,6 +221,11 @@ dest: "{{ hpl_root }}/tmp/hpl-2.3/bin/rpi/HPL.dat" mode: 0644 + # If this is not done, the nodes will fail to connect to each other + # causing the playbook to hang at 'Run the benchmark.' + - include_tasks: firewall/configure-firewall.yml + when: ansible_os_family == "RedHat" + - name: Run the benchmark. ansible.builtin.command: mpirun -f cluster-hosts ./xhpl args: @@ -217,5 +233,8 @@ register: mpirun_output run_once: true + - include_tasks: firewall/reset-firewall.yml + when: ansible_os_family == "RedHat" + - name: Output the results. debug: var=mpirun_output.stdout diff --git a/patch-atlas-src.yml b/patch-atlas-src.yml new file mode 100755 index 0000000..5808941 --- /dev/null +++ b/patch-atlas-src.yml @@ -0,0 +1,40 @@ +- name: Checking to see if patches need to be applied. + ansible.builtin.stat: + path: "{{ hpl_root }}/tmp/ATLAS_PATCHING_COMPLETE" + register: no_patches_needed + +# Fixes fgrep warning being added to output and causing +# the ATLAS configuration to fail to identify aspects of +# the given node +- name: Patching ATLAS source code (Part 1). + ansible.builtin.replace: + path: "{{ hpl_root }}tmp/ATLAS/CONFIG/include/atlas_sys.h" + regexp: '2>&1' + replace: '2>/dev/null' + when: no_patches_needed.stat.exists == false + +# Fixes 'free(): invalid pointer' error during +# ATLAS configuration +- name: Patching ATLAS source code (Part 2). + ansible.builtin.lineinfile: + path: "{{ hpl_root }}tmp/ATLAS/CONFIG/src/probe_comp.c" + insertbefore: 'free\(cmnd\)' + line: ' printf("\n");' + firstmatch: true + when: no_patches_needed.stat.exists == false + +# Fixes 'corrupted size vs. prev_size' error during +# ATLAS configuration +- name: Patching ATLAS source code (Part 3). + ansible.builtin.lineinfile: + path: "{{ hpl_root }}tmp/ATLAS/CONFIG/include/atlas_sys.h" + insertbefore: 'if \(fgets\(sout, len, fpin\)\)' + line: ' printf("\n");' + firstmatch: true + when: no_patches_needed.stat.exists == false + +- name: Create 'ATLAS_PATCHING_COMPLETE' file. + ansible.builtin.file: + path: "{{ hpl_root }}/tmp/ATLAS_PATCHING_COMPLETE" + state: touch + mode: 0644 \ No newline at end of file