diff --git a/README.md b/README.md
index ad9c570..95da5f5 100644
--- a/README.md
+++ b/README.md
@@ -789,6 +789,17 @@ _**Example** (dual-socket 4-core Intel Nehalem with HT turned off)_: `check_hw_
+##### check_hw_numa
+`check_hw_numa [numa-nodes] [NPS]`
+
+`check_hw_numa` compares the properties of the [NUMA](https://en.wikipedia.org/wiki/Non-uniform_memory_access) nodes configured on the system to the specified values to ensure that the correct system topology is enabled. For CPUs with configurable NUMA nodes-per-socket (NPS), the 2nd parameter can be used to verify proper BIOS configuration.
+
+_**Example** (dual-socket AMD EPYC CPU with NPS=2)_: `check_hw_numa 4 2`
+
+
+
+
+
##### check_hw_eth
`check_hw_eth device`
@@ -1247,7 +1258,7 @@ function check_stuff_works() {
die 1 "Stuff is not working"
return 1
fi
-
+
# check passed
return 0
}
diff --git a/scripts/lbnl_hw.nhc b/scripts/lbnl_hw.nhc
index e73d197..a19d64a 100644
--- a/scripts/lbnl_hw.nhc
+++ b/scripts/lbnl_hw.nhc
@@ -7,6 +7,8 @@
HW_SOCKETS=0
HW_CORES=0
HW_THREADS=0
+HW_NUMA_NODES=0
+HW_NUMA_NPS=0
HW_RAM_TOTAL=0
HW_RAM_FREE=0
HW_SWAP_TOTAL=0
@@ -24,7 +26,7 @@ MCELOG_MAX_UNCORRECTED_RATE="${MCELOG_MAX_UNCORRECTED_RATE:-0}"
# Read hardware information from /proc and /sys files.
function nhc_hw_gather_data() {
- local LINE CORES SIBLINGS MHZ PROCESSOR PHYS_ID PORT INDEX DEV
+ local LINE CORES SIBLINGS MHZ PROCESSOR PHYS_ID PORT INDEX DEV NODES
local -a FIELD PHYS_IDS
# Gather CPU info
@@ -66,6 +68,16 @@ function nhc_hw_gather_data() {
fi
dbg "Got $HW_SOCKETS $MHZ MHz processors ($HW_CORES cores, $HW_THREADS threads)"
+ # Gather NUMA info
+ if [[ -d /sys/devices/system/node ]]; then
+ set +f
+ NODES=(/sys/devices/system/node/node*)
+ set -f
+ HW_NUMA_NODES=${#NODES[@]}
+ HW_NUMA_NPS=$((HW_NUMA_NODES/$HW_SOCKETS))
+ fi
+ dbg "Got $HW_NUMA_NODES NUMA node(s) (NPS: $HW_NUMA_NPS)"
+
# Gather memory info
if [[ -e /proc/meminfo ]]; then
while read -a FIELD ; do
@@ -169,6 +181,26 @@ function check_hw_cpuinfo() {
return 0
}
+# Check that the NUMA nodes ($1) and NPS ($2) counts all match.
+function check_hw_numa() {
+ local NUMA_NODES=$1
+ local NUMA_NPS=$2
+
+ if [[ $HW_NUMA_NODES -eq 0 ]]; then
+ nhc_hw_gather_data
+ fi
+
+ if [[ -n "$NUMA_NODES" && $NUMA_NODES -ne $HW_NUMA_NODES ]]; then
+ die 1 "$FUNCNAME: Actual NUMA nodes count ($HW_NUMA_NODES) does not match expected ($NUMA_NODES)."
+ return 1
+ fi
+ if [[ -n "$NUMA_NPS" && $NUMA_NPS -ne $HW_NUMA_NPS ]]; then
+ die 1 "$FUNCNAME: Actual Nodes Per Socket (NPS) value ($HW_NUMA_NPS) does not match expected ($NUMA_NPS)."
+ return 1
+ fi
+ return 0
+}
+
# Check RAM size against minimum ($1) and maximum ($2) allowable size in kB. To
# require an exact amount of RAM, pass the same value for both parameters.
# The optional fudge factor ($3) allows a certain amount of variance to be tolerated.
@@ -462,7 +494,7 @@ function check_hw_mcelog() {
die 1 "$MSG"
return 1
fi
-
+
# If none of the above thresholds was met, return success.
return 0
else
diff --git a/test/test_lbnl_hw.nhc b/test/test_lbnl_hw.nhc
index ef607f2..0944250 100644
--- a/test/test_lbnl_hw.nhc
+++ b/test/test_lbnl_hw.nhc
@@ -1,8 +1,9 @@
# Tests for lbnl_hw.nhc
-plan $((11+7+13+13+13+4+4+4+10+3+6+6)) "lbnl_hw.nhc" && {
+plan $((12+7+1+13+13+13+4+4+4+10+3+6+6)) "lbnl_hw.nhc" && {
is "`type -t nhc_hw_gather_data 2>&1`" 'function' 'nhc_hw_gather_data() loaded properly'
is "`type -t check_hw_cpuinfo 2>&1`" 'function' 'check_hw_cpuinfo() loaded properly'
+ is "`type -t check_hw_numa 2>&1`" 'function' 'check_hw_numa() loaded properly'
is "`type -t check_hw_physmem 2>&1`" 'function' 'check_hw_physmem() loaded properly'
is "`type -t check_hw_swap 2>&1`" 'function' 'check_hw_swap() loaded properly'
is "`type -t check_hw_mem 2>&1`" 'function' 'check_hw_mem() loaded properly'
@@ -42,6 +43,15 @@ plan $((11+7+13+13+13+4+4+4+10+3+6+6)) "lbnl_hw.nhc" && {
is $? 1 "Invalid test hardware: Non-existent CPU"
+ # NUMA data: 2 NUMA nodes, 1 NUMA node per socket
+ HW_NUMA_NODES=2
+ HW_NUMA_NPS=1
+
+ # valid test
+ check_hw_numa 2 1
+ is $? 0 "Valid test hardware: 2 NUMA nodes, 1 NPS"
+
+
# Memory data: 32GB RAM, 18GB swap
HW_RAM_FREE=27828840
HW_RAM_TOTAL=32857508