From 1d7aee252331d463a3a848c80ef6820768376b89 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Wed, 31 Jul 2024 11:03:51 -0700 Subject: [PATCH] [PAL/vm-common] Check the "break" variable less frequently in `delay()` Previously, `delay()` function accessed the "break out of loop early" variable `continue_gate` basically on every CPU cycle. This variable is typically a global variable causing high contention on multi-core workloads. This e.g. manifested in the Candle Quantized LLaMA app. This commit fixes this by checking the variable less frequently. The current heuristic is to check it every 1 ms. Signed-off-by: Dmitrii Kuvaiskii --- pal/src/host/vm-common/kernel_time.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pal/src/host/vm-common/kernel_time.c b/pal/src/host/vm-common/kernel_time.c index d6a192f68..61f1eca03 100644 --- a/pal/src/host/vm-common/kernel_time.c +++ b/pal/src/host/vm-common/kernel_time.c @@ -58,9 +58,14 @@ int delay(uint64_t delay_us, bool* continue_gate) { uint64_t curr_tsc = get_tsc(); uint64_t wait_until_tsc = curr_tsc + delay_us * g_tsc_mhz; + uint64_t next_gate_check_tsc = curr_tsc + 1000 * g_tsc_mhz; /* check every 1ms */ + while (curr_tsc < wait_until_tsc) { - if (continue_gate && __atomic_load_n(continue_gate, __ATOMIC_ACQUIRE)) - break; + if (curr_tsc > next_gate_check_tsc) { + if (continue_gate && __atomic_load_n(continue_gate, __ATOMIC_ACQUIRE)) + break; + next_gate_check_tsc = curr_tsc + 1000 * g_tsc_mhz; + } CPU_RELAX(); curr_tsc = get_tsc(); }