From 4d12dee6f7dc80259f171157d98bf21ff8625fbc Mon Sep 17 00:00:00 2001 From: David Chisnall Date: Fri, 12 Apr 2024 14:10:31 +0100 Subject: [PATCH] Tickless scheduler This commit incorporates several changes: Timers are now not set for a regular tick, they are set when a thread may be preempted. Specifically, the timer is set to the next timeout that will trigger a scheduling operation. This avoids timers triggering a switch to the scheduler to do nothing (resume the currently running thread). This means that if a thread sleeps for ten ticks while another runs, we will get one timer interrupt ten ticks in the future, rather than ten interrupts one tick apart. This means that ticks are now calculated retroactively based on elapsed time, rather than counted on each context switch. This, in turn, necessitates some small API changes. We previously conflated two things: - Sleep for N * (tick duration) - Yield and allow lower-priority threads to run for, at most, N * (tick duration) These are now deconflated by adding a second parameter to thread_sleep. Most sleeps are of the second form and so this is the default. This reduces the time taken to run the test suite on Sonata by around 30% and in the Ibex SAFE simulator by 13%. --- sdk/core/allocator/main.cc | 2 +- sdk/core/scheduler/main.cc | 37 ++++-- sdk/core/scheduler/thread.h | 72 +++++++++--- sdk/core/scheduler/timer.h | 109 ++++++++++++++++-- sdk/core/switcher/entry.S | 6 + sdk/include/FreeRTOS-Compat/task.h | 2 +- .../platform/generic-riscv/platform-timer.hh | 40 ++++--- sdk/include/thread.h | 27 ++++- sdk/lib/debug/debug.cc | 7 ++ tests/locks-test.cc | 2 + 10 files changed, 249 insertions(+), 55 deletions(-) diff --git a/sdk/core/allocator/main.cc b/sdk/core/allocator/main.cc index 29a652b1..7a47e821 100644 --- a/sdk/core/allocator/main.cc +++ b/sdk/core/allocator/main.cc @@ -291,7 +291,7 @@ namespace // Drop and reacquire the lock while yielding. // Sleep for a single tick. g.unlock(); - Timeout smallSleep{0}; + Timeout smallSleep{1}; thread_sleep(&smallSleep); if (!reacquire_lock(timeout, g, smallSleep.elapsed)) { diff --git a/sdk/core/scheduler/main.cc b/sdk/core/scheduler/main.cc index 88442be3..f8b50785 100644 --- a/sdk/core/scheduler/main.cc +++ b/sdk/core/scheduler/main.cc @@ -270,15 +270,20 @@ namespace sched ExceptionGuard g{[=]() { sched_panic(mcause, mepc, mtval); }}; + bool tick = false; switch (mcause) { // Explicit yield call case MCAUSE_ECALL_MACHINE: - schedNeeded = true; + { + schedNeeded = true; + Thread *currentThread = Thread::current_get(); + tick = currentThread && currentThread->is_ready(); break; + } case MCAUSE_INTR | MCAUSE_MTIME: - Timer::do_interrupt(); schedNeeded = true; + tick = true; break; case MCAUSE_INTR | MCAUSE_MEXTERN: schedNeeded = false; @@ -293,6 +298,7 @@ namespace sched std::tie(schedNeeded, std::ignore, std::ignore) = futex_wake(Capability{&word}.address()); }); + tick = schedNeeded; break; case MCAUSE_THREAD_EXIT: // Make the current thread non-runnable. @@ -305,13 +311,23 @@ namespace sched // We cannot continue exiting this thread, make sure we will // pick a new one. schedNeeded = true; + tick = true; sealedTStack = nullptr; break; default: sched_panic(mcause, mepc, mtval); } + if (tick || !Thread::any_ready()) + { + Timer::expiretimers(); + } auto newContext = schedNeeded ? Thread::schedule(sealedTStack) : sealedTStack; +#if 0 + Debug::log("Thread: {}", + Thread::current_get() ? Thread::current_get()->id_get() : 0); +#endif + Timer::update(); if constexpr (Accounting) { @@ -351,7 +367,7 @@ namespace sched if (shouldYield) { - Thread::yield_interrupt_enabled(); + yield(); } return ret; @@ -419,14 +435,17 @@ SystickReturn __cheri_compartment("sched") thread_systemtick_get() } __cheriot_minimum_stack(0x80) int __cheri_compartment("sched") - thread_sleep(Timeout *timeout) + thread_sleep(Timeout *timeout, uint32_t flags) { STACK_CHECK(0x80); if (!check_timeout_pointer(timeout)) { return -EINVAL; } - Thread::current_get()->suspend(timeout, nullptr, true); + ////Debug::log("Thread {} sleeping for {} ticks", + /// Thread::current_get()->id_get(), timeout->remaining); + Thread *current = Thread::current_get(); + current->suspend(timeout, nullptr, true, !(flags & ThreadSleepNoEarlyWake)); return 0; } @@ -468,8 +487,10 @@ __cheriot_minimum_stack(0xa0) int futex_timed_wait(Timeout *timeout, // If we try to block ourself, that's a mistake. if ((owningThread == currentThread) || (owningThread == nullptr)) { - Debug::log("futex_timed_wait: invalid owning thread {}", - owningThread); + Debug::log("futex_timed_wait: thread {} acquiring PI futex with " + "invalid owning thread {}", + currentThread->id_get(), + owningThreadID); return -EINVAL; } Debug::log("Thread {} boosting priority of {} for futex {}", @@ -550,7 +571,7 @@ __cheriot_minimum_stack(0x90) int futex_wake(uint32_t *address, uint32_t count) if (shouldYield) { - Thread::yield_interrupt_enabled(); + yield(); } return woke; diff --git a/sdk/core/scheduler/thread.h b/sdk/core/scheduler/thread.h index e5b29614..81a93838 100644 --- a/sdk/core/scheduler/thread.h +++ b/sdk/core/scheduler/thread.h @@ -18,6 +18,8 @@ namespace // thread structures. class MultiWaiterInternal; + uint64_t expiry_time_for_timeout(uint32_t timeout); + template class ThreadImpl final : private utils::NoCopyNoMove { @@ -115,23 +117,18 @@ namespace } /** - * When yielding inside the scheduler compartment, we almost always want - * to re-enable interrupts before ecall. If we don't, then a thread with - * interrupt enabled can just call a scheduler function with a long - * timeout, essentially gaining the ability to indefinitely block - * interrupts. Worse, if this is the only thread, then it blocks - * interrupts forever for the whole system. + * Returns true if any thread is ready to run. */ - static void yield_interrupt_enabled() + static bool any_ready() { - __asm volatile("ecall"); + return priorityMap != 0; } static uint32_t yield_timed() { uint64_t ticksAtStart = ticksSinceBoot; - yield_interrupt_enabled(); + yield(); uint64_t elapsed = ticksSinceBoot - ticksAtStart; if (elapsed > std::numeric_limits::max()) @@ -152,11 +149,12 @@ namespace */ bool suspend(Timeout *t, ThreadImpl **newSleepQueue, - bool yieldUnconditionally = false) + bool yieldUnconditionally = false, + bool yieldNotSleep = false) { if (t->remaining != 0) { - suspend(t->remaining, newSleepQueue); + suspend(t->remaining, newSleepQueue, yieldNotSleep); } if ((t->remaining != 0) || yieldUnconditionally) { @@ -188,6 +186,7 @@ namespace OriginalPriority(priority), expiryTime(-1), state(ThreadState::Suspended), + isYielding(false), sleepQueue(nullptr), tStackPtr(tstack) { @@ -212,7 +211,7 @@ namespace // We must be suspended. Debug::Assert(state == ThreadState::Suspended, "Waking thread that is in state {}, not suspended", - state); + static_cast(state)); // First, remove self from the timer waiting list. timer_list_remove(&waitingList); if (sleepQueue != nullptr) @@ -233,11 +232,18 @@ namespace schedule = true; } } + // If this is the same priority as the current thread, we may need + // to update the timer. + if (priority >= highestPriority) + { + schedule = true; + } if (reason == WakeReason::Timer || reason == WakeReason::Delete) { multiWaiter = nullptr; } list_insert(&priorityList[priority]); + isYielding = false; return schedule; } @@ -278,11 +284,14 @@ namespace * waiting on a resource, add it to the list of that resource. No * matter what, it has to be added to the timer list. */ - void suspend(uint32_t waitTicks, ThreadImpl **newSleepQueue) + void suspend(uint32_t waitTicks, + ThreadImpl **newSleepQueue, + bool yieldNotSleep = false) { + isYielding = yieldNotSleep; Debug::Assert(state == ThreadState::Ready, "Suspending thread that is in state {}, not ready", - state); + static_cast(state)); list_remove(&priorityList[priority]); state = ThreadState::Suspended; priority_map_remove(); @@ -291,8 +300,7 @@ namespace list_insert(newSleepQueue); sleepQueue = newSleepQueue; } - expiryTime = - (waitTicks == UINT32_MAX ? -1 : ticksSinceBoot + waitTicks); + expiryTime = expiry_time_for_timeout(waitTicks); timer_list_insert(&waitingList); } @@ -407,7 +415,7 @@ namespace Debug::Assert(state == ThreadState::Suspended, "Inserting thread into timer list that is in state " "{}, not suspended", - state); + static_cast(state)); if (head == nullptr) { timerNext = timerPrev = *headPtr = this; @@ -511,6 +519,29 @@ namespace return priority; } + bool is_ready() + { + return state == ThreadState::Ready; + } + + bool is_yielding() + { + return isYielding; + } + + /** + * Returns true if there are other runnable threads with the same + * priority as this thread. + */ + bool has_priority_peers() + { + Debug::Assert(state == ThreadState::Ready, + "Checking for peers on thread that is in state {}, " + "not ready", + static_cast(state)); + return next != this; + } + ~ThreadImpl() { // We have static definition of threads. We only create threads in @@ -616,7 +647,12 @@ namespace uint8_t priority; /// The original priority level for this thread. This never changes. const uint8_t OriginalPriority; - ThreadState state; + ThreadState state : 2; + /** + * If the thread is yielding, it may be scheduled before its timeout + * expires, as long as no other threads are runnable. + */ + bool isYielding : 1; }; using Thread = ThreadImpl; diff --git a/sdk/core/scheduler/timer.h b/sdk/core/scheduler/timer.h index a86120b5..2c13ffbd 100644 --- a/sdk/core/scheduler/timer.h +++ b/sdk/core/scheduler/timer.h @@ -26,36 +26,96 @@ namespace IsTimer, "Platform's timer implementation does not meet the required interface"); + /** + * Timer interface. Provides generic timer functionality to the scheduler, + * wrapping the platform's timer device. + */ class Timer final : private TimerCore { + inline static uint64_t lastTickTime = 0; + inline static uint64_t zeroTickTime = 0; + inline static uint32_t accumulatedTickError = 0; + public: + /** + * Perform any setup necessary for the timer device. + */ static void interrupt_setup() { static_assert(TIMERCYCLES_PER_TICK <= UINT32_MAX, "Cycles per tick can't be represented in 32 bits. " "Double check your platform config"); init(); - setnext(TIMERCYCLES_PER_TICK); + zeroTickTime = time(); } - static void do_interrupt() - { - ++Thread::ticksSinceBoot; + /** + * Expose the timer device's method for returning the current time. + */ + using TimerCore::time; - expiretimers(); - setnext(TIMERCYCLES_PER_TICK); + /** + * Update the timer to fire the next timeout for the thread at the + * front of the queue, or disable the timer if there are no threads + * blocked with a timeout and no threads with the same priority. + * + * The scheduler is a simple RTOS scheduler that does not allow any + * thread to run if a higher-priority thread is runnable. This means + * that we need a timer interrupt in one of two situations: + * + * - We have a thread of the same priority as the current thread and + * we are going to round-robin schedule it. + * - We have a thread of a higher priority than the current thread + * that is currently sleeping on a timeout and need it to preempt the + * current thread when its timeout expires. + * + * We currently over approximate the second condition by making the + * timer fire independent of the priority. If this is every changed, + * some care must be taken to ensure that dynamic priority propagation + * via priority-inheriting futexes behaves correctly. + * + * This should be called after scheduling has changed the list of + * waiting threads. + */ + static void update() + { + auto *thread = Thread::current_get(); + bool waitingListIsEmpty = ((Thread::waitingList == nullptr) || + (Thread::waitingList->expiryTime == -1)); + bool threadHasNoPeers = + (thread == nullptr) || (!thread->has_priority_peers()); + if (waitingListIsEmpty && threadHasNoPeers) + { + clear(); + } + else + { + uint64_t nextTimer = waitingListIsEmpty + ? time() + TIMERCYCLES_PER_TICK + : Thread::waitingList->expiryTime; + setnext(nextTimer); + } } - private: + /** + * Wake any threads that were sleeping until a timeout before the + * current time. This also wakes yielded threads if there are no + * runnable threads. + * + * This should be called when a timer interrupt fires. + */ static void expiretimers() { + uint64_t now = time(); + Thread::ticksSinceBoot = + (now - zeroTickTime) / TIMERCYCLES_PER_TICK; if (Thread::waitingList == nullptr) { return; } for (Thread *iter = Thread::waitingList;;) { - if (iter->expiryTime <= Thread::ticksSinceBoot) + if (iter->expiryTime <= now) { Thread *iterNext = iter->timerNext; @@ -72,6 +132,39 @@ namespace break; } } + // If there are not runnable threads, try to wake a yielded thread + if (!Thread::any_ready()) + { + // Look at the first thread. If it is not yielding, there may + // be another thread behind it that is, but that's fine. We + // don't want to encounter situations where (with a + // high-priority A and a low-priority B): + // + // 1. A yields for 5 ticks. + // 2. B starts and does a blocking operation (e.g. try_lock) + // with a 1-tick timeout. + // 3. A wakes up and prevents B from running even though we're + // still in its 5-tick yield period. + if (Thread *head = Thread::waitingList) + { + if (head->is_yielding()) + { + Debug::log("Woke thread {} {} cycles early", + head->id_get(), + int64_t(head->expiryTime) - now); + head->ready(Thread::WakeReason::Timer); + } + } + } } }; + + uint64_t expiry_time_for_timeout(uint32_t timeout) + { + if (timeout == -1) + { + return -1; + } + return Timer::time() + (timeout * TIMERCYCLES_PER_TICK); + } } // namespace diff --git a/sdk/core/switcher/entry.S b/sdk/core/switcher/entry.S index 8836d088..716248da 100644 --- a/sdk/core/switcher/entry.S +++ b/sdk/core/switcher/entry.S @@ -970,7 +970,13 @@ __Z13thread_id_getv: // Load the trusted stack pointer into a register that we will clobber in // the next instruction when we load the thread ID. cspecialr ca0, mtdc + cgettag a1, ca0 + // If this is a null pointer, don't try to dereference it and report that + // we are thread 0. This permits the debug code to work even from things + // that are not real threads. + beqz a1, .Lend clh a0, TrustedStack_offset_threadID(ca0) +.Lend: cret diff --git a/sdk/include/FreeRTOS-Compat/task.h b/sdk/include/FreeRTOS-Compat/task.h index 5aac5878..398ff58b 100644 --- a/sdk/include/FreeRTOS-Compat/task.h +++ b/sdk/include/FreeRTOS-Compat/task.h @@ -55,7 +55,7 @@ static inline BaseType_t xTaskCheckForTimeOut(TimeOut_t *pxTimeOut, static inline void vTaskDelay(const TickType_t xTicksToDelay) { struct Timeout timeout = {0, xTicksToDelay}; - thread_sleep(&timeout); + thread_sleep(&timeout, ThreadSleepNoEarlyWake); } /** diff --git a/sdk/include/platform/generic-riscv/platform-timer.hh b/sdk/include/platform/generic-riscv/platform-timer.hh index 377f3233..d0173b5c 100644 --- a/sdk/include/platform/generic-riscv/platform-timer.hh +++ b/sdk/include/platform/generic-riscv/platform-timer.hh @@ -38,6 +38,22 @@ class StandardClint : private utils::NoCopyNoMove 2 * sizeof(uint32_t)); } + static uint64_t time() + { + // The timer is little endian, so the high 32 bits are after the low 32 bits. + // We can't do atomic 64-bit loads and so we have to read these separately. + volatile uint32_t *timerHigh = pmtimer + 1; + uint32_t timeLow, timeHigh; + + // Read the current time. Loop until the high 32 bits are stable. + do + { + timeHigh = *timerHigh; + timeLow = *pmtimer; + } while (timeHigh != *timerHigh); + return (uint64_t(timeHigh) << 32) | timeLow; + } + /** * Set the timer up for the next timer interrupt. We need to: * 1. read the current MTIME, @@ -51,28 +67,22 @@ class StandardClint : private utils::NoCopyNoMove * interrupt. Writing to any half is enough to clear the interrupt, * which is also why 2. is important. */ - static void setnext(uint32_t cycles) + static void setnext(uint64_t nextTime) { /// the high 32 bits of the 64-bit MTIME register volatile uint32_t *pmtimercmphigh = pmtimercmp + 1; - /// the low 32 bits - volatile uint32_t *pmtimerhigh = pmtimer + 1; uint32_t curmtimehigh, curmtime, curmtimenew; - // Read the current time. Loop until the high 32 bits are stable. - do - { - curmtimehigh = *pmtimerhigh; - curmtime = *pmtimer; - } while (curmtimehigh != *pmtimerhigh); - - // Add tick cycles to current time. Handle carry bit. - curmtimehigh += __builtin_add_overflow(curmtime, cycles, &curmtimenew); - // Write the new MTIMECMP value, at which the next interrupt fires. *pmtimercmphigh = -1; // Prevent spurious interrupts. - *pmtimercmp = curmtimenew; - *pmtimercmphigh = curmtimehigh; + *pmtimercmp = nextTime; + *pmtimercmphigh = nextTime >> 32; + } + + static void clear() + { + volatile uint32_t *pmtimercmphigh = pmtimercmp + 1; + *pmtimercmphigh = -1; // Prevent spurious interrupts. } private: diff --git a/sdk/include/thread.h b/sdk/include/thread.h index 3c5430a1..ac3c8db3 100644 --- a/sdk/include/thread.h +++ b/sdk/include/thread.h @@ -23,6 +23,17 @@ typedef struct [[cheri::interrupt_state(disabled)]] SystickReturn __cheri_compartment("sched") thread_systemtick_get(void); +enum ThreadSleepFlags : uint32_t +{ + /** + * Sleep for up to the specified timeout, but wake early if there are no + * other runnable threads. This allows a high-priority thread to yield for + * a fixed number of ticks for lower-priority threads to run, but does not + * prevent it from resuming early. + */ + ThreadSleepNoEarlyWake = 1 << 0, +}; + /** * Sleep for at most the specified timeout (see `timeout.h`). * @@ -34,9 +45,17 @@ typedef struct * but reports the time spent sleeping. This requires a cross-domain call and * return in addition to the overheads of `yield` and so `yield` should be * preferred in contexts where the elapsed time is not required. + * + * The `flags` parameter is a bitwise OR of `ThreadSleepFlags`. + * + * A sleeping thread may be woken early if no other threads are runnable. The + * thread with the earliest timeout will be woken first. If you are using + * `thread_sleep` to elapse real time, pass `ThreadSleepNoEarlyWake` as the + * flags argument to prevent early wakeups. + * */ [[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched") - thread_sleep(struct Timeout *timeout); + thread_sleep(struct Timeout *timeout, uint32_t flags __if_cxx(= 0)); /** * Return the thread ID of the current running thread. @@ -119,7 +138,7 @@ static inline uint64_t thread_millisecond_wait(uint32_t milliseconds) // In simulation builds, just yield once but don't bother trying to do // anything sensible with time. Timeout t = {0, 1}; - thread_sleep(&t); + thread_sleep(&t, 0); return milliseconds; #else static const uint32_t CyclesPerMillisecond = CPU_TIMER_HZ / 1'000; @@ -133,7 +152,7 @@ static inline uint64_t thread_millisecond_wait(uint32_t milliseconds) while ((end > current) && (end - current > MS_PER_TICK)) { Timeout t = {0, ((uint32_t)(end - current)) / CyclesPerTick}; - thread_sleep(&t); + thread_sleep(&t, ThreadSleepNoEarlyWake); current = rdcycle64(); } // Spin for the remaining time. @@ -142,7 +161,7 @@ static inline uint64_t thread_millisecond_wait(uint32_t milliseconds) current = rdcycle64(); } current = rdcycle64(); - return (current - start) * CyclesPerMillisecond; + return (current - start) / CyclesPerMillisecond; #endif } diff --git a/sdk/lib/debug/debug.cc b/sdk/lib/debug/debug.cc index 07561643..2642d9fb 100644 --- a/sdk/lib/debug/debug.cc +++ b/sdk/lib/debug/debug.cc @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MIT #include +#include using namespace CHERI; @@ -335,7 +336,13 @@ debug_log_message_write(const char *context, DebugPrinter printer; printer.write("\x1b[35m"); printer.write(context); +#if 0 + printer.write(" [Thread "); + printer.write(thread_id_get()); + printer.write("]\033[0m: "); +#else printer.write("\033[0m: "); +#endif printer.format(format, messages, messageCount); printer.write("\n"); } diff --git a/tests/locks-test.cc b/tests/locks-test.cc index ffd8da27..20798c81 100644 --- a/tests/locks-test.cc +++ b/tests/locks-test.cc @@ -59,7 +59,9 @@ namespace "Trying to acquire lock spuriously succeeded"); if constexpr (!std::is_same_v) { +#ifndef SIMULATION TEST(t.elapsed >= 1, "Sleep slept for {} ticks", t.elapsed); +#endif } }