diff --git a/sdk/core/allocator/main.cc b/sdk/core/allocator/main.cc
index 29a652b1..7a47e821 100644
--- a/sdk/core/allocator/main.cc
+++ b/sdk/core/allocator/main.cc
@@ -291,7 +291,7 @@ namespace
 						// Drop and reacquire the lock while yielding.
 						// Sleep for a single tick.
 						g.unlock();
-						Timeout smallSleep{0};
+						Timeout smallSleep{1};
 						thread_sleep(&smallSleep);
 						if (!reacquire_lock(timeout, g, smallSleep.elapsed))
 						{
diff --git a/sdk/core/scheduler/main.cc b/sdk/core/scheduler/main.cc
index 88442be3..f563d102 100644
--- a/sdk/core/scheduler/main.cc
+++ b/sdk/core/scheduler/main.cc
@@ -270,15 +270,20 @@ namespace sched
 
 		ExceptionGuard g{[=]() { sched_panic(mcause, mepc, mtval); }};
 
+		bool tick = false;
 		switch (mcause)
 		{
 			// Explicit yield call
 			case MCAUSE_ECALL_MACHINE:
-				schedNeeded = true;
+			{
+				schedNeeded           = true;
+				Thread *currentThread = Thread::current_get();
+				tick = currentThread && currentThread->is_ready();
 				break;
+			}
 			case MCAUSE_INTR | MCAUSE_MTIME:
-				Timer::do_interrupt();
 				schedNeeded = true;
+				tick        = true;
 				break;
 			case MCAUSE_INTR | MCAUSE_MEXTERN:
 				schedNeeded = false;
@@ -293,6 +298,7 @@ namespace sched
 					  std::tie(schedNeeded, std::ignore, std::ignore) =
 					    futex_wake(Capability{&word}.address());
 				  });
+				tick = schedNeeded;
 				break;
 			case MCAUSE_THREAD_EXIT:
 				// Make the current thread non-runnable.
@@ -305,13 +311,23 @@ namespace sched
 				// We cannot continue exiting this thread, make sure we will
 				// pick a new one.
 				schedNeeded  = true;
+				tick         = true;
 				sealedTStack = nullptr;
 				break;
 			default:
 				sched_panic(mcause, mepc, mtval);
 		}
+		if (tick || !Thread::any_ready())
+		{
+			Timer::expiretimers();
+		}
 		auto newContext =
 		  schedNeeded ? Thread::schedule(sealedTStack) : sealedTStack;
+#if 0
+		Debug::log("Thread: {}",
+		           Thread::current_get() ? Thread::current_get()->id_get() : 0);
+#endif
+		Timer::update();
 
 		if constexpr (Accounting)
 		{
@@ -419,14 +435,17 @@ SystickReturn __cheri_compartment("sched") thread_systemtick_get()
 }
 
 __cheriot_minimum_stack(0x80) int __cheri_compartment("sched")
-  thread_sleep(Timeout *timeout)
+  thread_sleep(Timeout *timeout, uint32_t flags)
 {
 	STACK_CHECK(0x80);
 	if (!check_timeout_pointer(timeout))
 	{
 		return -EINVAL;
 	}
-	Thread::current_get()->suspend(timeout, nullptr, true);
+	////Debug::log("Thread {} sleeping for {} ticks",
+	/// Thread::current_get()->id_get(), timeout->remaining);
+	Thread *current = Thread::current_get();
+	current->suspend(timeout, nullptr, true, !(flags & ThreadSleepNoEarlyWake));
 	return 0;
 }
 
@@ -468,8 +487,10 @@ __cheriot_minimum_stack(0xa0) int futex_timed_wait(Timeout        *timeout,
 		// If we try to block ourself, that's a mistake.
 		if ((owningThread == currentThread) || (owningThread == nullptr))
 		{
-			Debug::log("futex_timed_wait: invalid owning thread {}",
-			           owningThread);
+			Debug::log("futex_timed_wait: thread {} acquiring PI futex with "
+			           "invalid owning thread {}",
+			           currentThread->id_get(),
+			           owningThreadID);
 			return -EINVAL;
 		}
 		Debug::log("Thread {} boosting priority of {} for futex {}",
diff --git a/sdk/core/scheduler/thread.h b/sdk/core/scheduler/thread.h
index e5b29614..fc140904 100644
--- a/sdk/core/scheduler/thread.h
+++ b/sdk/core/scheduler/thread.h
@@ -18,6 +18,8 @@ namespace
 	// thread structures.
 	class MultiWaiterInternal;
 
+	uint64_t expiry_time_for_timeout(uint32_t timeout);
+
 	template<size_t NPrios>
 	class ThreadImpl final : private utils::NoCopyNoMove
 	{
@@ -114,6 +116,11 @@ namespace
 			return schedTStack;
 		}
 
+		static bool any_ready()
+		{
+			return priorityMap != 0;
+		}
+
 		/**
 		 * When yielding inside the scheduler compartment, we almost always want
 		 * to re-enable interrupts before ecall. If we don't, then a thread with
@@ -152,11 +159,12 @@ namespace
 		 */
 		bool suspend(Timeout     *t,
 		             ThreadImpl **newSleepQueue,
-		             bool         yieldUnconditionally = false)
+		             bool         yieldUnconditionally = false,
+		             bool         yieldNotSleep        = false)
 		{
 			if (t->remaining != 0)
 			{
-				suspend(t->remaining, newSleepQueue);
+				suspend(t->remaining, newSleepQueue, yieldNotSleep);
 			}
 			if ((t->remaining != 0) || yieldUnconditionally)
 			{
@@ -188,6 +196,7 @@ namespace
 		    OriginalPriority(priority),
 		    expiryTime(-1),
 		    state(ThreadState::Suspended),
+		    isYielding(false),
 		    sleepQueue(nullptr),
 		    tStackPtr(tstack)
 		{
@@ -212,7 +221,7 @@ namespace
 			// We must be suspended.
 			Debug::Assert(state == ThreadState::Suspended,
 			              "Waking thread that is in state {}, not suspended",
-			              state);
+			              static_cast<ThreadState>(state));
 			// First, remove self from the timer waiting list.
 			timer_list_remove(&waitingList);
 			if (sleepQueue != nullptr)
@@ -233,11 +242,18 @@ namespace
 					schedule        = true;
 				}
 			}
+			// If this is the same priority as the current thread, we may need
+			// to update the timer.
+			if (priority >= highestPriority)
+			{
+				schedule = true;
+			}
 			if (reason == WakeReason::Timer || reason == WakeReason::Delete)
 			{
 				multiWaiter = nullptr;
 			}
 			list_insert(&priorityList[priority]);
+			isYielding = false;
 
 			return schedule;
 		}
@@ -278,11 +294,14 @@ namespace
 		 * waiting on a resource, add it to the list of that resource. No
 		 * matter what, it has to be added to the timer list.
 		 */
-		void suspend(uint32_t waitTicks, ThreadImpl **newSleepQueue)
+		void suspend(uint32_t     waitTicks,
+		             ThreadImpl **newSleepQueue,
+		             bool         yieldNotSleep = false)
 		{
+			isYielding = yieldNotSleep;
 			Debug::Assert(state == ThreadState::Ready,
 			              "Suspending thread that is in state {}, not ready",
-			              state);
+			              static_cast<ThreadState>(state));
 			list_remove(&priorityList[priority]);
 			state = ThreadState::Suspended;
 			priority_map_remove();
@@ -291,8 +310,7 @@ namespace
 				list_insert(newSleepQueue);
 				sleepQueue = newSleepQueue;
 			}
-			expiryTime =
-			  (waitTicks == UINT32_MAX ? -1 : ticksSinceBoot + waitTicks);
+			expiryTime = expiry_time_for_timeout(waitTicks);
 
 			timer_list_insert(&waitingList);
 		}
@@ -407,7 +425,7 @@ namespace
 			Debug::Assert(state == ThreadState::Suspended,
 			              "Inserting thread into timer list that is in state "
 			              "{}, not suspended",
-			              state);
+			              static_cast<ThreadState>(state));
 			if (head == nullptr)
 			{
 				timerNext = timerPrev = *headPtr = this;
@@ -511,6 +529,29 @@ namespace
 			return priority;
 		}
 
+		bool is_ready()
+		{
+			return state == ThreadState::Ready;
+		}
+
+		bool is_yielding()
+		{
+			return isYielding;
+		}
+
+		/**
+		 * Returns true if there are other runnable threads with the same
+		 * priority as this thread.
+		 */
+		bool has_priority_peers()
+		{
+			Debug::Assert(state == ThreadState::Ready,
+			              "Checking for peers on thread that is in state {}, "
+			              "not ready",
+			              static_cast<ThreadState>(state));
+			return next != this;
+		}
+
 		~ThreadImpl()
 		{
 			// We have static definition of threads. We only create threads in
@@ -616,7 +657,12 @@ namespace
 		uint8_t priority;
 		/// The original priority level for this thread.  This never changes.
 		const uint8_t OriginalPriority;
-		ThreadState   state;
+		ThreadState   state : 2;
+		/**
+		 * If the thread is yielding, it may be scheduled before its timeout
+		 * expires, as long as no other threads are runnable.
+		 */
+		bool isYielding : 1;
 	};
 
 	using Thread = ThreadImpl<ThreadPrioNum>;
diff --git a/sdk/core/scheduler/timer.h b/sdk/core/scheduler/timer.h
index a86120b5..b56b8f0e 100644
--- a/sdk/core/scheduler/timer.h
+++ b/sdk/core/scheduler/timer.h
@@ -28,6 +28,9 @@ namespace
 
 	class Timer final : private TimerCore
 	{
+		inline static uint64_t lastTickTime         = 0;
+		inline static uint32_t accumulatedTickError = 0;
+
 		public:
 		static void interrupt_setup()
 		{
@@ -35,27 +38,62 @@ namespace
 			              "Cycles per tick can't be represented in 32 bits. "
 			              "Double check your platform config");
 			init();
-			setnext(TIMERCYCLES_PER_TICK);
 		}
 
-		static void do_interrupt()
+		using TimerCore::time;
+
+		static void update()
 		{
-			++Thread::ticksSinceBoot;
+			auto *thread             = Thread::current_get();
+			bool  waitingListIsEmpty = ((Thread::waitingList == nullptr) ||
+                                       (Thread::waitingList->expiryTime == -1));
+			bool  threadHasNoPeers =
+			  (thread == nullptr) || (!thread->has_priority_peers());
+			if (waitingListIsEmpty && threadHasNoPeers)
+			{
+				clear();
+			}
+			else
+			{
+				uint64_t nextTimer = waitingListIsEmpty
+				                       ? time() + TIMERCYCLES_PER_TICK
+				                       : Thread::waitingList->expiryTime;
+				setnext(nextTimer);
+			}
+		}
 
-			expiretimers();
-			setnext(TIMERCYCLES_PER_TICK);
+		static uint64_t update_tick()
+		{
+			uint64_t now     = time();
+			uint32_t elapsed = now - lastTickTime;
+			int32_t  error   = elapsed % TIMERCYCLES_PER_TICK;
+			if (elapsed < TIMERCYCLES_PER_TICK)
+			{
+				error = TIMERCYCLES_PER_TICK - error;
+			}
+			accumulatedTickError += error;
+			int32_t errorDirection = accumulatedTickError < 0 ? -1 : 1;
+			int32_t absoluteError  = accumulatedTickError * errorDirection;
+			if (absoluteError >= TIMERCYCLES_PER_TICK)
+			{
+				Thread::ticksSinceBoot += errorDirection;
+				accumulatedTickError += TIMERCYCLES_PER_TICK * -errorDirection;
+			}
+			lastTickTime = now;
+			Thread::ticksSinceBoot += elapsed / TIMERCYCLES_PER_TICK;
+			return now;
 		}
 
-		private:
 		static void expiretimers()
 		{
+			uint64_t now = update_tick();
 			if (Thread::waitingList == nullptr)
 			{
 				return;
 			}
 			for (Thread *iter = Thread::waitingList;;)
 			{
-				if (iter->expiryTime <= Thread::ticksSinceBoot)
+				if (iter->expiryTime <= now)
 				{
 					Thread *iterNext = iter->timerNext;
 
@@ -72,6 +110,39 @@ namespace
 					break;
 				}
 			}
+			if (!Thread::any_ready())
+			{
+				for (Thread *iter = Thread::waitingList; iter;)
+				{
+					if (iter->is_yielding())
+					{
+						Debug::log("Woke thread {} {} cycles early",
+						           iter->id_get(),
+						           int64_t(iter->expiryTime) - now);
+						Thread *iterNext = iter->timerNext;
+						iter->ready(Thread::WakeReason::Timer);
+						iter = iterNext;
+						if (Thread::waitingList == nullptr ||
+						    iter == Thread::waitingList)
+						{
+							break;
+						}
+					}
+					else
+					{
+						break;
+					}
+				}
+			}
 		}
 	};
+
+	uint64_t expiry_time_for_timeout(uint32_t timeout)
+	{
+		if (timeout == -1)
+		{
+			return -1;
+		}
+		return Timer::time() + (timeout * TIMERCYCLES_PER_TICK);
+	}
 } // namespace
diff --git a/sdk/core/switcher/entry.S b/sdk/core/switcher/entry.S
index 8836d088..30f94659 100644
--- a/sdk/core/switcher/entry.S
+++ b/sdk/core/switcher/entry.S
@@ -970,7 +970,13 @@ __Z13thread_id_getv:
 	// Load the trusted stack pointer into a register that we will clobber in
 	// the next instruction when we load the thread ID.
 	cspecialr          ca0, mtdc
+	//cgettag            a1, ca0
+	// If this is a null pointer, don't try to dereference it and report that
+	// we are thread 0.  This permits the debug code to work even from things
+	// that are not real threads.
+	//beqz               a1, .Lend
 	clh                a0, TrustedStack_offset_threadID(ca0)
+.Lend:
 	cret
 
 
diff --git a/sdk/include/FreeRTOS-Compat/task.h b/sdk/include/FreeRTOS-Compat/task.h
index 5aac5878..2517c027 100644
--- a/sdk/include/FreeRTOS-Compat/task.h
+++ b/sdk/include/FreeRTOS-Compat/task.h
@@ -55,7 +55,7 @@ static inline BaseType_t xTaskCheckForTimeOut(TimeOut_t  *pxTimeOut,
 static inline void vTaskDelay(const TickType_t xTicksToDelay)
 {
 	struct Timeout timeout = {0, xTicksToDelay};
-	thread_sleep(&timeout);
+	thread_sleep(&timeout, 0);
 }
 
 /**
diff --git a/sdk/include/platform/generic-riscv/platform-timer.hh b/sdk/include/platform/generic-riscv/platform-timer.hh
index 377f3233..91461c0c 100644
--- a/sdk/include/platform/generic-riscv/platform-timer.hh
+++ b/sdk/include/platform/generic-riscv/platform-timer.hh
@@ -38,6 +38,21 @@ class StandardClint : private utils::NoCopyNoMove
 		         2 * sizeof(uint32_t));
 	}
 
+	static uint64_t time()
+	{
+		/// the low 32 bits
+		volatile uint32_t *timerHigh = pmtimer + 1;
+		uint32_t           timeLow, timeHigh;
+
+		// Read the current time. Loop until the high 32 bits are stable.
+		do
+		{
+			timeHigh = *timerHigh;
+			timeLow  = *pmtimer;
+		} while (timeHigh != *timerHigh);
+		return (uint64_t(timeHigh) << 32) | timeLow;
+	}
+
 	/**
 	 * Set the timer up for the next timer interrupt. We need to:
 	 * 1. read the current MTIME,
@@ -51,28 +66,22 @@ class StandardClint : private utils::NoCopyNoMove
 	 * interrupt. Writing to any half is enough to clear the interrupt,
 	 * which is also why 2. is important.
 	 */
-	static void setnext(uint32_t cycles)
+	static void setnext(uint64_t nextTime)
 	{
 		/// the high 32 bits of the 64-bit MTIME register
 		volatile uint32_t *pmtimercmphigh = pmtimercmp + 1;
-		/// the low 32 bits
-		volatile uint32_t *pmtimerhigh = pmtimer + 1;
 		uint32_t           curmtimehigh, curmtime, curmtimenew;
 
-		// Read the current time. Loop until the high 32 bits are stable.
-		do
-		{
-			curmtimehigh = *pmtimerhigh;
-			curmtime     = *pmtimer;
-		} while (curmtimehigh != *pmtimerhigh);
-
-		// Add tick cycles to current time. Handle carry bit.
-		curmtimehigh += __builtin_add_overflow(curmtime, cycles, &curmtimenew);
-
 		// Write the new MTIMECMP value, at which the next interrupt fires.
 		*pmtimercmphigh = -1; // Prevent spurious interrupts.
-		*pmtimercmp     = curmtimenew;
-		*pmtimercmphigh = curmtimehigh;
+		*pmtimercmp     = nextTime;
+		*pmtimercmphigh = nextTime >> 32;
+	}
+
+	static void clear()
+	{
+		volatile uint32_t *pmtimercmphigh = pmtimercmp + 1;
+		*pmtimercmphigh                   = -1; // Prevent spurious interrupts.
 	}
 
 	private:
diff --git a/sdk/include/thread.h b/sdk/include/thread.h
index 3c5430a1..5e8d03aa 100644
--- a/sdk/include/thread.h
+++ b/sdk/include/thread.h
@@ -23,6 +23,17 @@ typedef struct
 [[cheri::interrupt_state(disabled)]] SystickReturn __cheri_compartment("sched")
   thread_systemtick_get(void);
 
+enum ThreadSleepFlags : uint32_t
+{
+	/**
+	 * Sleep for up to the specified timeout, but wake early if there are no
+	 * other runnable threads.  This allows a high-priority thread to yield for
+	 * a fixed number of ticks for lower-priority threads to run, but does not
+	 * prevent it from resuming early.
+	 */
+	ThreadSleepNoEarlyWake = 1 << 0,
+};
+
 /**
  * Sleep for at most the specified timeout (see `timeout.h`).
  *
@@ -34,9 +45,17 @@ typedef struct
  * but reports the time spent sleeping.  This requires a cross-domain call and
  * return in addition to the overheads of `yield` and so `yield` should be
  * preferred in contexts where the elapsed time is not required.
+ *
+ * The `flags` parameter is a bitwise OR of `ThreadSleepFlags`.
+ *
+ * A sleeping thread may be woken early if no other threads are runnable.  The
+ * thread with the earliest timeout will be woken first.  If you are using
+ * `thread_sleep` to elapse real time, pass `ThreadSleepNoEarlyWake` as the
+ * flags argument to prevent early wakeups.
+ *
  */
 [[cheri::interrupt_state(disabled)]] int __cheri_compartment("sched")
-  thread_sleep(struct Timeout *timeout);
+  thread_sleep(struct Timeout *timeout, uint32_t flags __if_cxx(= 0));
 
 /**
  * Return the thread ID of the current running thread.
@@ -119,7 +138,7 @@ static inline uint64_t thread_millisecond_wait(uint32_t milliseconds)
 	// In simulation builds, just yield once but don't bother trying to do
 	// anything sensible with time.
 	Timeout t = {0, 1};
-	thread_sleep(&t);
+	thread_sleep(&t, 0);
 	return milliseconds;
 #else
 	static const uint32_t CyclesPerMillisecond = CPU_TIMER_HZ / 1'000;
@@ -133,7 +152,7 @@ static inline uint64_t thread_millisecond_wait(uint32_t milliseconds)
 	while ((end > current) && (end - current > MS_PER_TICK))
 	{
 		Timeout t = {0, ((uint32_t)(end - current)) / CyclesPerTick};
-		thread_sleep(&t);
+		thread_sleep(&t, ThreadSleepNoEarlyWake);
 		current = rdcycle64();
 	}
 	// Spin for the remaining time.
diff --git a/sdk/lib/debug/debug.cc b/sdk/lib/debug/debug.cc
index 07561643..2642d9fb 100644
--- a/sdk/lib/debug/debug.cc
+++ b/sdk/lib/debug/debug.cc
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: MIT
 
 #include <debug.hh>
+#include <thread.h>
 
 using namespace CHERI;
 
@@ -335,7 +336,13 @@ debug_log_message_write(const char          *context,
 	DebugPrinter printer;
 	printer.write("\x1b[35m");
 	printer.write(context);
+#if 0
+	printer.write(" [Thread ");
+	printer.write(thread_id_get());
+	printer.write("]\033[0m: ");
+#else
 	printer.write("\033[0m: ");
+#endif
 	printer.format(format, messages, messageCount);
 	printer.write("\n");
 }
diff --git a/tests/locks-test.cc b/tests/locks-test.cc
index ffd8da27..20798c81 100644
--- a/tests/locks-test.cc
+++ b/tests/locks-test.cc
@@ -59,7 +59,9 @@ namespace
 		     "Trying to acquire lock spuriously succeeded");
 		if constexpr (!std::is_same_v<Lock, FlagLockPriorityInherited>)
 		{
+#ifndef SIMULATION
 			TEST(t.elapsed >= 1, "Sleep slept for {} ticks", t.elapsed);
+#endif
 		}
 	}