From 4064d36dfefe1a5001b83beb99b4461b4479ea5d Mon Sep 17 00:00:00 2001 From: Neil Dhar Date: Wed, 13 Nov 2024 18:07:53 -0800 Subject: [PATCH] Add fast path for JIT to JIT calls Summary: Add a fast path for calling another JIT function from the JIT without needing the indirection through `_sh_dispatch_call`. Reviewed By: avp Differential Revision: D64702363 fbshipit-source-id: 45e3360fb5a4aaaf72c7940761f12dd598d5db90 --- include/hermes/VM/Callable.h | 1 + include/hermes/VM/CodeBlock.h | 2 + include/hermes/VM/GCCell.h | 3 ++ lib/VM/JIT/RuntimeOffsets.h | 7 +++ lib/VM/JIT/arm64/JitEmitter.cpp | 77 ++++++++++++++++++++++++++++---- lib/VM/JIT/arm64/JitHandlers.cpp | 5 ++- 6 files changed, 84 insertions(+), 11 deletions(-) diff --git a/include/hermes/VM/Callable.h b/include/hermes/VM/Callable.h index 5f63d34ccbd..a9c093dce49 100644 --- a/include/hermes/VM/Callable.h +++ b/include/hermes/VM/Callable.h @@ -1000,6 +1000,7 @@ class NativeConstructor final : public NativeFunction { class JSFunction : public Callable { using Super = Callable; friend void JSFunctionBuildMeta(const GCCell *cell, Metadata::Builder &mb); + friend struct RuntimeOffsets; /// CodeBlock to execute when called. CodeBlock *codeBlock_; diff --git a/include/hermes/VM/CodeBlock.h b/include/hermes/VM/CodeBlock.h index 954d8d28f71..ea7363b8674 100644 --- a/include/hermes/VM/CodeBlock.h +++ b/include/hermes/VM/CodeBlock.h @@ -37,6 +37,8 @@ typedef HermesValue (*JITCompiledFunctionPtr)(Runtime *runtime); class CodeBlock final : private llvh::TrailingObjects { friend TrailingObjects; + friend struct RuntimeOffsets; + /// Points to the runtime module with the information required for this code /// block. RuntimeModule *const runtimeModule_; diff --git a/include/hermes/VM/GCCell.h b/include/hermes/VM/GCCell.h index 1f0eab4ef6c..8cf6c179b8d 100644 --- a/include/hermes/VM/GCCell.h +++ b/include/hermes/VM/GCCell.h @@ -60,6 +60,7 @@ class KindAndSize { } private: + friend struct RuntimeOffsets; using RawType = CompressedPointer::RawType; static constexpr size_t kNumBits = sizeof(RawType) * 8; static constexpr size_t kNumKindBits = 8; @@ -87,6 +88,8 @@ static_assert( /// traversal in a contiguous space: given a pointer to the head, you /// can get the size, and thus get to the head of the next cell. class GCCell { + friend struct RuntimeOffsets; + /// Either contains the CellKind and size of this cell, or a forwarding /// pointer. union { diff --git a/lib/VM/JIT/RuntimeOffsets.h b/lib/VM/JIT/RuntimeOffsets.h index e0db157eaa3..8812632c4af 100644 --- a/lib/VM/JIT/RuntimeOffsets.h +++ b/lib/VM/JIT/RuntimeOffsets.h @@ -8,6 +8,7 @@ #ifndef HERMES_VM_JIT_X86_64_RUNTIMEOFFSETS_H #define HERMES_VM_JIT_X86_64_RUNTIMEOFFSETS_H +#include "hermes/VM/Callable.h" #include "hermes/VM/Runtime.h" namespace hermes { @@ -28,6 +29,12 @@ struct RuntimeOffsets { static constexpr uint32_t nativeStackSize = offsetof(Runtime, overflowGuard_) + offsetof(StackOverflowGuard, nativeStackSize); + + static constexpr uint32_t codeBlockJitPtr = offsetof(CodeBlock, JITCompiled_); + static constexpr uint32_t jsFunctionCodeBlock = + offsetof(JSFunction, codeBlock_); + /// Can't use offsetof here because KindAndSize uses bitfields. + static constexpr uint32_t kindAndSizeKind = KindAndSize::kNumSizeBits / 8; }; #pragma GCC diagnostic pop diff --git a/lib/VM/JIT/arm64/JitEmitter.cpp b/lib/VM/JIT/arm64/JitEmitter.cpp index 2c554b60129..f71ac225eb7 100644 --- a/lib/VM/JIT/arm64/JitEmitter.cpp +++ b/lib/VM/JIT/arm64/JitEmitter.cpp @@ -164,6 +164,20 @@ void emit_sh_ljs_bool(a64::Assembler &a, const a64::GpX inOut) { a.movk(inOut, baseBool.raw >> kHV_NumDataBits, kHV_NumDataBits); } +/// For a register containing a pointer to a GCCell, retrieve its CellKind (a +/// single byte) and store it in \p wOut. +/// \p wOut and \p xIn may refer to the same register. +void emit_gccell_get_kind( + a64::Assembler &a, + const a64::GpW &wOut, + const a64::GpX &xIn) { + a.ldrb( + wOut, + a64::Mem( + xIn, + offsetof(SHGCCell, kindAndSize) + RuntimeOffsets::kindAndSizeKind)); +} + /// For a register \p dInput, which contains a double, check whether it is a /// valid signed 64-bit integer. /// CPU flags are updated. b_eq on success. @@ -3670,20 +3684,65 @@ void Emitter::callImpl(FR frRes, FR frCallee) { } #endif + auto hwCallee = getOrAllocFRInGpX(frCallee, true); + auto hwTemp = allocTempGpX(); + auto xTemp = hwTemp.a64GpX(); syncAllFRTempExcept(FR()); freeAllFRTempExcept({}); - - a.mov(a64::x0, xRuntime); - loadFrameAddr( - a64::x1, - FR(frameRegs_.size() + hbc::StackFrameLayout::CalleeClosureOrCB)); - EMIT_RUNTIME_CALL_WITHOUT_SAVED_IP( - *this, - SHLegacyValue(*)(SHRuntime *, SHLegacyValue *), - _jit_dispatch_call); + freeReg(hwTemp); HWReg hwRes = getOrAllocFRInAnyReg(frRes, false, HWReg::gpX(0)); frUpdatedWithHW(frRes, hwRes); + + auto slowPathLab = newSlowPathLabel(); + auto contLab = newContLabel(); + + // Check if the callee is a JSFunction we have already JITted. + emit_sh_ljs_is_object(a, xTemp, hwCallee.a64GpX()); + a.b_ne(slowPathLab); + + // We can now use any temp registers we want, because everything has been + // sync'd and we are done with hwCallee. + emit_sh_ljs_get_pointer(a, a64::x0, hwCallee.a64GpX()); + emit_gccell_get_kind(a, a64::w1, a64::x0); + + // Check if it is a JSFunction. + a.cmp(a64::w1, CellKind::JSFunctionKind); + a.b_ne(slowPathLab); + + // Check if the JSFunction has already been JIT compiled. + a.ldr(a64::x1, a64::Mem(a64::x0, RuntimeOffsets::jsFunctionCodeBlock)); + a.ldr(a64::x1, a64::Mem(a64::x1, RuntimeOffsets::codeBlockJitPtr)); + a.cbz(a64::x1, slowPathLab); + + // We have a JIT compiled function, call it. + a.mov(a64::x0, xRuntime); + a.blr(a64::x1); + a.bind(contLab); + // NOTE: this does the move for both the slow and fast paths. movHWFromHW(hwRes, HWReg::gpX(0)); + + slowPaths_.push_back( + {.slowPathLab = slowPathLab, + .contLab = contLab, + .frRes = frRes, + .frInput1 = frCallee, + .emit = [](Emitter &em, SlowPath &sl) { + em.comment( + "// Slow path: CallImpl r%u, r%u", + sl.frRes.index(), + sl.frInput1.index()); + em.a.bind(sl.slowPathLab); + em.a.mov(a64::x0, xRuntime); + em.loadFrameAddr( + a64::x1, + FR(em.frameRegs_.size() + + hbc::StackFrameLayout::CalleeClosureOrCB)); + EMIT_RUNTIME_CALL_WITHOUT_SAVED_IP( + em, + SHLegacyValue(*)(SHRuntime *, SHLegacyValue *), + _jit_dispatch_call); + em.a.b(sl.contLab); + }}); } void Emitter::call(FR frRes, FR frCallee, uint32_t argc) { diff --git a/lib/VM/JIT/arm64/JitHandlers.cpp b/lib/VM/JIT/arm64/JitHandlers.cpp index cc313d9e8cc..57388dd8aa7 100644 --- a/lib/VM/JIT/arm64/JitHandlers.cpp +++ b/lib/VM/JIT/arm64/JitHandlers.cpp @@ -272,8 +272,9 @@ SHLegacyValue _jit_dispatch_call( auto *callTarget = toPHV(callTargetSHLV); if (vmisa(*callTarget)) { JSFunction *jsFunc = vmcast(*callTarget); - if (auto *fnPtr = jsFunc->getCodeBlock()->getJITCompiled()) - return fnPtr(&runtime); + assert( + !jsFunc->getCodeBlock()->getJITCompiled() && + "Calls to JITted code should go directly."); CallResult result = jsFunc->_interpret(runtime); if (LLVM_UNLIKELY(result == ExecutionStatus::EXCEPTION)) _sh_throw_current(getSHRuntime(runtime));