From 420717ee369d44906f5ba6bbf94ee28058c56037 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sat, 30 Sep 2023 15:38:28 +0200 Subject: [PATCH] Split off patched code into separate .S file and disable subsections-via-symbols for it (#92555) * [amd64/arm64] Split off patched code into separate .S file and disable subsections-via-symbols for it * [amd64/arm64] Split off patched code into separate .asm file [arm64] Move JIT_UpdateWriteBarrierState out of the patched region to match implementation in .S file * Remove NO_SUBSECTIONS_VIA_SYMBOLS --- src/coreclr/vm/CMakeLists.txt | 4 + src/coreclr/vm/amd64/JitHelpers_Fast.asm | 182 ----------------- src/coreclr/vm/amd64/jithelpers_fast.S | 239 ---------------------- src/coreclr/vm/amd64/patchedcode.S | 245 +++++++++++++++++++++++ src/coreclr/vm/amd64/patchedcode.asm | 202 +++++++++++++++++++ src/coreclr/vm/arm64/asmhelpers.S | 232 +-------------------- src/coreclr/vm/arm64/asmhelpers.asm | 231 +-------------------- src/coreclr/vm/arm64/patchedcode.S | 232 +++++++++++++++++++++ src/coreclr/vm/arm64/patchedcode.asm | 239 ++++++++++++++++++++++ 9 files changed, 926 insertions(+), 880 deletions(-) create mode 100644 src/coreclr/vm/amd64/patchedcode.S create mode 100644 src/coreclr/vm/amd64/patchedcode.asm create mode 100644 src/coreclr/vm/arm64/patchedcode.S create mode 100644 src/coreclr/vm/arm64/patchedcode.asm diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index d8054a250af41..5856acd7650da 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -652,6 +652,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/JitHelpers_InlineGetThread.asm ${ARCH_SOURCES_DIR}/JitHelpers_SingleAppDomain.asm ${ARCH_SOURCES_DIR}/JitHelpers_Slow.asm + ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm @@ -683,6 +684,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm ${ARCH_SOURCES_DIR}/CrtHelpers.asm + ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm ) @@ -705,6 +707,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/jithelpers_fastwritebarriers.S ${ARCH_SOURCES_DIR}/jithelpers_singleappdomain.S ${ARCH_SOURCES_DIR}/jithelpers_slow.S + ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/redirectedhandledjitcase.S ${ARCH_SOURCES_DIR}/theprestubamd64.S @@ -738,6 +741,7 @@ else(CLR_CMAKE_TARGET_WIN32) ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S ${ARCH_SOURCES_DIR}/crthelpers.S + ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index dd5b891a44134..0f1b71b5ee93b 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -50,188 +50,6 @@ endif extern JIT_InternalThrow:proc -; Mark start of the code region that we patch at runtime -LEAF_ENTRY JIT_PatchedCodeStart, _TEXT - ret -LEAF_END JIT_PatchedCodeStart, _TEXT - - -; This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow -; or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ -; change at runtime as the GC changes. Initially it should simply be a copy of the -; larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created -; enough space to copy that code in. -LEAF_ENTRY JIT_WriteBarrier, _TEXT - align 16 - -ifdef _DEBUG - ; In debug builds, this just contains jump to the debug version of the write barrier by default - mov rax, JIT_WriteBarrier_Debug - jmp rax -endif - -ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - ; JIT_WriteBarrier_WriteWatch_PostGrow64 - - ; Regarding patchable constants: - ; - 64-bit constants have to be loaded into a register - ; - The constants have to be aligned to 8 bytes so that they can be patched easily - ; - The constant loads have been located to minimize NOP padding required to align the constants - ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special - ; non-volatile calling convention, this should be changed to use just one register. - - ; Do the move into the GC . It is correct to take an AV here, the EH code - ; figures out that this came from a WriteBarrier and correctly maps it back - ; to the managed method which called the WriteBarrier (see setup in - ; InitializeExceptionHandling, vm\exceptionhandling.cpp). - mov [rcx], rdx - - ; Update the write watch table if necessary - mov rax, rcx - mov r8, 0F0F0F0F0F0F0F0F0h - shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift - NOP_2_BYTE ; padding for alignment of constant - mov r9, 0F0F0F0F0F0F0F0F0h - add rax, r8 - cmp byte ptr [rax], 0h - jne CheckCardTable - mov byte ptr [rax], 0FFh - - NOP_3_BYTE ; padding for alignment of constant - - ; Check the lower and upper ephemeral region bounds - CheckCardTable: - cmp rdx, r9 - jb Exit - - NOP_3_BYTE ; padding for alignment of constant - - mov r8, 0F0F0F0F0F0F0F0F0h - - cmp rdx, r8 - jae Exit - - nop ; padding for alignment of constant - - mov rax, 0F0F0F0F0F0F0F0F0h - - ; Touch the card table entry, if not already dirty. - shr rcx, 0Bh - cmp byte ptr [rcx + rax], 0FFh - jne UpdateCardTable - REPRET - - UpdateCardTable: - mov byte ptr [rcx + rax], 0FFh -ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - mov rax, 0F0F0F0F0F0F0F0F0h - shr rcx, 0Ah - cmp byte ptr [rcx + rax], 0FFh - jne UpdateCardBundleTable - REPRET - - UpdateCardBundleTable: - mov byte ptr [rcx + rax], 0FFh -endif - ret - - align 16 - Exit: - REPRET - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - -else - ; JIT_WriteBarrier_PostGrow64 - - ; Do the move into the GC . It is correct to take an AV here, the EH code - ; figures out that this came from a WriteBarrier and correctly maps it back - ; to the managed method which called the WriteBarrier (see setup in - ; InitializeExceptionHandling, vm\exceptionhandling.cpp). - mov [rcx], rdx - - NOP_3_BYTE ; padding for alignment of constant - - ; Can't compare a 64 bit immediate, so we have to move them into a - ; register. Values of these immediates will be patched at runtime. - ; By using two registers we can pipeline better. Should we decide to use - ; a special non-volatile calling convention, this should be changed to - ; just one. - - mov rax, 0F0F0F0F0F0F0F0F0h - - ; Check the lower and upper ephemeral region bounds - cmp rdx, rax - jb Exit - - nop ; padding for alignment of constant - - mov r8, 0F0F0F0F0F0F0F0F0h - - cmp rdx, r8 - jae Exit - - nop ; padding for alignment of constant - - mov rax, 0F0F0F0F0F0F0F0F0h - - ; Touch the card table entry, if not already dirty. - shr rcx, 0Bh - cmp byte ptr [rcx + rax], 0FFh - jne UpdateCardTable - REPRET - - UpdateCardTable: - mov byte ptr [rcx + rax], 0FFh -ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - mov rax, 0F0F0F0F0F0F0F0F0h - shr rcx, 0Ah - cmp byte ptr [rcx + rax], 0FFh - jne UpdateCardBundleTable - REPRET - - UpdateCardBundleTable: - mov byte ptr [rcx + rax], 0FFh -endif - ret - - align 16 - Exit: - REPRET -endif - - ; make sure this is bigger than any of the others - align 16 - nop -LEAF_END_MARKED JIT_WriteBarrier, _TEXT - -; Mark start of the code region that we patch at runtime -LEAF_ENTRY JIT_PatchedCodeLast, _TEXT - ret -LEAF_END JIT_PatchedCodeLast, _TEXT - ; JIT_ByRefWriteBarrier has weird semantics, see usage in StubLinkerX86.cpp ; ; Entry: diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 7578f46ce0c1d..3a2d803a1460f 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -5,245 +5,6 @@ #include "unixasmmacros.inc" #include "asmconstants.h" -// Mark start of the code region that we patch at runtime -LEAF_ENTRY JIT_PatchedCodeStart, _TEXT - ret -LEAF_END JIT_PatchedCodeStart, _TEXT - - -// There is an even more optimized version of these helpers possible which takes -// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 -// that check (this is more significant in the JIT_WriteBarrier case). -// -// Additionally we can look into providing helpers which will take the src/dest from -// specific registers (like x86) which _could_ (??) make for easier register allocation -// for the JIT64, however it might lead to having to have some nasty code that treats -// these guys really special like... :(. -// -// Version that does the move, checks whether or not it's in the GC and whether or not -// it needs to have it's card updated -// -// void JIT_CheckedWriteBarrier(Object** dst, Object* src) -LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT - - // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference - // but if it isn't then it will just return. - // - // See if this is in GCHeap - PREPARE_EXTERNAL_VAR g_lowest_address, rax - cmp rdi, [rax] - // jb LOCAL_LABEL(NotInHeap) - .byte 0x72, 0x12 - PREPARE_EXTERNAL_VAR g_highest_address, rax - cmp rdi, [rax] - - // jnb LOCAL_LABEL(NotInHeap) - .byte 0x73, 0x06 - jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)] - - LOCAL_LABEL(NotInHeap): - // See comment above about possible AV - mov [rdi], rsi - ret -LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT - - -// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow -// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ -// change at runtime as the GC changes. Initially it should simply be a copy of the -// larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created -// enough space to copy that code in. -.balign 16 -LEAF_ENTRY JIT_WriteBarrier, _TEXT -#ifdef _DEBUG - // In debug builds, this just contains jump to the debug version of the write barrier by default - jmp C_FUNC(JIT_WriteBarrier_Debug) -#endif - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - // JIT_WriteBarrier_WriteWatch_PostGrow64 - - // Regarding patchable constants: - // - 64-bit constants have to be loaded into a register - // - The constants have to be aligned to 8 bytes so that they can be patched easily - // - The constant loads have been located to minimize NOP padding required to align the constants - // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special - // non-volatile calling convention, this should be changed to use just one register. - - // Do the move into the GC . It is correct to take an AV here, the EH code - // figures out that this came from a WriteBarrier and correctly maps it back - // to the managed method which called the WriteBarrier (see setup in - // InitializeExceptionHandling, vm\exceptionhandling.cpp). - mov [rdi], rsi - - // Update the write watch table if necessary - mov rax, rdi - movabs r10, 0xF0F0F0F0F0F0F0F0 - shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift - NOP_2_BYTE // padding for alignment of constant - movabs r11, 0xF0F0F0F0F0F0F0F0 - add rax, r10 - cmp byte ptr [rax], 0x0 - .byte 0x75, 0x06 - // jne LOCAL_LABEL(CheckCardTable) - mov byte ptr [rax], 0xFF - - NOP_3_BYTE // padding for alignment of constant - - // Check the lower and upper ephemeral region bounds - LOCAL_LABEL(CheckCardTable): - cmp rsi, r11 - .byte 0x72,0x3D - // jb LOCAL_LABEL(Exit) - - NOP_3_BYTE // padding for alignment of constant - - movabs r10, 0xF0F0F0F0F0F0F0F0 - - cmp rsi, r10 - .byte 0x73,0x2B - // jae LOCAL_LABEL(Exit) - - nop // padding for alignment of constant - - movabs rax, 0xF0F0F0F0F0F0F0F0 - - // Touch the card table entry, if not already dirty. - shr rdi, 0x0B - cmp byte ptr [rdi + rax], 0xFF - .byte 0x75, 0x02 - // jne LOCAL_LABEL(UpdateCardTable) - REPRET - - LOCAL_LABEL(UpdateCardTable): - mov byte ptr [rdi + rax], 0xFF - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - NOP_2_BYTE // padding for alignment of constant - shr rdi, 0x0A - - movabs rax, 0xF0F0F0F0F0F0F0F0 - cmp byte ptr [rdi + rax], 0xFF - - .byte 0x75, 0x02 - // jne LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64) - REPRET - - LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64): - mov byte ptr [rdi + rax], 0xFF -#endif - - ret - - .balign 16 - LOCAL_LABEL(Exit): - REPRET - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - NOP_3_BYTE - -#else - // JIT_WriteBarrier_PostGrow64 - - // Do the move into the GC . It is correct to take an AV here, the EH code - // figures out that this came from a WriteBarrier and correctly maps it back - // to the managed method which called the WriteBarrier (see setup in - // InitializeExceptionHandling, vm\exceptionhandling.cpp). - mov [rdi], rsi - - NOP_3_BYTE // padding for alignment of constant - - // Can't compare a 64 bit immediate, so we have to move them into a - // register. Values of these immediates will be patched at runtime. - // By using two registers we can pipeline better. Should we decide to use - // a special non-volatile calling convention, this should be changed to - // just one. - - movabs rax, 0xF0F0F0F0F0F0F0F0 - - // Check the lower and upper ephemeral region bounds - cmp rsi, rax - // jb LOCAL_LABEL(Exit) - .byte 0x72, 0x36 - - nop // padding for alignment of constant - - movabs r8, 0xF0F0F0F0F0F0F0F0 - - cmp rsi, r8 - // jae LOCAL_LABEL(Exit) - .byte 0x73, 0x26 - - nop // padding for alignment of constant - - movabs rax, 0xF0F0F0F0F0F0F0F0 - - // Touch the card table entry, if not already dirty. - shr rdi, 0Bh - cmp byte ptr [rdi + rax], 0FFh - .byte 0x75, 0x02 - // jne LOCAL_LABEL(UpdateCardTable) - REPRET - - LOCAL_LABEL(UpdateCardTable): - mov byte ptr [rdi + rax], 0FFh - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - NOP_6_BYTE // padding for alignment of constant - - movabs rax, 0xF0F0F0F0F0F0F0F0 - - // Touch the card bundle, if not already dirty. - // rdi is already shifted by 0xB, so shift by 0xA more - shr rdi, 0x0A - cmp byte ptr [rdi + rax], 0FFh - - .byte 0x75, 0x02 - // jne LOCAL_LABEL(UpdateCardBundle) - REPRET - - LOCAL_LABEL(UpdateCardBundle): - mov byte ptr [rdi + rax], 0FFh -#endif - - ret - - .balign 16 - LOCAL_LABEL(Exit): - REPRET -#endif - - // make sure this is bigger than any of the others - .balign 16 - nop -LEAF_END_MARKED JIT_WriteBarrier, _TEXT - -// Mark start of the code region that we patch at runtime -LEAF_ENTRY JIT_PatchedCodeLast, _TEXT - ret -LEAF_END JIT_PatchedCodeLast, _TEXT - // JIT_ByRefWriteBarrier has weird semantics, see usage in StubLinkerX86.cpp // // Entry: diff --git a/src/coreclr/vm/amd64/patchedcode.S b/src/coreclr/vm/amd64/patchedcode.S new file mode 100644 index 0000000000000..9af4e3ce855b7 --- /dev/null +++ b/src/coreclr/vm/amd64/patchedcode.S @@ -0,0 +1,245 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +.intel_syntax noprefix +#include "unixasmmacros.inc" +#include "asmconstants.h" + +// Mark start of the code region that we patch at runtime +LEAF_ENTRY JIT_PatchedCodeStart, _TEXT + ret +LEAF_END JIT_PatchedCodeStart, _TEXT + + +// There is an even more optimized version of these helpers possible which takes +// advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 +// that check (this is more significant in the JIT_WriteBarrier case). +// +// Additionally we can look into providing helpers which will take the src/dest from +// specific registers (like x86) which _could_ (??) make for easier register allocation +// for the JIT64, however it might lead to having to have some nasty code that treats +// these guys really special like... :(. +// +// Version that does the move, checks whether or not it's in the GC and whether or not +// it needs to have it's card updated +// +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT + + // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + // but if it isn't then it will just return. + // + // See if this is in GCHeap + PREPARE_EXTERNAL_VAR g_lowest_address, rax + cmp rdi, [rax] + // jb LOCAL_LABEL(NotInHeap) + .byte 0x72, 0x12 + PREPARE_EXTERNAL_VAR g_highest_address, rax + cmp rdi, [rax] + + // jnb LOCAL_LABEL(NotInHeap) + .byte 0x73, 0x06 + jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)] + + LOCAL_LABEL(NotInHeap): + // See comment above about possible AV + mov [rdi], rsi + ret +LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT + + +// This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow +// or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ +// change at runtime as the GC changes. Initially it should simply be a copy of the +// larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created +// enough space to copy that code in. +.balign 16 +LEAF_ENTRY JIT_WriteBarrier, _TEXT +#ifdef _DEBUG + // In debug builds, this just contains jump to the debug version of the write barrier by default + jmp C_FUNC(JIT_WriteBarrier_Debug) +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // JIT_WriteBarrier_WriteWatch_PostGrow64 + + // Regarding patchable constants: + // - 64-bit constants have to be loaded into a register + // - The constants have to be aligned to 8 bytes so that they can be patched easily + // - The constant loads have been located to minimize NOP padding required to align the constants + // - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + // non-volatile calling convention, this should be changed to use just one register. + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + // Update the write watch table if necessary + mov rax, rdi + movabs r10, 0xF0F0F0F0F0F0F0F0 + shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE // padding for alignment of constant + movabs r11, 0xF0F0F0F0F0F0F0F0 + add rax, r10 + cmp byte ptr [rax], 0x0 + .byte 0x75, 0x06 + // jne LOCAL_LABEL(CheckCardTable) + mov byte ptr [rax], 0xFF + + NOP_3_BYTE // padding for alignment of constant + + // Check the lower and upper ephemeral region bounds + LOCAL_LABEL(CheckCardTable): + cmp rsi, r11 + .byte 0x72,0x3D + // jb LOCAL_LABEL(Exit) + + NOP_3_BYTE // padding for alignment of constant + + movabs r10, 0xF0F0F0F0F0F0F0F0 + + cmp rsi, r10 + .byte 0x73,0x2B + // jae LOCAL_LABEL(Exit) + + nop // padding for alignment of constant + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Touch the card table entry, if not already dirty. + shr rdi, 0x0B + cmp byte ptr [rdi + rax], 0xFF + .byte 0x75, 0x02 + // jne LOCAL_LABEL(UpdateCardTable) + REPRET + + LOCAL_LABEL(UpdateCardTable): + mov byte ptr [rdi + rax], 0xFF + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + NOP_2_BYTE // padding for alignment of constant + shr rdi, 0x0A + + movabs rax, 0xF0F0F0F0F0F0F0F0 + cmp byte ptr [rdi + rax], 0xFF + + .byte 0x75, 0x02 + // jne LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64) + REPRET + + LOCAL_LABEL(UpdateCardBundle_WriteWatch_PostGrow64): + mov byte ptr [rdi + rax], 0xFF +#endif + + ret + + .balign 16 + LOCAL_LABEL(Exit): + REPRET + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + +#else + // JIT_WriteBarrier_PostGrow64 + + // Do the move into the GC . It is correct to take an AV here, the EH code + // figures out that this came from a WriteBarrier and correctly maps it back + // to the managed method which called the WriteBarrier (see setup in + // InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rdi], rsi + + NOP_3_BYTE // padding for alignment of constant + + // Can't compare a 64 bit immediate, so we have to move them into a + // register. Values of these immediates will be patched at runtime. + // By using two registers we can pipeline better. Should we decide to use + // a special non-volatile calling convention, this should be changed to + // just one. + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Check the lower and upper ephemeral region bounds + cmp rsi, rax + // jb LOCAL_LABEL(Exit) + .byte 0x72, 0x36 + + nop // padding for alignment of constant + + movabs r8, 0xF0F0F0F0F0F0F0F0 + + cmp rsi, r8 + // jae LOCAL_LABEL(Exit) + .byte 0x73, 0x26 + + nop // padding for alignment of constant + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Touch the card table entry, if not already dirty. + shr rdi, 0Bh + cmp byte ptr [rdi + rax], 0FFh + .byte 0x75, 0x02 + // jne LOCAL_LABEL(UpdateCardTable) + REPRET + + LOCAL_LABEL(UpdateCardTable): + mov byte ptr [rdi + rax], 0FFh + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + NOP_6_BYTE // padding for alignment of constant + + movabs rax, 0xF0F0F0F0F0F0F0F0 + + // Touch the card bundle, if not already dirty. + // rdi is already shifted by 0xB, so shift by 0xA more + shr rdi, 0x0A + cmp byte ptr [rdi + rax], 0FFh + + .byte 0x75, 0x02 + // jne LOCAL_LABEL(UpdateCardBundle) + REPRET + + LOCAL_LABEL(UpdateCardBundle): + mov byte ptr [rdi + rax], 0FFh +#endif + + ret + + .balign 16 + LOCAL_LABEL(Exit): + REPRET +#endif + + // make sure this is bigger than any of the others + .balign 16 + nop +LEAF_END_MARKED JIT_WriteBarrier, _TEXT + +// Mark start of the code region that we patch at runtime +LEAF_ENTRY JIT_PatchedCodeLast, _TEXT + ret +LEAF_END JIT_PatchedCodeLast, _TEXT diff --git a/src/coreclr/vm/amd64/patchedcode.asm b/src/coreclr/vm/amd64/patchedcode.asm new file mode 100644 index 0000000000000..56d325979f69c --- /dev/null +++ b/src/coreclr/vm/amd64/patchedcode.asm @@ -0,0 +1,202 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +; *********************************************************************** +; File: patchedcode.asm +; +; Notes: routinues which are patched at runtime and need to be linked in +; their declared order. +; *********************************************************************** + + +include AsmMacros.inc +include asmconstants.inc + +ifdef _DEBUG +extern JIT_WriteBarrier_Debug:proc +endif + + +; Mark start of the code region that we patch at runtime +LEAF_ENTRY JIT_PatchedCodeStart, _TEXT + ret +LEAF_END JIT_PatchedCodeStart, _TEXT + + +; This is used by the mechanism to hold either the JIT_WriteBarrier_PreGrow +; or JIT_WriteBarrier_PostGrow code (depending on the state of the GC). It _WILL_ +; change at runtime as the GC changes. Initially it should simply be a copy of the +; larger of the two functions (JIT_WriteBarrier_PostGrow) to ensure we have created +; enough space to copy that code in. +LEAF_ENTRY JIT_WriteBarrier, _TEXT + align 16 + +ifdef _DEBUG + ; In debug builds, this just contains jump to the debug version of the write barrier by default + mov rax, JIT_WriteBarrier_Debug + jmp rax +endif + +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + ; JIT_WriteBarrier_WriteWatch_PostGrow64 + + ; Regarding patchable constants: + ; - 64-bit constants have to be loaded into a register + ; - The constants have to be aligned to 8 bytes so that they can be patched easily + ; - The constant loads have been located to minimize NOP padding required to align the constants + ; - Using different registers for successive constant loads helps pipeline better. Should we decide to use a special + ; non-volatile calling convention, this should be changed to use just one register. + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + ; Update the write watch table if necessary + mov rax, rcx + mov r8, 0F0F0F0F0F0F0F0F0h + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + NOP_2_BYTE ; padding for alignment of constant + mov r9, 0F0F0F0F0F0F0F0F0h + add rax, r8 + cmp byte ptr [rax], 0h + jne CheckCardTable + mov byte ptr [rax], 0FFh + + NOP_3_BYTE ; padding for alignment of constant + + ; Check the lower and upper ephemeral region bounds + CheckCardTable: + cmp rdx, r9 + jb Exit + + NOP_3_BYTE ; padding for alignment of constant + + mov r8, 0F0F0F0F0F0F0F0F0h + + cmp rdx, r8 + jae Exit + + nop ; padding for alignment of constant + + mov rax, 0F0F0F0F0F0F0F0F0h + + ; Touch the card table entry, if not already dirty. + shr rcx, 0Bh + cmp byte ptr [rcx + rax], 0FFh + jne UpdateCardTable + REPRET + + UpdateCardTable: + mov byte ptr [rcx + rax], 0FFh +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + mov rax, 0F0F0F0F0F0F0F0F0h + shr rcx, 0Ah + cmp byte ptr [rcx + rax], 0FFh + jne UpdateCardBundleTable + REPRET + + UpdateCardBundleTable: + mov byte ptr [rcx + rax], 0FFh +endif + ret + + align 16 + Exit: + REPRET + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + NOP_3_BYTE + +else + ; JIT_WriteBarrier_PostGrow64 + + ; Do the move into the GC . It is correct to take an AV here, the EH code + ; figures out that this came from a WriteBarrier and correctly maps it back + ; to the managed method which called the WriteBarrier (see setup in + ; InitializeExceptionHandling, vm\exceptionhandling.cpp). + mov [rcx], rdx + + NOP_3_BYTE ; padding for alignment of constant + + ; Can't compare a 64 bit immediate, so we have to move them into a + ; register. Values of these immediates will be patched at runtime. + ; By using two registers we can pipeline better. Should we decide to use + ; a special non-volatile calling convention, this should be changed to + ; just one. + + mov rax, 0F0F0F0F0F0F0F0F0h + + ; Check the lower and upper ephemeral region bounds + cmp rdx, rax + jb Exit + + nop ; padding for alignment of constant + + mov r8, 0F0F0F0F0F0F0F0F0h + + cmp rdx, r8 + jae Exit + + nop ; padding for alignment of constant + + mov rax, 0F0F0F0F0F0F0F0F0h + + ; Touch the card table entry, if not already dirty. + shr rcx, 0Bh + cmp byte ptr [rcx + rax], 0FFh + jne UpdateCardTable + REPRET + + UpdateCardTable: + mov byte ptr [rcx + rax], 0FFh +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + mov rax, 0F0F0F0F0F0F0F0F0h + shr rcx, 0Ah + cmp byte ptr [rcx + rax], 0FFh + jne UpdateCardBundleTable + REPRET + + UpdateCardBundleTable: + mov byte ptr [rcx + rax], 0FFh +endif + ret + + align 16 + Exit: + REPRET +endif + + ; make sure this is bigger than any of the others + align 16 + nop +LEAF_END_MARKED JIT_WriteBarrier, _TEXT + +; Mark start of the code region that we patch at runtime +LEAF_ENTRY JIT_PatchedCodeLast, _TEXT + ret +LEAF_END JIT_PatchedCodeLast, _TEXT + + end diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index a7c65bb713c5c..0edbb3fdf92fc 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -174,26 +174,6 @@ C_FUNC(ThePreStubPatchLabel): ret lr LEAF_END ThePreStubPatch, _TEXT - -//----------------------------------------------------------------------------- -// The following Macros help in WRITE_BARRIER Implementations -// WRITE_BARRIER_ENTRY -// -// Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way -// to declare a write barrier function. -// -.macro WRITE_BARRIER_ENTRY name - LEAF_ENTRY \name, _TEXT -.endm - -// WRITE_BARRIER_END -// -// The partner to WRITE_BARRIER_ENTRY, used like NESTED_END. -// -.macro WRITE_BARRIER_END name - LEAF_END_MARKED \name, _TEXT -.endm - // void JIT_UpdateWriteBarrierState(bool skipEphemeralCheck, size_t writeableOffset) // // Update shadow copies of the various state info required for barrier @@ -205,7 +185,7 @@ LEAF_END ThePreStubPatch, _TEXT // Align and group state info together so it fits in a single cache line // and each entry can be written atomically // -WRITE_BARRIER_ENTRY JIT_UpdateWriteBarrierState +LEAF_ENTRY JIT_UpdateWriteBarrierState, _TEXT PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -16 // x0-x7, x10 will contain intended new state @@ -269,7 +249,7 @@ LOCAL_LABEL(EphemeralCheckEnabled): EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 EPILOG_RETURN -WRITE_BARRIER_END JIT_UpdateWriteBarrierState +LEAF_END JIT_UpdateWriteBarrierState // ------------------------// ------------------------------------------------------------------ // __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) @@ -285,214 +265,6 @@ LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT br x17 LEAF_END JIT_WriteBarrier_Callable, _TEXT -.balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line -//------------------------------------------ -// Start of the writeable code region -LEAF_ENTRY JIT_PatchedCodeStart, _TEXT - ret lr -LEAF_END JIT_PatchedCodeStart, _TEXT - -// void JIT_ByRefWriteBarrier -// On entry: -// x13 : the source address (points to object reference to write) -// x14 : the destination address (object reference written here) -// -// On exit: -// x12 : trashed -// x13 : incremented by 8 -// x14 : incremented by 8 -// x15 : trashed -// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -// -WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier - - ldr x15, [x13], 8 - b C_FUNC(JIT_CheckedWriteBarrier) - -WRITE_BARRIER_END JIT_ByRefWriteBarrier - -//----------------------------------------------------------------------------- -// Simple WriteBarriers -// void JIT_CheckedWriteBarrier(Object** dst, Object* src) -// On entry: -// x14 : the destination address (LHS of the assignment) -// x15 : the object reference (RHS of the assignment) -// -// On exit: -// x12 : trashed -// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) -// x15 : trashed -// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -// -WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier - ldr x12, LOCAL_LABEL(wbs_lowest_address) - cmp x14, x12 - - ldr x12, LOCAL_LABEL(wbs_highest_address) - - // Compare against the upper bound if the previous comparison indicated - // that the destination address is greater than or equal to the lower - // bound. Otherwise, set the C flag (specified by the 0x2) so that the - // branch below is not taken. - ccmp x14, x12, #0x2, hs - - bhs LOCAL_LABEL(NotInHeap) - - b C_FUNC(JIT_WriteBarrier) - -LOCAL_LABEL(NotInHeap): - str x15, [x14], 8 - ret lr -WRITE_BARRIER_END JIT_CheckedWriteBarrier - -// void JIT_WriteBarrier(Object** dst, Object* src) -// On entry: -// x14 : the destination address (LHS of the assignment) -// x15 : the object reference (RHS of the assignment) -// -// On exit: -// x12 : trashed -// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) -// x15 : trashed -// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -// -WRITE_BARRIER_ENTRY JIT_WriteBarrier - stlr x15, [x14] - -#ifdef WRITE_BARRIER_CHECK - // Update GC Shadow Heap - - // Do not perform the work if g_GCShadow is 0 - ldr x12, LOCAL_LABEL(wbs_GCShadow) - cbz x12, LOCAL_LABEL(ShadowUpdateDisabled) - - // need temporary register. Save before using. - str x13, [sp, #-16]! - - // Compute address of shadow heap location: - // pShadow = g_GCShadow + (x14 - g_lowest_address) - ldr x13, LOCAL_LABEL(wbs_lowest_address) - sub x13, x14, x13 - add x12, x13, x12 - - // if (pShadow >= g_GCShadowEnd) goto end - ldr x13, LOCAL_LABEL(wbs_GCShadowEnd) - cmp x12, x13 - bhs LOCAL_LABEL(ShadowUpdateEnd) - - // *pShadow = x15 - str x15, [x12] - - // Ensure that the write to the shadow heap occurs before the read from the GC heap so that race - // conditions are caught by INVALIDGCVALUE. - dmb ish - - // if ([x14] == x15) goto end - ldr x13, [x14] - cmp x13, x15 - beq LOCAL_LABEL(ShadowUpdateEnd) - - // *pShadow = INVALIDGCVALUE (0xcccccccd) - movz x13, #0xcccd - movk x13, #0xcccc, LSL #16 - str x13, [x12] - -LOCAL_LABEL(ShadowUpdateEnd): - ldr x13, [sp], #16 -LOCAL_LABEL(ShadowUpdateDisabled): -#endif - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - // Update the write watch table if necessary - ldr x12, LOCAL_LABEL(wbs_sw_ww_table) - cbz x12, LOCAL_LABEL(CheckCardTable) - add x12, x12, x14, lsr #0xc // SoftwareWriteWatch::AddressToTableByteIndexShift - ldrb w17, [x12] - cbnz x17, LOCAL_LABEL(CheckCardTable) - mov w17, #0xFF - strb w17, [x12] -#endif - -LOCAL_LABEL(CheckCardTable): - // Branch to Exit if the reference is not in the Gen0 heap - // - ldr x12, LOCAL_LABEL(wbs_ephemeral_low) - cbz x12, LOCAL_LABEL(SkipEphemeralCheck) - cmp x15, x12 - - ldr x12, LOCAL_LABEL(wbs_ephemeral_high) - - // Compare against the upper bound if the previous comparison indicated - // that the destination address is greater than or equal to the lower - // bound. Otherwise, set the C flag (specified by the 0x2) so that the - // branch to exit is taken. - ccmp x15, x12, #0x2, hs - - bhs LOCAL_LABEL(Exit) - -LOCAL_LABEL(SkipEphemeralCheck): - // Check if we need to update the card table - ldr x12, LOCAL_LABEL(wbs_card_table) - add x15, x12, x14, lsr #11 - ldrb w12, [x15] - cmp x12, 0xFF - beq LOCAL_LABEL(Exit) - -LOCAL_LABEL(UpdateCardTable): - mov x12, 0xFF - strb w12, [x15] - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Check if we need to update the card bundle table - ldr x12, LOCAL_LABEL(wbs_card_bundle_table) - add x15, x12, x14, lsr #21 - ldrb w12, [x15] - cmp x12, 0xFF - beq LOCAL_LABEL(Exit) - -LOCAL_LABEL(UpdateCardBundle): - mov x12, 0xFF - strb w12, [x15] -#endif - -LOCAL_LABEL(Exit): - add x14, x14, 8 - ret lr -WRITE_BARRIER_END JIT_WriteBarrier - - // Begin patchable literal pool - .balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line -WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table -LOCAL_LABEL(wbs_begin): -LOCAL_LABEL(wbs_card_table): - .quad 0 -LOCAL_LABEL(wbs_card_bundle_table): - .quad 0 -LOCAL_LABEL(wbs_sw_ww_table): - .quad 0 -LOCAL_LABEL(wbs_ephemeral_low): - .quad 0 -LOCAL_LABEL(wbs_ephemeral_high): - .quad 0 -LOCAL_LABEL(wbs_lowest_address): - .quad 0 -LOCAL_LABEL(wbs_highest_address): - .quad 0 -#ifdef WRITE_BARRIER_CHECK -LOCAL_LABEL(wbs_GCShadow): - .quad 0 -LOCAL_LABEL(wbs_GCShadowEnd): - .quad 0 -#endif -WRITE_BARRIER_END JIT_WriteBarrier_Table - - -// ------------------------------------------------------------------ -// End of the writeable code region -LEAF_ENTRY JIT_PatchedCodeLast, _TEXT - ret lr -LEAF_END JIT_PatchedCodeLast, _TEXT - // void SinglecastDelegateInvokeStub(Delegate *pThis) LEAF_ENTRY SinglecastDelegateInvokeStub, _TEXT cmp x0, #0 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index d72d3ad7863e1..bc88d15ee330f 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -233,36 +233,6 @@ ThePreStubPatchLabel ret lr LEAF_END -;----------------------------------------------------------------------------- -; The following Macros help in WRITE_BARRIER Implementations - ; WRITE_BARRIER_ENTRY - ; - ; Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way - ; to declare a write barrier function. - ; - MACRO - WRITE_BARRIER_ENTRY $name - - LEAF_ENTRY $name - MEND - - ; WRITE_BARRIER_END - ; - ; The partner to WRITE_BARRIER_ENTRY, used like NESTED_END. - ; - MACRO - WRITE_BARRIER_END $__write_barrier_name - - LEAF_END_MARKED $__write_barrier_name - - MEND - -; ------------------------------------------------------------------ -; Start of the writeable code region - LEAF_ENTRY JIT_PatchedCodeStart - ret lr - LEAF_END - ;----------------------------------------------------------------------------- ; void JIT_UpdateWriteBarrierState(bool skipEphemeralCheck, size_t writeableOffset) ; @@ -275,7 +245,7 @@ ThePreStubPatchLabel ; Align and group state info together so it fits in a single cache line ; and each entry can be written atomically ; - WRITE_BARRIER_ENTRY JIT_UpdateWriteBarrierState + LEAF_ENTRY JIT_UpdateWriteBarrierState PROLOG_SAVE_REG_PAIR fp, lr, #-16! ; x0-x7, x10 will contain intended new state @@ -339,204 +309,7 @@ EphemeralCheckEnabled EPILOG_RESTORE_REG_PAIR fp, lr, #16! EPILOG_RETURN - WRITE_BARRIER_END JIT_UpdateWriteBarrierState - - ; Begin patchable literal pool - ALIGN 64 ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line - WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table -wbs_begin -wbs_card_table - DCQ 0 -wbs_card_bundle_table - DCQ 0 -wbs_sw_ww_table - DCQ 0 -wbs_ephemeral_low - DCQ 0 -wbs_ephemeral_high - DCQ 0 -wbs_lowest_address - DCQ 0 -wbs_highest_address - DCQ 0 -#ifdef WRITE_BARRIER_CHECK -wbs_GCShadow - DCQ 0 -wbs_GCShadowEnd - DCQ 0 -#endif - WRITE_BARRIER_END JIT_WriteBarrier_Table - -; void JIT_ByRefWriteBarrier -; On entry: -; x13 : the source address (points to object reference to write) -; x14 : the destination address (object reference written here) -; -; On exit: -; x12 : trashed -; x13 : incremented by 8 -; x14 : incremented by 8 -; x15 : trashed -; x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -; - WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier - - ldr x15, [x13], 8 - b JIT_CheckedWriteBarrier - - WRITE_BARRIER_END JIT_ByRefWriteBarrier - -;----------------------------------------------------------------------------- -; Simple WriteBarriers -; void JIT_CheckedWriteBarrier(Object** dst, Object* src) -; On entry: -; x14 : the destination address (LHS of the assignment) -; x15 : the object reference (RHS of the assignment) -; -; On exit: -; x12 : trashed -; x14 : incremented by 8 -; x15 : trashed -; x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -; - WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier - ldr x12, wbs_lowest_address - cmp x14, x12 - - ldr x12, wbs_highest_address - ccmphs x14, x12, #0x2 - blo JIT_WriteBarrier - -NotInHeap - str x15, [x14], 8 - ret lr - WRITE_BARRIER_END JIT_CheckedWriteBarrier - -; void JIT_WriteBarrier(Object** dst, Object* src) -; On entry: -; x14 : the destination address (LHS of the assignment) -; x15 : the object reference (RHS of the assignment) -; -; On exit: -; x12 : trashed -; x14 : incremented by 8 -; x15 : trashed -; x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP -; - WRITE_BARRIER_ENTRY JIT_WriteBarrier - stlr x15, [x14] - -#ifdef WRITE_BARRIER_CHECK - ; Update GC Shadow Heap - - ; Do not perform the work if g_GCShadow is 0 - ldr x12, wbs_GCShadow - cbz x12, ShadowUpdateDisabled - - ; need temporary register. Save before using. - str x13, [sp, #-16]! - - ; Compute address of shadow heap location: - ; pShadow = $g_GCShadow + (x14 - g_lowest_address) - ldr x13, wbs_lowest_address - sub x13, x14, x13 - add x12, x13, x12 - - ; if (pShadow >= $g_GCShadowEnd) goto end - ldr x13, wbs_GCShadowEnd - cmp x12, x13 - bhs ShadowUpdateEnd - - ; *pShadow = x15 - str x15, [x12] - - ; Ensure that the write to the shadow heap occurs before the read from the GC heap so that race - ; conditions are caught by INVALIDGCVALUE. - dmb ish - - ; if ([x14] == x15) goto end - ldr x13, [x14] - cmp x13, x15 - beq ShadowUpdateEnd - - ; *pShadow = INVALIDGCVALUE (0xcccccccd) - movz x13, #0xcccd - movk x13, #0xcccc, LSL #16 - str x13, [x12] - -ShadowUpdateEnd - ldr x13, [sp], #16 -ShadowUpdateDisabled -#endif - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - ; Update the write watch table if necessary - ldr x12, wbs_sw_ww_table - cbz x12, CheckCardTable - add x12, x12, x14, LSR #0xC // SoftwareWriteWatch::AddressToTableByteIndexShift - ldrb w17, [x12] - cbnz x17, CheckCardTable - mov w17, 0xFF - strb w17, [x12] -#endif - -CheckCardTable - ; Branch to Exit if the reference is not in the Gen0 heap - ; - ldr x12, wbs_ephemeral_low - cbz x12, SkipEphemeralCheck - cmp x15, x12 - - ldr x12, wbs_ephemeral_high - - ; Compare against the upper bound if the previous comparison indicated - ; that the destination address is greater than or equal to the lower - ; bound. Otherwise, set the C flag (specified by the 0x2) so that the - ; branch to exit is taken. - ccmp x15, x12, #0x2, hs - - bhs Exit - -SkipEphemeralCheck - ; Check if we need to update the card table - ldr x12, wbs_card_table - - ; x15 := pointer into card table - add x15, x12, x14, lsr #11 - - ldrb w12, [x15] - cmp x12, 0xFF - beq Exit - -UpdateCardTable - mov x12, 0xFF - strb w12, [x15] - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - ; Check if we need to update the card bundle table - ldr x12, wbs_card_bundle_table - - ; x15 := pointer into card bundle table - add x15, x12, x14, lsr #21 - - ldrb w12, [x15] - cmp x12, 0xFF - beq Exit - - mov x12, 0xFF - strb w12, [x15] -#endif - -Exit - add x14, x14, 8 - ret lr - WRITE_BARRIER_END JIT_WriteBarrier - -; ------------------------------------------------------------------ -; End of the writeable code region - LEAF_ENTRY JIT_PatchedCodeLast - ret lr - LEAF_END + LEAF_END JIT_UpdateWriteBarrierState ; void SinglecastDelegateInvokeStub(Delegate *pThis) LEAF_ENTRY SinglecastDelegateInvokeStub diff --git a/src/coreclr/vm/arm64/patchedcode.S b/src/coreclr/vm/arm64/patchedcode.S new file mode 100644 index 0000000000000..2c1199be69a78 --- /dev/null +++ b/src/coreclr/vm/arm64/patchedcode.S @@ -0,0 +1,232 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "asmconstants.h" +#include "unixasmmacros.inc" + +//----------------------------------------------------------------------------- +// The following Macros help in WRITE_BARRIER Implementations +// WRITE_BARRIER_ENTRY +// +// Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way +// to declare a write barrier function. +// +.macro WRITE_BARRIER_ENTRY name + LEAF_ENTRY \name, _TEXT +.endm + +// WRITE_BARRIER_END +// +// The partner to WRITE_BARRIER_ENTRY, used like NESTED_END. +// +.macro WRITE_BARRIER_END name + LEAF_END_MARKED \name, _TEXT +.endm + +.balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line +//------------------------------------------ +// Start of the writeable code region +LEAF_ENTRY JIT_PatchedCodeStart, _TEXT + ret lr +LEAF_END JIT_PatchedCodeStart, _TEXT + +// void JIT_ByRefWriteBarrier +// On entry: +// x13 : the source address (points to object reference to write) +// x14 : the destination address (object reference written here) +// +// On exit: +// x12 : trashed +// x13 : incremented by 8 +// x14 : incremented by 8 +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +// +WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier + + ldr x15, [x13], 8 + b C_FUNC(JIT_CheckedWriteBarrier) + +WRITE_BARRIER_END JIT_ByRefWriteBarrier + +//----------------------------------------------------------------------------- +// Simple WriteBarriers +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +// On entry: +// x14 : the destination address (LHS of the assignment) +// x15 : the object reference (RHS of the assignment) +// +// On exit: +// x12 : trashed +// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +// +WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier + ldr x12, LOCAL_LABEL(wbs_lowest_address) + cmp x14, x12 + + ldr x12, LOCAL_LABEL(wbs_highest_address) + + // Compare against the upper bound if the previous comparison indicated + // that the destination address is greater than or equal to the lower + // bound. Otherwise, set the C flag (specified by the 0x2) so that the + // branch below is not taken. + ccmp x14, x12, #0x2, hs + + bhs LOCAL_LABEL(NotInHeap) + + b C_FUNC(JIT_WriteBarrier) + +LOCAL_LABEL(NotInHeap): + str x15, [x14], 8 + ret lr +WRITE_BARRIER_END JIT_CheckedWriteBarrier + +// void JIT_WriteBarrier(Object** dst, Object* src) +// On entry: +// x14 : the destination address (LHS of the assignment) +// x15 : the object reference (RHS of the assignment) +// +// On exit: +// x12 : trashed +// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +// +WRITE_BARRIER_ENTRY JIT_WriteBarrier + stlr x15, [x14] + +#ifdef WRITE_BARRIER_CHECK + // Update GC Shadow Heap + + // Do not perform the work if g_GCShadow is 0 + ldr x12, LOCAL_LABEL(wbs_GCShadow) + cbz x12, LOCAL_LABEL(ShadowUpdateDisabled) + + // need temporary register. Save before using. + str x13, [sp, #-16]! + + // Compute address of shadow heap location: + // pShadow = g_GCShadow + (x14 - g_lowest_address) + ldr x13, LOCAL_LABEL(wbs_lowest_address) + sub x13, x14, x13 + add x12, x13, x12 + + // if (pShadow >= g_GCShadowEnd) goto end + ldr x13, LOCAL_LABEL(wbs_GCShadowEnd) + cmp x12, x13 + bhs LOCAL_LABEL(ShadowUpdateEnd) + + // *pShadow = x15 + str x15, [x12] + + // Ensure that the write to the shadow heap occurs before the read from the GC heap so that race + // conditions are caught by INVALIDGCVALUE. + dmb ish + + // if ([x14] == x15) goto end + ldr x13, [x14] + cmp x13, x15 + beq LOCAL_LABEL(ShadowUpdateEnd) + + // *pShadow = INVALIDGCVALUE (0xcccccccd) + movz x13, #0xcccd + movk x13, #0xcccc, LSL #16 + str x13, [x12] + +LOCAL_LABEL(ShadowUpdateEnd): + ldr x13, [sp], #16 +LOCAL_LABEL(ShadowUpdateDisabled): +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + ldr x12, LOCAL_LABEL(wbs_sw_ww_table) + cbz x12, LOCAL_LABEL(CheckCardTable) + add x12, x12, x14, lsr #0xc // SoftwareWriteWatch::AddressToTableByteIndexShift + ldrb w17, [x12] + cbnz x17, LOCAL_LABEL(CheckCardTable) + mov w17, #0xFF + strb w17, [x12] +#endif + +LOCAL_LABEL(CheckCardTable): + // Branch to Exit if the reference is not in the Gen0 heap + // + ldr x12, LOCAL_LABEL(wbs_ephemeral_low) + cbz x12, LOCAL_LABEL(SkipEphemeralCheck) + cmp x15, x12 + + ldr x12, LOCAL_LABEL(wbs_ephemeral_high) + + // Compare against the upper bound if the previous comparison indicated + // that the destination address is greater than or equal to the lower + // bound. Otherwise, set the C flag (specified by the 0x2) so that the + // branch to exit is taken. + ccmp x15, x12, #0x2, hs + + bhs LOCAL_LABEL(Exit) + +LOCAL_LABEL(SkipEphemeralCheck): + // Check if we need to update the card table + ldr x12, LOCAL_LABEL(wbs_card_table) + add x15, x12, x14, lsr #11 + ldrb w12, [x15] + cmp x12, 0xFF + beq LOCAL_LABEL(Exit) + +LOCAL_LABEL(UpdateCardTable): + mov x12, 0xFF + strb w12, [x15] + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Check if we need to update the card bundle table + ldr x12, LOCAL_LABEL(wbs_card_bundle_table) + add x15, x12, x14, lsr #21 + ldrb w12, [x15] + cmp x12, 0xFF + beq LOCAL_LABEL(Exit) + +LOCAL_LABEL(UpdateCardBundle): + mov x12, 0xFF + strb w12, [x15] +#endif + +LOCAL_LABEL(Exit): + add x14, x14, 8 + ret lr +WRITE_BARRIER_END JIT_WriteBarrier + + // Begin patchable literal pool + .balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line +WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table +LOCAL_LABEL(wbs_begin): +LOCAL_LABEL(wbs_card_table): + .quad 0 +LOCAL_LABEL(wbs_card_bundle_table): + .quad 0 +LOCAL_LABEL(wbs_sw_ww_table): + .quad 0 +LOCAL_LABEL(wbs_ephemeral_low): + .quad 0 +LOCAL_LABEL(wbs_ephemeral_high): + .quad 0 +LOCAL_LABEL(wbs_lowest_address): + .quad 0 +LOCAL_LABEL(wbs_highest_address): + .quad 0 +#ifdef WRITE_BARRIER_CHECK +LOCAL_LABEL(wbs_GCShadow): + .quad 0 +LOCAL_LABEL(wbs_GCShadowEnd): + .quad 0 +#endif +WRITE_BARRIER_END JIT_WriteBarrier_Table + + +// ------------------------------------------------------------------ +// End of the writeable code region +LEAF_ENTRY JIT_PatchedCodeLast, _TEXT + ret lr +LEAF_END JIT_PatchedCodeLast, _TEXT diff --git a/src/coreclr/vm/arm64/patchedcode.asm b/src/coreclr/vm/arm64/patchedcode.asm new file mode 100644 index 0000000000000..bd4f57cc6810c --- /dev/null +++ b/src/coreclr/vm/arm64/patchedcode.asm @@ -0,0 +1,239 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. + +#include "ksarm64.h" +#include "asmconstants.h" +#include "asmmacros.h" + + ;;like TEXTAREA, but with 64 byte alignment so that we can align the patchable pool below to 64 without warning + AREA |.text|,ALIGN=6,CODE,READONLY + +;----------------------------------------------------------------------------- +; The following Macros help in WRITE_BARRIER Implementations + ; WRITE_BARRIER_ENTRY + ; + ; Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way + ; to declare a write barrier function. + ; + MACRO + WRITE_BARRIER_ENTRY $name + + LEAF_ENTRY $name + MEND + + ; WRITE_BARRIER_END + ; + ; The partner to WRITE_BARRIER_ENTRY, used like NESTED_END. + ; + MACRO + WRITE_BARRIER_END $__write_barrier_name + + LEAF_END_MARKED $__write_barrier_name + + MEND + +; ------------------------------------------------------------------ +; Start of the writeable code region + LEAF_ENTRY JIT_PatchedCodeStart + ret lr + LEAF_END + + ; Begin patchable literal pool + ALIGN 64 ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line + WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table +wbs_begin +wbs_card_table + DCQ 0 +wbs_card_bundle_table + DCQ 0 +wbs_sw_ww_table + DCQ 0 +wbs_ephemeral_low + DCQ 0 +wbs_ephemeral_high + DCQ 0 +wbs_lowest_address + DCQ 0 +wbs_highest_address + DCQ 0 +#ifdef WRITE_BARRIER_CHECK +wbs_GCShadow + DCQ 0 +wbs_GCShadowEnd + DCQ 0 +#endif + WRITE_BARRIER_END JIT_WriteBarrier_Table + +; void JIT_ByRefWriteBarrier +; On entry: +; x13 : the source address (points to object reference to write) +; x14 : the destination address (object reference written here) +; +; On exit: +; x12 : trashed +; x13 : incremented by 8 +; x14 : incremented by 8 +; x15 : trashed +; x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +; + WRITE_BARRIER_ENTRY JIT_ByRefWriteBarrier + + ldr x15, [x13], 8 + b JIT_CheckedWriteBarrier + + WRITE_BARRIER_END JIT_ByRefWriteBarrier + +;----------------------------------------------------------------------------- +; Simple WriteBarriers +; void JIT_CheckedWriteBarrier(Object** dst, Object* src) +; On entry: +; x14 : the destination address (LHS of the assignment) +; x15 : the object reference (RHS of the assignment) +; +; On exit: +; x12 : trashed +; x14 : incremented by 8 +; x15 : trashed +; x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +; + WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier + ldr x12, wbs_lowest_address + cmp x14, x12 + + ldr x12, wbs_highest_address + ccmphs x14, x12, #0x2 + blo JIT_WriteBarrier + +NotInHeap + str x15, [x14], 8 + ret lr + WRITE_BARRIER_END JIT_CheckedWriteBarrier + +; void JIT_WriteBarrier(Object** dst, Object* src) +; On entry: +; x14 : the destination address (LHS of the assignment) +; x15 : the object reference (RHS of the assignment) +; +; On exit: +; x12 : trashed +; x14 : incremented by 8 +; x15 : trashed +; x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +; + WRITE_BARRIER_ENTRY JIT_WriteBarrier + stlr x15, [x14] + +#ifdef WRITE_BARRIER_CHECK + ; Update GC Shadow Heap + + ; Do not perform the work if g_GCShadow is 0 + ldr x12, wbs_GCShadow + cbz x12, ShadowUpdateDisabled + + ; need temporary register. Save before using. + str x13, [sp, #-16]! + + ; Compute address of shadow heap location: + ; pShadow = $g_GCShadow + (x14 - g_lowest_address) + ldr x13, wbs_lowest_address + sub x13, x14, x13 + add x12, x13, x12 + + ; if (pShadow >= $g_GCShadowEnd) goto end + ldr x13, wbs_GCShadowEnd + cmp x12, x13 + bhs ShadowUpdateEnd + + ; *pShadow = x15 + str x15, [x12] + + ; Ensure that the write to the shadow heap occurs before the read from the GC heap so that race + ; conditions are caught by INVALIDGCVALUE. + dmb ish + + ; if ([x14] == x15) goto end + ldr x13, [x14] + cmp x13, x15 + beq ShadowUpdateEnd + + ; *pShadow = INVALIDGCVALUE (0xcccccccd) + movz x13, #0xcccd + movk x13, #0xcccc, LSL #16 + str x13, [x12] + +ShadowUpdateEnd + ldr x13, [sp], #16 +ShadowUpdateDisabled +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + ; Update the write watch table if necessary + ldr x12, wbs_sw_ww_table + cbz x12, CheckCardTable + add x12, x12, x14, LSR #0xC // SoftwareWriteWatch::AddressToTableByteIndexShift + ldrb w17, [x12] + cbnz x17, CheckCardTable + mov w17, 0xFF + strb w17, [x12] +#endif + +CheckCardTable + ; Branch to Exit if the reference is not in the Gen0 heap + ; + ldr x12, wbs_ephemeral_low + cbz x12, SkipEphemeralCheck + cmp x15, x12 + + ldr x12, wbs_ephemeral_high + + ; Compare against the upper bound if the previous comparison indicated + ; that the destination address is greater than or equal to the lower + ; bound. Otherwise, set the C flag (specified by the 0x2) so that the + ; branch to exit is taken. + ccmp x15, x12, #0x2, hs + + bhs Exit + +SkipEphemeralCheck + ; Check if we need to update the card table + ldr x12, wbs_card_table + + ; x15 := pointer into card table + add x15, x12, x14, lsr #11 + + ldrb w12, [x15] + cmp x12, 0xFF + beq Exit + +UpdateCardTable + mov x12, 0xFF + strb w12, [x15] + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + ; Check if we need to update the card bundle table + ldr x12, wbs_card_bundle_table + + ; x15 := pointer into card bundle table + add x15, x12, x14, lsr #21 + + ldrb w12, [x15] + cmp x12, 0xFF + beq Exit + + mov x12, 0xFF + strb w12, [x15] +#endif + +Exit + add x14, x14, 8 + ret lr + WRITE_BARRIER_END JIT_WriteBarrier + +; ------------------------------------------------------------------ +; End of the writeable code region + LEAF_ENTRY JIT_PatchedCodeLast + ret lr + LEAF_END + +; Must be at very end of file + END