dotnet · jakobbotsch · Jan 30, 2024 · Jan 3, 2024 · Jan 4, 2024 · Jan 4, 2024
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
@@ -1258,7 +1258,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
                 unspillType = lcl->TypeGet();
             }
 
-#if defined(TARGET_LOONGARCH64)
+#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
             if (varTypeIsFloating(unspillType) && emitter::isGeneralRegister(tree->GetRegNum()))
             {
                 unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
@@ -1310,15 +1310,23 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
             // The spill temp allocated for it is associated with the original tree that defined the
             // register that it was spilled from.
             // So we use 'unspillTree' to recover that spill temp.
-            TempDsc* t        = regSet.rsUnspillInPlace(unspillTree, unspillTree->GetRegNum());
-            emitAttr emitType = emitActualTypeSize(unspillTree->TypeGet());
+            TempDsc*  t           = regSet.rsUnspillInPlace(unspillTree, unspillTree->GetRegNum());
+            var_types unspillType = unspillTree->TypeGet();
+
             // Reload into the register specified by 'tree' which may be a GT_RELOAD.
             regNumber dstReg = tree->GetRegNum();
-            GetEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitType, dstReg, t->tdTempNum(), 0);
+#ifdef TARGET_RISCV64
+            if (varTypeIsFloating(unspillType) && emitter::isGeneralRegister(dstReg))
+            {
+                unspillType = (unspillType == TYP_FLOAT) ? TYP_INT : TYP_LONG;
+            }
+#endif // TARGET_RISCV64
+            GetEmitter()->emitIns_R_S(ins_Load(unspillType), emitActualTypeSize(unspillType), dstReg, t->tdTempNum(),
+                                      0);
             regSet.tmpRlsTemp(t);
 
             unspillTree->gtFlags &= ~GTF_SPILLED;
-            gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+            gcInfo.gcMarkRegPtrVal(dstReg, unspillType);
         }
     }
 }

diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp
@@ -5169,7 +5169,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
     {
         if (compiler->opts.compReloc)
         {
-            emit->emitIns_R_AI(INS_jalr, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            emit->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
         }
         else
         {

diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp
@@ -1133,9 +1133,10 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
 
 #endif // DEBUG
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 
-    // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
+    // For arm64/LoongArch64/RISCV64, we want to allocate JIT data always adjacent to code similar to what native
+    // compiler does.
     // This way allows us to use a single `ldr` to access such data like float constant/jmp table.
     // For LoongArch64 using `pcaddi + ld` to access such data.
 
@@ -1149,7 +1150,7 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
     args->hotCodeSize                 = roDataOffset + args->roDataSize;
     args->roDataSize                  = 0;
 
-#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 
     info.compCompHnd->allocMem(args);
 
@@ -1166,15 +1167,15 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
 
 #endif // DEBUG
 
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 
     // Fix up data section pointers.
     assert(args->roDataBlock == nullptr);
     assert(args->roDataBlockRW == nullptr);
     args->roDataBlock   = ((BYTE*)args->hotCodeBlock) + roDataOffset;
     args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset;
 
-#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
 }
 
 void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)

diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp
@@ -1423,7 +1423,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
         assert(callType == EC_FUNC_TOKEN);
         assert(addr != NULL);
 
-        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
+        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jalr ra/r0,rd,0
         id->idAddr()->iiaAddr = (BYTE*)addr;
 
         if (emitComp->opts.compReloc)
@@ -1546,7 +1546,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
 #endif
         emitOutput_Instr(dst, 0x00000067 | (REG_DEFAULT_HELPER_CALL_TARGET << 15) | reg2 << 7);
 
-        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_JALR);
+        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_PC);
     }
     else
     {
@@ -4049,6 +4049,12 @@ void emitter::emitDispIns(
         instrSize = sizeof(code_t);
         code_t instruction;
         memcpy(&instruction, instr, instrSize);
+#ifdef DEBUG
+        if (emitComp->verbose && i != 0)
+        {
+            printf("        ");
+        }
+#endif
         emitDispInsName(instruction, instr, doffs, offset, id, ig);
     }
 }

diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
@@ -5570,7 +5570,7 @@ struct GenTreeCall final : public GenTree
             return WellKnownArg::VirtualStubCell;
         }
 
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64)
         // For ARM architectures, we always use an indirection cell for R2R calls.
         if (IsR2RRelativeIndir() && !IsDelegateInvoke())
         {

diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
@@ -7045,6 +7045,12 @@ void LinearScan::updateMaxSpill(RefPosition* refPosition)
             if (!treeNode->IsMultiRegNode())
             {
                 type = getDefType(treeNode);
+#ifdef TARGET_RISCV64
+                if (isFloatRegType(type) && genIsValidIntReg(treeNode->GetRegNum()))
+                {
+                    type = (type == TYP_FLOAT) ? TYP_INT : TYP_LONG;
+                }
+#endif // TARGET_RISCV64
             }
             else
             {

diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp
@@ -330,21 +330,31 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */)
         treeType = tree->TypeGet();
     }
 
-    var_types tempType = RegSet::tmpNormalizeType(treeType);
     regMaskTP mask;
     bool      floatSpill = false;
 
     if (isFloatRegType(treeType))
     {
-        floatSpill = true;
-        mask       = genRegMaskFloat(reg ARM_ARG(treeType));
+#ifdef TARGET_RISCV64
+        if (genIsValidIntReg(reg))
+        {
+            treeType = (treeType == TYP_FLOAT) ? TYP_INT : TYP_LONG;
+            mask     = genRegMask(reg);
+        }
+        else
+#endif // TARGET_RISCV64
+        {
+            floatSpill = true;
+            mask       = genRegMaskFloat(reg ARM_ARG(treeType));
+        }
     }
     else
     {
         mask = genRegMask(reg);
     }
 
-    rsNeededSpillReg = true;
+    var_types tempType = RegSet::tmpNormalizeType(treeType);
+    rsNeededSpillReg   = true;
 
     // We should only be spilling nodes marked for spill,
     // vars should be handled elsewhere, and to prevent

diff --git a/src/coreclr/pal/inc/rt/ntimage.h b/src/coreclr/pal/inc/rt/ntimage.h
@@ -1025,7 +1025,6 @@ typedef IMAGE_RELOCATION UNALIGNED *PIMAGE_RELOCATION;
 // RISCV64 relocation types
 //
 #define IMAGE_REL_RISCV64_PC            0x0003
-#define IMAGE_REL_RISCV64_JALR          0x0004
 
 //
 // CEF relocation types.

diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc
@@ -293,8 +293,14 @@ C_FUNC(\Name\()_End):
 .endm
 
 .macro EPILOG_WITH_TRANSITION_BLOCK_RETURN
-// TODO RISCV NYI
-    sw  ra, 0(zero)
+
+    RESTORE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters
+
+    EPILOG_RESTORE_REG_PAIR       fp, ra, __PWTB_CalleeSavedRegisters
+
+    EPILOG_STACK_FREE             __PWTB_StackAlloc
+
+    ret
 .endm
 
 

@@ -321,7 +321,6 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0)
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC:
                 case RelocType.IMAGE_REL_BASED_LOONGARCH64_JIR:
 
-                //TODO: consider removal of IMAGE_REL_RISCV64_JALR from runtime too
                 case RelocType.IMAGE_REL_BASED_RISCV64_PC:
                     Debug.Assert(delta == 0);
                     // Do not vacate space for this kind of relocation, because

@@ -420,9 +420,11 @@ private static unsafe int GetRiscV64PC(uint* pCode)
             // first get the high 20 bits,
             int imm = (int)((auipcInstr & 0xfffff000));
             // then get the low 12 bits,
-            uint addiInstr = *(pCode + 1);
-            Debug.Assert((addiInstr & 0x707f) == 0x00000013);
-            imm += ((int)(addiInstr)) >> 20;
+            uint nextInstr = *(pCode + 1);
+            Debug.Assert((nextInstr & 0x707f) == 0x00000013 ||
+                         (nextInstr & 0x707f) == 0x00000067 ||
+                         (nextInstr & 0x707f) == 0x00003003);
+            imm += ((int)(nextInstr)) >> 20;
 
             return imm;
         }
@@ -434,6 +436,10 @@ private static unsafe int GetRiscV64PC(uint* pCode)
         //  case:EA_PTR_DSP_RELOC
         //   auipc  reg, off-hi-20bits
         //   ld     reg, reg, off-lo-12bits
+        //  case:
+        // INS_OPTS_C
+        //   auipc  reg, off-hi-20bits
+        //   jalr   reg, reg, off-lo-12bits
         private static unsafe void PutRiscV64PC(uint* pCode, long imm32)
         {
             // Verify that we got a valid offset
@@ -446,10 +452,12 @@ private static unsafe void PutRiscV64PC(uint* pCode, long imm32)
             auipcInstr |= (uint)((imm32 + 0x800) & 0xfffff000);
             *pCode = auipcInstr;
 
-            uint addiInstr = *(pCode + 1);
-            Debug.Assert((addiInstr & 0x707f) == 0x00000013);
-            addiInstr |= (uint)((doff & 0xfff) << 20);
-            *(pCode + 1) = addiInstr;
+            uint nextInstr = *(pCode + 1);
+            Debug.Assert((nextInstr & 0x707f) == 0x00000013 ||
+                         (nextInstr & 0x707f) == 0x00000067 ||
+                         (nextInstr & 0x707f) == 0x00003003);
+            nextInstr |= (uint)((doff & 0xfff) << 20);
+            *(pCode + 1) = nextInstr;
 
             Debug.Assert(GetRiscV64PC(pCode) == imm32);
         }

@@ -89,6 +89,10 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ
                 if (potentialType.Namespace != "System.Runtime.Intrinsics.Arm")
                     return "";
             }
+            else if (architecture == TargetArchitecture.RiscV64)
+            {
+                return "";
+            }
             else
             {
                 throw new InternalCompilerErrorException("Unknown architecture");

@@ -52,7 +52,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
                     // to the same alignment as __m128, which is supported by the ABI.
                     alignment = new LayoutInt(8);
                 }
-                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
+                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
                 {
                     // The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
                     // 16-byte alignment for __m256.
@@ -73,7 +73,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
                     // to the same alignment as __m128, which is supported by the ABI.
                     alignment = new LayoutInt(8);
                 }
-                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
+                else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
                 {
                     // The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
                     // 16-byte alignment for __m256.

@@ -412,9 +412,10 @@ private CompilationResult CompileMethodInternal(IMethodNode methodCodeNodeNeedin
 
             if (codeSize < _code.Length)
             {
-                if (_compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.ARM64)
+                if (_compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.ARM64
+                    && _compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.RiscV64)
                 {
-                    // For xarch/arm32, the generated code is sometimes smaller than the memory allocated.
+                    // For xarch/arm32/RiscV64, the generated code is sometimes smaller than the memory allocated.
                     // In that case, trim the codeBlock to the actual value.
                     //
                     // For arm64, the allocation request of `hotCodeSize` also includes the roData size
@@ -4084,6 +4085,9 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
             if (targetArchitecture == TargetArchitecture.ARM && !_compilation.TypeSystemContext.Target.IsWindows)
                 flags.Set(CorJitFlag.CORJIT_FLAG_RELATIVE_CODE_RELOCS);
 
+            if (targetArchitecture == TargetArchitecture.RiscV64)
+                flags.Set(CorJitFlag.CORJIT_FLAG_FRAMED);
+
             if (this.MethodBeingCompiled.IsUnmanagedCallersOnly)
             {
                 // Validate UnmanagedCallersOnlyAttribute usage

@@ -29,12 +29,6 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
                 return (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
             }
 
-            //// The SIMD Intrinsic types are meant to be handled specially and should not be passed as struct registers
-            if (typeDesc.IsIntrinsic)
-            {
-                throw new NotImplementedException("For RISCV64, SIMD would be implemented later");
-            }
-
             MetadataType mdType = typeDesc as MetadataType;
             Debug.Assert(mdType != null);
 
@@ -85,6 +79,16 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
                         {
                             floatFieldFlags |= (uint)StructFloatFieldInfoFlags.STRUCT_SECOND_FIELD_DOUBLE;
                         }
+
+                        // Pass with two integer registers in `struct {int a, int b, float/double c}` cases
+                        if (fieldIndex == 1 &&
+                                (floatFieldFlags |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FIRST_FIELD_SIZE_IS8 |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND) ==
+                                floatFieldFlags)
+                        {
+                            floatFieldFlags = (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
+                        }
                     }
                     break;
 
@@ -106,6 +110,16 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
                         {
                             floatFieldFlags |= (uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND;
                         }
+
+                        // Pass with two integer registers in `struct {int a, int b, float/double c}` cases
+                        if (fieldIndex == 1 &&
+                                (floatFieldFlags |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FIRST_FIELD_SIZE_IS8 |
+                                 (uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND) ==
+                                floatFieldFlags)
+                        {
+                            floatFieldFlags = (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
+                        }
                     }
                     break;
-Original file line number
+Diff line change
@@ Expand Up @@
         {
             if (compiler->opts.compReloc)
             {
-                emit->emitIns_R_AI(INS_jalr, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+                emit->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
             }
             else
             {
@@ Expand Down @@