Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISC-V] Fix errors in crosgen2 for risc-v #97368

Merged
merged 22 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
unspillType = lcl->TypeGet();
}

#if defined(TARGET_LOONGARCH64)
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
if (varTypeIsFloating(unspillType) && emitter::isGeneralRegister(tree->GetRegNum()))
{
unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
Expand Down Expand Up @@ -1310,15 +1310,23 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
// The spill temp allocated for it is associated with the original tree that defined the
// register that it was spilled from.
// So we use 'unspillTree' to recover that spill temp.
TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->GetRegNum());
emitAttr emitType = emitActualTypeSize(unspillTree->TypeGet());
TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->GetRegNum());
var_types unspillType = unspillTree->TypeGet();

// Reload into the register specified by 'tree' which may be a GT_RELOAD.
regNumber dstReg = tree->GetRegNum();
GetEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitType, dstReg, t->tdTempNum(), 0);
#ifdef TARGET_RISCV64
if (varTypeIsFloating(unspillType) && emitter::isGeneralRegister(dstReg))
{
unspillType = (unspillType == TYP_FLOAT) ? TYP_INT : TYP_LONG;
}
#endif // TARGET_RISCV64
GetEmitter()->emitIns_R_S(ins_Load(unspillType), emitActualTypeSize(unspillType), dstReg, t->tdTempNum(),
0);
regSet.tmpRlsTemp(t);

unspillTree->gtFlags &= ~GTF_SPILLED;
gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
gcInfo.gcMarkRegPtrVal(dstReg, unspillType);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5169,7 +5169,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
{
if (compiler->opts.compReloc)
{
emit->emitIns_R_AI(INS_jalr, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
emit->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
}
else
{
Expand Down
11 changes: 6 additions & 5 deletions src/coreclr/jit/ee_il_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1133,9 +1133,10 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection

#endif // DEBUG

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

// For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
// For arm64/LoongArch64/RISCV64, we want to allocate JIT data always adjacent to code similar to what native
// compiler does.
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.
// For LoongArch64 using `pcaddi + ld` to access such data.

Expand All @@ -1149,7 +1150,7 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
args->hotCodeSize = roDataOffset + args->roDataSize;
args->roDataSize = 0;

#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

info.compCompHnd->allocMem(args);

Expand All @@ -1166,15 +1167,15 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection

#endif // DEBUG

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

// Fix up data section pointers.
assert(args->roDataBlock == nullptr);
assert(args->roDataBlockRW == nullptr);
args->roDataBlock = ((BYTE*)args->hotCodeBlock) + roDataOffset;
args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset;

#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
}

void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)
Expand Down
10 changes: 8 additions & 2 deletions src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1423,7 +1423,7 @@ void emitter::emitIns_Call(EmitCallType callType,
assert(callType == EC_FUNC_TOKEN);
assert(addr != NULL);

addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jalr ra/r0,rd,0
id->idAddr()->iiaAddr = (BYTE*)addr;

if (emitComp->opts.compReloc)
Expand Down Expand Up @@ -1546,7 +1546,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
#endif
emitOutput_Instr(dst, 0x00000067 | (REG_DEFAULT_HELPER_CALL_TARGET << 15) | reg2 << 7);

emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_JALR);
emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_PC);
}
else
{
Expand Down Expand Up @@ -4049,6 +4049,12 @@ void emitter::emitDispIns(
instrSize = sizeof(code_t);
code_t instruction;
memcpy(&instruction, instr, instrSize);
#ifdef DEBUG
if (emitComp->verbose && i != 0)
{
printf(" ");
}
#endif
emitDispInsName(instruction, instr, doffs, offset, id, ig);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -5570,7 +5570,7 @@ struct GenTreeCall final : public GenTree
return WellKnownArg::VirtualStubCell;
}

#if defined(TARGET_ARMARCH)
#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64)
// For ARM architectures, we always use an indirection cell for R2R calls.
if (IsR2RRelativeIndir() && !IsDelegateInvoke())
{
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7045,6 +7045,12 @@ void LinearScan::updateMaxSpill(RefPosition* refPosition)
if (!treeNode->IsMultiRegNode())
{
type = getDefType(treeNode);
#ifdef TARGET_RISCV64
if (isFloatRegType(type) && genIsValidIntReg(treeNode->GetRegNum()))
{
type = (type == TYP_FLOAT) ? TYP_INT : TYP_LONG;
}
#endif // TARGET_RISCV64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In what cases do you have these mismatches in the RISC-V backend? Why can you not get rid of them? (For example by inserting GT_BITCAST)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR introduces many RISCV ifdefs in general code because of the mismatch. It would be preferable to avoid it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When there are too many float arguments to allocate with 8 float argument registers, in RISCV it uses available integer argument registers. So there are mismatches which have float type and integer registers. Could you please share better solutions?

Sorry for making not good enough codes. When I saw errors, I think I tend to focus on fixing errors in RISCV and trying to find easy and simple ways for only RISC-V because of my lack of .NET runtime understanding and developing competency. I am so sorry. And thank you for your help.

Copy link
Member

@jakobbotsch jakobbotsch Jan 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, let's try to see if we can avoid them. The idea would be to have lowering insert GT_BITCAST when these mismatches happen. In fact, you can already see that Lowering:LowerFloatArg already has this logic to handle some other cases. Can you generalize Lowering::LowerArg so that it calls LowerFloatArg for these new cases? In the end you would have PUTARG_REG<integer register>(BITCAST<TYP_INT>(TYP_FLOAT node)), instead of the current PUTARG_REG<integer register>(TYP_FLOAT node) that I am assuming that you end up with.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your comment. I will try as you told.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated mismatch cases based on your comment. I hope it is better. If I misunderstand your comment or it has any problem, please let me know. I will fix it. Thank you!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks great to me.

@shushanhf can you please check if LA64 is able to use the same fix so we can remove LA64 specific handling for unspilling as well?

Copy link
Contributor

@shushanhf shushanhf Jan 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I will check it.
We had some patches in our local runtime to release LoongArch64's SDK8.0 and we will push them after the Chinese Spring Festival.
The SDK6.0 and SDK8.0 supporting R2R is OK for LA64, the Intrinsic feature had been finished and is ready to release after the LoongArch64's Instrinsic API merged.

BTW, I think we will push lots of codes during the 2024, liking improving the R2R, supporiting intrinsic and Native-AOT, pushing the mono and some other optimization.
The mono for Unity3D had been finished and the Unity3D game is running on the LoongArch64 linux.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shushanhf can you please check if LA64 is able to use the same fix so we can remove LA64 specific handling for unspilling as well?

Thanks very much, I think this is OK for LA64.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shushanhf Thank you for the check. I include a commit for LA64. Thank you.

}
else
{
Expand Down
18 changes: 14 additions & 4 deletions src/coreclr/jit/regset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,21 +330,31 @@ void RegSet::rsSpillTree(regNumber reg, GenTree* tree, unsigned regIdx /* =0 */)
treeType = tree->TypeGet();
}

var_types tempType = RegSet::tmpNormalizeType(treeType);
regMaskTP mask;
bool floatSpill = false;

if (isFloatRegType(treeType))
{
floatSpill = true;
mask = genRegMaskFloat(reg ARM_ARG(treeType));
#ifdef TARGET_RISCV64
if (genIsValidIntReg(reg))
{
treeType = (treeType == TYP_FLOAT) ? TYP_INT : TYP_LONG;
mask = genRegMask(reg);
}
else
#endif // TARGET_RISCV64
{
floatSpill = true;
mask = genRegMaskFloat(reg ARM_ARG(treeType));
}
}
else
{
mask = genRegMask(reg);
}

rsNeededSpillReg = true;
var_types tempType = RegSet::tmpNormalizeType(treeType);
rsNeededSpillReg = true;

// We should only be spilling nodes marked for spill,
// vars should be handled elsewhere, and to prevent
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/pal/inc/rt/ntimage.h
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,6 @@ typedef IMAGE_RELOCATION UNALIGNED *PIMAGE_RELOCATION;
// RISCV64 relocation types
//
#define IMAGE_REL_RISCV64_PC 0x0003
#define IMAGE_REL_RISCV64_JALR 0x0004

//
// CEF relocation types.
Expand Down
10 changes: 8 additions & 2 deletions src/coreclr/pal/inc/unixasmmacrosriscv64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,14 @@ C_FUNC(\Name\()_End):
.endm

.macro EPILOG_WITH_TRANSITION_BLOCK_RETURN
// TODO RISCV NYI
sw ra, 0(zero)

RESTORE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters

EPILOG_RESTORE_REG_PAIR fp, ra, __PWTB_CalleeSavedRegisters

EPILOG_STACK_FREE __PWTB_StackAlloc

ret
.endm


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0)
case RelocType.IMAGE_REL_BASED_LOONGARCH64_PC:
case RelocType.IMAGE_REL_BASED_LOONGARCH64_JIR:

//TODO: consider removal of IMAGE_REL_RISCV64_JALR from runtime too
case RelocType.IMAGE_REL_BASED_RISCV64_PC:
Debug.Assert(delta == 0);
// Do not vacate space for this kind of relocation, because
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,11 @@ private static unsafe int GetRiscV64PC(uint* pCode)
// first get the high 20 bits,
int imm = (int)((auipcInstr & 0xfffff000));
// then get the low 12 bits,
uint addiInstr = *(pCode + 1);
Debug.Assert((addiInstr & 0x707f) == 0x00000013);
imm += ((int)(addiInstr)) >> 20;
uint nextInstr = *(pCode + 1);
Debug.Assert((nextInstr & 0x707f) == 0x00000013 ||
(nextInstr & 0x707f) == 0x00000067 ||
(nextInstr & 0x707f) == 0x00003003);
imm += ((int)(nextInstr)) >> 20;

return imm;
}
Expand All @@ -434,6 +436,10 @@ private static unsafe int GetRiscV64PC(uint* pCode)
// case:EA_PTR_DSP_RELOC
// auipc reg, off-hi-20bits
// ld reg, reg, off-lo-12bits
// case:
// INS_OPTS_C
// auipc reg, off-hi-20bits
// jalr reg, reg, off-lo-12bits
private static unsafe void PutRiscV64PC(uint* pCode, long imm32)
{
// Verify that we got a valid offset
Expand All @@ -446,10 +452,12 @@ private static unsafe void PutRiscV64PC(uint* pCode, long imm32)
auipcInstr |= (uint)((imm32 + 0x800) & 0xfffff000);
*pCode = auipcInstr;

uint addiInstr = *(pCode + 1);
Debug.Assert((addiInstr & 0x707f) == 0x00000013);
addiInstr |= (uint)((doff & 0xfff) << 20);
*(pCode + 1) = addiInstr;
uint nextInstr = *(pCode + 1);
Debug.Assert((nextInstr & 0x707f) == 0x00000013 ||
(nextInstr & 0x707f) == 0x00000067 ||
(nextInstr & 0x707f) == 0x00003003);
nextInstr |= (uint)((doff & 0xfff) << 20);
*(pCode + 1) = nextInstr;

Debug.Assert(GetRiscV64PC(pCode) == imm32);
}
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ public static string GetHardwareIntrinsicId(TargetArchitecture architecture, Typ
if (potentialType.Namespace != "System.Runtime.Intrinsics.Arm")
return "";
}
else if (architecture == TargetArchitecture.RiscV64)
{
return "";
}
else
{
throw new InternalCompilerErrorException("Unknown architecture");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
// to the same alignment as __m128, which is supported by the ABI.
alignment = new LayoutInt(8);
}
else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
{
// The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
// 16-byte alignment for __m256.
Expand All @@ -73,7 +73,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp
// to the same alignment as __m128, which is supported by the ABI.
alignment = new LayoutInt(8);
}
else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64)
else if (defType.Context.Target.Architecture == TargetArchitecture.ARM64 || defType.Context.Target.Architecture == TargetArchitecture.RiscV64)
{
// The Procedure Call Standard for ARM 64-bit (with SVE support) defaults to
// 16-byte alignment for __m256.
Expand Down
8 changes: 6 additions & 2 deletions src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
Original file line number Diff line number Diff line change
Expand Up @@ -412,9 +412,10 @@ private CompilationResult CompileMethodInternal(IMethodNode methodCodeNodeNeedin

if (codeSize < _code.Length)
{
if (_compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.ARM64)
if (_compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.ARM64
&& _compilation.TypeSystemContext.Target.Architecture != TargetArchitecture.RiscV64)
{
// For xarch/arm32, the generated code is sometimes smaller than the memory allocated.
// For xarch/arm32/RiscV64, the generated code is sometimes smaller than the memory allocated.
// In that case, trim the codeBlock to the actual value.
//
// For arm64, the allocation request of `hotCodeSize` also includes the roData size
Expand Down Expand Up @@ -4084,6 +4085,9 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
if (targetArchitecture == TargetArchitecture.ARM && !_compilation.TypeSystemContext.Target.IsWindows)
flags.Set(CorJitFlag.CORJIT_FLAG_RELATIVE_CODE_RELOCS);

if (targetArchitecture == TargetArchitecture.RiscV64)
flags.Set(CorJitFlag.CORJIT_FLAG_FRAMED);

if (this.MethodBeingCompiled.IsUnmanagedCallersOnly)
{
// Validate UnmanagedCallersOnlyAttribute usage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
return (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
}

//// The SIMD Intrinsic types are meant to be handled specially and should not be passed as struct registers
if (typeDesc.IsIntrinsic)
{
throw new NotImplementedException("For RISCV64, SIMD would be implemented later");
}

MetadataType mdType = typeDesc as MetadataType;
Debug.Assert(mdType != null);

Expand Down Expand Up @@ -85,6 +79,16 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
{
floatFieldFlags |= (uint)StructFloatFieldInfoFlags.STRUCT_SECOND_FIELD_DOUBLE;
}

// Pass with two integer registers in `struct {int a, int b, float/double c}` cases
if (fieldIndex == 1 &&
(floatFieldFlags |
(uint)StructFloatFieldInfoFlags.STRUCT_FIRST_FIELD_SIZE_IS8 |
(uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND) ==
floatFieldFlags)
{
floatFieldFlags = (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
}
}
break;

Expand All @@ -106,6 +110,16 @@ public static uint GetRISCV64PassStructInRegisterFlags(TypeDesc typeDesc)
{
floatFieldFlags |= (uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND;
}

// Pass with two integer registers in `struct {int a, int b, float/double c}` cases
if (fieldIndex == 1 &&
(floatFieldFlags |
(uint)StructFloatFieldInfoFlags.STRUCT_FIRST_FIELD_SIZE_IS8 |
(uint)StructFloatFieldInfoFlags.STRUCT_FLOAT_FIELD_SECOND) ==
floatFieldFlags)
{
floatFieldFlags = (uint)StructFloatFieldInfoFlags.STRUCT_NO_FLOAT_FIELD;
}
}
break;

Expand Down
Loading
Loading