From 9da4de9e3b6d8cda6fa9be1e128d642af4858d92 Mon Sep 17 00:00:00 2001 From: jiangfeilong Date: Tue, 9 May 2023 23:11:21 +0800 Subject: [PATCH] RISC-V: Implementation: JEP 450: Compact Object Headers (Experimental) --- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 8 +++-- .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 34 ++++++------------ .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 13 ++++++- .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 24 ++++++++++--- src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp | 11 ++++++ .../cpu/riscv/c2_MacroAssembler_riscv.cpp | 20 +++++++++++ .../cpu/riscv/c2_MacroAssembler_riscv.hpp | 1 + .../cpu/riscv/macroAssembler_riscv.cpp | 36 +++++++++++++++++-- .../cpu/riscv/macroAssembler_riscv.hpp | 1 + src/hotspot/cpu/riscv/riscv.ad | 18 ++++++++++ src/hotspot/cpu/riscv/templateTable_riscv.cpp | 29 +++++++++++---- 11 files changed, 155 insertions(+), 40 deletions(-) diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp index bdd713bf8cd12..45f9c519251c1 100644 --- a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp @@ -33,6 +33,7 @@ #include "c1/c1_Runtime1.hpp" #include "classfile/javaClasses.hpp" #include "nativeInst_riscv.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #include "vmreg_riscv.inline.hpp" @@ -226,8 +227,11 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) { } void LoadKlassStub::emit_code(LIR_Assembler* ce) { - // Currently not needed. - Unimplemented(); + assert(UseCompactObjectHeaders, "Only use with compact object headers"); + __ bind(_entry); + Register d = _result->as_register(); + __ ld(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header))); + __ j(_continuation); } // Implementation of patching: diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp index 4f407764f2028..0d516f626797f 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp @@ -194,7 +194,10 @@ void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Registe // We don't know the array types are compatible if (basic_type != T_OBJECT) { // Simple test for basic type arrays - if (UseCompressedClassPointers) { + if (UseCompactObjectHeaders) { + __ load_nklass_compact(tmp, src, t1 /* tmp */); + __ load_nklass_compact(t0, dst, t1 /* tmp */); + } else if (UseCompressedClassPointers) { __ lwu(tmp, Address(src, oopDesc::klass_offset_in_bytes())); __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); } else { @@ -254,32 +257,17 @@ void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, c // dst type is exactly the expected type and the src type is a // subtype which we can't check or src is the same array as dst // but not necessarily exactly of type default_type. - Label known_ok, halt; + Label known_ok, cont, halt; __ mov_metadata(tmp, default_type->constant_encoding()); - if (UseCompressedClassPointers) { - __ encode_klass_not_null(tmp); - } if (basic_type != T_OBJECT) { - if (UseCompressedClassPointers) { - __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); - } else { - __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); - } - __ bne(tmp, t0, halt); - if (UseCompressedClassPointers) { - __ lwu(t0, Address(src, oopDesc::klass_offset_in_bytes())); - } else { - __ ld(t0, Address(src, oopDesc::klass_offset_in_bytes())); - } - __ beq(tmp, t0, known_ok); + __ cmp_klass(dst, tmp, t0, t1, cont); + __ j(halt); + + __ bind(cont); + __ cmp_klass(src, tmp, t0, t1, known_ok); } else { - if (UseCompressedClassPointers) { - __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); - } else { - __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); - } - __ beq(tmp, t0, known_ok); + __ cmp_klass(dst, tmp, t0, t1, known_ok); __ beq(src, dst, known_ok); } __ bind(halt); diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp index e3ec023aef260..2ba29e423fc80 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -1524,7 +1524,18 @@ void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) { } if (UseCompressedClassPointers) { - __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); + if (UseCompactObjectHeaders) { + // Check if we can take the (common) fast path, if obj is unlocked + __ ld(result, Address(obj, oopDesc::mark_offset_in_bytes())); + __ test_bit(t0, result, exact_log2(markWord::monitor_value)); + __ bnez(t0, *op->stub()->entry(), /* is_far */ true); + __ bind(*op->stub()->continuation()); + + // Shift to get proper narrow Klass*. + __ srli(result, result, markWord::klass_shift); + } else { + __ lwu(result, Address(obj, oopDesc::klass_offset_in_bytes())); + } __ decode_klass_not_null(result); } else { __ ld(result, Address(obj, oopDesc::klass_offset_in_bytes())); diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 2961b1a91ceab..a3fa0e86cc138 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -172,16 +172,30 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); - if (UseCompressedClassPointers) { // Take care not to kill klass - encode_klass_not_null(tmp1, klass, tmp2); - sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes())); + if (UseCompactObjectHeaders) { + ld(tmp1, Address(klass, Klass::prototype_header_offset())); + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); } else { - sd(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + // This assumes that all prototype bits fit in an int32_t + mv(tmp1, (int32_t)(intptr_t)markWord::prototype().value()); + sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass + encode_klass_not_null(tmp1, klass, tmp2); + sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes())); + } else { + sd(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + } } if (len->is_valid()) { sw(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); - } else if (UseCompressedClassPointers) { + if (UseCompactObjectHeaders) { + // With compact headers, arrays have a 32bit alignment gap after the length. + assert(arrayOopDesc::length_offset_in_bytes() == 8, "check length offset"); + sw(zr, Address(obj, arrayOopDesc::length_offset_in_bytes() + sizeof(jint))); + } + } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) { store_klass_gap(obj, zr); } } diff --git a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp index 7995750aba96b..b774fa869a2c7 100644 --- a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp @@ -99,4 +99,15 @@ void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { __ j(continuation()); } +int C2LoadNKlassStub::max_size() const { + return 8; +} + +void C2LoadNKlassStub::emit(C2_MacroAssembler& masm) { + __ bind(entry()); + Register d = dst(); + __ ld(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header))); + __ j(continuation()); +} + #undef __ diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 9670bc987a304..aaa8c4ab92259 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -1958,6 +1958,26 @@ void C2_MacroAssembler::expand_bits_l_v(Register dst, Register src, Register mas expand_bits_v(dst, src, mask, /* is_long */ true); } +void C2_MacroAssembler::load_nklass_compact(Register dst, Register obj, int disp) { + C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst); + Compile::current()->output()->add_stub(stub); + + // Note: Don't clobber obj anywhere in that method! + + // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract + // obj-start, so that we can load from the object's mark-word instead. Usually the address + // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2 + // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and + // then passes that register as obj and 0 in disp. The following code extracts the base + // and offset to load the mark-word. + int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes(); + ld(dst, Address(obj, offset)); + test_bit(t0, dst, exact_log2(markWord::monitor_value)); + bnez(t0, stub->entry(), /* is_far */ true); + bind(stub->continuation()); + srli(dst, dst, markWord::klass_shift); +} + void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { Label loop; diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 9fe4dc002c992..d5b52ae953ae8 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -176,6 +176,7 @@ void signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen); + void load_nklass_compact(Register dst, Register src, int disp); // intrinsic methods implemented by rvv instructions diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 96e07319e843f..240633e0e0b7d 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -2055,10 +2055,15 @@ void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, R sd(tmp1, adr); } +// Compare object klass with 'trial_klass' and if equal branch to label 'L'. Fall through otherwise. void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L) { assert_different_registers(oop, trial_klass, tmp1, tmp2); if (UseCompressedClassPointers) { - lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); + if (UseCompactObjectHeaders) { + load_nklass_compact(tmp1, oop, tmp2); + } else { + lwu(tmp1, Address(oop, oopDesc::klass_offset_in_bytes())); + } if (CompressedKlassPointers::base() == nullptr) { slli(tmp1, tmp1, CompressedKlassPointers::shift()); beq(trial_klass, tmp1, L); @@ -2249,10 +2254,35 @@ void MacroAssembler::encode_heap_oop(Register d, Register s) { } } +// Loads the obj's Klass* into dst. +// Preserves src. +void MacroAssembler::load_nklass_compact(Register dst, Register src, Register tmp) { + assert(UseCompressedClassPointers, "expects UseCompressedClassPointers"); + assert_different_registers(dst, tmp); + assert_different_registers(src, tmp); + + Label fast; + + // Check if we can take the (common) fast path, if obj is unlocked. + ld(dst, Address(src, oopDesc::mark_offset_in_bytes())); + test_bit(tmp, dst, exact_log2(markWord::monitor_value)); + beqz(tmp, fast); + + // Fetch displaced header + ld(dst, Address(dst, OM_OFFSET_NO_MONITOR_VALUE_TAG(header))); + + // Fast-path: shift and decode Klass*. + bind(fast); + srli(dst, dst, markWord::klass_shift); +} + void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { assert_different_registers(dst, tmp); assert_different_registers(src, tmp); - if (UseCompressedClassPointers) { + if (UseCompactObjectHeaders) { + load_nklass_compact(dst, src, tmp); + decode_klass_not_null(dst, tmp); + } else if (UseCompressedClassPointers) { lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); decode_klass_not_null(dst, tmp); } else { @@ -2263,6 +2293,7 @@ void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { // FIXME: Should this be a store release? concurrent gcs assumes // klass length is valid if klass field is not null. + assert(!UseCompactObjectHeaders, "not with compact headers"); if (UseCompressedClassPointers) { encode_klass_not_null(src, tmp); sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); @@ -2272,6 +2303,7 @@ void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { } void MacroAssembler::store_klass_gap(Register dst, Register src) { + assert(!UseCompactObjectHeaders, "not with compact headers"); if (UseCompressedClassPointers) { // Store to klass gap in destination sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 63cfb22855180..5a369cc5185ca 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -194,6 +194,7 @@ class MacroAssembler: public Assembler { Address src, Register tmp1, Register tmp2); void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); + void load_nklass_compact(Register dst, Register src, Register tmp = t0); void load_klass(Register dst, Register src, Register tmp = t0); void store_klass(Register dst, Register src, Register tmp = t0); void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L); diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 10a80cd094024..065efca55a6aa 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -4801,6 +4801,7 @@ instruct loadKlass(iRegPNoSp dst, memory mem) // Load Narrow Klass Pointer instruct loadNKlass(iRegNNoSp dst, memory mem) %{ + predicate(!UseCompactObjectHeaders); match(Set dst (LoadNKlass mem)); ins_cost(LOAD_COST); @@ -4813,6 +4814,23 @@ instruct loadNKlass(iRegNNoSp dst, memory mem) ins_pipe(iload_reg_mem); %} +instruct loadNKlassCompactHeaders(iRegNNoSp dst, memory mem) +%{ + predicate(UseCompactObjectHeaders); + match(Set dst (LoadNKlass mem)); + effect(TEMP_DEF dst); + + ins_cost(LOAD_COST + ALU_COST * 3 + BRANCH_COST); + format %{ "lwu $dst, $mem\t# compressed class ptr, #@loadNKlassCompactHeaders" %} + + ins_encode %{ + assert($mem$$index$$Register == noreg, "expect no index"); + __ load_nklass_compact($dst$$Register, $mem$$base$$Register, $$mem$$disp); + %} + + ins_pipe(pipe_slow); +%} + // Load Float instruct loadF(fRegF dst, memory mem) %{ diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp index 58f57f32b2f65..8221802ef1856 100644 --- a/src/hotspot/cpu/riscv/templateTable_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp @@ -3576,12 +3576,22 @@ void TemplateTable::_new() { // The object is initialized before the header. If the object size is // zero, go directly to the header initialization. - __ sub(x13, x13, sizeof(oopDesc)); + if (UseCompactObjectHeaders) { + assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong, "oop base offset must be 8-byte-aligned")); + __ sub(x13, x13, oopDesc::base_offset_in_bytes()); + } else { + __ sub(x13, x13, sizeof(oopDesc)); + } __ beqz(x13, initialize_header); // Initialize object fields { - __ add(x12, x10, sizeof(oopDesc)); + if (UseCompactObjectHeaders) { + assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong, "oop base offset must be 8-byte-aligned")); + __ add(x12, x10, oopDesc::base_offset_in_bytes()); + } else { + __ add(x12, x10, sizeof(oopDesc)); + } Label loop; __ bind(loop); __ sd(zr, Address(x12)); @@ -3590,12 +3600,17 @@ void TemplateTable::_new() { __ bnez(x13, loop); } - // initialize object hader only. + // initialize object header only. __ bind(initialize_header); - __ mv(t0, (intptr_t)markWord::prototype().value()); - __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); - __ store_klass_gap(x10, zr); // zero klass gap for compressed oops - __ store_klass(x10, x14); // store klass last + if (UseCompactObjectHeaders) { + __ ld(t0, Address(x14, Klass::prototype_header_offset())); + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + } else { + __ mv(t0, (intptr_t)markWord::prototype().value()); + __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); + __ store_klass_gap(x10, zr); // zero klass gap for compressed oops + __ store_klass(x10, x14); // store klass last + } { SkipIfEqual skip(_masm, &DTraceAllocProbes, false);