Skip to content

Commit

Permalink
Merge branch 'upstream-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
Datadog Syncup Service committed Oct 2, 2024
2 parents 3e39a4f + 5e98007 commit a02df76
Show file tree
Hide file tree
Showing 116 changed files with 2,362 additions and 694 deletions.
62 changes: 27 additions & 35 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,10 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
Register table0, Register table1, Register table2, Register table3,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) {
assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
Label L_by16_loop, L_vector_entry, L_unroll_loop, L_unroll_loop_entry, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
Label L_vector_entry,
L_unroll_loop,
L_by4_loop_entry, L_by4_loop,
L_by1_loop, L_exit;

const int64_t single_table_size = 256;
const int64_t unroll = 16;
Expand All @@ -1585,21 +1588,17 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
bge(len, tmp1, L_vector_entry);
}
#endif // COMPILER2
subw(len, len, unroll_words);
bge(len, zr, L_unroll_loop_entry);

addiw(len, len, unroll_words-4);
bge(len, zr, L_by4_loop);
addiw(len, len, 4);
bgt(len, zr, L_by1_loop);
j(L_exit);
mv(tmp1, unroll_words);
blt(len, tmp1, L_by4_loop_entry);

const Register loop_buf_end = tmp3;

align(CodeEntryAlignment);
bind(L_unroll_loop_entry);
const Register buf_end = tmp3;
add(buf_end, buf, len); // buf_end will be used as endpoint for loop below
// Entry for L_unroll_loop
add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below
andi(len, len, unroll_words-1); // len = (len % unroll_words)
sub(len, len, unroll_words); // Length after all iterations
sub(loop_buf_end, loop_buf_end, len);
bind(L_unroll_loop);
for (int i = 0; i < unroll; i++) {
ld(tmp1, Address(buf, i*wordSize));
Expand All @@ -1608,57 +1607,50 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
}

addi(buf, buf, unroll_words);
ble(buf, buf_end, L_unroll_loop);
addiw(len, len, unroll_words-4);
bge(len, zr, L_by4_loop);
addiw(len, len, 4);
bgt(len, zr, L_by1_loop);
j(L_exit);

blt(buf, loop_buf_end, L_unroll_loop);

bind(L_by4_loop_entry);
mv(tmp1, 4);
blt(len, tmp1, L_by1_loop);
add(loop_buf_end, buf, len); // loop_buf_end will be used as endpoint for loop below
andi(len, len, 3);
sub(loop_buf_end, loop_buf_end, len);
bind(L_by4_loop);
lwu(tmp1, Address(buf));
update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false);
subw(len, len, 4);
addi(buf, buf, 4);
bge(len, zr, L_by4_loop);
addiw(len, len, 4);
ble(len, zr, L_exit);
blt(buf, loop_buf_end, L_by4_loop);

bind(L_by1_loop);
beqz(len, L_exit);

subw(len, len, 1);
lwu(tmp1, Address(buf));
andi(tmp2, tmp1, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
ble(len, zr, L_exit);
beqz(len, L_exit);

subw(len, len, 1);
srli(tmp2, tmp1, 8);
andi(tmp2, tmp2, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
ble(len, zr, L_exit);
beqz(len, L_exit);

subw(len, len, 1);
srli(tmp2, tmp1, 16);
andi(tmp2, tmp2, right_8_bits);
update_byte_crc32(crc, tmp2, table0);
ble(len, zr, L_exit);

srli(tmp2, tmp1, 24);
andi(tmp2, tmp2, right_8_bits);
update_byte_crc32(crc, tmp2, table0);

#ifdef COMPILER2
// put vector code here, otherwise "offset is too large" error occurs.
if (UseRVV) {
j(L_exit); // only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`.
// only need to jump exit when UseRVV == true, it's a jump from end of block `L_by1_loop`.
j(L_exit);

bind(L_vector_entry);
vector_update_crc32(crc, buf, len, tmp1, tmp2, tmp3, tmp4, tmp6, table0, table3);

addiw(len, len, -4);
bge(len, zr, L_by4_loop);
addiw(len, len, 4);
bgt(len, zr, L_by1_loop);
bgtz(len, L_by4_loop_entry);
}
#endif // COMPILER2

Expand Down
17 changes: 4 additions & 13 deletions src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6092,26 +6092,17 @@ static const int64_t right_3_bits = right_n_bits(3);

address start = __ pc();

// input parameters
const Register crc = c_rarg0; // crc
const Register buf = c_rarg1; // source java byte array address
const Register len = c_rarg2; // length
const Register table0 = c_rarg3; // crc_table address
const Register table1 = c_rarg4;
const Register table2 = c_rarg5;
const Register table3 = c_rarg6;

const Register tmp1 = c_rarg7;
const Register tmp2 = t2;
const Register tmp3 = x28; // t3
const Register tmp4 = x29; // t4
const Register tmp5 = x30; // t5
const Register tmp6 = x31; // t6

BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame

__ kernel_crc32(crc, buf, len, table0, table1, table2,
table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
__ kernel_crc32(crc, buf, len,
c_rarg3, c_rarg4, c_rarg5, c_rarg6, // tmp's for tables
c_rarg7, t2, x28, x29, x30, x31); // misc tmps

__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret();
Expand Down
9 changes: 0 additions & 9 deletions src/hotspot/cpu/riscv/templateTable_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
__ la(temp_reg, Address(temp_reg, in_bytes(ResolvedFieldEntry::put_code_offset())));
}
// Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in()
__ membar(MacroAssembler::AnyAny);
__ lbu(temp_reg, Address(temp_reg, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ mv(bc_reg, bc);
Expand Down Expand Up @@ -320,7 +319,6 @@ void TemplateTable::ldc(LdcType type) {
// get type
__ addi(x13, x11, tags_offset);
__ add(x13, x10, x13);
__ membar(MacroAssembler::AnyAny);
__ lbu(x13, Address(x13, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);

Expand Down Expand Up @@ -2189,7 +2187,6 @@ void TemplateTable::resolve_cache_and_index_for_method(int byte_no,
break;
}
// Load-acquire the bytecode to match store-release in InterpreterRuntime
__ membar(MacroAssembler::AnyAny);
__ lbu(temp, Address(temp, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);

Expand Down Expand Up @@ -2241,7 +2238,6 @@ void TemplateTable::resolve_cache_and_index_for_field(int byte_no,
__ la(temp, Address(Rcache, in_bytes(ResolvedFieldEntry::put_code_offset())));
}
// Load-acquire the bytecode to match store-release in ResolvedFieldEntry::fill_in()
__ membar(MacroAssembler::AnyAny);
__ lbu(temp, Address(temp, 0));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ mv(t0, (int) code); // have we resolved this bytecode?
Expand Down Expand Up @@ -2403,7 +2399,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) {
Label resolved;

__ load_resolved_indy_entry(cache, index);
__ membar(MacroAssembler::AnyAny);
__ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset())));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);

Expand All @@ -2418,7 +2413,6 @@ void TemplateTable::load_invokedynamic_entry(Register method) {
__ call_VM(noreg, entry, method);
// Update registers with resolved info
__ load_resolved_indy_entry(cache, index);
__ membar(MacroAssembler::AnyAny);
__ ld(method, Address(cache, in_bytes(ResolvedIndyEntry::method_offset())));
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);

Expand Down Expand Up @@ -3533,7 +3527,6 @@ void TemplateTable::_new() {
const int tags_offset = Array<u1>::base_offset_in_bytes();
__ add(t0, x10, x13);
__ la(t0, Address(t0, tags_offset));
__ membar(MacroAssembler::AnyAny);
__ lbu(t0, t0);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t1, t0, (u1)JVM_CONSTANT_Class);
Expand Down Expand Up @@ -3651,7 +3644,6 @@ void TemplateTable::checkcast() {
// See if bytecode has already been quicked
__ add(t0, x13, Array<u1>::base_offset_in_bytes());
__ add(x11, t0, x9);
__ membar(MacroAssembler::AnyAny);
__ lbu(x11, x11);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t0, x11, (u1)JVM_CONSTANT_Class);
Expand Down Expand Up @@ -3707,7 +3699,6 @@ void TemplateTable::instanceof() {
// See if bytecode has already been quicked
__ add(t0, x13, Array<u1>::base_offset_in_bytes());
__ add(x11, t0, x9);
__ membar(MacroAssembler::AnyAny);
__ lbu(x11, x11);
__ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
__ sub(t0, x11, (u1)JVM_CONSTANT_Class);
Expand Down
5 changes: 0 additions & 5 deletions src/hotspot/cpu/zero/vm_version_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}

if ((LockingMode != LM_LEGACY) && (LockingMode != LM_MONITOR)) {
warning("Unsupported locking mode for this CPU.");
FLAG_SET_DEFAULT(LockingMode, LM_LEGACY);
}

// Enable error context decoding on known platforms
#if defined(IA32) || defined(AMD64) || defined(ARM) || \
defined(AARCH64) || defined(PPC) || defined(RISCV) || \
Expand Down
34 changes: 19 additions & 15 deletions src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,26 +485,30 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {

// Unlock if necessary
if (monitor) {
BasicLock *lock = monitor->lock();
markWord header = lock->displaced_header();
oop rcvr = monitor->obj();
monitor->set_obj(nullptr);

bool dec_monitor_count = true;
if (header.to_pointer() != nullptr) {
markWord old_header = markWord::encode(lock);
if (rcvr->cas_set_mark(header, old_header) != old_header) {
monitor->set_obj(rcvr);
dec_monitor_count = false;
InterpreterRuntime::monitorexit(monitor);
bool success = false;
if (LockingMode == LM_LEGACY) {
BasicLock* lock = monitor->lock();
oop rcvr = monitor->obj();
monitor->set_obj(nullptr);
success = true;
markWord header = lock->displaced_header();
if (header.to_pointer() != nullptr) { // Check for recursive lock
markWord old_header = markWord::encode(lock);
if (rcvr->cas_set_mark(header, old_header) != old_header) {
monitor->set_obj(rcvr);
success = false;
}
}
if (success) {
THREAD->dec_held_monitor_count();
}
}
if (dec_monitor_count) {
THREAD->dec_held_monitor_count();
if (!success) {
InterpreterRuntime::monitorexit(monitor);
}
}

unwind_and_return:
unwind_and_return:

// Unwind the current activation
thread->pop_zero_frame();
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/adlc/formssel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4357,7 +4357,7 @@ bool MatchRule::is_vector() const {
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
"LoadVectorGather", "StoreVectorScatter", "LoadVectorGatherMasked", "StoreVectorScatterMasked",
"VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
"VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
"VectorRearrange", "VectorLoadShuffle", "VectorLoadConst",
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorCastF2HF", "VectorCastHF2F",
"VectorUCastB2X", "VectorUCastS2X", "VectorUCastI2X",
Expand Down
5 changes: 4 additions & 1 deletion src/hotspot/share/ci/ciEnv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1616,7 +1616,10 @@ void ciEnv::dump_replay_data_helper(outputStream* out) {
for (int i = 0; i < objects->length(); i++) {
objects->at(i)->dump_replay_data(out);
}
dump_compile_data(out);

if (this->task() != nullptr) {
dump_compile_data(out);
}
out->flush();
}

Expand Down
21 changes: 21 additions & 0 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,15 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_shuffle_to_vector_name, "shuffleToVector") \
\
do_intrinsic(_VectorWrapShuffleIndexes, jdk_internal_vm_vector_VectorSupport, vector_wrap_shuffle_indexes_name, \
vector_wrap_shuffle_indexes_sig, F_S) \
do_signature(vector_wrap_shuffle_indexes_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;" \
"ILjdk/internal/vm/vector/VectorSupport$WrapShuffleIndexesOperation;)" \
"Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;") \
do_name(vector_wrap_shuffle_indexes_name, "wrapShuffleIndexes") \
\
do_intrinsic(_VectorLoadOp, jdk_internal_vm_vector_VectorSupport, vector_load_op_name, vector_load_op_sig, F_S) \
do_signature(vector_load_op_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
Expand Down Expand Up @@ -1129,6 +1138,18 @@ class methodHandle;
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_rearrange_name, "rearrangeOp") \
\
do_intrinsic(_VectorSelectFrom, jdk_internal_vm_vector_VectorSupport, vector_select_from_name, vector_select_from_sig, F_S) \
do_signature(vector_select_from_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"Ljava/lang/Class;" \
"I" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorMask;" \
"Ljdk/internal/vm/vector/VectorSupport$VectorSelectFromOp;)" \
"Ljdk/internal/vm/vector/VectorSupport$Vector;") \
do_name(vector_select_from_name, "selectFromOp") \
\
do_intrinsic(_VectorExtract, jdk_internal_vm_vector_VectorSupport, vector_extract_name, vector_extract_sig, F_S) \
do_signature(vector_extract_sig, "(Ljava/lang/Class;" \
"Ljava/lang/Class;" \
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/c2compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,7 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_VectorFromBitsCoerced:
case vmIntrinsics::_VectorShuffleIota:
case vmIntrinsics::_VectorShuffleToVector:
case vmIntrinsics::_VectorWrapShuffleIndexes:
case vmIntrinsics::_VectorLoadOp:
case vmIntrinsics::_VectorLoadMaskedOp:
case vmIntrinsics::_VectorStoreOp:
Expand All @@ -821,6 +822,7 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_VectorTest:
case vmIntrinsics::_VectorBlend:
case vmIntrinsics::_VectorRearrange:
case vmIntrinsics::_VectorSelectFrom:
case vmIntrinsics::_VectorCompare:
case vmIntrinsics::_VectorBroadcastInt:
case vmIntrinsics::_VectorConvert:
Expand Down
9 changes: 7 additions & 2 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_mask_operation();
case vmIntrinsics::_VectorShuffleToVector:
return inline_vector_shuffle_to_vector();
case vmIntrinsics::_VectorWrapShuffleIndexes:
return inline_vector_wrap_shuffle_indexes();
case vmIntrinsics::_VectorLoadOp:
return inline_vector_mem_operation(/*is_store=*/false);
case vmIntrinsics::_VectorLoadMaskedOp:
Expand All @@ -737,6 +739,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
return inline_vector_blend();
case vmIntrinsics::_VectorRearrange:
return inline_vector_rearrange();
case vmIntrinsics::_VectorSelectFrom:
return inline_vector_select_from();
case vmIntrinsics::_VectorCompare:
return inline_vector_compare();
case vmIntrinsics::_VectorBroadcastInt:
Expand Down Expand Up @@ -2048,7 +2052,7 @@ LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset, BasicType type)
if (base_type == nullptr) {
// Unknown type.
return Type::AnyPtr;
} else if (base_type == TypePtr::NULL_PTR) {
} else if (_gvn.type(base->uncast()) == TypePtr::NULL_PTR) {
// Since this is a null+long form, we have to switch to a rawptr.
base = _gvn.transform(new CastX2PNode(offset));
offset = MakeConX(0);
Expand Down Expand Up @@ -2366,8 +2370,9 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
SafePointNode* old_map = clone_map();

Node* adr = make_unsafe_address(base, offset, type, kind == Relaxed);
assert(!stopped(), "Inlining of unsafe access failed: address construction stopped unexpectedly");

if (_gvn.type(base)->isa_ptr() == TypePtr::NULL_PTR) {
if (_gvn.type(base->uncast())->isa_ptr() == TypePtr::NULL_PTR) {
if (type != T_OBJECT) {
decorators |= IN_NATIVE; // off-heap primitive access
} else {
Expand Down
Loading

0 comments on commit a02df76

Please sign in to comment.