Skip to content

Commit

Permalink
Merge branch 'upstream-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
Datadog Syncup Service committed Nov 10, 2023
2 parents ec6a1df + c9657ca commit a4723ad
Show file tree
Hide file tree
Showing 53 changed files with 3,587 additions and 2,362 deletions.
19 changes: 7 additions & 12 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1658,25 +1658,20 @@ void C2_MacroAssembler::round_double_mode(FloatRegister dst, FloatRegister src,
// otherwise return +/- 1.0 using sign of input.
// one - gives us a floating-point 1.0 (got from matching rule)
// bool is_double - specifies single or double precision operations will be used.
void C2_MacroAssembler::signum_fp(FloatRegister dst, FloatRegister src, FloatRegister one, bool is_double) {
Register tmp1 = t0;

void C2_MacroAssembler::signum_fp(FloatRegister dst, FloatRegister one, bool is_double) {
Label done;

is_double ? fclass_d(tmp1, src)
: fclass_s(tmp1, src);

is_double ? fmv_d(dst, src)
: fmv_s(dst, src);
is_double ? fclass_d(t0, dst)
: fclass_s(t0, dst);

// check if input is -0, +0, signaling NaN or quiet NaN
andi(tmp1, tmp1, fclass_mask::zero | fclass_mask::nan);
andi(t0, t0, fclass_mask::zero | fclass_mask::nan);

bnez(tmp1, done);
bnez(t0, done);

// use floating-point 1.0 with a sign of input
is_double ? fsgnj_d(dst, one, src)
: fsgnj_s(dst, one, src);
is_double ? fsgnj_d(dst, one, dst)
: fsgnj_s(dst, one, dst);

bind(done);
}
Expand Down
3 changes: 1 addition & 2 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@
void round_double_mode(FloatRegister dst, FloatRegister src, int round_mode,
Register tmp1, Register tmp2, Register tmp3);

void signum_fp(FloatRegister dst, FloatRegister src, FloatRegister one,
bool is_double);
void signum_fp(FloatRegister dst, FloatRegister one, bool is_double);

// intrinsic methods implemented by rvv instructions
void string_equals_v(Register r1, Register r2,
Expand Down
18 changes: 8 additions & 10 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -7601,22 +7601,20 @@ instruct copySignF_reg(fRegF dst, fRegF src1, fRegF src2) %{
ins_pipe(fp_dop_reg_reg_s);
%}

instruct signumD_reg(fRegD dst, fRegD src, immD zero, fRegD one) %{
match(Set dst (SignumD src (Binary zero one)));
format %{ "signumD $dst, $src" %}
instruct signumD_reg(fRegD dst, immD zero, fRegD one) %{
match(Set dst (SignumD dst (Binary zero one)));
format %{ "signumD $dst, $dst" %}
ins_encode %{
__ signum_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($one$$reg), true /* is_double */);
__ signum_fp(as_FloatRegister($dst$$reg), as_FloatRegister($one$$reg), true /* is_double */);
%}
ins_pipe(pipe_class_default);
%}

instruct signumF_reg(fRegF dst, fRegF src, immF zero, fRegF one) %{
match(Set dst (SignumF src (Binary zero one)));
format %{ "signumF $dst, $src" %}
instruct signumF_reg(fRegF dst, immF zero, fRegF one) %{
match(Set dst (SignumF dst (Binary zero one)));
format %{ "signumF $dst, $dst" %}
ins_encode %{
__ signum_fp(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($one$$reg), false /* is_double */);
__ signum_fp(as_FloatRegister($dst$$reg), as_FloatRegister($one$$reg), false /* is_double */);
%}
ins_pipe(pipe_class_default);
%}
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/cpu/x86/globals_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, UseLibmIntrinsic, true, DIAGNOSTIC, \
"Use Libm Intrinsics") \
\
/* Autodetected, see vm_version_x86.cpp */ \
product(bool, EnableX86ECoreOpts, false, DIAGNOSTIC, \
"Perform Ecore Optimization") \
\
/* Minimum array size in bytes to use AVX512 intrinsics */ \
/* for copy, inflate and fill which don't bail out early based on any */ \
/* condition. When this value is set to zero compare operations like */ \
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3965,8 +3965,8 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0x0D7F;
// Round to nearest, 24-bit mode, exceptions masked
StubRoutines::x86::_fpu_cntrl_wrd_24 = 0x007F;
// Round to nearest, 64-bit mode, exceptions masked
StubRoutines::x86::_mxcsr_std = 0x1F80;
// Round to nearest, 64-bit mode, exceptions masked, flags specialized
StubRoutines::x86::_mxcsr_std = EnableX86ECoreOpts ? 0x1FBF : 0x1F80;
// Note: the following two constants are 80-bit values
// layout is critical for correct loading by FPU.
// Bias for strict fp multiply/divide
Expand Down
8 changes: 4 additions & 4 deletions src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3907,10 +3907,10 @@ address StubGenerator::generate_upcall_stub_exception_handler() {
}

void StubGenerator::create_control_words() {
// Round to nearest, 64-bit mode, exceptions masked
StubRoutines::x86::_mxcsr_std = 0x1F80;
// Round to zero, 64-bit mode, exceptions masked
StubRoutines::x86::_mxcsr_rz = 0x7F80;
// Round to nearest, 64-bit mode, exceptions masked, flags specialized
StubRoutines::x86::_mxcsr_std = EnableX86ECoreOpts ? 0x1FBF : 0x1F80;
// Round to zero, 64-bit mode, exceptions masked, flags specialized
StubRoutines::x86::_mxcsr_rz = EnableX86ECoreOpts ? 0x7FBF : 0x7F80;
}

// Initialization
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/x86/vm_version_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,12 @@ void VM_Version::get_processor_features() {
}
#endif

// Check if processor has Intel Ecore
if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
(_model == 0x97 || _model == 0xAC || _model == 0xAF)) {
FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
}

if (UseSSE < 4) {
_features &= ~CPU_SSE4_1;
_features &= ~CPU_SSE4_2;
Expand Down
6 changes: 3 additions & 3 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -7423,7 +7423,7 @@ instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec
format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
__ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
$tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister);
Expand All @@ -7440,7 +7440,7 @@ instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kR
format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
__ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc,
$tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
Expand All @@ -7455,7 +7455,7 @@ instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg
format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
__ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
$tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/share/logging/logConfiguration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,12 @@ bool LogConfiguration::parse_log_arguments(const char* outputstr,
errstream->print_cr("Invalid output index '%s'", outputstr);
return false;
}
} else if (strcmp(outputstr, StdoutLog->name()) == 0) { // stdout
idx = 0;
assert(find_output(outputstr) == idx, "sanity check");
} else if (strcmp(outputstr, StderrLog->name()) == 0) { // stderr
idx = 1;
assert(find_output(outputstr) == idx, "sanity check");
} else { // Output specified using name
// Normalize the name, stripping quotes and ensures it includes type prefix
size_t len = strlen(outputstr) + strlen(implicit_output_prefix) + 1;
Expand Down
5 changes: 4 additions & 1 deletion src/hotspot/share/opto/c2_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,10 @@
"Try to simplify allocation merges before Scalar Replacement") \
\
notproduct(bool, TraceReduceAllocationMerges, false, \
"Trace decision for simplifying allocation merges.") \
"Trace decision for simplifying allocation merges.") \
\
develop(bool, VerifyReduceAllocationMerges, true, \
"Verify reduce allocation merges in escape analysis") \
\
product(bool, DoEscapeAnalysis, true, \
"Perform escape analysis") \
Expand Down
3 changes: 0 additions & 3 deletions src/hotspot/share/opto/compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -812,8 +812,6 @@ Compile::Compile( ciEnv* ci_env, ciMethod* target, int osr_bci,

if (failing()) return;

print_method(PHASE_BEFORE_REMOVEUSELESS, 3);

// Remove clutter produced by parsing.
if (!failing()) {
ResourceMark rm;
Expand Down Expand Up @@ -2001,7 +1999,6 @@ void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {
if (_boxing_late_inlines.length() > 0) {
assert(has_boxed_value(), "inconsistent");

PhaseGVN* gvn = initial_gvn();
set_inlining_incrementally(true);

igvn_worklist()->ensure_empty(); // should be done with igvn
Expand Down
132 changes: 89 additions & 43 deletions src/hotspot/share/opto/escape.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,18 @@ bool ConnectionGraph::compute_escape() {
assert(ptn->escape_state() == PointsToNode::NoEscape && ptn->scalar_replaceable(), "sanity");
}
}

if (VerifyReduceAllocationMerges) {
for (uint i = 0; i < reducible_merges.size(); i++ ) {
Node* n = reducible_merges.at(i);
if (!can_reduce_phi(n->as_Phi())) {
TraceReduceAllocationMerges = true;
n->dump(2);
n->dump(-2);
assert(can_reduce_phi(n->as_Phi()), "Sanity: previous reducible Phi is no longer reducible before SUT.");
}
}
}
#endif

// 5. Separate memory graph for scalar replaceable allcations.
Expand Down Expand Up @@ -530,14 +542,21 @@ void ConnectionGraph::reduce_phi_on_field_access(PhiNode* ophi, GrowableArray<No
// though the load doesn't have an unique instance type.
bool ignore_missing_instance_id = true;

#ifdef ASSERT
if (VerifyReduceAllocationMerges && !can_reduce_phi(ophi)) {
TraceReduceAllocationMerges = true;
ophi->dump(2);
ophi->dump(-2);
assert(can_reduce_phi(ophi), "Sanity: previous reducible Phi is no longer reducible inside reduce_phi_on_field_access.");
}
#endif

// Iterate over Phi outputs looking for an AddP
for (int j = ophi->outcnt()-1; j >= 0;) {
Node* previous_addp = ophi->raw_out(j);
uint num_edges = 1;
if (previous_addp->is_AddP()) {
// All AddPs are present in the connection graph
FieldNode* fn = ptnode_adr(previous_addp->_idx)->as_Field();
num_edges = previous_addp->in(AddPNode::Address) == previous_addp->in(AddPNode::Base) ? 2 : 1;

// Iterate over AddP looking for a Load
for (int k = previous_addp->outcnt()-1; k >= 0;) {
Expand All @@ -547,55 +566,66 @@ void ConnectionGraph::reduce_phi_on_field_access(PhiNode* ophi, GrowableArray<No
_igvn->replace_node(previous_load, data_phi);
assert(data_phi != nullptr, "Output of split_through_phi is null.");
assert(data_phi != previous_load, "Output of split_through_phi is same as input.");
assert(data_phi->is_Phi(), "Return of split_through_phi should be a Phi.");

// Push the newly created AddP on alloc_worklist and patch
// the connection graph. Note that the changes in the CG below
// won't affect the ES of objects since the new nodes have the
// same status as the old ones.
if (data_phi != nullptr && data_phi->is_Phi()) {
for (uint i = 1; i < data_phi->req(); i++) {
Node* new_load = data_phi->in(i);
if (new_load->is_Load()) {
Node* new_addp = new_load->in(MemNode::Address);
Node* base = get_addp_base(new_addp);

// The base might not be something that we can create an unique
// type for. If that's the case we are done with that input.
PointsToNode* jobj_ptn = unique_java_object(base);
if (jobj_ptn == nullptr || !jobj_ptn->scalar_replaceable()) {
continue;
}
for (uint i = 1; i < data_phi->req(); i++) {
Node* new_load = data_phi->in(i);
if (new_load->is_Load()) {
Node* new_addp = new_load->in(MemNode::Address);
Node* base = get_addp_base(new_addp);

// The base might not be something that we can create an unique
// type for. If that's the case we are done with that input.
PointsToNode* jobj_ptn = unique_java_object(base);
if (jobj_ptn == nullptr || !jobj_ptn->scalar_replaceable()) {
continue;
}

// Push to alloc_worklist since the base has an unique_type
alloc_worklist.append_if_missing(new_addp);

// Now let's add the node to the connection graph
_nodes.at_grow(new_addp->_idx, nullptr);
add_field(new_addp, fn->escape_state(), fn->offset());
add_base(ptnode_adr(new_addp->_idx)->as_Field(), ptnode_adr(base->_idx));

// If the load doesn't load an object then it won't be
// part of the connection graph
PointsToNode* curr_load_ptn = ptnode_adr(previous_load->_idx);
if (curr_load_ptn != nullptr) {
_nodes.at_grow(new_load->_idx, nullptr);
add_local_var(new_load, curr_load_ptn->escape_state());
add_edge(ptnode_adr(new_load->_idx), ptnode_adr(new_addp->_idx)->as_Field());
}
// Push to alloc_worklist since the base has an unique_type
alloc_worklist.append_if_missing(new_addp);

// Now let's add the node to the connection graph
_nodes.at_grow(new_addp->_idx, nullptr);
add_field(new_addp, fn->escape_state(), fn->offset());
add_base(ptnode_adr(new_addp->_idx)->as_Field(), ptnode_adr(base->_idx));

// If the load doesn't load an object then it won't be
// part of the connection graph
PointsToNode* curr_load_ptn = ptnode_adr(previous_load->_idx);
if (curr_load_ptn != nullptr) {
_nodes.at_grow(new_load->_idx, nullptr);
add_local_var(new_load, curr_load_ptn->escape_state());
add_edge(ptnode_adr(new_load->_idx), ptnode_adr(new_addp->_idx)->as_Field());
}
}
}
}
--k;
k = MIN2(k, (int)previous_addp->outcnt()-1);
k = MIN2(--k, (int)previous_addp->outcnt()-1);
}

// Remove the old AddP from the processing list because it's dead now
alloc_worklist.remove_if_existing(previous_addp);
_igvn->remove_globally_dead_node(previous_addp);
}
j -= num_edges;
j = MIN2(j, (int)ophi->outcnt()-1);
j = MIN2(--j, (int)ophi->outcnt()-1);
}

#ifdef ASSERT
if (VerifyReduceAllocationMerges) {
for (uint j = 0; j < ophi->outcnt(); j++) {
Node* use = ophi->raw_out(j);
if (!use->is_SafePoint()) {
ophi->dump(2);
ophi->dump(-2);
assert(false, "Should be a SafePoint.");
}
}
}
#endif
}

// This method will create a SafePointScalarObjectNode for each combination of
Expand Down Expand Up @@ -3607,6 +3637,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist,
GrowableArray<ArrayCopyNode*> &arraycopy_worklist,
GrowableArray<MergeMemNode*> &mergemem_worklist,
Unique_Node_List &reducible_merges) {
DEBUG_ONLY(Unique_Node_List reduced_merges;)
GrowableArray<Node *> memnode_worklist;
GrowableArray<PhiNode *> orig_phis;
PhaseIterGVN *igvn = _igvn;
Expand Down Expand Up @@ -3783,6 +3814,11 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist,
if (reducible_merges.member(n)) {
// Split loads through phi
reduce_phi_on_field_access(n->as_Phi(), alloc_worklist);
#ifdef ASSERT
if (VerifyReduceAllocationMerges) {
reduced_merges.push(n);
}
#endif
continue;
}
JavaObjectNode* jobj = unique_java_object(n);
Expand Down Expand Up @@ -3895,14 +3931,24 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist,
}

#ifdef ASSERT
// At this point reducible Phis shouldn't have AddP users anymore; only SafePoints.
for (uint i = 0; i < reducible_merges.size(); i++) {
Node* phi = reducible_merges.at(i);
for (DUIterator_Fast jmax, j = phi->fast_outs(jmax); j < jmax; j++) {
Node* use = phi->fast_out(j);
if (!use->is_SafePoint()) {
phi->dump(-3);
assert(false, "Unexpected user of reducible Phi -> %s", use->Name());
if (VerifyReduceAllocationMerges) {
// At this point reducible Phis shouldn't have AddP users anymore; only SafePoints.
for (uint i = 0; i < reducible_merges.size(); i++) {
Node* phi = reducible_merges.at(i);

if (!reduced_merges.member(phi)) {
phi->dump(2);
phi->dump(-2);
assert(false, "This reducible merge wasn't reduced.");
}

for (DUIterator_Fast jmax, j = phi->fast_outs(jmax); j < jmax; j++) {
Node* use = phi->fast_out(j);
if (!use->is_SafePoint()) {
phi->dump(2);
phi->dump(-2);
assert(false, "Unexpected user of reducible Phi -> %d:%s:%d", use->_idx, use->Name(), use->outcnt());
}
}
}
}
Expand Down
Loading

0 comments on commit a4723ad

Please sign in to comment.