Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/hotspot/share/opto/c2_globals.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,9 @@
product(bool, IncrementalInlineForceCleanup, false, DIAGNOSTIC, \
"do cleanup after every iteration of incremental inlining") \
\
product(bool, IncrementalInlineVector, true, DIAGNOSTIC, \
"Inline fallback implementation of failed vector intrinsics") \
\
product(intx, LiveNodeCountInliningCutoff, 40000, \
"max number of live nodes in a method") \
range(0, max_juint / 8) \
Expand Down
26 changes: 26 additions & 0 deletions src/hotspot/share/opto/callGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,32 @@ CallGenerator* CallGenerator::for_mh_late_inline(ciMethod* caller, ciMethod* cal
return cg;
}

class LateInlineVectorCallGenerator : public LateInlineCallGenerator {
private:
CallGenerator* _fallback_cg;

public:
LateInlineVectorCallGenerator(ciMethod* method, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) :
LateInlineCallGenerator(method, intrinsic_cg), _fallback_cg(fallback_cg) {}

virtual bool is_vector_late_inline() const { return true; }

virtual JVMState* generate(JVMState* jvms) {
JVMState* new_jvms = LateInlineCallGenerator::generate(jvms);
if (_fallback_cg != nullptr) {
Copy link
Copy Markdown
Contributor

@iwanowww iwanowww May 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: you can require _fallback_cg now. Assert _fallback_cg != nullptr in ctor and drop null check.

assert(_fallback_cg->is_parse(), "");
CallGenerator* fallback = CallGenerator::for_late_inline(method(), _fallback_cg)->with_call_node(call_node());
Compile::current()->add_vector_late_inline(fallback);
}
return new_jvms;
}
};

CallGenerator* CallGenerator::for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg) {
return new LateInlineVectorCallGenerator(m, intrinsic_cg, fallback_cg);
}


// Allow inlining decisions to be delayed
class LateInlineVirtualCallGenerator : public VirtualCallGenerator {
private:
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/opto/callGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ class CallGenerator : public ArenaObj {
// same but for method handle calls
virtual bool is_mh_late_inline() const { return false; }
virtual bool is_string_late_inline() const { return false; }
virtual bool is_vector_late_inline() const { return false; }
virtual bool is_boxing_late_inline() const { return false; }
virtual bool is_vector_reboxing_late_inline() const { return false; }
virtual bool is_virtual_late_inline() const { return false; }
Expand Down Expand Up @@ -142,6 +143,7 @@ class CallGenerator : public ArenaObj {
static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_mh_late_inline(ciMethod* caller, ciMethod* callee, bool input_not_const);
static CallGenerator* for_string_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_vector_late_inline(ciMethod* m, CallGenerator* intrinsic_cg, CallGenerator* fallback_cg = nullptr);
static CallGenerator* for_boxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_vector_reboxing_late_inline(ciMethod* m, CallGenerator* inline_cg);
static CallGenerator* for_late_inline_virtual(ciMethod* m, int vtable_index, float expected_uses);
Expand Down
26 changes: 26 additions & 0 deletions src/hotspot/share/opto/compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ void Compile::remove_useless_node(Node* dead) {
remove_useless_late_inlines( &_late_inlines, dead);
remove_useless_late_inlines( &_string_late_inlines, dead);
remove_useless_late_inlines( &_boxing_late_inlines, dead);
remove_useless_late_inlines( &_vector_late_inlines, dead);
remove_useless_late_inlines(&_vector_reboxing_late_inlines, dead);

if (dead->is_CallStaticJava()) {
Expand Down Expand Up @@ -480,6 +481,7 @@ void Compile::disconnect_useless_nodes(Unique_Node_List& useful, Unique_Node_Lis
remove_useless_late_inlines( &_late_inlines, useful);
remove_useless_late_inlines( &_string_late_inlines, useful);
remove_useless_late_inlines( &_boxing_late_inlines, useful);
remove_useless_late_inlines( &_vector_late_inlines, useful);
remove_useless_late_inlines(&_vector_reboxing_late_inlines, useful);
DEBUG_ONLY(verify_graph_edges(true /*check for no_dead_code*/, root_and_safepoints);)
}
Expand Down Expand Up @@ -693,6 +695,7 @@ Compile::Compile(ciEnv* ci_env, ciMethod* target, int osr_bci,
_string_late_inlines(comp_arena(), 2, 0, nullptr),
_boxing_late_inlines(comp_arena(), 2, 0, nullptr),
_vector_reboxing_late_inlines(comp_arena(), 2, 0, nullptr),
_vector_late_inlines(comp_arena(), 2, 0, nullptr),
_late_inlines_pos(0),
_has_mh_late_inlines(false),
_oom(false),
Expand Down Expand Up @@ -2158,6 +2161,25 @@ void Compile::shuffle_late_inlines() {
shuffle_array(*C, _late_inlines);
}

void Compile::process_vector_late_inlines() {
for (int i = 0; i < _vector_late_inlines.length(); i++) {
CallGenerator* cg = _vector_late_inlines.at(i);

// When a vector intrinsic fails, set_generator(cg) caches the
// LateInlineVectorCallGenerator on the call node to allow retries
// if IGVN optimizes the call node's inputs. If the call node is not
// on the IGVN worklist when cleanup runs, CallStaticJavaNode::Ideal
// does not fire and the cached generator persists. Once _late_inlines
// drains and we commit to the fallback here, clear the stale generator
// to prevent a subsequent IGVN pass from re-registering the intrinsic
// attempt into _late_inlines alongside the fallback, which would create
// duplicate call_node entries.
cg->call_node()->as_CallJava()->set_generator(nullptr);
add_late_inline(cg);
}
_vector_late_inlines.clear();
}

// Perform incremental inlining until bound on number of live nodes is reached
void Compile::inline_incrementally(PhaseIterGVN& igvn) {
TracePhase tp(_t_incrInline);
Expand Down Expand Up @@ -2215,6 +2237,10 @@ void Compile::inline_incrementally(PhaseIterGVN& igvn) {
print_method(PHASE_INCREMENTAL_INLINE_STEP, 3);

if (failing()) return;

if (_late_inlines.length() == 0) {
process_vector_late_inlines();
}
}

igvn_worklist()->ensure_empty(); // should be done with igvn
Expand Down
7 changes: 7 additions & 0 deletions src/hotspot/share/opto/compile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ class Compile : public Phase {
GrowableArray<CallGenerator*> _boxing_late_inlines; // same but for boxing operations

GrowableArray<CallGenerator*> _vector_reboxing_late_inlines; // same but for vector reboxing operations
GrowableArray<CallGenerator*> _vector_late_inlines; // inline fallback implementation for failed intrinsics

int _late_inlines_pos; // Where in the queue should the next late inlining candidate go (emulate depth first inlining)
bool _has_mh_late_inlines; // Can there still be a method handle late inlining pending?
Expand Down Expand Up @@ -508,6 +509,12 @@ class Compile : public Phase {
InlinePrinter _inline_printer;

public:

void add_vector_late_inline(CallGenerator* cg) {
_vector_late_inlines.push(cg);
}
void process_vector_late_inlines();

void* barrier_set_state() const { return _barrier_set_state; }

InlinePrinter* inline_printer() { return &_inline_printer; }
Expand Down
11 changes: 11 additions & 0 deletions src/hotspot/share/opto/doCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,17 @@ CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool
cg_intrinsic = cg;
cg = nullptr;
} else if (IncrementalInline && should_delay_vector_inlining(callee, jvms)) {
if (IncrementalInlineVector && allow_inline) {
// Try to late inline fallback implementation if intrinsification attempt fails.
CallGenerator* fallback_cg = call_generator(callee, vtable_index, call_does_dispatch, jvms,
true /*allow_inline*/, prof_factor,
speculative_receiver_type, false /*allow_intrinsics*/);
if (fallback_cg != nullptr && fallback_cg->is_parse()) {
return CallGenerator::for_vector_late_inline(callee, cg, fallback_cg);
}
// Fallback not inlineable by regular heuristics; fall through.
}
// Don't try to inline fallback implementation.
return CallGenerator::for_late_inline(callee, cg);
} else {
return cg;
Expand Down
3 changes: 2 additions & 1 deletion test/hotspot/jtreg/compiler/vectorapi/TestVectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
*/
public class TestVectorTest {
public static void main(String[] args) {
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
TestFramework.runWithFlags("--add-modules=jdk.incubator.vector",
"-XX:-IncrementalInlineVector");
}

@DontInline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,8 @@ public static void testCompareMaskNotDoubleNegative() {
public static void main(String[] args) {
TestFramework testFramework = new TestFramework();
testFramework.setDefaultWarmup(5000)
.addFlags("--add-modules=jdk.incubator.vector")
.addFlags("--add-modules=jdk.incubator.vector",
"-XX:InlineSmallCode=1000000")
.start();
}
}