diff --git a/exist-core/src/main/java/org/exist/xquery/LetExpr.java b/exist-core/src/main/java/org/exist/xquery/LetExpr.java index 809d98362bb..091fbda6868 100644 --- a/exist-core/src/main/java/org/exist/xquery/LetExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/LetExpr.java @@ -52,6 +52,10 @@ public void setScoreBinding(final boolean scoreBinding) { this.scoreBinding = scoreBinding; } + public boolean isScoreBinding() { + return scoreBinding; + } + @Override public ClauseType getType() { return ClauseType.LET; @@ -113,6 +117,13 @@ && getPreviousClause() == null result = cc.replaceWith(this, returnExpr, "unused let-binding $" + varName); } + // Inline a node-typed path binding referenced exactly once at the + // source of a FilteredExpression with an Optimizable predicate + // (closes GH-873). Skip if the literal-drop above already fired. + if (result == this) { + result = LetInliner.tryInline(this, cc); + } + if (enteredScope) { result = cc.applyHoistsAndExitChain(result); } diff --git a/exist-core/src/main/java/org/exist/xquery/LetInliner.java b/exist-core/src/main/java/org/exist/xquery/LetInliner.java new file mode 100644 index 00000000000..35e8f60a9cd --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/LetInliner.java @@ -0,0 +1,238 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.value.Type; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * Implements the inline-for-index-pre-select rewrite invoked from + * {@link LetExpr#optimize(CompileContext)}. + * + *

Rewrites + *

+ *   let $v := <persistent path> return $v[Optimizable-pred]
+ * 
+ * into + *
+ *   <persistent path>[Optimizable-pred]
+ * 
+ * by attaching the predicate to the last LocationStep of the input path. The + * resulting tree is identical in shape to the direct form, so the legacy + * {@link Optimizer} pass that runs immediately after the {@code optimize(cc)} + * pass wraps it in the {@code (#exist:optimize#)} pragma and routes the + * predicate through the index pre-select machinery — closing GH-873. + * + *

Without this rewrite, the indirect form runs about 167x slower than the + * direct form because {@code Optimize.eval} computes a pre-selected node set + * and then {@code FilteredExpression.eval} re-evaluates {@code $v} from the + * variable stack (ignoring the pre-selected context), so the index hit-set is + * thrown away and the predicate runs once per node in the full input. + * + *

The gate predicates are intentionally narrow in v1: each one corresponds + * to a soundness or scope concern documented in the design doc accompanying + * the change. See the comments on {@link #tryInline(LetExpr, CompileContext)} + * for the precise list. + */ +final class LetInliner { + + private LetInliner() { + } + + /** + * Attempt to inline the binding. Returns the LetExpr's replacement if all + * gates pass, or {@code let} unchanged if any gate fails. + * + *

Gates (all must hold): + *

    + *
  1. {@code !scoreBinding} and a non-null variable name -- score + * bindings (XQFT 3.0 §2.3) bind a synthesised double, not the + * input value, so inlining changes semantics;
  2. + *
  3. {@code getPreviousClause() == null} and the body is not itself + * another FLWOR clause -- limits v1 to standalone lets;
  4. + *
  5. no static type declared on the binding -- typed declarations + * impose a runtime check on the variable's value that inlining + * would silently bypass;
  6. + *
  7. the input is a node-typed expression -- only node sequences + * benefit from a downstream index pre-select;
  8. + *
  9. the input contains at least one non-wildcard LocationStep -- + * this is what an index can attach to;
  10. + *
  11. the body is exactly {@code FilteredExpression} (or a + * length-1 PathExpr wrapping one) whose source is the bound + * variable, with exactly one predicate that contains an + * {@link Optimizable}, and the variable is not referenced + * anywhere else in the body.
  12. + *
+ */ + static Expression tryInline(final LetExpr let, final CompileContext cc) { + // Gate 1 + final QName varName = let.getVariable(); + if (varName == null || let.isScoreBinding()) { + return let; + } + // Gate 2 + if (let.getPreviousClause() != null) { + return let; + } + final Expression returnExpr = let.getReturnExpression(); + if (returnExpr == null || returnExpr instanceof FLWORClause) { + return let; + } + // Gate 3 -- BindingExpression.sequenceType is protected; same-package access. + if (let.sequenceType != null) { + return let; + } + // Gate 4 + final Expression inputSequence = let.getInputSequence(); + if (inputSequence == null + || !Type.subTypeOf(inputSequence.returnsType(), Type.NODE)) { + return let; + } + // Gate 5: at least one non-wildcard LocationStep in the input. Pick + // the last one as the predicate-attachment site -- semantically the + // predicate filters the OUTPUT of the path, which is what the last + // step yields. + final LocationStep lastStep = findLastNamedStep(inputSequence); + if (lastStep == null) { + return let; + } + + // Gate 6: body must be a FilteredExpression (or length-1 PathExpr + // wrapping one) whose source is $varName, with exactly one + // Optimizable predicate, and $varName must not appear anywhere + // else in the body. + final FilteredExpression fe = unwrapFilteredExpression(returnExpr); + if (fe == null) { + return let; + } + final Expression feSrc = fe.getExpression(); + if (!(feSrc instanceof final VariableReference vr) + || !varName.equals(vr.getName())) { + return let; + } + final List preds = fe.getPredicates(); + if (preds.size() != 1) { + return let; + } + final Predicate pred = preds.get(0); + if (!hasOptimizable(pred)) { + return let; + } + final RefCounter counter = new RefCounter(varName); + returnExpr.accept(counter); + if (counter.count != 1) { + // The variable appears outside the FilteredExpression source -- + // a literal substitution would leave dangling references. + return let; + } + + // Substitute: attach the predicate to the input's last named step. + // The legacy Optimizer pass that runs next will detect the + // Optimizable predicate, wrap the rewritten path in the + // (#exist:optimize#) pragma, and route through the index pre-select + // (Optimizer.visitLocationStep / Optimize.before). + lastStep.addPredicate(pred); + return cc.replaceWith(let, inputSequence, + "inline let $" + varName.getLocalPart() + " for index pre-select"); + } + + /** + * Return the last non-wildcard LocationStep in {@code expr}, or null if + * none. Order matches document-order traversal of the path. + */ + private static @Nullable LocationStep findLastNamedStep(final Expression expr) { + final List steps = BasicExpressionVisitor.findLocationSteps(expr); + LocationStep last = null; + for (final LocationStep s : steps) { + if (s != null && !s.getTest().isWildcardTest()) { + last = s; + } + } + return last; + } + + /** + * Unwrap {@link DebuggableExpression} and length-1 {@link PathExpr} + * containers to expose a {@link FilteredExpression}, or return null if + * the underlying shape isn't one. Mirrors the unwrap rule used + * elsewhere in the engine to look past parser-introduced wrappers. + */ + private static @Nullable FilteredExpression unwrapFilteredExpression(final Expression expr) { + Expression current = expr; + while (true) { + if (current instanceof final FilteredExpression filtered) { + return filtered; + } else if (current instanceof final DebuggableExpression debug) { + current = debug.getFirst(); + } else if (current instanceof final PathExpr pathExpr && pathExpr.getLength() == 1) { + current = pathExpr.getExpression(0); + } else { + return null; + } + } + } + + /** + * Reuses the engine's existing Optimizable-detection visitor so the + * "is this predicate index-eligible?" question gets the same answer + * the legacy Optimizer pass would give. + */ + private static boolean hasOptimizable(final Predicate predicate) { + final Optimizer.FindOptimizable visitor = new Optimizer.FindOptimizable(); + predicate.accept(visitor); + final Optimizable[] optimizables = visitor.getOptimizables(); + return optimizables != null && optimizables.length > 0; + } + + /** + * Counts {@link VariableReference} nodes referring to a target name + * across an expression tree. Descends explicitly into + * {@link FilteredExpression} (BasicExpressionVisitor's default does + * not), so a $v that sits as the source of a FE is still counted. + */ + private static final class RefCounter extends DefaultExpressionVisitor { + private final QName target; + int count = 0; + + RefCounter(final QName target) { + this.target = target; + } + + @Override + public void visitVariableReference(final VariableReference ref) { + if (target.equals(ref.getName())) { + count++; + } + } + + @Override + public void visitFilteredExpr(final FilteredExpression filtered) { + filtered.getExpression().accept(this); + for (final Predicate p : filtered.getPredicates()) { + p.accept(this); + } + } + } +} diff --git a/extensions/indexes/lucene/src/test/java/org/exist/indexing/lucene/LetInliningRegressionTest.java b/extensions/indexes/lucene/src/test/java/org/exist/indexing/lucene/LetInliningRegressionTest.java new file mode 100644 index 00000000000..8a5b81043e4 --- /dev/null +++ b/extensions/indexes/lucene/src/test/java/org/exist/indexing/lucene/LetInliningRegressionTest.java @@ -0,0 +1,245 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.indexing.lucene; + +import org.exist.EXistException; +import org.exist.collections.Collection; +import org.exist.collections.CollectionConfigurationException; +import org.exist.collections.CollectionConfigurationManager; +import org.exist.collections.triggers.TriggerException; +import org.exist.security.PermissionDeniedException; +import org.exist.storage.BrokerPool; +import org.exist.storage.DBBroker; +import org.exist.storage.txn.TransactionManager; +import org.exist.storage.txn.Txn; +import org.exist.test.ExistEmbeddedServer; +import org.exist.test.TestConstants; +import org.exist.util.LockException; +import org.exist.util.MimeType; +import org.exist.util.StringInputSource; +import org.exist.xmldb.XmldbURI; +import org.exist.xquery.CompileContext; +import org.exist.xquery.CompiledXQuery; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQuery; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.Sequence; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Regression test for issue + * #873: a let-bound persistent path used as the source of a FilteredExpression + * with an Optimizable predicate should not be ~167x slower than the direct form. + * + *

Validates the {@code LetInliner} rewrite that turns + * {@code let $a := //X return $a[Optimizable-pred]} into + * {@code //X[Optimizable-pred]} so the legacy Optimizer pass can attach the + * {@code (#exist:optimize#)} pragma to the LocationStep, routing the predicate + * through the lucene pre-select. + */ +public class LetInliningRegressionTest { + + private static final String COLLECTION_CONFIG = """ + + + + + + + + + """; + + /** ~200 LINE elements, one of which contains the sentinel "Denmark". */ + private static final String CORPUS = buildCorpus(); + + private static String buildCorpus() { + final StringBuilder sb = new StringBuilder(""); + for (int i = 0; i < 199; i++) { + sb.append("line number ").append(i).append(""); + } + sb.append("something is rotten in the state of Denmark"); + sb.append(""); + return sb.toString(); + } + + /** Enables the optimizer per-query so the test does not require conf.xml flips. */ + /** Enables the optimizer per-query so the test does not require conf.xml flips. */ + private static final String OPTIMIZE = "declare option exist:optimize 'enable=yes';"; + + private static final String QUERY_DIRECT = """ + declare namespace ft="http://exist-db.org/xquery/lucene"; + %s + collection('%s')//LINE[ft:query(., 'Denmark')] + """.formatted(OPTIMIZE, TestConstants.TEST_COLLECTION_URI.toString()); + + private static final String QUERY_INDIRECT = """ + declare namespace ft="http://exist-db.org/xquery/lucene"; + %s + let $a := collection('%s')//LINE + return $a[ft:query(., 'Denmark')] + """.formatted(OPTIMIZE, TestConstants.TEST_COLLECTION_URI.toString()); + + private static final String QUERY_LET_REFERENCED_TWICE = """ + declare namespace ft="http://exist-db.org/xquery/lucene"; + %s + let $a := collection('%s')//LINE + return ($a[ft:query(., 'Denmark')], $a[1]) + """.formatted(OPTIMIZE, TestConstants.TEST_COLLECTION_URI.toString()); + + private static final String QUERY_LET_BOUND_TO_COUNT = """ + %s + let $a := collection('%s')//LINE + return count($a) + """.formatted(OPTIMIZE, TestConstants.TEST_COLLECTION_URI.toString()); + + private static final String QUERY_LET_IS_TYPED = """ + declare namespace ft="http://exist-db.org/xquery/lucene"; + %s + let $a as element(LINE)+ := collection('%s')//LINE + return $a[ft:query(., 'Denmark')] + """.formatted(OPTIMIZE, TestConstants.TEST_COLLECTION_URI.toString()); + + private static Collection root; + + @ClassRule + public static final ExistEmbeddedServer existEmbeddedServer = new ExistEmbeddedServer(true, true); + + @Before + public void setupAndStore() throws EXistException, PermissionDeniedException, IOException, + TriggerException, CollectionConfigurationException, SAXException, LockException { + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + final TransactionManager transact = pool.getTransactionManager(); + try (final DBBroker broker = pool.get(Optional.of(pool.getSecurityManager().getSystemSubject())); + final Txn transaction = transact.beginTransaction()) { + root = broker.getOrCreateCollection(transaction, TestConstants.TEST_COLLECTION_URI); + assertNotNull(root); + broker.saveCollection(transaction, root); + final CollectionConfigurationManager mgr = pool.getConfigurationManager(); + mgr.addConfiguration(transaction, broker, root, COLLECTION_CONFIG); + broker.storeDocument(transaction, XmldbURI.create("corpus.xml"), + new StringInputSource(CORPUS), MimeType.XML_TYPE, root); + transact.commit(transaction); + } + } + + @AfterClass + public static void cleanup() throws Exception { + org.exist.TestUtils.cleanupDB(); + } + + @Test + public void issue873_indirectQueryReturnsSameNodes() throws Exception { + final long directCount = countOf(QUERY_DIRECT); + final long indirectCount = countOf(QUERY_INDIRECT); + assertEquals("indirect form must return same hit count as direct form", + directCount, indirectCount); + assertEquals("expected exactly one Denmark hit", 1L, directCount); + } + + @Test + public void issue873_inlineRewriteLogged() throws Exception { + final List rewrites = compileAndCaptureLog(QUERY_INDIRECT); + assertTrue("expected an 'inline let' rewrite in the optimizer log; got: " + rewrites, + rewrites.stream().anyMatch(s -> s.contains("inline let $a"))); + } + + @Test + public void inline_doesNotFireWhen_letReferencedTwice() throws Exception { + final List rewrites = compileAndCaptureLog(QUERY_LET_REFERENCED_TWICE); + assertTrue("inline must not fire when $v is used more than once; got: " + rewrites, + rewrites.stream().noneMatch(s -> s.contains("inline let $a"))); + } + + @Test + public void inline_doesNotFireWhen_letBoundToCount() throws Exception { + final List rewrites = compileAndCaptureLog(QUERY_LET_BOUND_TO_COUNT); + assertTrue("inline must not fire when body is not a FilteredExpression; got: " + rewrites, + rewrites.stream().noneMatch(s -> s.contains("inline let $a"))); + } + + @Test + public void inline_doesNotFireWhen_letIsTyped() throws Exception { + final List rewrites = compileAndCaptureLog(QUERY_LET_IS_TYPED); + assertTrue("inline must not fire when binding has a static type; got: " + rewrites, + rewrites.stream().noneMatch(s -> s.contains("inline let $a"))); + } + + @Test + public void issue873_indirectQueryUnderLoosePerfBound() throws Exception { + // Warmup the JIT and the cached pragma machinery on both forms. + for (int i = 0; i < 3; i++) { + countOf(QUERY_DIRECT); + countOf(QUERY_INDIRECT); + } + final long directNs = timeOf(QUERY_DIRECT); + final long indirectNs = timeOf(QUERY_INDIRECT); + // Pre-fix this ratio is ~167x. The fix brings it to ~1x. A 20x ceiling + // catches the regression without inviting CI flakiness from JIT noise + // on tiny absolute timings. + final long ceilingNs = directNs * 20L + 500_000_000L; // +500ms slack + assertTrue("indirect form took " + (indirectNs / 1_000_000) + "ms; direct took " + + (directNs / 1_000_000) + "ms; ceiling " + + (ceilingNs / 1_000_000) + "ms", + indirectNs < ceilingNs); + } + + private long countOf(final String query) throws EXistException, PermissionDeniedException, XPathException { + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + try (final DBBroker broker = pool.get(Optional.of(pool.getSecurityManager().getSystemSubject()))) { + final XQuery xquery = pool.getXQueryService(); + final Sequence seq = xquery.execute(broker, query, null); + return seq.getItemCount(); + } + } + + private long timeOf(final String query) throws EXistException, PermissionDeniedException, XPathException { + final long start = System.nanoTime(); + countOf(query); + return System.nanoTime() - start; + } + + private List compileAndCaptureLog(final String query) throws Exception { + final BrokerPool pool = existEmbeddedServer.getBrokerPool(); + try (final DBBroker broker = pool.get(Optional.of(pool.getSecurityManager().getSystemSubject()))) { + final XQuery xquery = pool.getXQueryService(); + final XQueryContext context = new XQueryContext(pool); + final CompiledXQuery compiled = xquery.compile(broker, context, query); + assertNotNull(compiled); + final CompileContext cc = context.getLastCompileContext(); + assertNotNull("optimize() pass should have run", cc); + return cc.log(); + } + } +}