org.openjdk.jmh
jmh-core
diff --git a/exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java b/exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java
new file mode 100644
index 00000000000..9c867ac869e
--- /dev/null
+++ b/exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java
@@ -0,0 +1,156 @@
+/*
+ * eXist-db Open Source Native XML Database
+ * Copyright (C) 2001 The eXist-db Authors
+ *
+ * info@exist-db.org
+ * http://www.exist-db.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+package org.exist.xquery;
+
+import org.exist.test.ExistEmbeddedServer;
+import org.exist.xmldb.XmldbURI;
+import org.openjdk.jmh.annotations.*;
+import org.xmldb.api.DatabaseManager;
+import org.xmldb.api.base.Collection;
+import org.xmldb.api.base.Database;
+import org.xmldb.api.modules.XQueryService;
+
+import java.util.concurrent.TimeUnit;
+
+/**
+ * JMH benchmark for the {@code preceding::*} half of issue #2129.
+ *
+ * craigberry's #2129 follow-up reproduced position-dependence for the
+ * wildcard preceding axis on a 50,000-element flat document: a query at
+ * {@code @xml:id='45000'} took roughly twice as long as the same query at
+ * {@code @xml:id='25000'}. The K-bounded sliding window in
+ * {@code LocationStep.PrecedingFilter} caps the retained match set at K,
+ * eliminating the unbounded accumulation that produced the late-position
+ * tax.
+ *
+ * This benchmark exercises the wildcard-vs-sibling and early-vs-late
+ * comparisons that the original mixed-purpose JUnit class measured with
+ * {@code System.nanoTime} and median-of-N. JMH handles statistical
+ * aggregation natively; the correctness assertions live in the
+ * companion XQSuite test {@code exist-core/src/test/xquery/preceding-axis.xql}.
+ */
+@State(Scope.Benchmark)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 5, time = 2)
+@Fork(1)
+public class PrecedingAxisBenchmark {
+
+ private static final String LARGE_DOC = "/db/words-large.xml";
+
+ @Param({"5000", "25000", "45000"})
+ private int refPosition;
+
+ private ExistEmbeddedServer existServer;
+ private Database database;
+ private Collection root;
+ private XQueryService xqs;
+
+ /** Default constructor for JMH harness. */
+ public PrecedingAxisBenchmark() {
+ }
+
+ /**
+ * Boots an embedded eXist server, registers the XML:DB driver, and stores
+ * a 50,000-element flat words document used by the benchmark queries.
+ *
+ * @throws Exception if the embedded server fails to start, the database
+ * driver cannot be registered, or the corpus document cannot be stored
+ */
+ @Setup(Level.Trial)
+ public void setUp() throws Exception {
+ existServer = new ExistEmbeddedServer(true, true);
+ existServer.startDb();
+
+ final Class> cl = Class.forName("org.exist.xmldb.DatabaseImpl");
+ database = (Database) cl.getDeclaredConstructor().newInstance();
+ database.setProperty("create-database", "true");
+ DatabaseManager.registerDatabase(database);
+ root = DatabaseManager.getCollection(XmldbURI.LOCAL_DB, "admin", "");
+ xqs = root.getService(XQueryService.class);
+
+ xqs.query(
+ """
+ let $words := for $i in (1 to 50000) return {$i}
+ return xmldb:store('/db', 'words-large.xml', document {{$words}})
+ """);
+ }
+
+ /**
+ * Removes the corpus document, closes the test collection, and shuts down
+ * the embedded server.
+ *
+ * @throws Exception if removing the corpus document, closing the
+ * collection, or stopping the embedded server fails
+ */
+ @TearDown(Level.Trial)
+ public void tearDown() throws Exception {
+ try {
+ xqs.query("xmldb:remove('/db', 'words-large.xml')");
+ } finally {
+ root.close();
+ DatabaseManager.deregisterDatabase(database);
+ existServer.stopDb(true);
+ }
+ }
+
+ /**
+ * Wildcard preceding axis with a positional predicate gated by a self::w
+ * filter. Pre-fix this accumulated every preceding match from doc start;
+ * post-fix the sliding window caps retention at K=5.
+ *
+ * @return the result-set size, returned so JMH's blackhole prevents the
+ * call being optimized away
+ * @throws Exception if the embedded query fails
+ */
+ @Benchmark
+ public long wildcardPrecedingWithPositionalPredicate() throws Exception {
+ return xqs.query(
+ ("""
+ xquery version "3.1";
+ let $w := doc('%s')//w[@xml:id='%d']
+ return for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text()
+ """).formatted(LARGE_DOC, refPosition)
+ ).getSize();
+ }
+
+ /**
+ * preceding-sibling::w[K] baseline: walks the persistent sibling chain
+ * directly rather than the full preceding axis. Used as a relative
+ * lower-bound to interpret the wildcard preceding number.
+ *
+ * @return the result-set size, returned so JMH's blackhole prevents the
+ * call being optimized away
+ * @throws Exception if the embedded query fails
+ */
+ @Benchmark
+ public long precedingSiblingBaseline() throws Exception {
+ return xqs.query(
+ ("""
+ xquery version "3.1";
+ let $w := doc('%s')//w[@xml:id='%d']
+ return for $i in (1 to 5) return $w/preceding-sibling::w[$i]/text()
+ """).formatted(LARGE_DOC, refPosition)
+ ).getSize();
+ }
+}
diff --git a/exist-core/src/main/java/org/exist/xquery/LocationStep.java b/exist-core/src/main/java/org/exist/xquery/LocationStep.java
index b8936cbd1c2..ceaa2699cb4 100644
--- a/exist-core/src/main/java/org/exist/xquery/LocationStep.java
+++ b/exist-core/src/main/java/org/exist/xquery/LocationStep.java
@@ -38,6 +38,8 @@
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
+import java.util.ArrayDeque;
+import java.util.Deque;
/**
* Processes all location path steps (like descendant::*, ancestor::XXX).
@@ -920,7 +922,7 @@ private Sequence getPrecedingOrFollowing(final XQueryContext context, final Sequ
final NodeProxy root = new NodeProxy(this, node);
final StreamFilter filter;
if (axis == Constants.PRECEDING_AXIS) {
- filter = new PrecedingFilter(test, root, next, result, contextId);
+ filter = new PrecedingFilter(test, root, next, result, contextId, position);
} else {
filter = new FollowingFilter(test, root, next, result, contextId, position);
}
@@ -1417,12 +1419,19 @@ public boolean accept(final XMLStreamReader reader) {
private class PrecedingFilter extends AbstractFilterBase {
final NodeProxy root;
final NodeProxy referenceNode;
+ // Sliding window of the most recent {@code limit} matches. Non-null only
+ // when limit > 0 (positional predicate {@code [K]} present). The K-th
+ // preceding element in axis order is the (K-th-from-end) match in doc
+ // order, so any match earlier than the K most recent cannot be selected
+ // and may be discarded as new ones are found.
+ final Deque window;
PrecedingFilter(final NodeTest test, final NodeProxy root, final NodeProxy referenceNode, final NodeSet result,
- final int contextId) {
- super(test, result, contextId, -1);
+ final int contextId, final int limit) {
+ super(test, result, contextId, limit);
this.root = root;
this.referenceNode = referenceNode;
+ this.window = limit > 0 ? new ArrayDeque<>(limit) : null;
}
@Override
@@ -1431,11 +1440,16 @@ public boolean accept(final XMLStreamReader reader) {
if (reader.getEventType() == XMLStreamReader.END_ELEMENT) {
// exited the root element, so stop filtering
- return currentId.getTreeLevel() != root.getNodeId().getTreeLevel();
+ if (currentId.getTreeLevel() == root.getNodeId().getTreeLevel()) {
+ flushWindow();
+ return false;
+ }
+ return true;
}
final NodeId refId = referenceNode.getNodeId();
if (currentId.compareTo(refId) >= 0) {
+ flushWindow();
return false;
}
@@ -1449,10 +1463,26 @@ public boolean accept(final XMLStreamReader reader) {
proxy.addContextNode(contextId, referenceNode);
}
}
- result.add(proxy);
+ if (window != null) {
+ if (window.size() == limit) {
+ window.pollFirst();
+ }
+ window.addLast(proxy);
+ } else {
+ result.add(proxy);
+ }
}
return true;
}
+
+ private void flushWindow() {
+ if (window != null) {
+ for (final NodeProxy proxy : window) {
+ result.add(proxy);
+ }
+ window.clear();
+ }
+ }
}
}
diff --git a/exist-core/src/test/xquery/optimizer/positional.xqm b/exist-core/src/test/xquery/optimizer/positional.xqm
index cdbbcd37fb0..fb76e9aa24b 100644
--- a/exist-core/src/test/xquery/optimizer/positional.xqm
+++ b/exist-core/src/test/xquery/optimizer/positional.xqm
@@ -176,7 +176,7 @@ function ot:optimize-simple-following-nested() {
declare
%test:stats
- %test:assertXPath("not($result//stats:optimization[@type eq 'POSITIONAL_PREDICATE'])")
+ %test:assertXPath("$result//stats:optimization[@type eq 'POSITIONAL_PREDICATE']")
function ot:optimize-simple-preceding() {
let $w := doc($ot:DOC)//w[@xml:id='25000']
return
diff --git a/exist-core/src/test/xquery/preceding-axis.xql b/exist-core/src/test/xquery/preceding-axis.xql
new file mode 100644
index 00000000000..36f9e178999
--- /dev/null
+++ b/exist-core/src/test/xquery/preceding-axis.xql
@@ -0,0 +1,132 @@
+(:
+ : eXist-db Open Source Native XML Database
+ : Copyright (C) 2001 The eXist-db Authors
+ :
+ : info@exist-db.org
+ : http://www.exist-db.org
+ :
+ : This library is free software; you can redistribute it and/or
+ : modify it under the terms of the GNU Lesser General Public
+ : License as published by the Free Software Foundation; either
+ : version 2.1 of the License, or (at your option) any later version.
+ :
+ : This library is distributed in the hope that it will be useful,
+ : but WITHOUT ANY WARRANTY; without even the implied warranty of
+ : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ : Lesser General Public License for more details.
+ :
+ : You should have received a copy of the GNU Lesser General Public
+ : License along with this library; if not, write to the Free Software
+ : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ :)
+xquery version "3.1";
+
+(:~
+ : Correctness regression tests for the {@code preceding::*} half of issue #2129.
+ : The K-bounded sliding window in {@code LocationStep.PrecedingFilter} must
+ : preserve the original axis semantics: ancestor exclusion, axis-order
+ : positional predicates, and identical result counts/values regardless of
+ : where the reference node sits in the document.
+ :
+ : The companion performance benchmark lives in
+ : {@code exist-core-jmh/.../PrecedingAxisBenchmark.java}.
+ :)
+module namespace pa = "http://exist-db.org/xquery/test/preceding-axis";
+
+declare namespace test = "http://exist-db.org/xquery/xqsuite";
+
+declare variable $pa:collection := "/db/preceding-axis-test";
+declare variable $pa:small-doc := $pa:collection || "/words-small.xml";
+declare variable $pa:tiny-doc := $pa:collection || "/tiny.xml";
+declare variable $pa:ancestors-doc := $pa:collection || "/ancestors.xml";
+
+declare
+ %test:setUp
+function pa:setup() {
+ let $col := xmldb:create-collection("/db", "preceding-axis-test")
+ let $small :=
+ document {
+ {
+ for $i in (1 to 50) return {$i}
+ }
+ }
+ let $tiny :=
+ document {
+ {
+ for $i in (1 to 10) return {$i}
+ }
+ }
+ let $ancestors :=
+ document {
+
+ }
+ return (
+ xmldb:store($col, "words-small.xml", $small),
+ xmldb:store($col, "tiny.xml", $tiny),
+ xmldb:store($col, "ancestors.xml", $ancestors)
+ )
+};
+
+declare
+ %test:tearDown
+function pa:cleanup() {
+ xmldb:remove($pa:collection)
+};
+
+declare
+ %test:assertEquals("5 6 7 8 9")
+function pa:reproducer-output-at-early-position() {
+ let $w := doc($pa:small-doc)//w[@xml:id = "10"]
+ return
+ string-join(
+ for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
+ " "
+ )
+};
+
+declare
+ %test:assertEquals("20 21 22 23 24")
+function pa:reproducer-output-at-midpoint() {
+ let $w := doc($pa:small-doc)//w[@xml:id = "25"]
+ return
+ string-join(
+ for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
+ " "
+ )
+};
+
+declare
+ %test:assertEquals("40 41 42 43 44")
+function pa:reproducer-output-at-late-position() {
+ let $w := doc($pa:small-doc)//w[@xml:id = "45"]
+ return
+ string-join(
+ for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
+ " "
+ )
+};
+
+(:~
+ : {@code preceding::} excludes ancestors of the reference node even though
+ : they precede the reference in document order. Only the sibling {@code a}
+ : precedes {@code target} on the preceding axis.
+ :)
+declare
+ %test:assertEquals("a")
+function pa:preceding-excludes-ancestors() {
+ for $n in doc($pa:ancestors-doc)//target/preceding::*
+ return name($n)
+};
+
+(:~
+ : From {@code w[5]} on a flat doc of 10 elements, the k-th preceding element
+ : in axis order (reverse document order) is {@code w[5-k]}.
+ :)
+declare
+ %test:assertEquals("4", "3", "2", "1")
+function pa:preceding-positional-predicate-returns-kth-closest() {
+ let $w := doc($pa:tiny-doc)//w[@xml:id = "5"]
+ return
+ for $k in 1 to 4
+ return $w/preceding::*[$k]/text()
+};