diff --git a/exist-core-jmh/pom.xml b/exist-core-jmh/pom.xml index 3ef48a4dd93..b8270c99c2c 100644 --- a/exist-core-jmh/pom.xml +++ b/exist-core-jmh/pom.xml @@ -59,6 +59,10 @@ lucene-core ${lucene.version} + + net.sf.xmldb-org + xmldb-api + org.openjdk.jmh jmh-core diff --git a/exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java b/exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java new file mode 100644 index 00000000000..9c867ac869e --- /dev/null +++ b/exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java @@ -0,0 +1,156 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.test.ExistEmbeddedServer; +import org.exist.xmldb.XmldbURI; +import org.openjdk.jmh.annotations.*; +import org.xmldb.api.DatabaseManager; +import org.xmldb.api.base.Collection; +import org.xmldb.api.base.Database; +import org.xmldb.api.modules.XQueryService; + +import java.util.concurrent.TimeUnit; + +/** + * JMH benchmark for the {@code preceding::*} half of issue #2129. + * + *

craigberry's #2129 follow-up reproduced position-dependence for the + * wildcard preceding axis on a 50,000-element flat document: a query at + * {@code @xml:id='45000'} took roughly twice as long as the same query at + * {@code @xml:id='25000'}. The K-bounded sliding window in + * {@code LocationStep.PrecedingFilter} caps the retained match set at K, + * eliminating the unbounded accumulation that produced the late-position + * tax.

+ * + *

This benchmark exercises the wildcard-vs-sibling and early-vs-late + * comparisons that the original mixed-purpose JUnit class measured with + * {@code System.nanoTime} and median-of-N. JMH handles statistical + * aggregation natively; the correctness assertions live in the + * companion XQSuite test {@code exist-core/src/test/xquery/preceding-axis.xql}.

+ */ +@State(Scope.Benchmark) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@Warmup(iterations = 3, time = 1) +@Measurement(iterations = 5, time = 2) +@Fork(1) +public class PrecedingAxisBenchmark { + + private static final String LARGE_DOC = "/db/words-large.xml"; + + @Param({"5000", "25000", "45000"}) + private int refPosition; + + private ExistEmbeddedServer existServer; + private Database database; + private Collection root; + private XQueryService xqs; + + /** Default constructor for JMH harness. */ + public PrecedingAxisBenchmark() { + } + + /** + * Boots an embedded eXist server, registers the XML:DB driver, and stores + * a 50,000-element flat words document used by the benchmark queries. + * + * @throws Exception if the embedded server fails to start, the database + * driver cannot be registered, or the corpus document cannot be stored + */ + @Setup(Level.Trial) + public void setUp() throws Exception { + existServer = new ExistEmbeddedServer(true, true); + existServer.startDb(); + + final Class cl = Class.forName("org.exist.xmldb.DatabaseImpl"); + database = (Database) cl.getDeclaredConstructor().newInstance(); + database.setProperty("create-database", "true"); + DatabaseManager.registerDatabase(database); + root = DatabaseManager.getCollection(XmldbURI.LOCAL_DB, "admin", ""); + xqs = root.getService(XQueryService.class); + + xqs.query( + """ + let $words := for $i in (1 to 50000) return {$i} + return xmldb:store('/db', 'words-large.xml', document {{$words}}) + """); + } + + /** + * Removes the corpus document, closes the test collection, and shuts down + * the embedded server. + * + * @throws Exception if removing the corpus document, closing the + * collection, or stopping the embedded server fails + */ + @TearDown(Level.Trial) + public void tearDown() throws Exception { + try { + xqs.query("xmldb:remove('/db', 'words-large.xml')"); + } finally { + root.close(); + DatabaseManager.deregisterDatabase(database); + existServer.stopDb(true); + } + } + + /** + * Wildcard preceding axis with a positional predicate gated by a self::w + * filter. Pre-fix this accumulated every preceding match from doc start; + * post-fix the sliding window caps retention at K=5. + * + * @return the result-set size, returned so JMH's blackhole prevents the + * call being optimized away + * @throws Exception if the embedded query fails + */ + @Benchmark + public long wildcardPrecedingWithPositionalPredicate() throws Exception { + return xqs.query( + (""" + xquery version "3.1"; + let $w := doc('%s')//w[@xml:id='%d'] + return for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text() + """).formatted(LARGE_DOC, refPosition) + ).getSize(); + } + + /** + * preceding-sibling::w[K] baseline: walks the persistent sibling chain + * directly rather than the full preceding axis. Used as a relative + * lower-bound to interpret the wildcard preceding number. + * + * @return the result-set size, returned so JMH's blackhole prevents the + * call being optimized away + * @throws Exception if the embedded query fails + */ + @Benchmark + public long precedingSiblingBaseline() throws Exception { + return xqs.query( + (""" + xquery version "3.1"; + let $w := doc('%s')//w[@xml:id='%d'] + return for $i in (1 to 5) return $w/preceding-sibling::w[$i]/text() + """).formatted(LARGE_DOC, refPosition) + ).getSize(); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/LocationStep.java b/exist-core/src/main/java/org/exist/xquery/LocationStep.java index b8936cbd1c2..ceaa2699cb4 100644 --- a/exist-core/src/main/java/org/exist/xquery/LocationStep.java +++ b/exist-core/src/main/java/org/exist/xquery/LocationStep.java @@ -38,6 +38,8 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; /** * Processes all location path steps (like descendant::*, ancestor::XXX). @@ -920,7 +922,7 @@ private Sequence getPrecedingOrFollowing(final XQueryContext context, final Sequ final NodeProxy root = new NodeProxy(this, node); final StreamFilter filter; if (axis == Constants.PRECEDING_AXIS) { - filter = new PrecedingFilter(test, root, next, result, contextId); + filter = new PrecedingFilter(test, root, next, result, contextId, position); } else { filter = new FollowingFilter(test, root, next, result, contextId, position); } @@ -1417,12 +1419,19 @@ public boolean accept(final XMLStreamReader reader) { private class PrecedingFilter extends AbstractFilterBase { final NodeProxy root; final NodeProxy referenceNode; + // Sliding window of the most recent {@code limit} matches. Non-null only + // when limit > 0 (positional predicate {@code [K]} present). The K-th + // preceding element in axis order is the (K-th-from-end) match in doc + // order, so any match earlier than the K most recent cannot be selected + // and may be discarded as new ones are found. + final Deque window; PrecedingFilter(final NodeTest test, final NodeProxy root, final NodeProxy referenceNode, final NodeSet result, - final int contextId) { - super(test, result, contextId, -1); + final int contextId, final int limit) { + super(test, result, contextId, limit); this.root = root; this.referenceNode = referenceNode; + this.window = limit > 0 ? new ArrayDeque<>(limit) : null; } @Override @@ -1431,11 +1440,16 @@ public boolean accept(final XMLStreamReader reader) { if (reader.getEventType() == XMLStreamReader.END_ELEMENT) { // exited the root element, so stop filtering - return currentId.getTreeLevel() != root.getNodeId().getTreeLevel(); + if (currentId.getTreeLevel() == root.getNodeId().getTreeLevel()) { + flushWindow(); + return false; + } + return true; } final NodeId refId = referenceNode.getNodeId(); if (currentId.compareTo(refId) >= 0) { + flushWindow(); return false; } @@ -1449,10 +1463,26 @@ public boolean accept(final XMLStreamReader reader) { proxy.addContextNode(contextId, referenceNode); } } - result.add(proxy); + if (window != null) { + if (window.size() == limit) { + window.pollFirst(); + } + window.addLast(proxy); + } else { + result.add(proxy); + } } return true; } + + private void flushWindow() { + if (window != null) { + for (final NodeProxy proxy : window) { + result.add(proxy); + } + window.clear(); + } + } } } diff --git a/exist-core/src/test/xquery/optimizer/positional.xqm b/exist-core/src/test/xquery/optimizer/positional.xqm index cdbbcd37fb0..fb76e9aa24b 100644 --- a/exist-core/src/test/xquery/optimizer/positional.xqm +++ b/exist-core/src/test/xquery/optimizer/positional.xqm @@ -176,7 +176,7 @@ function ot:optimize-simple-following-nested() { declare %test:stats - %test:assertXPath("not($result//stats:optimization[@type eq 'POSITIONAL_PREDICATE'])") + %test:assertXPath("$result//stats:optimization[@type eq 'POSITIONAL_PREDICATE']") function ot:optimize-simple-preceding() { let $w := doc($ot:DOC)//w[@xml:id='25000'] return diff --git a/exist-core/src/test/xquery/preceding-axis.xql b/exist-core/src/test/xquery/preceding-axis.xql new file mode 100644 index 00000000000..36f9e178999 --- /dev/null +++ b/exist-core/src/test/xquery/preceding-axis.xql @@ -0,0 +1,132 @@ +(: + : eXist-db Open Source Native XML Database + : Copyright (C) 2001 The eXist-db Authors + : + : info@exist-db.org + : http://www.exist-db.org + : + : This library is free software; you can redistribute it and/or + : modify it under the terms of the GNU Lesser General Public + : License as published by the Free Software Foundation; either + : version 2.1 of the License, or (at your option) any later version. + : + : This library is distributed in the hope that it will be useful, + : but WITHOUT ANY WARRANTY; without even the implied warranty of + : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + : Lesser General Public License for more details. + : + : You should have received a copy of the GNU Lesser General Public + : License along with this library; if not, write to the Free Software + : Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + :) +xquery version "3.1"; + +(:~ + : Correctness regression tests for the {@code preceding::*} half of issue #2129. + : The K-bounded sliding window in {@code LocationStep.PrecedingFilter} must + : preserve the original axis semantics: ancestor exclusion, axis-order + : positional predicates, and identical result counts/values regardless of + : where the reference node sits in the document. + : + : The companion performance benchmark lives in + : {@code exist-core-jmh/.../PrecedingAxisBenchmark.java}. + :) +module namespace pa = "http://exist-db.org/xquery/test/preceding-axis"; + +declare namespace test = "http://exist-db.org/xquery/xqsuite"; + +declare variable $pa:collection := "/db/preceding-axis-test"; +declare variable $pa:small-doc := $pa:collection || "/words-small.xml"; +declare variable $pa:tiny-doc := $pa:collection || "/tiny.xml"; +declare variable $pa:ancestors-doc := $pa:collection || "/ancestors.xml"; + +declare + %test:setUp +function pa:setup() { + let $col := xmldb:create-collection("/db", "preceding-axis-test") + let $small := + document { + { + for $i in (1 to 50) return {$i} + } + } + let $tiny := + document { + { + for $i in (1 to 10) return {$i} + } + } + let $ancestors := + document { + + } + return ( + xmldb:store($col, "words-small.xml", $small), + xmldb:store($col, "tiny.xml", $tiny), + xmldb:store($col, "ancestors.xml", $ancestors) + ) +}; + +declare + %test:tearDown +function pa:cleanup() { + xmldb:remove($pa:collection) +}; + +declare + %test:assertEquals("5 6 7 8 9") +function pa:reproducer-output-at-early-position() { + let $w := doc($pa:small-doc)//w[@xml:id = "10"] + return + string-join( + for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(), + " " + ) +}; + +declare + %test:assertEquals("20 21 22 23 24") +function pa:reproducer-output-at-midpoint() { + let $w := doc($pa:small-doc)//w[@xml:id = "25"] + return + string-join( + for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(), + " " + ) +}; + +declare + %test:assertEquals("40 41 42 43 44") +function pa:reproducer-output-at-late-position() { + let $w := doc($pa:small-doc)//w[@xml:id = "45"] + return + string-join( + for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(), + " " + ) +}; + +(:~ + : {@code preceding::} excludes ancestors of the reference node even though + : they precede the reference in document order. Only the sibling {@code a} + : precedes {@code target} on the preceding axis. + :) +declare + %test:assertEquals("a") +function pa:preceding-excludes-ancestors() { + for $n in doc($pa:ancestors-doc)//target/preceding::* + return name($n) +}; + +(:~ + : From {@code w[5]} on a flat doc of 10 elements, the k-th preceding element + : in axis order (reverse document order) is {@code w[5-k]}. + :) +declare + %test:assertEquals("4", "3", "2", "1") +function pa:preceding-positional-predicate-returns-kth-closest() { + let $w := doc($pa:tiny-doc)//w[@xml:id = "5"] + return + for $k in 1 to 4 + return $w/preceding::*[$k]/text() +};