Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions exist-core-jmh/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>net.sf.xmldb-org</groupId>
<artifactId>xmldb-api</artifactId>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
* info@exist-db.org
* http://www.exist-db.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.xquery;

import org.exist.test.ExistEmbeddedServer;
import org.exist.xmldb.XmldbURI;
import org.openjdk.jmh.annotations.*;
import org.xmldb.api.DatabaseManager;
import org.xmldb.api.base.Collection;
import org.xmldb.api.base.Database;
import org.xmldb.api.modules.XQueryService;

import java.util.concurrent.TimeUnit;

/**
* JMH benchmark for the {@code preceding::*} half of issue #2129.
*
* <p>craigberry's #2129 follow-up reproduced position-dependence for the
* wildcard preceding axis on a 50,000-element flat document: a query at
* {@code @xml:id='45000'} took roughly twice as long as the same query at
* {@code @xml:id='25000'}. The K-bounded sliding window in
* {@code LocationStep.PrecedingFilter} caps the retained match set at K,
* eliminating the unbounded accumulation that produced the late-position
* tax.</p>
*
* <p>This benchmark exercises the wildcard-vs-sibling and early-vs-late
* comparisons that the original mixed-purpose JUnit class measured with
* {@code System.nanoTime} and median-of-N. JMH handles statistical
* aggregation natively; the correctness assertions live in the
* companion XQSuite test {@code exist-core/src/test/xquery/preceding-axis.xql}.</p>
*/
@State(Scope.Benchmark)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 3, time = 1)
@Measurement(iterations = 5, time = 2)
@Fork(1)
public class PrecedingAxisBenchmark {

private static final String LARGE_DOC = "/db/words-large.xml";

@Param({"5000", "25000", "45000"})
private int refPosition;

private ExistEmbeddedServer existServer;
private Database database;
private Collection root;
private XQueryService xqs;

/** Default constructor for JMH harness. */
public PrecedingAxisBenchmark() {

Check notice on line 70 in exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

exist-core-jmh/src/main/java/org/exist/xquery/PrecedingAxisBenchmark.java#L70

Avoid unnecessary constructors - the compiler will generate these for you
}

/**
* Boots an embedded eXist server, registers the XML:DB driver, and stores
* a 50,000-element flat words document used by the benchmark queries.
*
* @throws Exception if the embedded server fails to start, the database
* driver cannot be registered, or the corpus document cannot be stored
*/
@Setup(Level.Trial)
public void setUp() throws Exception {
existServer = new ExistEmbeddedServer(true, true);
existServer.startDb();

final Class<?> cl = Class.forName("org.exist.xmldb.DatabaseImpl");
database = (Database) cl.getDeclaredConstructor().newInstance();
database.setProperty("create-database", "true");
DatabaseManager.registerDatabase(database);
root = DatabaseManager.getCollection(XmldbURI.LOCAL_DB, "admin", "");
xqs = root.getService(XQueryService.class);

xqs.query(
"""
let $words := for $i in (1 to 50000) return <w xml:id="{$i}">{$i}</w>
return xmldb:store('/db', 'words-large.xml', document {<words>{$words}</words>})
""");
}

/**
* Removes the corpus document, closes the test collection, and shuts down
* the embedded server.
*
* @throws Exception if removing the corpus document, closing the
* collection, or stopping the embedded server fails
*/
@TearDown(Level.Trial)
public void tearDown() throws Exception {
try {
xqs.query("xmldb:remove('/db', 'words-large.xml')");
} finally {
root.close();
DatabaseManager.deregisterDatabase(database);
existServer.stopDb(true);
}
}

/**
* Wildcard preceding axis with a positional predicate gated by a self::w
* filter. Pre-fix this accumulated every preceding match from doc start;
* post-fix the sliding window caps retention at K=5.
*
* @return the result-set size, returned so JMH's blackhole prevents the
* call being optimized away
* @throws Exception if the embedded query fails
*/
@Benchmark
public long wildcardPrecedingWithPositionalPredicate() throws Exception {
return xqs.query(
("""
xquery version "3.1";
let $w := doc('%s')//w[@xml:id='%d']
return for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text()
""").formatted(LARGE_DOC, refPosition)
).getSize();
}

/**
* preceding-sibling::w[K] baseline: walks the persistent sibling chain
* directly rather than the full preceding axis. Used as a relative
* lower-bound to interpret the wildcard preceding number.
*
* @return the result-set size, returned so JMH's blackhole prevents the
* call being optimized away
* @throws Exception if the embedded query fails
*/
@Benchmark
public long precedingSiblingBaseline() throws Exception {
return xqs.query(
("""
xquery version "3.1";
let $w := doc('%s')//w[@xml:id='%d']
return for $i in (1 to 5) return $w/preceding-sibling::w[$i]/text()
""").formatted(LARGE_DOC, refPosition)
).getSize();
}
}
40 changes: 35 additions & 5 deletions exist-core/src/main/java/org/exist/xquery/LocationStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Deque;

/**
* Processes all location path steps (like descendant::*, ancestor::XXX).
Expand Down Expand Up @@ -920,7 +922,7 @@ private Sequence getPrecedingOrFollowing(final XQueryContext context, final Sequ
final NodeProxy root = new NodeProxy(this, node);
final StreamFilter filter;
if (axis == Constants.PRECEDING_AXIS) {
filter = new PrecedingFilter(test, root, next, result, contextId);
filter = new PrecedingFilter(test, root, next, result, contextId, position);
} else {
filter = new FollowingFilter(test, root, next, result, contextId, position);
}
Expand Down Expand Up @@ -1417,12 +1419,19 @@ public boolean accept(final XMLStreamReader reader) {
private class PrecedingFilter extends AbstractFilterBase {
final NodeProxy root;
final NodeProxy referenceNode;
// Sliding window of the most recent {@code limit} matches. Non-null only
// when limit > 0 (positional predicate {@code [K]} present). The K-th
// preceding element in axis order is the (K-th-from-end) match in doc
// order, so any match earlier than the K most recent cannot be selected
// and may be discarded as new ones are found.
final Deque<NodeProxy> window;

PrecedingFilter(final NodeTest test, final NodeProxy root, final NodeProxy referenceNode, final NodeSet result,
final int contextId) {
super(test, result, contextId, -1);
final int contextId, final int limit) {
super(test, result, contextId, limit);
this.root = root;
this.referenceNode = referenceNode;
this.window = limit > 0 ? new ArrayDeque<>(limit) : null;
}

@Override
Expand All @@ -1431,11 +1440,16 @@ public boolean accept(final XMLStreamReader reader) {

if (reader.getEventType() == XMLStreamReader.END_ELEMENT) {
// exited the root element, so stop filtering
return currentId.getTreeLevel() != root.getNodeId().getTreeLevel();
if (currentId.getTreeLevel() == root.getNodeId().getTreeLevel()) {
flushWindow();
return false;
}
return true;
}

final NodeId refId = referenceNode.getNodeId();
if (currentId.compareTo(refId) >= 0) {
flushWindow();
return false;
}

Expand All @@ -1449,10 +1463,26 @@ public boolean accept(final XMLStreamReader reader) {
proxy.addContextNode(contextId, referenceNode);
}
}
result.add(proxy);
if (window != null) {
if (window.size() == limit) {
window.pollFirst();
}
window.addLast(proxy);
} else {
result.add(proxy);
}
}
return true;
}

private void flushWindow() {
if (window != null) {
for (final NodeProxy proxy : window) {
result.add(proxy);
}
window.clear();
}
}
}

}
2 changes: 1 addition & 1 deletion exist-core/src/test/xquery/optimizer/positional.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ function ot:optimize-simple-following-nested() {

declare
%test:stats
%test:assertXPath("not($result//stats:optimization[@type eq 'POSITIONAL_PREDICATE'])")
%test:assertXPath("$result//stats:optimization[@type eq 'POSITIONAL_PREDICATE']")
function ot:optimize-simple-preceding() {
let $w := doc($ot:DOC)//w[@xml:id='25000']
return
Expand Down
132 changes: 132 additions & 0 deletions exist-core/src/test/xquery/preceding-axis.xql
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
(:
: eXist-db Open Source Native XML Database
: Copyright (C) 2001 The eXist-db Authors
:
: info@exist-db.org
: http://www.exist-db.org
:
: This library is free software; you can redistribute it and/or
: modify it under the terms of the GNU Lesser General Public
: License as published by the Free Software Foundation; either
: version 2.1 of the License, or (at your option) any later version.
:
: This library is distributed in the hope that it will be useful,
: but WITHOUT ANY WARRANTY; without even the implied warranty of
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
: Lesser General Public License for more details.
:
: You should have received a copy of the GNU Lesser General Public
: License along with this library; if not, write to the Free Software
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
:)
xquery version "3.1";

(:~
: Correctness regression tests for the {@code preceding::*} half of issue #2129.
: The K-bounded sliding window in {@code LocationStep.PrecedingFilter} must
: preserve the original axis semantics: ancestor exclusion, axis-order
: positional predicates, and identical result counts/values regardless of
: where the reference node sits in the document.
:
: The companion performance benchmark lives in
: {@code exist-core-jmh/.../PrecedingAxisBenchmark.java}.
:)
module namespace pa = "http://exist-db.org/xquery/test/preceding-axis";

declare namespace test = "http://exist-db.org/xquery/xqsuite";

declare variable $pa:collection := "/db/preceding-axis-test";
declare variable $pa:small-doc := $pa:collection || "/words-small.xml";
declare variable $pa:tiny-doc := $pa:collection || "/tiny.xml";
declare variable $pa:ancestors-doc := $pa:collection || "/ancestors.xml";

declare
%test:setUp
function pa:setup() {
let $col := xmldb:create-collection("/db", "preceding-axis-test")
let $small :=
document {
<words>{
for $i in (1 to 50) return <w xml:id="{$i}">{$i}</w>
}</words>
}
let $tiny :=
document {
<words>{
for $i in (1 to 10) return <w xml:id="{$i}">{$i}</w>
}</words>
}
let $ancestors :=
document {
<root><a/><b><c><d><target/></d></c></b></root>
}
return (
xmldb:store($col, "words-small.xml", $small),
xmldb:store($col, "tiny.xml", $tiny),
xmldb:store($col, "ancestors.xml", $ancestors)
)
};

declare
%test:tearDown
function pa:cleanup() {
xmldb:remove($pa:collection)
};

declare
%test:assertEquals("5 6 7 8 9")
function pa:reproducer-output-at-early-position() {
let $w := doc($pa:small-doc)//w[@xml:id = "10"]
return
string-join(
for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
" "
)
};

declare
%test:assertEquals("20 21 22 23 24")
function pa:reproducer-output-at-midpoint() {
let $w := doc($pa:small-doc)//w[@xml:id = "25"]
return
string-join(
for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
" "
)
};

declare
%test:assertEquals("40 41 42 43 44")
function pa:reproducer-output-at-late-position() {
let $w := doc($pa:small-doc)//w[@xml:id = "45"]
return
string-join(
for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
" "
)
};

(:~
: {@code preceding::} excludes ancestors of the reference node even though
: they precede the reference in document order. Only the sibling {@code a}
: precedes {@code target} on the preceding axis.
:)
declare
%test:assertEquals("a")
function pa:preceding-excludes-ancestors() {
for $n in doc($pa:ancestors-doc)//target/preceding::*
return name($n)
};

(:~
: From {@code w[5]} on a flat doc of 10 elements, the k-th preceding element
: in axis order (reverse document order) is {@code w[5-k]}.
:)
declare
%test:assertEquals("4", "3", "2", "1")
function pa:preceding-positional-predicate-returns-kth-closest() {
let $w := doc($pa:tiny-doc)//w[@xml:id = "5"]
return
for $k in 1 to 4
return $w/preceding::*[$k]/text()
};