Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions exist-core-jmh/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>net.sf.xmldb-org</groupId>
<artifactId>xmldb-api</artifactId>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
* info@exist-db.org
* http://www.exist-db.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.xquery;

import org.exist.test.ExistEmbeddedServer;
import org.exist.xmldb.XmldbURI;
import org.openjdk.jmh.annotations.*;
import org.xmldb.api.DatabaseManager;
import org.xmldb.api.base.Collection;
import org.xmldb.api.base.Database;
import org.xmldb.api.modules.XQueryService;

import java.util.concurrent.TimeUnit;

/**
* JMH benchmark for the {@code preceding::*} half of issue #2129.
*
* <p>craigberry's #2129 follow-up reproduced position-dependence for the
* wildcard preceding axis on a 50,000-element flat document: a query at
* {@code @xml:id='45000'} took roughly twice as long as the same query at
* {@code @xml:id='25000'}. The K-bounded sliding window in
* {@link LocationStep.PrecedingFilter} caps the retained match set at K,
* eliminating the unbounded accumulation that produced the late-position
* tax.</p>
*
* <p>This benchmark exercises the wildcard-vs-sibling and early-vs-late
* comparisons that the original mixed-purpose JUnit class measured with
* {@code System.nanoTime} and median-of-N. JMH handles statistical
* aggregation natively; the correctness assertions live in the
* companion XQSuite test {@code exist-core/src/test/xquery/preceding-axis.xql}.</p>
*/
@State(Scope.Benchmark)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 3, time = 1)
@Measurement(iterations = 5, time = 2)
@Fork(1)
public class PrecedingAxisBenchmark {

private static final String LARGE_DOC = "/db/words-large.xml";

@Param({"5000", "25000", "45000"})
private int refPosition;

private ExistEmbeddedServer existServer;
private Database database;
private Collection root;
private XQueryService xqs;

@Setup(Level.Trial)
public void setUp() throws Exception {
existServer = new ExistEmbeddedServer(true, true);
existServer.startDb();

final Class<?> cl = Class.forName("org.exist.xmldb.DatabaseImpl");
database = (Database) cl.getDeclaredConstructor().newInstance();
database.setProperty("create-database", "true");
DatabaseManager.registerDatabase(database);
root = DatabaseManager.getCollection(XmldbURI.LOCAL_DB, "admin", "");
xqs = root.getService(XQueryService.class);

xqs.query(
"""
let $words := for $i in (1 to 50000) return <w xml:id="{$i}">{$i}</w>
return xmldb:store('/db', 'words-large.xml', document {<words>{$words}</words>})
""");
}

@TearDown(Level.Trial)
public void tearDown() throws Exception {
try {
xqs.query("xmldb:remove('/db', 'words-large.xml')");
} finally {
root.close();
DatabaseManager.deregisterDatabase(database);
existServer.stopDb(true);
}
}

/**
* Wildcard preceding axis with a positional predicate gated by a self::w
* filter. Pre-fix this accumulated every preceding match from doc start;
* post-fix the sliding window caps retention at K=5.
*/
@Benchmark
public long wildcardPrecedingWithPositionalPredicate() throws Exception {
return xqs.query(
("""
xquery version "3.1";
let $w := doc('%s')//w[@xml:id='%d']
return for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text()
""").formatted(LARGE_DOC, refPosition)
).getSize();
}

/**
* preceding-sibling::w[K] baseline: walks the persistent sibling chain
* directly rather than the full preceding axis. Used as a relative
* lower-bound to interpret the wildcard preceding number.
*/
@Benchmark
public long precedingSiblingBaseline() throws Exception {
return xqs.query(
("""
xquery version "3.1";
let $w := doc('%s')//w[@xml:id='%d']
return for $i in (1 to 5) return $w/preceding-sibling::w[$i]/text()
""").formatted(LARGE_DOC, refPosition)
).getSize();
}
}
40 changes: 35 additions & 5 deletions exist-core/src/main/java/org/exist/xquery/LocationStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Deque;

/**
* Processes all location path steps (like descendant::*, ancestor::XXX).
Expand Down Expand Up @@ -920,7 +922,7 @@ private Sequence getPrecedingOrFollowing(final XQueryContext context, final Sequ
final NodeProxy root = new NodeProxy(this, node);
final StreamFilter filter;
if (axis == Constants.PRECEDING_AXIS) {
filter = new PrecedingFilter(test, root, next, result, contextId);
filter = new PrecedingFilter(test, root, next, result, contextId, position);
} else {
filter = new FollowingFilter(test, root, next, result, contextId, position);
}
Expand Down Expand Up @@ -1417,12 +1419,19 @@ public boolean accept(final XMLStreamReader reader) {
private class PrecedingFilter extends AbstractFilterBase {
final NodeProxy root;
final NodeProxy referenceNode;
// Sliding window of the most recent {@code limit} matches. Non-null only
// when limit > 0 (positional predicate {@code [K]} present). The K-th
// preceding element in axis order is the (K-th-from-end) match in doc
// order, so any match earlier than the K most recent cannot be selected
// and may be discarded as new ones are found.
final Deque<NodeProxy> window;

PrecedingFilter(final NodeTest test, final NodeProxy root, final NodeProxy referenceNode, final NodeSet result,
final int contextId) {
super(test, result, contextId, -1);
final int contextId, final int limit) {
super(test, result, contextId, limit);
this.root = root;
this.referenceNode = referenceNode;
this.window = limit > 0 ? new ArrayDeque<>(limit) : null;
}

@Override
Expand All @@ -1431,11 +1440,16 @@ public boolean accept(final XMLStreamReader reader) {

if (reader.getEventType() == XMLStreamReader.END_ELEMENT) {
// exited the root element, so stop filtering
return currentId.getTreeLevel() != root.getNodeId().getTreeLevel();
if (currentId.getTreeLevel() == root.getNodeId().getTreeLevel()) {
flushWindow();
return false;
}
return true;
}

final NodeId refId = referenceNode.getNodeId();
if (currentId.compareTo(refId) >= 0) {
flushWindow();
return false;
}

Expand All @@ -1449,10 +1463,26 @@ public boolean accept(final XMLStreamReader reader) {
proxy.addContextNode(contextId, referenceNode);
}
}
result.add(proxy);
if (window != null) {
if (window.size() == limit) {
window.pollFirst();
}
window.addLast(proxy);
} else {
result.add(proxy);
}
}
return true;
}

private void flushWindow() {
if (window != null) {
for (final NodeProxy proxy : window) {
result.add(proxy);
}
window.clear();
}
}
}

}
2 changes: 1 addition & 1 deletion exist-core/src/test/xquery/optimizer/positional.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ function ot:optimize-simple-following-nested() {

declare
%test:stats
%test:assertXPath("not($result//stats:optimization[@type eq 'POSITIONAL_PREDICATE'])")
%test:assertXPath("$result//stats:optimization[@type eq 'POSITIONAL_PREDICATE']")
function ot:optimize-simple-preceding() {
let $w := doc($ot:DOC)//w[@xml:id='25000']
return
Expand Down
132 changes: 132 additions & 0 deletions exist-core/src/test/xquery/preceding-axis.xql
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
(:
: eXist-db Open Source Native XML Database
: Copyright (C) 2001 The eXist-db Authors
:
: info@exist-db.org
: http://www.exist-db.org
:
: This library is free software; you can redistribute it and/or
: modify it under the terms of the GNU Lesser General Public
: License as published by the Free Software Foundation; either
: version 2.1 of the License, or (at your option) any later version.
:
: This library is distributed in the hope that it will be useful,
: but WITHOUT ANY WARRANTY; without even the implied warranty of
: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
: Lesser General Public License for more details.
:
: You should have received a copy of the GNU Lesser General Public
: License along with this library; if not, write to the Free Software
: Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
:)
xquery version "3.1";

(:~
: Correctness regression tests for the {@code preceding::*} half of issue #2129.
: The K-bounded sliding window in {@code LocationStep.PrecedingFilter} must
: preserve the original axis semantics: ancestor exclusion, axis-order
: positional predicates, and identical result counts/values regardless of
: where the reference node sits in the document.
:
: The companion performance benchmark lives in
: {@code exist-core-jmh/.../PrecedingAxisBenchmark.java}.
:)
module namespace pa = "http://exist-db.org/xquery/test/preceding-axis";

declare namespace test = "http://exist-db.org/xquery/xqsuite";

declare variable $pa:collection := "/db/preceding-axis-test";
declare variable $pa:small-doc := $pa:collection || "/words-small.xml";
declare variable $pa:tiny-doc := $pa:collection || "/tiny.xml";
declare variable $pa:ancestors-doc := $pa:collection || "/ancestors.xml";

declare
%test:setUp
function pa:setup() {
let $col := xmldb:create-collection("/db", "preceding-axis-test")
let $small :=
document {
<words>{
for $i in (1 to 50) return <w xml:id="{$i}">{$i}</w>
}</words>
}
let $tiny :=
document {
<words>{
for $i in (1 to 10) return <w xml:id="{$i}">{$i}</w>
}</words>
}
let $ancestors :=
document {
<root><a/><b><c><d><target/></d></c></b></root>
}
return (
xmldb:store($col, "words-small.xml", $small),
xmldb:store($col, "tiny.xml", $tiny),
xmldb:store($col, "ancestors.xml", $ancestors)
)
};

declare
%test:tearDown
function pa:cleanup() {
xmldb:remove($pa:collection)
};

declare
%test:assertEquals("5 6 7 8 9")
function pa:reproducer-output-at-early-position() {
let $w := doc($pa:small-doc)//w[@xml:id = "10"]
return
string-join(
for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
" "
)
};

declare
%test:assertEquals("20 21 22 23 24")
function pa:reproducer-output-at-midpoint() {
let $w := doc($pa:small-doc)//w[@xml:id = "25"]
return
string-join(
for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
" "
)
};

declare
%test:assertEquals("40 41 42 43 44")
function pa:reproducer-output-at-late-position() {
let $w := doc($pa:small-doc)//w[@xml:id = "45"]
return
string-join(
for $i in (1 to 5) return $w/preceding::*[5 + 1 - $i][self::w]/text(),
" "
)
};

(:~
: {@code preceding::} excludes ancestors of the reference node even though
: they precede the reference in document order. Only the sibling {@code a}
: precedes {@code target} on the preceding axis.
:)
declare
%test:assertEquals("a")
function pa:preceding-excludes-ancestors() {
for $n in doc($pa:ancestors-doc)//target/preceding::*
return name($n)
};

(:~
: From {@code w[5]} on a flat doc of 10 elements, the k-th preceding element
: in axis order (reverse document order) is {@code w[5-k]}.
:)
declare
%test:assertEquals("4", "3", "2", "1")
function pa:preceding-positional-predicate-returns-kth-closest() {
let $w := doc($pa:tiny-doc)//w[@xml:id = "5"]
return
for $k in 1 to 4
return $w/preceding::*[$k]/text()
};