diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7571644..23ee452 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,4 +30,4 @@ jobs:
         run: sbt -v assembly
       - name: Test
         shell: bash
-        run: target/scala-2.13/exist-xqts-runner-assembly-*-SNAPSHOT.jar --xqts-version HEAD --test-set fn-current-date
+        run: java -jar target/scala-2.13/exist-xqts-runner-assembly-*-SNAPSHOT.jar --xqts-version HEAD --test-set fn-current-date
diff --git a/build.sbt b/build.sbt
index abe9c5f..0831fc7 100644
--- a/build.sbt
+++ b/build.sbt
@@ -65,9 +65,11 @@ libraryDependencies ++= {
     "org.parboiled" %% "parboiled" % "2.5.1",
     "org.apache.ant" % "ant-junit" % "1.10.15", // used for formatting junit style report
 
-    "net.sf.saxon" % "Saxon-HE" % "9.9.1-8",
+    "net.sf.saxon" % "Saxon-HE" % "12.5",
     "org.exist-db" % "exist-core" % existV changing(),
     "org.exist-db" % "exist-expath" % existV changing(),
+    "org.exist-db" % "exist-expath-file" % existV changing(),
+    "org.exist-db" % "exist-expath-binary" % existV changing(),
     "org.xmlunit" % "xmlunit-core" % "2.11.0",
 
     "org.slf4j" % "slf4j-api" % "2.0.17",
@@ -77,10 +79,16 @@ libraryDependencies ++= {
 
 autoAPIMappings := true
 
-// we prefer Saxon over Xalan
+// Exclude transitive dependencies the runner doesn't need.
+// Jetty exclusions allow building against both Jetty 11 (develop) and Jetty 12 (next) —
+// Ivy can't resolve Jetty 12 Maven POM constructs, and the runner doesn't use Jetty anyway.
 excludeDependencies ++= Seq(
   ExclusionRule("xalan", "xalan"),
-  ExclusionRule("org.eclipse.jetty.toolchain", "jetty-jakarta-servlet-api"),
+  ExclusionRule("org.eclipse.jetty"),
+  ExclusionRule("org.eclipse.jetty.toolchain"),
+  ExclusionRule("org.eclipse.jetty.websocket"),
+  ExclusionRule("org.eclipse.jetty.ee10"),
+  ExclusionRule("org.eclipse.jetty.ee10.websocket"),
 
   ExclusionRule("org.hamcrest", "hamcrest-core"),
   ExclusionRule("org.hamcrest", "hamcrest-library")
@@ -149,7 +157,10 @@ assembly / assemblyMergeStrategy := {
 // make the assembly executable with basic shell scripts
 import sbtassembly.AssemblyPlugin.defaultUniversalScript
 
-assemblyPrependShellScript := Some(defaultUniversalScript(shebang = false))
+// Skip prepend script in CI — the prepended shell script can corrupt the ZIP
+// central directory offsets on certain platforms, causing "An unexpected error
+// occurred while trying to open file" from the Java launcher.
+assemblyPrependShellScript := (if (sys.env.contains("CI")) None else Some(defaultUniversalScript(shebang = false)))
 
 
 // Add assembly to publish step
diff --git a/run-batched.sh b/run-batched.sh
new file mode 100755
index 0000000..3faf1a2
--- /dev/null
+++ b/run-batched.sh
@@ -0,0 +1,448 @@
+#!/usr/bin/env bash
+#
+# Batch XQTS Runner — runs the exist-xqts-runner JAR in batches to avoid OOM.
+#
+# Each batch runs in a fresh JVM, so thread pool / BrokerPool leaks are
+# cleaned up between batches. JUnit XML results accumulate in a single
+# output directory across batches.
+#
+# Usage:
+#   ./run-batched.sh [OPTIONS]
+#
+# Options:
+#   --xqts-version VERSION   3.1, HEAD, QT4, or FTTS (default: QT4)
+#   --batch-size N           test sets per batch (default: 50)
+#   --heap SIZE              JVM heap size (default: 4g)
+#   --timeout SECS           per-batch timeout in seconds (default: 180)
+#   --output-dir DIR         output directory (default: target)
+#   --test-set-pattern PAT   regex filter for test set names
+#   --exclude-test-set SETS  comma-separated test sets to exclude
+#   --exclude-test-case CASES  comma-separated test cases to exclude
+#                              (QT4 defaults to a list of known OOM-prone op-to
+#                              cases; pass an empty string to disable)
+#   --enable-feature FEATS   comma-separated features to enable
+#   --parallel N             run N batch streams in parallel (default: 1)
+#   --resume                 skip test sets that already have result XML
+#   --dry-run                print batches without running
+#   --                       remaining args passed through to runner JAR
+#
+# Examples:
+#   ./run-batched.sh --xqts-version QT4 --batch-size 40 --heap 6g
+#   ./run-batched.sh --xqts-version 3.1 --resume
+#   ./run-batched.sh --xqts-version QT4 --test-set-pattern 'fn-.*' --batch-size 30
+
+set -euo pipefail
+
+# === Defaults ===
+XQTS_VERSION="QT4"
+BATCH_SIZE=50
+HEAP="4g"
+BATCH_TIMEOUT=300
+OUTPUT_DIR="target"
+TEST_SET_PATTERN=""
+EXCLUDE_TEST_SETS=""
+EXCLUDE_TEST_CASES="__DEFAULT__"
+ENABLE_FEATURES=""
+PARALLEL=1
+RESUME=false
+DRY_RUN=false
+EXTRA_ARGS=()
+
+# QT4 op-to test cases that allocate >100M-item integer sequences (typically
+# via reverse() of a 100B-item range, which materializes all items, or via
+# `=`/`<` against a huge range where the matching value is at the far end so
+# no short-circuit helps). They reliably OOM the JVM at any reasonable heap
+# size and abort the entire batch, losing the other ~49 test sets in batch-9.
+# Excluding them costs ~14 individual test cases out of QT4's ~36k total.
+# See: op/to.xml RangeExpr-408f-k, 409c-d, 410f-k.
+QT4_OOM_PRONE_TEST_CASES="RangeExpr-408f,RangeExpr-408g,RangeExpr-408h,RangeExpr-408i,RangeExpr-408j,RangeExpr-408k,RangeExpr-409c,RangeExpr-409d,RangeExpr-410f,RangeExpr-410g,RangeExpr-410h,RangeExpr-410i,RangeExpr-410j,RangeExpr-410k"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+JAR="$SCRIPT_DIR/exist-xqts-runner-assembly-2.0.0-SNAPSHOT.jar"
+JAVA_HOME="${JAVA_HOME:-/Users/wicentowskijc/.asdf/installs/java/zulu-21.38.21}"
+
+# === Parse args ===
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --xqts-version) XQTS_VERSION="$2"; shift 2 ;;
+    --batch-size)   BATCH_SIZE="$2"; shift 2 ;;
+    --heap)         HEAP="$2"; shift 2 ;;
+    --timeout)      BATCH_TIMEOUT="$2"; shift 2 ;;
+    --output-dir)   OUTPUT_DIR="$2"; shift 2 ;;
+    --test-set-pattern) TEST_SET_PATTERN="$2"; shift 2 ;;
+    --exclude-test-set) EXCLUDE_TEST_SETS="$2"; shift 2 ;;
+    --exclude-test-case) EXCLUDE_TEST_CASES="$2"; shift 2 ;;
+    --enable-feature)   ENABLE_FEATURES="$2"; shift 2 ;;
+    --parser)       PARSER="$2"; shift 2 ;;
+    --parallel)     PARALLEL="$2"; shift 2 ;;
+    --resume)       RESUME=true; shift ;;
+    --dry-run)      DRY_RUN=true; shift ;;
+    --)             shift; EXTRA_ARGS+=("$@"); break ;;
+    *)              EXTRA_ARGS+=("$1"); shift ;;
+  esac
+done
+
+# === Resolve catalog ===
+case "$XQTS_VERSION" in
+  3.1)   CATALOG="$SCRIPT_DIR/work/QT3_1_0/catalog.xml" ;;
+  HEAD)  CATALOG="$SCRIPT_DIR/work/qt3tests-master/catalog.xml" ;;
+  QT4)   CATALOG="$SCRIPT_DIR/work/qt4tests-master/catalog.xml" ;;
+  FTTS)  CATALOG="$SCRIPT_DIR/work/XQFTTS_1_0_4/XQFTTSCatalog.xml" ;;
+  *)     echo "ERROR: Unknown XQTS version: $XQTS_VERSION"; exit 1 ;;
+esac
+
+if [[ ! -f "$CATALOG" ]]; then
+  echo "ERROR: Catalog not found: $CATALOG"
+  echo "Run the JAR once with no test sets to trigger download, or check work/ dir."
+  exit 1
+fi
+
+if [[ ! -f "$JAR" ]]; then
+  echo "ERROR: Runner JAR not found: $JAR"
+  exit 1
+fi
+
+# Apply default OOM-prone test-case exclusions for QT4 if the user didn't
+# override them via --exclude-test-case. Pass `--exclude-test-case ''` to
+# disable the defaults explicitly (e.g. when running op-to in isolation
+# with a large heap to investigate the underlying eXist behavior).
+if [[ "$EXCLUDE_TEST_CASES" == "__DEFAULT__" ]]; then
+  if [[ "$XQTS_VERSION" == "QT4" ]]; then
+    EXCLUDE_TEST_CASES="$QT4_OOM_PRONE_TEST_CASES"
+  else
+    EXCLUDE_TEST_CASES=""
+  fi
+fi
+
+# === Extract test set names from catalog ===
+if [[ "$XQTS_VERSION" == "FTTS" ]]; then
+  # XQFTTS uses a different catalog format
+  ALL_SETS=$(grep '<test-group ' "$CATALOG" | sed 's/.*name="\([^"]*\)".*/\1/' | sort)
+else
+  ALL_SETS=$(grep '<test-set ' "$CATALOG" | sed 's/.*name="\([^"]*\)".*/\1/' | sort)
+fi
+
+# Apply pattern filter
+if [[ -n "$TEST_SET_PATTERN" ]]; then
+  ALL_SETS=$(echo "$ALL_SETS" | grep -E "$TEST_SET_PATTERN" || true)
+fi
+
+# Apply exclusions
+if [[ -n "$EXCLUDE_TEST_SETS" ]]; then
+  IFS=',' read -ra EXCL <<< "$EXCLUDE_TEST_SETS"
+  for ex in "${EXCL[@]}"; do
+    ALL_SETS=$(echo "$ALL_SETS" | grep -v "^${ex}$" || true)
+  done
+fi
+
+# If --resume, skip test sets that already have XML results
+if [[ "$RESUME" == true ]]; then
+  DATA_DIR="$OUTPUT_DIR/junit/data"
+  if [[ -d "$DATA_DIR" ]]; then
+    BEFORE=$(echo "$ALL_SETS" | wc -l | tr -d ' ')
+    FILTERED=""
+    while IFS= read -r ts; do
+      if [[ ! -f "$DATA_DIR/TEST-${ts}.xml" ]]; then
+        FILTERED+="$ts"$'\n'
+      fi
+    done <<< "$ALL_SETS"
+    ALL_SETS=$(echo "$FILTERED" | sed '/^$/d')
+    AFTER=$(echo "$ALL_SETS" | wc -l | tr -d ' ')
+    echo "Resume mode: skipping $((BEFORE - AFTER)) already-completed test sets ($AFTER remaining)"
+  fi
+fi
+
+# Convert to array (portable — no mapfile)
+SET_ARRAY=()
+while IFS= read -r line; do
+  line="${line// /}"
+  [[ -n "$line" ]] && SET_ARRAY+=("$line")
+done <<< "$ALL_SETS"
+
+TOTAL=${#SET_ARRAY[@]}
+BATCHES=$(( (TOTAL + BATCH_SIZE - 1) / BATCH_SIZE ))
+
+echo "=== XQTS Batch Runner ==="
+echo "Parser:     ${PARSER:-antlr2}"
+echo "Version:    $XQTS_VERSION"
+echo "Test sets:  $TOTAL"
+echo "Batch size: $BATCH_SIZE"
+echo "Batches:    $BATCHES"
+echo "Parallel:   $PARALLEL"
+echo "Heap:       $HEAP"
+echo "Timeout:    ${BATCH_TIMEOUT}s"
+echo "Output:     $OUTPUT_DIR"
+echo "JAR:        $JAR"
+echo ""
+
+# === Run a single batch ===
+# Args: batch_num total_batches start_idx end_idx stream_id
+run_batch() {
+  local batch_num=$1 total_batches=$2 start_idx=$3 end_idx=$4 stream_id=$5
+
+  # Build comma-separated test set list
+  local batch_sets=""
+  for (( j=start_idx; j<end_idx; j++ )); do
+    if [[ -n "$batch_sets" ]]; then batch_sets+=","; fi
+    batch_sets+="${SET_ARRAY[$j]}"
+  done
+
+  echo "=== Batch $batch_num/$total_batches (sets $((start_idx+1))-$end_idx of $TOTAL) [stream $stream_id] ==="
+
+  if [[ "$DRY_RUN" == true ]]; then
+    echo "  Sets: $batch_sets"
+    echo "  [dry run, skipping]"
+    return 0
+  fi
+
+  # Each parallel stream gets its own eXist-db home directory to avoid
+  # BrokerPool data directory lock conflicts between JVMs.
+  local exist_home
+  exist_home=$(mktemp -d /tmp/xqts-stream.XXXXXX)
+
+  # Build runner command
+  # For QT4 tests, set default XQuery version to 4.0 so tests without
+  # version declarations get xq4Enabled=true in the ANTLR parser.
+  local version_prop=""
+  if [[ "$XQTS_VERSION" == "QT4" ]]; then
+    version_prop="-Dexist.xqts.default-version=4.0"
+  fi
+  local cmd=("$JAVA_HOME/bin/java" "-Xmx${HEAP}" "-XX:+ExitOnOutOfMemoryError"
+    "-Dexist.home=$exist_home"
+    "-Dexist.parser=${PARSER:-antlr2}"
+    ${version_prop:+"$version_prop"}
+    "-jar" "$JAR"
+    "--xqts-version" "$XQTS_VERSION"
+    "--test-set" "$batch_sets"
+    "--local-dir" "$SCRIPT_DIR/work"
+    "--output-dir" "$OUTPUT_DIR"
+  )
+
+  if [[ -n "$EXCLUDE_TEST_CASES" ]]; then
+    cmd+=("--exclude-test-case" "$EXCLUDE_TEST_CASES")
+  fi
+
+  if [[ -n "$ENABLE_FEATURES" ]]; then
+    cmd+=("--enable-feature" "$ENABLE_FEATURES")
+  fi
+
+  if [[ ${#EXTRA_ARGS[@]} -gt 0 ]]; then
+    cmd+=("${EXTRA_ARGS[@]}")
+  fi
+
+  local batch_start batch_end batch_elapsed exit_code
+  batch_start=$(date +%s)
+
+  local batch_log jstack_file
+  batch_log=$(mktemp /tmp/xqts-batch.XXXXXX)
+  jstack_file="$OUTPUT_DIR/jstack-batch-${batch_num}.txt"
+
+  # Run the batch in the background so we can monitor for timeouts
+  # and capture a thread dump before the hard kill.
+  local jstack_buffer=15
+  local jstack_delay=$((BATCH_TIMEOUT - jstack_buffer))
+  if (( jstack_delay < 10 )); then jstack_delay=$((BATCH_TIMEOUT / 2)); fi
+
+  timeout --kill-after=30 "$BATCH_TIMEOUT" "${cmd[@]}" > "$batch_log" 2>&1 &
+  local batch_pid=$!
+
+  # Monitor: if still running near timeout, capture jstack
+  (
+    sleep "$jstack_delay"
+    if kill -0 $batch_pid 2>/dev/null; then
+      echo "  Batch $batch_num approaching timeout — capturing thread dump..."
+      local java_pid
+      java_pid=$(pgrep -P $batch_pid java 2>/dev/null | head -1 || true)
+      if [[ -n "$java_pid" ]]; then
+        "$JAVA_HOME/bin/jstack" "$java_pid" > "$jstack_file" 2>&1 || true
+        echo "  Thread dump saved to $jstack_file"
+      fi
+    fi
+  ) &
+  local monitor_pid=$!
+
+  # Wait for the batch to complete (or timeout)
+  exit_code=0
+  wait $batch_pid 2>/dev/null || exit_code=$?
+
+  # Clean up monitor and any lingering Java processes
+  kill $monitor_pid 2>/dev/null || true
+  wait $monitor_pid 2>/dev/null || true
+
+  tail -20 "$batch_log" 2>/dev/null || true
+  rm -f "$batch_log" 2>/dev/null || true
+
+  # Kill any lingering Java processes from this batch (BrokerPool shutdown hangs)
+  pkill -9 -f "exist.home=$exist_home" 2>/dev/null || true
+  sleep 1
+  rm -rf "$exist_home" 2>/dev/null || true
+
+  batch_end=$(date +%s)
+  batch_elapsed=$((batch_end - batch_start))
+
+  if [[ $exit_code -eq 124 || $exit_code -eq 137 ]]; then
+    echo "  WARNING: Batch $batch_num TIMED OUT after ${BATCH_TIMEOUT}s (exit $exit_code) [stream $stream_id]"
+    if [[ -f "$jstack_file" ]]; then
+      echo "  Thread dump: $jstack_file"
+    fi
+    return 1
+  elif [[ $exit_code -gt 1 && $exit_code -ne 255 ]]; then
+    echo "  WARNING: Batch $batch_num crashed with code $exit_code (${batch_elapsed}s) [stream $stream_id]"
+    return 1
+  else
+    # exit 0 = all tests passed, exit 1 = some test failures (normal), exit 255 = runner error (non-fatal)
+    echo "  Batch $batch_num completed in ${batch_elapsed}s (exit $exit_code) [stream $stream_id]"
+  fi
+  return 0
+}
+
+# === Run a stream of batches sequentially ===
+# Args: stream_id batch_indices...
+# Writes failure count to /tmp/xqts-stream-failures-$stream_id
+run_stream() {
+  local stream_id=$1; shift
+  local failures=0
+  local indices=("$@")
+
+  for batch_idx in "${indices[@]}"; do
+    local start_idx=$((batch_idx * BATCH_SIZE))
+    local end_idx=$((start_idx + BATCH_SIZE))
+    if (( end_idx > TOTAL )); then end_idx=$TOTAL; fi
+    local batch_num=$((batch_idx + 1))
+
+    run_batch "$batch_num" "$BATCHES" "$start_idx" "$end_idx" "$stream_id" || failures=$((failures + 1))
+    echo ""
+  done
+
+  echo "$failures" > "/tmp/xqts-stream-failures-$stream_id"
+}
+
+# === Dispatch batches ===
+mkdir -p "$OUTPUT_DIR/junit/data"
+START_TIME=$(date +%s)
+FAILURES=0
+
+if [[ "$PARALLEL" -le 1 ]]; then
+  # Sequential mode (original behavior)
+  for (( batch_idx=0; batch_idx<BATCHES; batch_idx++ )); do
+    local_start=$((batch_idx * BATCH_SIZE))
+    local_end=$((local_start + BATCH_SIZE))
+    if (( local_end > TOTAL )); then local_end=$TOTAL; fi
+
+    run_batch "$((batch_idx + 1))" "$BATCHES" "$local_start" "$local_end" "1" || FAILURES=$((FAILURES + 1))
+    echo ""
+  done
+else
+  # Parallel mode: distribute batches round-robin across streams
+  echo "Starting $PARALLEL parallel streams..."
+  echo ""
+
+  # Build batch index arrays for each stream
+  declare -a STREAM_PIDS
+  for (( s=0; s<PARALLEL; s++ )); do
+    STREAM_INDICES=()
+    for (( batch_idx=s; batch_idx<BATCHES; batch_idx+=PARALLEL )); do
+      STREAM_INDICES+=("$batch_idx")
+    done
+
+    if [[ ${#STREAM_INDICES[@]} -gt 0 ]]; then
+      run_stream "$((s + 1))" "${STREAM_INDICES[@]}" &
+      STREAM_PIDS+=($!)
+    fi
+  done
+
+  # Wait for all streams
+  for pid in "${STREAM_PIDS[@]}"; do
+    wait "$pid" || true
+  done
+
+  # Collect failure counts
+  for (( s=0; s<PARALLEL; s++ )); do
+    local_file="/tmp/xqts-stream-failures-$((s + 1))"
+    if [[ -f "$local_file" ]]; then
+      FAILURES=$((FAILURES + $(cat "$local_file")))
+      rm -f "$local_file"
+    fi
+  done
+fi
+
+END_TIME=$(date +%s)
+TOTAL_ELAPSED=$((END_TIME - START_TIME))
+
+echo "=== Summary ==="
+echo "Total time: ${TOTAL_ELAPSED}s ($((TOTAL_ELAPSED / 60))m $((TOTAL_ELAPSED % 60))s)"
+echo "Batches:    $BATCHES ($FAILURES failed)"
+
+# Write timing log for trend analysis
+TIMING_LOG="$OUTPUT_DIR/timing.log"
+echo "run=$(basename $OUTPUT_DIR) version=$XQTS_VERSION total_time=${TOTAL_ELAPSED}s batches=$BATCHES failures=$FAILURES date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$TIMING_LOG"
+
+# Count result files
+if [[ -d "$OUTPUT_DIR/junit/data" ]]; then
+  RESULT_COUNT=$(ls "$OUTPUT_DIR/junit/data"/TEST-*.xml 2>/dev/null | wc -l | tr -d ' ')
+  echo "Results:    $RESULT_COUNT XML files in $OUTPUT_DIR/junit/data/"
+
+  # Quick aggregate: count pass/fail/error across all XML files
+  if command -v xmllint &>/dev/null && [[ $RESULT_COUNT -gt 0 ]]; then
+    TOTAL_TESTS=0
+    TOTAL_FAILURES=0
+    TOTAL_ERRORS=0
+    TOTAL_SKIPPED=0
+    for f in "$OUTPUT_DIR/junit/data"/TEST-*.xml; do
+      T=$(xmllint --xpath 'string(//testsuite/@tests)' "$f" 2>/dev/null || echo 0)
+      F=$(xmllint --xpath 'string(//testsuite/@failures)' "$f" 2>/dev/null || echo 0)
+      E=$(xmllint --xpath 'string(//testsuite/@errors)' "$f" 2>/dev/null || echo 0)
+      S=$(xmllint --xpath 'string(//testsuite/@skipped)' "$f" 2>/dev/null || echo 0)
+      TOTAL_TESTS=$((TOTAL_TESTS + T))
+      TOTAL_FAILURES=$((TOTAL_FAILURES + F))
+      TOTAL_ERRORS=$((TOTAL_ERRORS + E))
+      TOTAL_SKIPPED=$((TOTAL_SKIPPED + S))
+    done
+    PASSED=$((TOTAL_TESTS - TOTAL_FAILURES - TOTAL_ERRORS - TOTAL_SKIPPED))
+    echo ""
+    echo "Aggregate:  $TOTAL_TESTS tests, $PASSED passed, $TOTAL_FAILURES failed, $TOTAL_ERRORS errors, $TOTAL_SKIPPED skipped"
+    if [[ $TOTAL_TESTS -gt 0 ]]; then
+      PCT=$(echo "scale=1; $PASSED * 100 / $TOTAL_TESTS" | bc)
+      echo "Pass rate:  ${PCT}% ($PASSED / $TOTAL_TESTS)"
+    fi
+  fi
+fi
+
+# Per-test-set timing report (sorted by time, descending)
+if [[ -d "$OUTPUT_DIR/junit/data" ]] && command -v python3 &>/dev/null; then
+  TIMING_REPORT="$OUTPUT_DIR/timing-report.txt"
+  python3 -c "
+import xml.etree.ElementTree as ET, glob, sys
+results = []
+for f in sorted(glob.glob('$OUTPUT_DIR/junit/data/TEST-*.xml')):
+    root = ET.parse(f).getroot()
+    name = root.get('name','').replace('XQTS_QT4.','').replace('XQTS_3_1.','').replace('XQTS_FTTS_1_0.','')
+    t = float(root.get('time','0'))
+    tests = int(root.get('tests','0'))
+    fails = int(root.get('failures','0'))
+    errs = int(root.get('errors','0'))
+    passed = tests - fails - errs - int(root.get('skipped','0'))
+    results.append((t, name, tests, passed, fails, errs))
+results.sort(reverse=True)
+total_time = sum(r[0] for r in results)
+print(f'Per-test-set timing report ({len(results)} sets, {total_time:.0f}s total)')
+print(f'{\"Time\":>8} {\"Tests\":>6} {\"Pass\":>6} {\"Fail\":>5} {\"Err\":>4}  Set')
+for t, name, tests, p, f, e in results:
+    if t >= 1.0:
+        flag = ' !!!' if t > 60 else ' !' if t > 10 else ''
+        print(f'{t:>7.1f}s {tests:>6} {p:>6} {f:>5} {e:>4}  {name}{flag}')
+slow = [r for r in results if r[0] > 60]
+if slow:
+    print(f'\n{len(slow)} test sets >60s — investigate for performance issues')
+" 2>/dev/null | tee "$TIMING_REPORT"
+  echo ""
+  echo "Timing report saved to: $TIMING_REPORT"
+fi
+
+# List test sets that were expected but produced no results (killed by timeout)
+if [[ $FAILURES -gt 0 ]]; then
+  echo ""
+  echo "WARNING: $FAILURES batch(es) timed out or failed. Some test sets may have no results."
+fi
+
+echo ""
+echo "Done."
diff --git a/src/main/resources/conf.xml b/src/main/resources/conf.xml
index 4f44d2a..25ec97c 100644
--- a/src/main/resources/conf.xml
+++ b/src/main/resources/conf.xml
@@ -888,7 +888,8 @@
             <module uri="http://www.w3.org/2005/xpath-functions/map"  class="org.exist.xquery.functions.map.MapModule" />
             <module uri="http://www.w3.org/2005/xpath-functions/math" class="org.exist.xquery.functions.math.MathModule" />
             <module uri="http://www.w3.org/2005/xpath-functions/array" class="org.exist.xquery.functions.array.ArrayModule" />
-            <module uri="http://expath.org/ns/file" class="org.expath.exist.file.ExpathFileModule" />
+            <module uri="http://expath.org/ns/file" class="org.exist.xquery.modules.file.expath.ExpathFileModule"/>
+            <module uri="http://expath.org/ns/binary" class="org.exist.xquery.modules.binary.BinaryModule"/>
         </builtin-modules>
     </xquery>
 
diff --git a/src/main/scala/org/exist/xqts/runner/ExistServer.scala b/src/main/scala/org/exist/xqts/runner/ExistServer.scala
index 91edf4d..fe9baa2 100644
--- a/src/main/scala/org/exist/xqts/runner/ExistServer.scala
+++ b/src/main/scala/org/exist/xqts/runner/ExistServer.scala
@@ -372,10 +372,11 @@ class ExistConnection(brokerRes: Resource[IO, DBBroker], contextAttributesSuppli
                  ): IO[Either[ExistServerException, Result]] = {
         IO.delay {
           try {
-            val resultSequence = xqueryService.execute(broker, compiledQuery.compiledXquery, contextSequence.orNull)
-            // Extract serialization properties from the query context (e.g. declare option output:method "json")
+            // Pass outputProperties to execute() so eXist extracts serialization
+            // options (e.g., declare option output:method "html") BEFORE calling
+            // context.reset(), which clears them.
             val serializationProps = new Properties()
-            compiledQuery.xqueryContext.checkOptions(serializationProps)
+            val resultSequence = xqueryService.execute(broker, compiledQuery.compiledXquery, contextSequence.orNull, serializationProps)
             val result = Result(resultSequence, compiledQuery.compilationTime, System.currentTimeMillis() - executionStartTime)
             result.serializationProperties = serializationProps
             Right(result)
@@ -494,15 +495,36 @@ class ExistConnection(brokerRes: Resource[IO, DBBroker], contextAttributesSuppli
       }
 
       for (module <- modules) {
-        val fileUri: XmldbURI = XmldbURI.createInternal(module.file.toAbsolutePath.toUri.toString)
-        val locations: Array[AnyURIValue] = Array(new AnyURIValue(fileUri))
-        context.importModule(module.uri.getStringValue, null, locations)
+        val fileUri: String = module.file.toAbsolutePath.toUri.toString
+        // Register the location hint so sub-modules can find it during compilation
+        context.addModuleLocationHint(module.uri.getStringValue, fileUri)
+        // Try to eagerly import the module; ignore XQST0059 namespace mismatches
+        // (the XQTS catalog may map a namespace to a file declaring a different namespace)
+        try {
+          val locations: Array[AnyURIValue] = Array(new AnyURIValue(fileUri))
+          context.importModule(module.uri.getStringValue, null, locations)
+        } catch {
+          case _: org.exist.xquery.XPathException => // ignore namespace mismatch or load errors
+        }
       }
 
       context
     }
 
-    val source = new StringSource(query)
+    // If the query has no version declaration and we're running with QT4
+    // (indicated by exist.xqts.default-version=4.0 system property), prepend
+    // "xquery version '4.0';" so XQ4 syntax (=!>, ->, etc.) is accepted.
+    // If the query has no version declaration and exist.xqts.default-version=4.0,
+    // prepend "xquery version '4.0';" so XQ4 syntax is accepted.
+    // Match version declaration even after leading comments.
+    val hasVersionDecl = query.contains("xquery version") || query.contains("module namespace")
+    val defaultVersion = System.getProperty("exist.xqts.default-version", "")
+    val effectiveQuery = if (!hasVersionDecl && defaultVersion == "4.0") {
+      "xquery version \"4.0\";\n" + query
+    } else {
+      query
+    }
+    val source = new StringSource(effectiveQuery)
     val fnConfigureContext: XQueryContext => XQueryContext = { ctx =>
       val configured = setupContext(ctx)(staticBaseUri, availableDocuments, availableCollections, availableTextResources, namespaces, externalVariables, decimalFormats, modules, xpath1Compatibility)
       // Set global context attributes (e.g., ft.stopWordURIMap, ft.thesaurusURIMap from XQFTTS catalog)
diff --git a/src/main/scala/org/exist/xqts/runner/TestCaseRunnerActor.scala b/src/main/scala/org/exist/xqts/runner/TestCaseRunnerActor.scala
index 5e52772..b8f3ccf 100644
--- a/src/main/scala/org/exist/xqts/runner/TestCaseRunnerActor.scala
+++ b/src/main/scala/org/exist/xqts/runner/TestCaseRunnerActor.scala
@@ -66,6 +66,12 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
   private var awaitingQueryStr: Map[Path, Seq[TestCaseId]] = Map.empty
   private var pendingTestCases: Map[TestCaseId, PendingTestCase] = Map.empty
 
+  // Namespaces declared by the current test case's <environment>. These need
+  // to be visible to assertion XPath queries (e.g. `j:` prefix in
+  // fn-json-to-xml tests). Set when an assertion is dispatched and consulted
+  // by executeQueryWith$Result. Actors are single-threaded so this is safe.
+  private var assertionNamespaces: Seq[XQTSParserActor.Namespace] = Seq.empty
+
   override def receive: Receive = {
 
     case rtc@RunTestCase(testSetRef, testCase, manager) =>
@@ -305,6 +311,45 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
     }
   }
 
+  /**
+   * Prepend `xquery version "..."` to the test query, picking the right version
+   * from the test's spec dependencies.
+   *
+   * Why this exists: tests need a version declaration so eXist applies
+   * version-specific semantics. Strict deps like `XQ10 XQ30 XQ31` (no plus form)
+   * mark tests authored before XQuery 4.0 — running them as XQ4 trips changed
+   * rules (reserved function names, unprefixed default namespace, default param
+   * values, etc.). The qt4-xquery-update runner branch does not auto-prepend in
+   * ExistServer, so we do it here based on the test's declared compatibility.
+   *
+   * Algorithm:
+   *   - If the query already declares a version, leave it alone.
+   *   - If any spec dep uses "+" form (e.g. XQ31+, XQ40+), prepend "4.0".
+   *   - Otherwise, pick the highest strict spec (XQ40 > XQ31 > XQ30 > XQ10).
+   *   - If no XQ spec dep exists, leave unchanged.
+   */
+  private def applyVersionHint(query: String, deps: Seq[Dependency]): String = {
+    if (query.contains("xquery version") || query.contains("module namespace")) {
+      return query
+    }
+    val specDeps = deps.filter(d => d.`type` == DependencyType.Spec && d.satisfied)
+    if (specDeps.isEmpty) {
+      return query
+    }
+    val acceptsXQ4 = specDeps.exists(_.value.contains("+"))
+    val specs = specDeps.flatMap(_.value.split(' ').toSeq).filter(_.nonEmpty).toSet
+    val version =
+      if (acceptsXQ4 || specs.contains("XQ40")) Some("4.0")
+      else if (specs.contains("XQ31")) Some("3.1")
+      else if (specs.contains("XQ30")) Some("3.0")
+      else if (specs.contains("XQ10")) Some("1.0")
+      else None
+    version match {
+      case Some(v) => "xquery version \"" + v + "\";\n" + query
+      case None    => query
+    }
+  }
+
   /**
    * Run a non-update (standard) XQTS test-case.
    */
@@ -313,8 +358,10 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
     testCase.test match {
       case Some(test) =>
 
-        // get the XQuery to execute
-        val queryString: String = test.map(_ => resolvedEnvironment.resolvedQuery.get).merge
+        // get the XQuery to execute, applying a version prepend hint when the test's
+        // strict spec dependencies indicate a version older than the runner default.
+        val rawQuery: String = test.map(_ => resolvedEnvironment.resolvedQuery.get).merge
+        val queryString = applyVersionHint(rawQuery, testCase.dependencies)
 
         // get the static baseURI for the XQuery
         val baseUri = testCase.environment
@@ -382,7 +429,12 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
                     FailureResult(testSetName, testCase.name, compilationTime, executionTime, failureMessage(connection)(expectedError, queryResult))
 
                   case (Some(expectedResult)) =>
-                    processAssertion(connection, testSetName, testCase.name, compilationTime, executionTime, queryResultObj.serializationProperties, baseUri)(expectedResult, queryResult)
+                    assertionNamespaces = testCase.environment.map(_.namespaces).getOrElse(List.empty)
+                    try {
+                      processAssertion(connection, testSetName, testCase.name, compilationTime, executionTime, queryResultObj.serializationProperties, baseUri)(expectedResult, queryResult)
+                    } finally {
+                      assertionNamespaces = Seq.empty
+                    }
 
                   case None =>
                     ErrorResult(testSetName, testCase.name, compilationTime, executionTime, new IllegalStateException("No defined expected result"))
@@ -561,7 +613,12 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
                           case Some(expectedError: Error) =>
                             FailureResult(testSetName, testCase.name, compilationTime, executionTime, failureMessage(connection)(expectedError, queryResult))
                           case Some(expectedResult) =>
-                            processAssertion(connection, testSetName, testCase.name, compilationTime, executionTime, assertionBaseUri = baseUri)(expectedResult, queryResult)
+                            assertionNamespaces = testCase.environment.map(_.namespaces).getOrElse(List.empty)
+                            try {
+                              processAssertion(connection, testSetName, testCase.name, compilationTime, executionTime, assertionBaseUri = baseUri)(expectedResult, queryResult)
+                            } finally {
+                              assertionNamespaces = Seq.empty
+                            }
                           case None =>
                             ErrorResult(testSetName, testCase.name, compilationTime, executionTime, new IllegalStateException("No defined expected result"))
                         }
@@ -576,7 +633,12 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
                     FailureResult(testSetName, testCase.name, updateCompTime, updateExecTime, failureMessage(connection)(expectedError, new org.exist.xquery.value.EmptySequence()))
                   case Some(expectedResult) if lastUpdateResult.isDefined =>
                     // Copy-modify-return: use the update expression's return value for assertion
-                    processAssertion(connection, testSetName, testCase.name, updateCompTime, updateExecTime, assertionBaseUri = baseUri)(expectedResult, lastUpdateResult.get)
+                    assertionNamespaces = testCase.environment.map(_.namespaces).getOrElse(List.empty)
+                    try {
+                      processAssertion(connection, testSetName, testCase.name, updateCompTime, updateExecTime, assertionBaseUri = baseUri)(expectedResult, lastUpdateResult.get)
+                    } finally {
+                      assertionNamespaces = Seq.empty
+                    }
                   case Some(_) =>
                     // Expected a non-error result with no verification query and no update result
                     FailureResult(testSetName, testCase.name, updateCompTime, updateExecTime, s"Expected a result but no verification query defined")
@@ -1269,22 +1331,31 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
     val serializationQuery = if (serializationProperties.isEmpty || !serializationProperties.containsKey(OutputKeys.METHOD)) {
       QUERY_ASSERT_XML_SERIALIZATION
     } else {
-      val method = serializationProperties.getProperty(OutputKeys.METHOD, "xml")
-      val indent = serializationProperties.getProperty(OutputKeys.INDENT, "no")
+      // Build a map with all serialization properties from the query context
+      val mapEntries = new StringBuilder()
+      val propNames = serializationProperties.propertyNames()
+      while (propNames.hasMoreElements) {
+        val key = propNames.nextElement().asInstanceOf[String]
+        val value = serializationProperties.getProperty(key)
+        if (mapEntries.nonEmpty) mapEntries.append(", ")
+        // Boolean-valued properties need xs:boolean, not string
+        val booleanProps = Set("indent", "omit-xml-declaration", "include-content-type",
+          "escape-uri-attributes", "undeclare-prefixes", "byte-order-mark", "allow-duplicate-names")
+        if (booleanProps.contains(key) && (value == "yes" || value == "no")) {
+          mapEntries.append(s"'$key': ${value == "yes"}")
+        } else {
+          mapEntries.append(s"'$key': '${value.replace("'", "''")}'")
+        }
+      }
+      // Always include omit-xml-declaration unless already set
+      if (!serializationProperties.containsKey("omit-xml-declaration")) {
+        if (mapEntries.nonEmpty) mapEntries.append(", ")
+        mapEntries.append("'omit-xml-declaration': true()")
+      }
       s"""
-         |xquery version "3.1";
-         |declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
-         |
-         |declare variable $$local:serialization :=
-         |  <output:serialization-parameters>
-         |    <output:method value="$method"/>
-         |    <output:indent value="$indent"/>
-         |    <output:omit-xml-declaration value="yes"/>
-         |  </output:serialization-parameters>;
-         |
          |declare variable $$result external;
          |
-         |fn:serialize($$result, $$local:serialization)
+         |fn:serialize($$result, map { $mapEntries })
          |""".stripMargin
     }
     executeQueryWith$Result(connection, serializationQuery, true, None, actual, assertionBaseUri) match {
@@ -1644,14 +1715,19 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
         ErrorResult(testSetName, testCaseName, compilationTime, executionTime, t)
 
       case Right(expectedRegexStr) =>
+        // Pass regex and flags as external variables to avoid eXist parser issues
+        // with special characters in backtick string constructors (e.g., <?xml
+        // triggers processing instruction parsing inside ``[...]``)
         val expectedQuery =
           s"""
              | declare variable $$result external;
+             | declare variable $$regex external;
+             | declare variable $$flags external;
              |
-             | fn:matches($$result, ``[$expectedRegexStr]``, "${flags.getOrElse("")}")
+             | fn:matches($$result, $$regex, $$flags)
              |""".stripMargin
         val actualStr = connection.sequenceToStringRaw(actual, serializationProperties)
-        executeQueryWith$Result(connection, expectedQuery, true, None, new StringValue(actualStr), assertionBaseUri) match {
+        connection.executeQuery(expectedQuery, true, assertionBaseUri, None, Seq.empty, Seq.empty, Seq.empty, Seq.empty, Seq(RESULT_VARIABLE_NAME -> new StringValue(actualStr), "regex" -> new StringValue(expectedRegexStr), "flags" -> new StringValue(flags.getOrElse("")))) match {
           case Left(existServerException) =>
             ErrorResult(testSetName, testCaseName, compilationTime + existServerException.compilationTime, executionTime + existServerException.executionTime, existServerException)
 
@@ -1774,7 +1850,7 @@ class TestCaseRunnerActor(existServer: ExistServer, commonResourceCacheActor: Ac
    * @return the result or executing the query, or an exception.
    */
   private def executeQueryWith$Result(connection: ExistConnection, query: String, cacheCompiled: Boolean, contextSequence: Option[Sequence], $result: Sequence, staticBaseUri: Option[String] = None) = {
-    connection.executeQuery(query, cacheCompiled, staticBaseUri, contextSequence, Seq.empty, Seq.empty, Seq.empty, Seq.empty, Seq(RESULT_VARIABLE_NAME -> $result))
+    connection.executeQuery(query, cacheCompiled, staticBaseUri, contextSequence, Seq.empty, Seq.empty, Seq.empty, assertionNamespaces, Seq(RESULT_VARIABLE_NAME -> $result))
   }
 
   /**
diff --git a/src/main/scala/org/exist/xqts/runner/XQTSParserActor.scala b/src/main/scala/org/exist/xqts/runner/XQTSParserActor.scala
index 7aafda1..3d51908 100644
--- a/src/main/scala/org/exist/xqts/runner/XQTSParserActor.scala
+++ b/src/main/scala/org/exist/xqts/runner/XQTSParserActor.scala
@@ -294,6 +294,7 @@ object XQTSParserActor {
     val TransformXSLT = FeatureVal("fn-transform-XSLT")
     val TransformXSLT_30 = FeatureVal("fn-transform-XSLT30")
     val XQUpdate = FeatureVal("XQUpdate")
+    val Binary = FeatureVal("binary")
   }
 
   /**
diff --git a/src/main/scala/org/exist/xqts/runner/XQTSRunner.scala b/src/main/scala/org/exist/xqts/runner/XQTSRunner.scala
index 6a66df4..8617bf1 100644
--- a/src/main/scala/org/exist/xqts/runner/XQTSRunner.scala
+++ b/src/main/scala/org/exist/xqts/runner/XQTSRunner.scala
@@ -92,7 +92,8 @@ object XQTSRunner {
     XPath_1_0_Compatibility,
     TransformXSLT,
     TransformXSLT_30,
-    XQUpdate
+    XQUpdate,
+    Binary
   )
 
   /**
@@ -333,7 +334,7 @@ private class XQTSRunner {
             val parserActorClass = getParserActorClass(cmdConfig.xqtsVersion)
             val serializerActorClass = getSerializerActorClass()
             val xqtsRunner = system.actorOf(Props(classOf[XQTSRunnerActor], settings.xmlParserBufferSize, server, parserActorClass, serializerActorClass, styleDir, cmdConfig.outputDir.getOrElse(Paths.get(settings.outputDir))), name = "XQTSRunner")
-            xqtsRunner ! RunXQTS(cmdConfig.xqtsVersion, localXqtsDir, getEnabled(DEFAULT_FEATURES)(cmdConfig.enableFeatures, cmdConfig.disableFeatures).toSet, getEnabled(DEFAULT_SPECS)(cmdConfig.enableSpecs, cmdConfig.disableSpecs).toSet, getEnabled(DEFAULT_XML_VERSIONS)(cmdConfig.enableXmlVersions, cmdConfig.disableXmlVersions).toSet, getEnabled(DEFAULT_XSD_VERSIONS)(cmdConfig.enableXsdVersions, cmdConfig.disableXsdVersions).toSet, settings.commonResourceCacheMaxSize, cmdConfig.testSetPattern.map(Right(_)).getOrElse(Left(cmdConfig.testSets.toSet)), cmdConfig.testCasePattern.map(Right(_)).getOrElse(Left(cmdConfig.testCases.toSet)), cmdConfig.excludeTestSets.toSet, cmdConfig.excludeTestCases.toSet)
+            xqtsRunner ! RunXQTS(cmdConfig.xqtsVersion, localXqtsDir, getEnabled(DEFAULT_FEATURES)(cmdConfig.enableFeatures, cmdConfig.disableFeatures).toSet, getEnabled(defaultSpecsFor(cmdConfig.xqtsVersion))(cmdConfig.enableSpecs, cmdConfig.disableSpecs).toSet, getEnabled(DEFAULT_XML_VERSIONS)(cmdConfig.enableXmlVersions, cmdConfig.disableXmlVersions).toSet, getEnabled(DEFAULT_XSD_VERSIONS)(cmdConfig.enableXsdVersions, cmdConfig.disableXsdVersions).toSet, settings.commonResourceCacheMaxSize, cmdConfig.testSetPattern.map(Right(_)).getOrElse(Left(cmdConfig.testSets.toSet)), cmdConfig.testCasePattern.map(Right(_)).getOrElse(Left(cmdConfig.testCases.toSet)), cmdConfig.excludeTestSets.toSet, cmdConfig.excludeTestCases.toSet)
 
           case Left(throwable) =>
             logger.error("Unable to start eXist-db Server", throwable)
@@ -354,6 +355,20 @@ private class XQTSRunner {
     (defaultEnabled ++ enable).filterNot(disable.contains(_)).toSet.toSeq
   }
 
+  /**
+   * Returns the spec versions enabled by default for the given XQTS version.
+   * Tests carrying a strict (non-"+") spec dependency are filtered out unless
+   * the runner's target spec is in the dependency's value list. For QT4 we
+   * target XQ40/XP40 only -- tests declaring e.g. `XQ10 XQ30 XQ31` (no XQ40)
+   * are skipped, because the runner prepends `xquery version "4.0"` and so
+   * cannot reproduce their pre-XQ40 semantics. Other XQTS versions keep the
+   * historical "all specs enabled" behavior.
+   */
+  private def defaultSpecsFor(xqtsVersion: XQTSVersion): Seq[Spec] = xqtsVersion match {
+    case XQTS_QT4 => Seq(XP40, XQ40)
+    case _ => DEFAULT_SPECS
+  }
+
   /**
    * Gets the parser for the XQTS version.
    *
diff --git a/src/main/scala/org/exist/xqts/runner/XQTSRunnerActor.scala b/src/main/scala/org/exist/xqts/runner/XQTSRunnerActor.scala
index 6256328..8108bd6 100644
--- a/src/main/scala/org/exist/xqts/runner/XQTSRunnerActor.scala
+++ b/src/main/scala/org/exist/xqts/runner/XQTSRunnerActor.scala
@@ -71,10 +71,22 @@ class XQTSRunnerActor(xmlParserBufferSize: Int, existServer: ExistServer, parser
   private var previousStats: Stats = Stats(0, (0, 0), (0, 0), 0)
   private var unchangedStatsTicks = 0;
 
-  /** Number of consecutive watchdog ticks with no progress before forcing shutdown. 10s tick x 60 = 600s stall timeout. */
-  private val STALL_TIMEOUT_TICKS = 60
+  /** Number of consecutive watchdog ticks with no progress before forcing shutdown. 10s tick x 6 = 60s stall timeout. */
+  private val STALL_TIMEOUT_TICKS = 6
   private var watchdogPreviousCompletedCount = 0
   private var watchdogStalledTicks = 0
+  private var startedTestCases: Map[TestSetRef, Set[String]] = Map.empty
+
+  // Forced-shutdown drain state. Once `forceSerializeAndShutdown` has been
+  // called, we send any pending TestSetResults and then wait for their
+  // SerializedTestSetResults acks before triggering actor-system termination —
+  // otherwise the children get killed mid-write and the in-flight results land
+  // in deadLetters. The deadline thread is the hard backstop in case the
+  // serializer itself is wedged.
+  private var forcedShutdown = false
+  private var finalizeSent = false
+  /** Hard deadline (ms) for the forced-shutdown drain before we give up and terminate anyway. */
+  private val FORCED_DRAIN_DEADLINE_MS = 60000L
 
   override def receive: Receive = {
 
@@ -141,19 +153,17 @@ class XQTSRunnerActor(xmlParserBufferSize: Int, existServer: ExistServer, parser
 
       if (watchdogStalledTicks >= STALL_TIMEOUT_TICKS) {
         val totalCases = this.testCases.values.foldLeft(0)(_ + _.size)
+        // Identify which test cases started but never completed (hung tests)
+        val hungTests = for {
+          (testSetRef, started) <- startedTestCases
+          completed = completedTestCases.getOrElse(testSetRef, Map.empty).keySet
+          testCase <- started -- completed
+        } yield s"${testSetRef.name}/$testCase"
         logger.warn(s"Watchdog: no progress for ${STALL_TIMEOUT_TICKS * 10}s ($currentCompletedCount/$totalCases cases completed, ${unserializedTestSets.size} unserialized). Forcing shutdown.")
-
-        // Serialize any completed but unsent test sets before shutting down
-        for {
-          (testSetRef, _) <- this.testCases
-          if isTestSetCompleted(testSetRef) && !unserializedTestSets.contains(testSetRef)
-        } {
-          completedTestCases.get(testSetRef).foreach { results =>
-            resultsSerializerRouter ! TestSetResults(testSetRef, results.values.toSeq)
-          }
+        if (hungTests.nonEmpty) {
+          logger.warn(s"Hung test cases (started but never completed): ${hungTests.mkString(", ")}")
         }
-
-        shutdown()
+        forceSerializeAndShutdown()
       }
 
     case ParseComplete(xqtsVersion, _, matchedTestSets) =>
@@ -171,7 +181,7 @@ class XQTSRunnerActor(xmlParserBufferSize: Int, existServer: ExistServer, parser
       unparsedTestSets -= testSetRef
 
       // have we completed testing an entire TestSet? NOTE: tests could have finished executing before parse complete message arrives!
-      if (isTestSetCompleted(testSetRef)) {
+      if (!unserializedTestSets.contains(testSetRef) && isTestSetCompleted(testSetRef)) {
         // serialize the TestSet results
         resultsSerializerRouter ! TestSetResults(testSetRef, completedTestCases(testSetRef).values.toSeq)
         unserializedTestSets += testSetRef
@@ -180,22 +190,37 @@ class XQTSRunnerActor(xmlParserBufferSize: Int, existServer: ExistServer, parser
     case RunningTestCase(testSetRef, testCase) =>
       logger.info(s"Starting execution of Test Case: ${testSetRef.name}/${testCase}...")
       testCases = addTestCase(testCases, testSetRef, testCase)
+      startedTestCases = addTestCase(startedTestCases, testSetRef, testCase)
 
     case RanTestCase(testSetRef, testResult) =>
       logger.info(s"Finished execution of Test Case: ${testSetRef.name}/${testResult.testCase}.")
       completedTestCases = mergeTestCases(completedTestCases, testSetRef, testResult)
 
       // have we completed testing an entire TestSet?
-      if (isTestSetCompleted(testSetRef)) {
+      if (!unserializedTestSets.contains(testSetRef) && isTestSetCompleted(testSetRef)) {
         // serialize the TestSet results
         resultsSerializerRouter ! TestSetResults(testSetRef, completedTestCases(testSetRef).values.toSeq)
         unserializedTestSets += testSetRef
+      } else if (!unserializedTestSets.contains(testSetRef) && isTestSetCompletedByStarted(testSetRef)) {
+        // All started test cases completed, but ParsedTestSet hasn't been processed
+        // yet (still in unparsedTestSets). This happens when BrokerPool threads block
+        // the Pekko dispatcher, preventing the ParsedTestSet message from being delivered.
+        logger.info(s"Test set ${testSetRef.name} completed (all started cases finished, ParsedTestSet pending). Serializing results.")
+        resultsSerializerRouter ! TestSetResults(testSetRef, completedTestCases(testSetRef).values.toSeq)
+        unserializedTestSets += testSetRef
       }
 
     case SerializedTestSetResults(testSetRef) =>
       unserializedTestSets -= testSetRef
-      if (allTestSetsCompleted()) {
-        // all TestSet results have been sent to the serializer
+      // Under a forced shutdown, hung-but-never-completed test cases mean
+      // `allTestSetsCompleted()` will never be true; relax to "all serialization
+      // acks received" so the drain can finalize. Also guard against sending
+      // FinalizeSerialization more than once.
+      val readyToFinalize =
+        !finalizeSent &&
+          (allTestSetsCompleted() || (forcedShutdown && unserializedTestSets.isEmpty))
+      if (readyToFinalize) {
+        finalizeSent = true
         resultsSerializerRouter ! FinalizeSerialization
       }
 
@@ -205,26 +230,106 @@ class XQTSRunnerActor(xmlParserBufferSize: Int, existServer: ExistServer, parser
       shutdown()
   }
 
+  private def forceSerializeAndShutdown(): Unit = {
+    // Idempotent: a second watchdog tick (or a re-entry from another path)
+    // must not start a parallel drain.
+    if (forcedShutdown) {
+      return
+    }
+    forcedShutdown = true
+
+    // Stop the watchdog now that we're committed to draining; we don't want
+    // another stall tick to log "Forcing shutdown" while serialization is
+    // already in progress.
+    timers.cancel(TimerWatchdogKey)
+
+    // Serialize any completed but unsent test sets.
+    for {
+      (testSetRef, _) <- this.testCases
+      if !unserializedTestSets.contains(testSetRef)
+      results <- completedTestCases.get(testSetRef)
+    } {
+      resultsSerializerRouter ! TestSetResults(testSetRef, results.values.toSeq)
+      unserializedTestSets += testSetRef
+    }
+
+    if (unserializedTestSets.isEmpty) {
+      // Nothing in flight — fall straight through the normal finalize/finish
+      // handshake so the serializer router gets a chance to flush its own state.
+      if (!finalizeSent) {
+        finalizeSent = true
+        resultsSerializerRouter ! FinalizeSerialization
+      }
+    } else {
+      logger.info(s"Draining ${unserializedTestSets.size} in-flight TestSetResults before shutdown (deadline ${FORCED_DRAIN_DEADLINE_MS / 1000}s)")
+    }
+
+    // Hard backstop: if the serializer never acks (e.g. wedged write), give
+    // up on the drain after FORCED_DRAIN_DEADLINE_MS and shut down anyway.
+    // The 30s deadline thread inside shutdown() is a separate backstop for
+    // actor-system termination itself.
+    val backstop = new Thread(() => {
+      try {
+        Thread.sleep(FORCED_DRAIN_DEADLINE_MS)
+        logger.warn(s"Forced-shutdown drain did not complete within ${FORCED_DRAIN_DEADLINE_MS / 1000}s; terminating anyway (${unserializedTestSets.size} TestSetResults still unacked)")
+        // Re-enter via a self-message so shutdown() runs on the actor thread.
+        self ! FinishedSerialization
+      } catch {
+        case _: InterruptedException =>
+      }
+    }, "xqts-forced-drain-backstop")
+    backstop.setDaemon(true)
+    backstop.start()
+  }
+
+  private var shutdownCalled = false
   private def shutdown(): Unit = {
+    if (shutdownCalled) {
+      return
+    }
+    shutdownCalled = true
     timers.cancel(TimerWatchdogKey)
     if (logger.isDebugEnabled()) {
       timers.cancel(TimerStatsKey)
     }
+    // Hard deadline: force exit if actor system termination hangs.
+    // BrokerPool threads can block the Pekko dispatcher, preventing
+    // CoordinatedShutdown from completing. This standalone thread
+    // runs outside Pekko and forces JVM exit after 30 seconds.
+    logger.info("Starting 30-second shutdown deadline thread")
+    val deadline = new Thread(() => {
+      try {
+        Thread.sleep(30000)
+        logger.warn("Actor system shutdown did not complete within 30 seconds, forcing exit")
+        Runtime.getRuntime.halt(0)
+      } catch {
+        case _: InterruptedException =>
+          logger.info("Shutdown deadline thread interrupted (clean exit)")
+      }
+    }, "xqts-shutdown-deadline")
+    deadline.setDaemon(true)
+    deadline.start()
     context.stop(self)
     context.system.terminate()
   }
 
   private def isTestSetCompleted(testSetRef: TestSetRef): Boolean = {
     unparsedTestSets.contains(testSetRef) == false &&
-      completedTestCases.get(testSetRef).map(_.keySet)
-        .flatMap(completed => testCases.get(testSetRef).map(_ == completed))
-        .getOrElse(false)
+      isTestSetCompletedByStarted(testSetRef)
+  }
+
+  /** Check if all STARTED test cases have completed, ignoring ParsedTestSet status. */
+  private def isTestSetCompletedByStarted(testSetRef: TestSetRef): Boolean = {
+    completedTestCases.get(testSetRef).map(_.keySet)
+      .flatMap(completed => startedTestCases.get(testSetRef).map(started => started.nonEmpty && started == completed))
+      .getOrElse(false)
   }
 
   private def allTestSetsCompleted(): Boolean = {
-    unserializedTestSets.isEmpty &&
-      unparsedTestSets.isEmpty &&
-      !testCases.keySet.map(isTestSetCompleted(_)).contains(false)
+    unserializedTestSets.isEmpty && {
+      val testSetRefs = if (startedTestCases.nonEmpty) startedTestCases.keySet else testCases.keySet
+      testSetRefs.forall(ref => isTestSetCompleted(ref) || isTestSetCompletedByStarted(ref))
+    }
   }
 
   @unused
diff --git a/src/main/scala/org/exist/xqts/runner/qt3/XQTS3TestSetParserActor.scala b/src/main/scala/org/exist/xqts/runner/qt3/XQTS3TestSetParserActor.scala
index 83d0547..8974f11 100644
--- a/src/main/scala/org/exist/xqts/runner/qt3/XQTS3TestSetParserActor.scala
+++ b/src/main/scala/org/exist/xqts/runner/qt3/XQTS3TestSetParserActor.scala
@@ -580,10 +580,7 @@ class XQTS3TestSetParserActor(xmlParserBufferSize: Int, testCaseRunnerActor: Act
         case END_ELEMENT if (asyncReader.getLocalName == ELEM_ASSERT_TRUE) =>
           currentResult = currentResult.map(addAssertion(_)(AssertTrue))
 
-        case END_ELEMENT if (asyncReader.getLocalName == ELEM_ALL_OF || asyncReader.getLocalName == ELEM_ANY_OF) =>
-          currentResult = currentResult.map(stepOutAssertions)
-
-        case END_ELEMENT if (asyncReader.getLocalName == ELEM_ALL_OF || asyncReader.getLocalName == ELEM_NOT) =>
+        case END_ELEMENT if (asyncReader.getLocalName == ELEM_ALL_OF || asyncReader.getLocalName == ELEM_ANY_OF || asyncReader.getLocalName == ELEM_NOT) =>
           currentResult = currentResult.map(stepOutAssertions)
 
         case START_ELEMENT if (currentResult.nonEmpty && asyncReader.getLocalName == ELEM_ERROR) =>
@@ -664,9 +661,21 @@ class XQTS3TestSetParserActor(xmlParserBufferSize: Int, testCaseRunnerActor: Act
 
     def addAssertion(currentAssertions: Stack[Result])(assertion: Result): Stack[Result] = {
       currentAssertions.peekOption match {
-        case Some(head) if (head.isInstanceOf[Assertions] && !assertion.isInstanceOf[Assertions]) =>
+        case Some(head: Assertions) if (!assertion.isInstanceOf[Assertions]) =>
           // head of the stack is itself a list of assertions, and the assertion to add is not a list of assertions
-          currentAssertions.replace(head.asInstanceOf[Assertions] :+ assertion)
+          // Check if the last element in the list is a Not(None) that needs filling
+          head.assertions.lastOption match {
+            case Some(Not(None)) =>
+              // Fill the empty Not with this assertion
+              val updatedAssertions = head.assertions.init :+ Not(Some(assertion))
+              val updatedHead = head match {
+                case AllOf(_) => AllOf(updatedAssertions)
+                case AnyOf(_) => AnyOf(updatedAssertions)
+              }
+              currentAssertions.replace(updatedHead)
+            case _ =>
+              currentAssertions.replace(head :+ assertion)
+          }
 
         case Some(Not(None)) =>
           // head of the stack is a Not assertion which is empty, so wrap this assertion in the Not assertion
@@ -682,7 +691,8 @@ class XQTS3TestSetParserActor(xmlParserBufferSize: Int, testCaseRunnerActor: Act
 
     def stepOutAssertions(currentAssertions: Stack[Result]): Stack[Result] = {
       if (currentAssertions.size >= 2) {
-        if (currentAssertions.peek.isInstanceOf[Assertions]) {
+        val top = currentAssertions.peek
+        if (top.isInstanceOf[Assertions] || top.isInstanceOf[Not]) {
           val (prevHead, stack) = currentAssertions.pop()
           val head = stack.peek
           if (head.isInstanceOf[Assertions]) {