diff --git a/.github/workflows/_build.yml b/.github/workflows/_build.yml
index b77e505a7..18c6ac314 100644
--- a/.github/workflows/_build.yml
+++ b/.github/workflows/_build.yml
@@ -80,9 +80,6 @@ jobs:
git reset --hard HEAD
git clean -fd
# For sdist, ensure local runtime binaries are not packaged even if present
- rm -rf openviking/bin openviking/lib third_party/agfs/bin || true
- rm -f openviking/storage/vectordb/*.so openviking/storage/vectordb/*.dylib openviking/storage/vectordb/*.dll openviking/storage/vectordb/*.exe || true
- rm -rf openviking/_version.py openviking.egg-info
# Ignore uv.lock changes to avoid dirty state in setuptools_scm
git update-index --assume-unchanged uv.lock || true
@@ -193,11 +190,6 @@ jobs:
echo "LD_LIBRARY_PATH=${PYTHON_PREFIX}/lib:${LD_LIBRARY_PATH}" >> "$GITHUB_ENV"
export LD_LIBRARY_PATH="${PYTHON_PREFIX}/lib:${LD_LIBRARY_PATH}"
"$PYTHON_BIN" -V
- - name: Set up Go
- uses: actions/setup-go@v6
- with:
- go-version: '1.25.1'
-
- name: Set up Rust
uses: dtolnay/rust-toolchain@v1
with:
@@ -237,12 +229,17 @@ jobs:
mkdir -p openviking/bin
cp target/${{ matrix.arch == 'aarch64' && 'aarch64-unknown-linux-gnu' || 'x86_64-unknown-linux-gnu' }}/release/ov openviking/bin/
chmod +x openviking/bin/ov
+
- name: Clean workspace (force ignore dirty)
shell: bash
run: |
+ # Back up pre-built artifacts before cleaning
+ cp -a openviking/bin /tmp/_ov_bin || true
git reset --hard HEAD
git clean -fd
rm -rf openviking/_version.py openviking.egg-info
+ # Restore pre-built artifacts
+ cp -a /tmp/_ov_bin openviking/bin || true
# Ignore uv.lock changes to avoid dirty state in setuptools_scm
git update-index --assume-unchanged uv.lock || true
@@ -257,6 +254,8 @@ jobs:
git status --ignored
echo "=== Check openviking/_version.py ==="
if [ -f openviking/_version.py ]; then cat openviking/_version.py; else echo "Not found"; fi
+ echo "=== Verify pre-built artifacts survived clean ==="
+ ls -la openviking/bin/ || true
- name: Build package (Wheel Only)
run: uv build --wheel
@@ -276,11 +275,8 @@ jobs:
- name: Repair wheels (Linux)
run: |
uv pip install auditwheel
- # Repair wheels and output to a temporary directory
uv run auditwheel repair dist/*.whl -w dist_fixed
- # Remove original non-compliant wheels
rm dist/*.whl
- # Move repaired wheels back to dist
mv dist_fixed/*.whl dist/
rmdir dist_fixed
@@ -347,11 +343,6 @@ jobs:
echo "_PYTHON_HOST_PLATFORM=macosx-${MACOS_VERSION}-${TARGET_ARCH}" >> "$GITHUB_ENV"
echo "Configured macOS wheel platform: macosx-${MACOS_VERSION}-${TARGET_ARCH}"
- - name: Set up Go
- uses: actions/setup-go@v6
- with:
- go-version: '1.25.1'
-
- name: Set up Rust
uses: dtolnay/rust-toolchain@v1
with:
@@ -405,12 +396,17 @@ jobs:
cp target/release/ov openviking/bin/
chmod +x openviking/bin/ov
fi
+
- name: Clean workspace (force ignore dirty)
shell: bash
run: |
+ # Back up pre-built artifacts before cleaning
+ cp -a openviking/bin /tmp/_ov_bin || true
git reset --hard HEAD
git clean -fd
rm -rf openviking/_version.py openviking.egg-info
+ # Restore pre-built artifacts
+ cp -a /tmp/_ov_bin openviking/bin || true
# Ignore uv.lock changes to avoid dirty state in setuptools_scm
git update-index --assume-unchanged uv.lock || true
@@ -425,6 +421,8 @@ jobs:
git status --ignored
echo "=== Check openviking/_version.py ==="
if [ -f openviking/_version.py ]; then cat openviking/_version.py; else echo "Not found"; fi
+ echo "=== Verify pre-built artifacts survived clean ==="
+ ls -la openviking/bin/ || true
- name: Build package (Wheel Only)
run: uv build --wheel
diff --git a/.github/workflows/_codeql.yml b/.github/workflows/_codeql.yml
index ca007e316..646c97aa1 100644
--- a/.github/workflows/_codeql.yml
+++ b/.github/workflows/_codeql.yml
@@ -29,11 +29,6 @@ jobs:
with:
python-version: '3.11'
- - name: Set up Go
- uses: actions/setup-go@v6
- with:
- go-version: 'stable'
-
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
diff --git a/.github/workflows/_lint.yml b/.github/workflows/_lint.yml
index 3cbeec148..9bbde04dd 100644
--- a/.github/workflows/_lint.yml
+++ b/.github/workflows/_lint.yml
@@ -19,11 +19,6 @@ jobs:
with:
python-version: '3.11'
- - name: Set up Go
- uses: actions/setup-go@v6
- with:
- go-version: 'stable'
-
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
diff --git a/.github/workflows/_test_full.yml b/.github/workflows/_test_full.yml
index 30a58c9ec..4ea21488d 100644
--- a/.github/workflows/_test_full.yml
+++ b/.github/workflows/_test_full.yml
@@ -44,11 +44,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- - name: Set up Go
- uses: actions/setup-go@v6
- with:
- go-version: '1.25.1'
-
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
diff --git a/.github/workflows/_test_lite.yml b/.github/workflows/_test_lite.yml
index 52e6a7097..2374f35f3 100644
--- a/.github/workflows/_test_lite.yml
+++ b/.github/workflows/_test_lite.yml
@@ -44,11 +44,6 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- - name: Set up Go
- uses: actions/setup-go@v6
- with:
- go-version: '1.25.1'
-
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
diff --git a/.github/workflows/api_test.yml b/.github/workflows/api_test.yml
index f82e562e1..2611cd003 100644
--- a/.github/workflows/api_test.yml
+++ b/.github/workflows/api_test.yml
@@ -1,4 +1,4 @@
-name: 03. API Integration Tests
+name: 06. API Integration Tests
on:
workflow_dispatch:
@@ -42,10 +42,12 @@ jobs:
api-tests:
name: API Integration Tests (${{ matrix.os }})
runs-on: ${{ matrix.os }}
+ timeout-minutes: 50
strategy:
fail-fast: false
+ max-parallel: 1
matrix:
- os: [ubuntu-24.04, ubuntu-24.04-arm, macos-14, macos-15-intel, windows-latest]
+ os: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/main') && fromJSON('["ubuntu-24.04", "macos-14", "windows-latest"]') || fromJSON('["ubuntu-24.04"]') }}
steps:
- uses: actions/checkout@v6
@@ -57,22 +59,6 @@ jobs:
with:
python-version: '3.10'
- - name: Cache Go modules
- uses: actions/cache@v5
- with:
- path: ~/go/pkg/mod
- key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
- restore-keys: |
- ${{ runner.os }}-go-
-
- - name: Cache C++ extensions
- uses: actions/cache@v5
- with:
- path: openviking/pyagfs
- key: ${{ runner.os }}-cpp-${{ hashFiles('**/CMakeLists.txt', '**/*.cpp', '**/*.h') }}
- restore-keys: |
- ${{ runner.os }}-cpp-
-
- name: Cache Python dependencies (Unix)
if: runner.os != 'Windows'
uses: actions/cache@v5
@@ -91,11 +77,6 @@ jobs:
restore-keys: |
${{ runner.os }}-pip-
- - name: Set up Go
- uses: actions/setup-go@v6
- with:
- go-version: '1.22'
-
- name: Install system dependencies (Ubuntu)
if: runner.os == 'Linux'
run: |
@@ -415,7 +396,8 @@ jobs:
echo "Running basic tests only (no VLM/Embedding)"
uv run python -m pytest . -v --html=api-test-report.html --self-contained-html \
--ignore=retrieval/ --ignore=resources/test_pack.py --ignore=resources/test_wait_processed.py \
- --ignore=admin/ --ignore=skills/ --ignore=system/test_system_status.py --ignore=system/test_is_healthy.py --ignore=system/test_system_wait.py -k "not test_observer"
+ --ignore=admin/ --ignore=skills/ --ignore=system/test_system_status.py --ignore=system/test_is_healthy.py --ignore=system/test_system_wait.py \
+ --ignore=scenarios/ -k "not test_observer"
fi
continue-on-error: true
@@ -433,7 +415,7 @@ jobs:
uv run python -m pytest . -v --html=api-test-report.html --self-contained-html --ignore=filesystem/
} else {
Write-Host "Running basic tests only (no VLM/Embedding, Windows: skipping filesystem tests)"
- uv run python -m pytest . -v --html=api-test-report.html --self-contained-html --ignore=retrieval/ --ignore=resources/test_pack.py --ignore=resources/test_wait_processed.py --ignore=admin/ --ignore=skills/ --ignore=system/test_system_status.py --ignore=system/test_is_healthy.py --ignore=system/test_system_wait.py --ignore=filesystem/ -k "not test_observer"
+ uv run python -m pytest . -v --html=api-test-report.html --self-contained-html --ignore=retrieval/ --ignore=resources/test_pack.py --ignore=resources/test_wait_processed.py --ignore=admin/ --ignore=skills/ --ignore=system/test_system_status.py --ignore=system/test_is_healthy.py --ignore=system/test_system_wait.py --ignore=filesystem/ --ignore=scenarios/ -k "not test_observer"
}
continue-on-error: true
diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml
index 4845ddce7..de86e1cf2 100644
--- a/.github/workflows/build-docker-image.yml
+++ b/.github/workflows/build-docker-image.yml
@@ -8,6 +8,7 @@ on:
required: true
type: string
push:
+ branches: [ main ]
tags: [ "v*.*.*" ]
env:
@@ -52,41 +53,79 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v4
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v6
with:
- images: ${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }}
+ images: |
+ ${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }}
+ docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking
tags: |
type=raw,value=${{ github.event.inputs.version }},enable=${{ github.event_name == 'workflow_dispatch' }}
type=ref,event=tag,enable=${{ github.ref_type == 'tag' }}
+ type=raw,value=latest,enable=${{ github.ref_type == 'tag' }}
+ type=raw,value=main,enable=${{ github.ref == 'refs/heads/main' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- - name: Build and push Docker image
- id: push
+ - name: Build and push Docker image to GHCR
+ id: push-ghcr
+ uses: docker/build-push-action@v7
+ with:
+ context: .
+ platforms: ${{ matrix.platform }}
+ outputs: |
+ type=image,name=${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }},push-by-digest=true,name-canonical=true,push=true
+ labels: ${{ steps.meta.outputs.labels }}
+ build-args: |
+ # fallback to 0.0.0 if no version is provided
+ OPENVIKING_VERSION=${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.version) || (github.ref_type == 'tag' && github.ref_name) || '0.0.0' }}
+
+ - name: Build and push Docker image to Docker Hub
+ id: push-dockerhub
uses: docker/build-push-action@v7
with:
context: .
platforms: ${{ matrix.platform }}
- outputs: type=image,name=${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }},push-by-digest=true,name-canonical=true,push=true
+ outputs: |
+ type=image,name=docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking,push-by-digest=true,name-canonical=true,push=true
labels: ${{ steps.meta.outputs.labels }}
build-args: |
# fallback to 0.0.0 if no version is provided
OPENVIKING_VERSION=${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.version) || (github.ref_type == 'tag' && github.ref_name) || '0.0.0' }}
- - name: Export image digest
+ - name: Export GHCR image digest
run: |
- mkdir -p /tmp/digests
- digest="${{ steps.push.outputs.digest }}"
- touch "/tmp/digests/${digest#sha256:}"
+ mkdir -p /tmp/digests-ghcr
+ ghcr_digest="${{ steps.push-ghcr.outputs.digest }}"
+ touch "/tmp/digests-ghcr/${ghcr_digest#sha256:}"
- - name: Upload image digest
+ - name: Upload GHCR image digest
uses: actions/upload-artifact@v7
with:
- name: docker-digests-${{ matrix.arch }}
- path: /tmp/digests/*
+ name: docker-digests-ghcr-${{ matrix.arch }}
+ path: /tmp/digests-ghcr/*
+ if-no-files-found: error
+ retention-days: 1
+
+ - name: Export Docker Hub image digest
+ run: |
+ mkdir -p /tmp/digests-dockerhub
+ dockerhub_digest="${{ steps.push-dockerhub.outputs.digest }}"
+ touch "/tmp/digests-dockerhub/${dockerhub_digest#sha256:}"
+
+ - name: Upload Docker Hub image digest
+ uses: actions/upload-artifact@v7
+ with:
+ name: docker-digests-dockerhub-${{ matrix.arch }}
+ path: /tmp/digests-dockerhub/*
if-no-files-found: error
retention-days: 1
@@ -117,43 +156,81 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v4
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v6
with:
- images: ${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }}
+ images: |
+ ${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }}
+ docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking
tags: |
type=raw,value=${{ github.event.inputs.version }},enable=${{ github.event_name == 'workflow_dispatch' }}
type=ref,event=tag,enable=${{ github.ref_type == 'tag' }}
+ type=raw,value=latest,enable=${{ github.ref_type == 'tag' }}
+ type=raw,value=main,enable=${{ github.ref == 'refs/heads/main' }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- - name: Download image digests
+ - name: Download GHCR image digests
uses: actions/download-artifact@v8
with:
- pattern: docker-digests-*
- path: /tmp/digests
+ pattern: docker-digests-ghcr-*
+ path: /tmp/digests-ghcr
+ merge-multiple: true
+
+ - name: Download Docker Hub image digests
+ uses: actions/download-artifact@v8
+ with:
+ pattern: docker-digests-dockerhub-*
+ path: /tmp/digests-dockerhub
merge-multiple: true
- name: Create multi-arch manifests
env:
SOURCE_TAGS: ${{ steps.meta.outputs.tags }}
run: |
- image_refs=()
- for digest_file in /tmp/digests/*; do
+ # Collect image references for both registries
+ ghcr_image_refs=()
+ dockerhub_image_refs=()
+ for digest_file in /tmp/digests-ghcr/*; do
+ [ -e "$digest_file" ] || continue
+ digest="sha256:$(basename "$digest_file")"
+ ghcr_image_refs+=("${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }}@${digest}")
+ done
+ for digest_file in /tmp/digests-dockerhub/*; do
[ -e "$digest_file" ] || continue
- image_refs+=("${{ env.REGISTRY }}/${{ steps.image-name.outputs.image }}@sha256:$(basename "$digest_file")")
+ digest="sha256:$(basename "$digest_file")"
+ dockerhub_image_refs+=("docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking@${digest}")
done
- [ ${#image_refs[@]} -gt 0 ] || {
- echo "No image digests found" >&2
+ [ ${#ghcr_image_refs[@]} -gt 0 ] || {
+ echo "No GHCR image digests found" >&2
+ exit 1
+ }
+ [ ${#dockerhub_image_refs[@]} -gt 0 ] || {
+ echo "No Docker Hub image digests found" >&2
exit 1
}
+ # Create manifests for all tags
while IFS= read -r tag; do
[ -n "$tag" ] || continue
- docker buildx imagetools create \
- --tag "$tag" \
- "${image_refs[@]}"
+
+ # Determine which registry this tag belongs to
+ if [[ "$tag" == ghcr.io/* ]]; then
+ docker buildx imagetools create \
+ --tag "$tag" \
+ "${ghcr_image_refs[@]}"
+ elif [[ "$tag" == docker.io/* ]]; then
+ docker buildx imagetools create \
+ --tag "$tag" \
+ "${dockerhub_image_refs[@]}"
+ fi
done <<< "$SOURCE_TAGS"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fa78ff602..927f462b6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,8 +23,5 @@ permissions:
security-events: write
jobs:
- test-full:
- uses: ./.github/workflows/_test_full.yml
-
security-scan:
uses: ./.github/workflows/_codeql.yml
diff --git a/.github/workflows/oc2ov_test.yml b/.github/workflows/oc2ov_test.yml
index 70e2e4ca0..01e024876 100644
--- a/.github/workflows/oc2ov_test.yml
+++ b/.github/workflows/oc2ov_test.yml
@@ -30,6 +30,7 @@ jobs:
p0-tests:
name: P0 Memory Tests
runs-on: [self-hosted, linux, x64]
+ timeout-minutes: 50
if: inputs.skip_tests != true
steps:
@@ -184,15 +185,4 @@ jobs:
- name: Test summary
if: success()
run: |
- echo "::notice::P0 tests passed successfully! Ready for release."
-
- release-approval:
- name: Release Approval Gate
- needs: [p0-tests]
- if: github.event_name == 'release' && github.event.action == 'prereleased'
- runs-on: ubuntu-24.04
- steps:
- - name: Approve release
- run: |
- echo "::notice::P0 tests passed. Release can proceed."
- echo "Release ${{ github.event.release.tag_name }} has been validated by P0 tests."
+ echo "::notice::P0 tests passed successfully! Ready for release."
\ No newline at end of file
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
index 1969a0b44..81ebf7ece 100644
--- a/.github/workflows/pr.yml
+++ b/.github/workflows/pr.yml
@@ -21,12 +21,6 @@ jobs:
lint:
uses: ./.github/workflows/_lint.yml
- test-lite:
- uses: ./.github/workflows/_test_lite.yml
- with:
- os_json: '["ubuntu-24.04"]'
- python_json: '["3.10"]'
-
check-deps:
runs-on: ubuntu-24.04
outputs:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ec9c51d12..8f1de676b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -197,39 +197,75 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v4
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v6
with:
- images: ghcr.io/${{ steps.image-name.outputs.image }}
+ images: |
+ ghcr.io/${{ steps.image-name.outputs.image }}
+ docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking
tags: |
type=raw,value=${{ github.event.release.tag_name }}
+ type=raw,value=latest
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- - name: Build and push Docker image
- id: push
+ - name: Build and push Docker image to GHCR
+ id: push-ghcr
uses: docker/build-push-action@v7
with:
context: .
platforms: ${{ matrix.platform }}
- outputs: type=image,name=ghcr.io/${{ steps.image-name.outputs.image }},push-by-digest=true,name-canonical=true,push=true
+ outputs: |
+ type=image,name=ghcr.io/${{ steps.image-name.outputs.image }},push-by-digest=true,name-canonical=true,push=true
labels: ${{ steps.meta.outputs.labels }}
build-args: |
OPENVIKING_VERSION=${{ github.event.release.tag_name }}
- - name: Export image digest
+ - name: Build and push Docker image to Docker Hub
+ id: push-dockerhub
+ uses: docker/build-push-action@v7
+ with:
+ context: .
+ platforms: ${{ matrix.platform }}
+ outputs: |
+ type=image,name=docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking,push-by-digest=true,name-canonical=true,push=true
+ labels: ${{ steps.meta.outputs.labels }}
+ build-args: |
+ OPENVIKING_VERSION=${{ github.event.release.tag_name }}
+
+ - name: Export GHCR image digest
+ run: |
+ mkdir -p /tmp/digests-ghcr
+ ghcr_digest="${{ steps.push-ghcr.outputs.digest }}"
+ touch "/tmp/digests-ghcr/${ghcr_digest#sha256:}"
+
+ - name: Upload GHCR image digest
+ uses: actions/upload-artifact@v7
+ with:
+ name: docker-digests-ghcr-${{ matrix.arch }}
+ path: /tmp/digests-ghcr/*
+ if-no-files-found: error
+ retention-days: 1
+
+ - name: Export Docker Hub image digest
run: |
- mkdir -p /tmp/digests
- digest="${{ steps.push.outputs.digest }}"
- touch "/tmp/digests/${digest#sha256:}"
+ mkdir -p /tmp/digests-dockerhub
+ dockerhub_digest="${{ steps.push-dockerhub.outputs.digest }}"
+ touch "/tmp/digests-dockerhub/${dockerhub_digest#sha256:}"
- - name: Upload image digest
+ - name: Upload Docker Hub image digest
uses: actions/upload-artifact@v7
with:
- name: docker-digests-${{ matrix.arch }}
- path: /tmp/digests/*
+ name: docker-digests-dockerhub-${{ matrix.arch }}
+ path: /tmp/digests-dockerhub/*
if-no-files-found: error
retention-days: 1
@@ -263,42 +299,79 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v4
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v6
with:
- images: ghcr.io/${{ steps.image-name.outputs.image }}
+ images: |
+ ghcr.io/${{ steps.image-name.outputs.image }}
+ docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking
tags: |
type=raw,value=${{ github.event.release.tag_name }}
+ type=raw,value=latest
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- - name: Download image digests
+ - name: Download GHCR image digests
uses: actions/download-artifact@v8
with:
- pattern: docker-digests-*
- path: /tmp/digests
+ pattern: docker-digests-ghcr-*
+ path: /tmp/digests-ghcr
+ merge-multiple: true
+
+ - name: Download Docker Hub image digests
+ uses: actions/download-artifact@v8
+ with:
+ pattern: docker-digests-dockerhub-*
+ path: /tmp/digests-dockerhub
merge-multiple: true
- name: Create multi-arch manifests
env:
SOURCE_TAGS: ${{ steps.meta.outputs.tags }}
run: |
- image_refs=()
- for digest_file in /tmp/digests/*; do
+ # Collect image references for both registries
+ ghcr_image_refs=()
+ dockerhub_image_refs=()
+ for digest_file in /tmp/digests-ghcr/*; do
+ [ -e "$digest_file" ] || continue
+ digest="sha256:$(basename "$digest_file")"
+ ghcr_image_refs+=("ghcr.io/${{ steps.image-name.outputs.image }}@${digest}")
+ done
+ for digest_file in /tmp/digests-dockerhub/*; do
[ -e "$digest_file" ] || continue
- image_refs+=("ghcr.io/${{ steps.image-name.outputs.image }}@sha256:$(basename "$digest_file")")
+ digest="sha256:$(basename "$digest_file")"
+ dockerhub_image_refs+=("docker.io/${{ secrets.DOCKERHUB_USERNAME }}/openviking@${digest}")
done
- [ ${#image_refs[@]} -gt 0 ] || {
- echo "No image digests found" >&2
+ [ ${#ghcr_image_refs[@]} -gt 0 ] || {
+ echo "No GHCR image digests found" >&2
+ exit 1
+ }
+ [ ${#dockerhub_image_refs[@]} -gt 0 ] || {
+ echo "No Docker Hub image digests found" >&2
exit 1
}
+ # Create manifests for all tags
while IFS= read -r tag; do
[ -n "$tag" ] || continue
- docker buildx imagetools create \
- --tag "$tag" \
- "${image_refs[@]}"
+
+ # Determine which registry this tag belongs to
+ if [[ "$tag" == ghcr.io/* ]]; then
+ docker buildx imagetools create \
+ --tag "$tag" \
+ "${ghcr_image_refs[@]}"
+ elif [[ "$tag" == docker.io/* ]]; then
+ docker buildx imagetools create \
+ --tag "$tag" \
+ "${dockerhub_image_refs[@]}"
+ fi
done <<< "$SOURCE_TAGS"
diff --git a/.gitignore b/.gitignore
index 92b164c30..490160f25 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,7 +24,6 @@ share/python-wheels/
*.egg
MANIFEST
openviking.egg-info/
-data/
# Rust
target/
@@ -123,6 +122,7 @@ exports/
tests/api_test/api-test-report.html
tests/api_test/openviking-server.log
tests/api_test/openviking-server.pid
+tests/oc2ov_test/config/settings.py
# Benchmark outputs
examples/benchmark/outputs/
diff --git a/Cargo.lock b/Cargo.lock
index ae50a74b9..3dd5e4775 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -19,6 +19,18 @@ dependencies = [
"cpufeatures",
]
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
[[package]]
name = "aho-corasick"
version = "1.1.4"
@@ -34,6 +46,21 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
[[package]]
name = "anstream"
version = "0.6.21"
@@ -99,23 +126,599 @@ dependencies = [
"derive_arbitrary",
]
+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "atoi"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
+dependencies = [
+ "num-traits",
+]
+
[[package]]
name = "atomic-waker"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "aws-config"
+version = "1.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11493b0bad143270fb8ad284a096dd529ba91924c5409adeac856cc1bf047dbc"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-sdk-sso",
+ "aws-sdk-ssooidc",
+ "aws-sdk-sts",
+ "aws-smithy-async",
+ "aws-smithy-http 0.63.6",
+ "aws-smithy-json 0.62.5",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "hex",
+ "http 1.4.0",
+ "sha1",
+ "time",
+ "tokio",
+ "tracing",
+ "url",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-credential-types"
+version = "1.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f20799b373a1be121fe3005fba0c2090af9411573878f224df44b42727fcaf7"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-rs"
+version = "1.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc"
+dependencies = [
+ "aws-lc-sys",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-sys"
+version = "0.39.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399"
+dependencies = [
+ "cc",
+ "cmake",
+ "dunce",
+ "fs_extra",
+]
+
+[[package]]
+name = "aws-runtime"
+version = "1.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fc0651c57e384202e47153c1260b84a9936e19803d747615edf199dc3b98d17"
+dependencies = [
+ "aws-credential-types",
+ "aws-sigv4",
+ "aws-smithy-async",
+ "aws-smithy-eventstream",
+ "aws-smithy-http 0.63.6",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "bytes-utils",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "http-body 1.0.1",
+ "percent-encoding",
+ "pin-project-lite",
+ "tracing",
+ "uuid",
+]
+
+[[package]]
+name = "aws-sdk-s3"
+version = "1.119.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d65fddc3844f902dfe1864acb8494db5f9342015ee3ab7890270d36fbd2e01c"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-sigv4",
+ "aws-smithy-async",
+ "aws-smithy-checksums",
+ "aws-smithy-eventstream",
+ "aws-smithy-http 0.62.6",
+ "aws-smithy-json 0.61.9",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-smithy-xml",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "hex",
+ "hmac",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "lru",
+ "percent-encoding",
+ "regex-lite",
+ "sha2",
+ "tracing",
+ "url",
+]
+
+[[package]]
+name = "aws-sdk-sso"
+version = "1.97.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9aadc669e184501caaa6beafb28c6267fc1baef0810fb58f9b205485ca3f2567"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http 0.63.6",
+ "aws-smithy-json 0.62.5",
+ "aws-smithy-observability",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.0",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sdk-ssooidc"
+version = "1.99.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1342a7db8f358d3de0aed2007a0b54e875458e39848d54cc1d46700b2bfcb0a8"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http 0.63.6",
+ "aws-smithy-json 0.62.5",
+ "aws-smithy-observability",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.0",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sdk-sts"
+version = "1.101.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab41ad64e4051ecabeea802d6a17845a91e83287e1dd249e6963ea1ba78c428a"
+dependencies = [
+ "aws-credential-types",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-http 0.63.6",
+ "aws-smithy-json 0.62.5",
+ "aws-smithy-observability",
+ "aws-smithy-query",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-smithy-xml",
+ "aws-types",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.0",
+ "regex-lite",
+ "tracing",
+]
+
+[[package]]
+name = "aws-sigv4"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0b660013a6683ab23797778e21f1f854744fdf05f68204b4cca4c8c04b5d1f4"
+dependencies = [
+ "aws-credential-types",
+ "aws-smithy-eventstream",
+ "aws-smithy-http 0.63.6",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "crypto-bigint 0.5.5",
+ "form_urlencoded",
+ "hex",
+ "hmac",
+ "http 0.2.12",
+ "http 1.4.0",
+ "p256",
+ "percent-encoding",
+ "ring",
+ "sha2",
+ "subtle",
+ "time",
+ "tracing",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-smithy-async"
+version = "1.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ffcaf626bdda484571968400c326a244598634dc75fd451325a54ad1a59acfc"
+dependencies = [
+ "futures-util",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "aws-smithy-checksums"
+version = "0.63.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87294a084b43d649d967efe58aa1f9e0adc260e13a6938eb904c0ae9b45824ae"
+dependencies = [
+ "aws-smithy-http 0.62.6",
+ "aws-smithy-types",
+ "bytes",
+ "crc-fast",
+ "hex",
+ "http 0.2.12",
+ "http-body 0.4.6",
+ "md-5",
+ "pin-project-lite",
+ "sha1",
+ "sha2",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-eventstream"
+version = "0.60.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "faf09d74e5e32f76b8762da505a3cd59303e367a664ca67295387baa8c1d7548"
+dependencies = [
+ "aws-smithy-types",
+ "bytes",
+ "crc32fast",
+]
+
+[[package]]
+name = "aws-smithy-http"
+version = "0.62.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b"
+dependencies = [
+ "aws-smithy-eventstream",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "futures-util",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "percent-encoding",
+ "pin-project-lite",
+ "pin-utils",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-http"
+version = "0.63.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba1ab2dc1c2c3749ead27180d333c42f11be8b0e934058fb4b2258ee8dbe5231"
+dependencies = [
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "percent-encoding",
+ "pin-project-lite",
+ "pin-utils",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-http-client"
+version = "1.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a2f165a7feee6f263028b899d0a181987f4fa7179a6411a32a439fba7c5f769"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "h2 0.3.27",
+ "h2 0.4.13",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "hyper 0.14.32",
+ "hyper 1.8.1",
+ "hyper-rustls 0.24.2",
+ "hyper-rustls 0.27.7",
+ "hyper-util",
+ "pin-project-lite",
+ "rustls 0.21.12",
+ "rustls 0.23.37",
+ "rustls-native-certs",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls 0.26.4",
+ "tower",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-json"
+version = "0.61.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551"
+dependencies = [
+ "aws-smithy-types",
+]
+
+[[package]]
+name = "aws-smithy-json"
+version = "0.62.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9648b0bb82a2eedd844052c6ad2a1a822d1f8e3adee5fbf668366717e428856a"
+dependencies = [
+ "aws-smithy-types",
+]
+
+[[package]]
+name = "aws-smithy-observability"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a06c2315d173edbf1920da8ba3a7189695827002e4c0fc961973ab1c54abca9c"
+dependencies = [
+ "aws-smithy-runtime-api",
+]
+
+[[package]]
+name = "aws-smithy-query"
+version = "0.60.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a56d79744fb3edb5d722ef79d86081e121d3b9422cb209eb03aea6aa4f21ebd"
+dependencies = [
+ "aws-smithy-types",
+ "urlencoding",
+]
+
+[[package]]
+name = "aws-smithy-runtime"
+version = "1.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "028999056d2d2fd58a697232f9eec4a643cf73a71cf327690a7edad1d2af2110"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-http 0.63.6",
+ "aws-smithy-http-client",
+ "aws-smithy-observability",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "bytes",
+ "fastrand",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "http-body 1.0.1",
+ "http-body-util",
+ "pin-project-lite",
+ "pin-utils",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "aws-smithy-runtime-api"
+version = "1.11.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "876ab3c9c29791ba4ba02b780a3049e21ec63dabda09268b175272c3733a79e6"
+dependencies = [
+ "aws-smithy-async",
+ "aws-smithy-types",
+ "bytes",
+ "http 0.2.12",
+ "http 1.4.0",
+ "pin-project-lite",
+ "tokio",
+ "tracing",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-smithy-types"
+version = "1.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d73dbfbaa8e4bc57b9045137680b958d274823509a360abfd8e1d514d40c95c"
+dependencies = [
+ "base64-simd",
+ "bytes",
+ "bytes-utils",
+ "futures-core",
+ "http 0.2.12",
+ "http 1.4.0",
+ "http-body 0.4.6",
+ "http-body 1.0.1",
+ "http-body-util",
+ "itoa",
+ "num-integer",
+ "pin-project-lite",
+ "pin-utils",
+ "ryu",
+ "serde",
+ "time",
+ "tokio",
+ "tokio-util",
+]
+
+[[package]]
+name = "aws-smithy-xml"
+version = "0.60.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce02add1aa3677d022f8adf81dcbe3046a95f17a1b1e8979c145cd21d3d22b3"
+dependencies = [
+ "xmlparser",
+]
+
+[[package]]
+name = "aws-types"
+version = "1.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47c8323699dd9b3c8d5b3c13051ae9cdef58fd179957c882f8374dd8725962d9"
+dependencies = [
+ "aws-credential-types",
+ "aws-smithy-async",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "rustc_version",
+ "tracing",
+]
+
+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper 1.8.1",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "base16ct"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce"
+
[[package]]
name = "base64"
version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+[[package]]
+name = "base64-simd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195"
+dependencies = [
+ "outref",
+ "vsimd",
+]
+
+[[package]]
+name = "base64ct"
+version = "1.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
+
[[package]]
name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
+dependencies = [
+ "serde_core",
+]
[[package]]
name = "block-buffer"
@@ -144,6 +747,16 @@ version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+[[package]]
+name = "bytes-utils"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35"
+dependencies = [
+ "bytes",
+ "either",
+]
+
[[package]]
name = "bzip2"
version = "0.5.2"
@@ -169,6 +782,12 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
[[package]]
name = "castaway"
version = "0.2.4"
@@ -208,6 +827,47 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+[[package]]
+name = "chrono"
+version = "0.4.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "serde",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
[[package]]
name = "cipher"
version = "0.4.4"
@@ -267,6 +927,15 @@ dependencies = [
"error-code",
]
+[[package]]
+name = "cmake"
+version = "0.1.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"
+dependencies = [
+ "cc",
+]
+
[[package]]
name = "colorchoice"
version = "1.0.4"
@@ -297,6 +966,21 @@ dependencies = [
"static_assertions",
]
+[[package]]
+name = "concurrent-queue"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "const-oid"
+version = "0.9.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
+
[[package]]
name = "constant_time_eq"
version = "0.3.1"
@@ -321,6 +1005,22 @@ dependencies = [
"crossterm 0.29.0",
]
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
[[package]]
name = "cpufeatures"
version = "0.2.17"
@@ -346,12 +1046,61 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
-name = "crc32fast"
-version = "1.5.0"
+name = "crc-fast"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ddc2d09feefeee8bd78101665bd8645637828fa9317f9f292496dbbd8c65ff3"
+dependencies = [
+ "crc",
+ "digest",
+ "rand 0.9.2",
+ "regex",
+ "rustversion",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools 0.10.5",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
- "cfg-if",
+ "cast",
+ "itertools 0.10.5",
]
[[package]]
@@ -479,6 +1228,34 @@ dependencies = [
"winapi",
]
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "crypto-bigint"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef"
+dependencies = [
+ "generic-array",
+ "rand_core 0.6.4",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "crypto-bigint"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76"
+dependencies = [
+ "rand_core 0.6.4",
+ "subtle",
+]
+
[[package]]
name = "crypto-common"
version = "0.1.7"
@@ -529,6 +1306,27 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "807800ff3288b621186fe0a8f3392c4652068257302709c24efd918c3dffcdc2"
+[[package]]
+name = "der"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de"
+dependencies = [
+ "const-oid",
+ "zeroize",
+]
+
+[[package]]
+name = "der"
+version = "0.7.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
+dependencies = [
+ "const-oid",
+ "pem-rfc7468",
+ "zeroize",
+]
+
[[package]]
name = "deranged"
version = "0.5.8"
@@ -578,6 +1376,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
+ "const-oid",
"crypto-common",
"subtle",
]
@@ -623,11 +1422,58 @@ dependencies = [
"litrs",
]
+[[package]]
+name = "dotenvy"
+version = "0.15.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
+
+[[package]]
+name = "dunce"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
+
+[[package]]
+name = "ecdsa"
+version = "0.14.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c"
+dependencies = [
+ "der 0.6.1",
+ "elliptic-curve",
+ "rfc6979",
+ "signature 1.6.4",
+]
+
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "elliptic-curve"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3"
+dependencies = [
+ "base16ct",
+ "crypto-bigint 0.4.9",
+ "der 0.6.1",
+ "digest",
+ "ff",
+ "generic-array",
+ "group",
+ "pkcs8 0.9.0",
+ "rand_core 0.6.4",
+ "sec1",
+ "subtle",
+ "zeroize",
+]
[[package]]
name = "endian-type"
@@ -657,6 +1503,40 @@ version = "3.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59"
+[[package]]
+name = "etcetera"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943"
+dependencies = [
+ "cfg-if",
+ "home",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "event-listener"
+version = "5.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab"
+dependencies = [
+ "concurrent-queue",
+ "parking",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "fallible-iterator"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
+
+[[package]]
+name = "fallible-streaming-iterator"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
+
[[package]]
name = "fastrand"
version = "2.3.0"
@@ -674,6 +1554,16 @@ dependencies = [
"windows-sys 0.59.0",
]
+[[package]]
+name = "ff"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160"
+dependencies = [
+ "rand_core 0.6.4",
+ "subtle",
+]
+
[[package]]
name = "find-msvc-tools"
version = "0.1.9"
@@ -690,6 +1580,23 @@ dependencies = [
"miniz_oxide",
]
+[[package]]
+name = "flume"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+ "spin",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
[[package]]
name = "foldhash"
version = "0.1.5"
@@ -705,6 +1612,12 @@ dependencies = [
"percent-encoding",
]
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
[[package]]
name = "futures"
version = "0.3.32"
@@ -747,6 +1660,17 @@ dependencies = [
"futures-util",
]
+[[package]]
+name = "futures-intrusive"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f"
+dependencies = [
+ "futures-core",
+ "lock_api",
+ "parking_lot",
+]
+
[[package]]
name = "futures-io"
version = "0.3.32"
@@ -843,6 +1767,75 @@ dependencies = [
"wasip3",
]
+[[package]]
+name = "group"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7"
+dependencies = [
+ "ff",
+ "rand_core 0.6.4",
+ "subtle",
+]
+
+[[package]]
+name = "h2"
+version = "0.3.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
+dependencies = [
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "futures-util",
+ "http 0.2.12",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "h2"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http 1.4.0",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
+dependencies = [
+ "ahash",
+]
+
[[package]]
name = "hashbrown"
version = "0.15.5"
@@ -860,18 +1853,51 @@ version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+[[package]]
+name = "hashlink"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
+dependencies = [
+ "hashbrown 0.14.5",
+]
+
+[[package]]
+name = "hashlink"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
+dependencies = [
+ "hashbrown 0.15.5",
+]
+
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+[[package]]
+name = "hkdf"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7"
+dependencies = [
+ "hmac",
+]
+
[[package]]
name = "hmac"
version = "0.12.1"
@@ -890,6 +1916,17 @@ dependencies = [
"windows-sys 0.61.2",
]
+[[package]]
+name = "http"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
+dependencies = [
+ "bytes",
+ "fnv",
+ "itoa",
+]
+
[[package]]
name = "http"
version = "1.4.0"
@@ -900,6 +1937,17 @@ dependencies = [
"itoa",
]
+[[package]]
+name = "http-body"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
+dependencies = [
+ "bytes",
+ "http 0.2.12",
+ "pin-project-lite",
+]
+
[[package]]
name = "http-body"
version = "1.0.1"
@@ -907,7 +1955,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
- "http",
+ "http 1.4.0",
]
[[package]]
@@ -918,8 +1966,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
dependencies = [
"bytes",
"futures-core",
- "http",
- "http-body",
+ "http 1.4.0",
+ "http-body 1.0.1",
"pin-project-lite",
]
@@ -929,6 +1977,36 @@ version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "0.14.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "h2 0.3.27",
+ "http 0.2.12",
+ "http-body 0.4.6",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "socket2 0.5.10",
+ "tokio",
+ "tower-service",
+ "tracing",
+ "want",
+]
+
[[package]]
name = "hyper"
version = "1.8.1"
@@ -939,9 +2017,11 @@ dependencies = [
"bytes",
"futures-channel",
"futures-core",
- "http",
- "http-body",
+ "h2 0.4.13",
+ "http 1.4.0",
+ "http-body 1.0.1",
"httparse",
+ "httpdate",
"itoa",
"pin-project-lite",
"pin-utils",
@@ -950,19 +2030,35 @@ dependencies = [
"want",
]
+[[package]]
+name = "hyper-rustls"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
+dependencies = [
+ "futures-util",
+ "http 0.2.12",
+ "hyper 0.14.32",
+ "log",
+ "rustls 0.21.12",
+ "tokio",
+ "tokio-rustls 0.24.1",
+]
+
[[package]]
name = "hyper-rustls"
version = "0.27.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [
- "http",
- "hyper",
+ "http 1.4.0",
+ "hyper 1.8.1",
"hyper-util",
- "rustls",
+ "rustls 0.23.37",
+ "rustls-native-certs",
"rustls-pki-types",
"tokio",
- "tokio-rustls",
+ "tokio-rustls 0.26.4",
"tower-service",
"webpki-roots",
]
@@ -977,19 +2073,43 @@ dependencies = [
"bytes",
"futures-channel",
"futures-util",
- "http",
- "http-body",
- "hyper",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "hyper 1.8.1",
"ipnet",
"libc",
"percent-encoding",
"pin-project-lite",
- "socket2",
+ "socket2 0.6.3",
"tokio",
"tower-service",
"tracing",
]
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
[[package]]
name = "icu_collections"
version = "2.1.1"
@@ -1163,11 +2283,31 @@ dependencies = [
"serde",
]
+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
[[package]]
name = "itertools"
@@ -1232,6 +2372,9 @@ name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+dependencies = [
+ "spin",
+]
[[package]]
name = "leb128fmt"
@@ -1245,13 +2388,33 @@ version = "0.2.183"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
+[[package]]
+name = "libm"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
+
[[package]]
name = "libredox"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a"
dependencies = [
+ "bitflags",
"libc",
+ "plain",
+ "redox_syscall 0.7.3",
+]
+
+[[package]]
+name = "libsqlite3-sys"
+version = "0.30.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149"
+dependencies = [
+ "cc",
+ "pkg-config",
+ "vcpkg",
]
[[package]]
@@ -1340,12 +2503,46 @@ dependencies = [
"windows-sys 0.61.2",
]
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
+[[package]]
+name = "md-5"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
+dependencies = [
+ "cfg-if",
+ "digest",
+]
+
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
[[package]]
name = "mime"
version = "0.3.17"
@@ -1414,12 +2611,67 @@ dependencies = [
"libc",
]
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "num-bigint-dig"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7"
+dependencies = [
+ "lazy_static",
+ "libm",
+ "num-integer",
+ "num-iter",
+ "num-traits",
+ "rand 0.8.5",
+ "smallvec",
+ "zeroize",
+]
+
[[package]]
name = "num-conv"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+ "libm",
+]
+
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -1432,12 +2684,30 @@ version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
+[[package]]
+name = "openssl-probe"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
+
[[package]]
name = "option-ext"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
+[[package]]
+name = "outref"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
+
[[package]]
name = "ov_cli"
version = "0.2.6"
@@ -1468,6 +2738,23 @@ dependencies = [
"zip",
]
+[[package]]
+name = "p256"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594"
+dependencies = [
+ "ecdsa",
+ "elliptic-curve",
+ "sha2",
+]
+
+[[package]]
+name = "parking"
+version = "2.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba"
+
[[package]]
name = "parking_lot"
version = "0.12.5"
@@ -1486,7 +2773,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
- "redox_syscall",
+ "redox_syscall 0.5.18",
"smallvec",
"windows-link",
]
@@ -1497,6 +2784,12 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+[[package]]
+name = "path-clean"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef"
+
[[package]]
name = "pbkdf2"
version = "0.12.2"
@@ -1507,6 +2800,15 @@ dependencies = [
"hmac",
]
+[[package]]
+name = "pem-rfc7468"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
+dependencies = [
+ "base64ct",
+]
+
[[package]]
name = "percent-encoding"
version = "2.3.2"
@@ -1525,12 +2827,83 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+[[package]]
+name = "pkcs1"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f"
+dependencies = [
+ "der 0.7.10",
+ "pkcs8 0.10.2",
+ "spki 0.7.3",
+]
+
+[[package]]
+name = "pkcs8"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba"
+dependencies = [
+ "der 0.6.1",
+ "spki 0.6.0",
+]
+
+[[package]]
+name = "pkcs8"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
+dependencies = [
+ "der 0.7.10",
+ "spki 0.7.3",
+]
+
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+[[package]]
+name = "plain"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
+
+[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
[[package]]
name = "potential_utf"
version = "0.1.4"
@@ -1574,6 +2947,67 @@ dependencies = [
"unicode-ident",
]
+[[package]]
+name = "pyo3"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d"
+dependencies = [
+ "indoc",
+ "libc",
+ "memoffset",
+ "once_cell",
+ "portable-atomic",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6"
+dependencies = [
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "quinn"
version = "0.11.9"
@@ -1586,8 +3020,8 @@ dependencies = [
"quinn-proto",
"quinn-udp",
"rustc-hash",
- "rustls",
- "socket2",
+ "rustls 0.23.37",
+ "socket2 0.6.3",
"thiserror 2.0.18",
"tokio",
"tracing",
@@ -1603,10 +3037,10 @@ dependencies = [
"bytes",
"getrandom 0.3.4",
"lru-slab",
- "rand",
+ "rand 0.9.2",
"ring",
"rustc-hash",
- "rustls",
+ "rustls 0.23.37",
"rustls-pki-types",
"slab",
"thiserror 2.0.18",
@@ -1624,7 +3058,7 @@ dependencies = [
"cfg_aliases 0.2.1",
"libc",
"once_cell",
- "socket2",
+ "socket2 0.6.3",
"tracing",
"windows-sys 0.60.2",
]
@@ -1660,14 +3094,78 @@ dependencies = [
"nibble_vec",
]
+[[package]]
+name = "ragfs"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "aws-config",
+ "aws-sdk-s3",
+ "aws-types",
+ "axum",
+ "bytes",
+ "chrono",
+ "clap",
+ "criterion",
+ "hyper 1.8.1",
+ "lru",
+ "path-clean",
+ "radix_trie",
+ "rusqlite",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "sqlx",
+ "tempfile",
+ "thiserror 1.0.69",
+ "tokio",
+ "tower",
+ "tower-http 0.5.2",
+ "tracing",
+ "tracing-subscriber",
+ "uuid",
+]
+
+[[package]]
+name = "ragfs-python"
+version = "0.1.0"
+dependencies = [
+ "pyo3",
+ "ragfs",
+ "serde_json",
+ "tokio",
+]
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha 0.3.1",
+ "rand_core 0.6.4",
+]
+
[[package]]
name = "rand"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
- "rand_chacha",
- "rand_core",
+ "rand_chacha 0.9.0",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core 0.6.4",
]
[[package]]
@@ -1677,7 +3175,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
- "rand_core",
+ "rand_core 0.9.5",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
]
[[package]]
@@ -1690,31 +3197,60 @@ dependencies = [
]
[[package]]
-name = "ratatui"
-version = "0.29.0"
+name = "ratatui"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b"
+dependencies = [
+ "bitflags",
+ "cassowary",
+ "compact_str",
+ "crossterm 0.28.1",
+ "indoc",
+ "instability",
+ "itertools 0.13.0",
+ "lru",
+ "paste",
+ "strum",
+ "unicode-segmentation",
+ "unicode-truncate",
+ "unicode-width 0.2.0",
+]
+
+[[package]]
+name = "rayon"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
- "cassowary",
- "compact_str",
- "crossterm 0.28.1",
- "indoc",
- "instability",
- "itertools",
- "lru",
- "paste",
- "strum",
- "unicode-segmentation",
- "unicode-truncate",
- "unicode-width 0.2.0",
]
[[package]]
name = "redox_syscall"
-version = "0.5.18"
+version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+checksum = "6ce70a74e890531977d37e532c34d45e9055d2409ed08ddba14529471ed0be16"
dependencies = [
"bitflags",
]
@@ -1753,6 +3289,12 @@ dependencies = [
"regex-syntax",
]
+[[package]]
+name = "regex-lite"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
+
[[package]]
name = "regex-syntax"
version = "0.8.10"
@@ -1769,11 +3311,11 @@ dependencies = [
"bytes",
"futures-core",
"futures-util",
- "http",
- "http-body",
+ "http 1.4.0",
+ "http-body 1.0.1",
"http-body-util",
- "hyper",
- "hyper-rustls",
+ "hyper 1.8.1",
+ "hyper-rustls 0.27.7",
"hyper-util",
"js-sys",
"log",
@@ -1781,16 +3323,16 @@ dependencies = [
"percent-encoding",
"pin-project-lite",
"quinn",
- "rustls",
+ "rustls 0.23.37",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
- "tokio-rustls",
+ "tokio-rustls 0.26.4",
"tower",
- "tower-http",
+ "tower-http 0.6.8",
"tower-service",
"url",
"wasm-bindgen",
@@ -1799,6 +3341,17 @@ dependencies = [
"webpki-roots",
]
+[[package]]
+name = "rfc6979"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb"
+dependencies = [
+ "crypto-bigint 0.4.9",
+ "hmac",
+ "zeroize",
+]
+
[[package]]
name = "ring"
version = "0.17.14"
@@ -1813,6 +3366,40 @@ dependencies = [
"windows-sys 0.52.0",
]
+[[package]]
+name = "rsa"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d"
+dependencies = [
+ "const-oid",
+ "digest",
+ "num-bigint-dig",
+ "num-integer",
+ "num-traits",
+ "pkcs1",
+ "pkcs8 0.10.2",
+ "rand_core 0.6.4",
+ "signature 2.2.0",
+ "spki 0.7.3",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rusqlite"
+version = "0.32.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7753b721174eb8ff87a9a0e799e2d7bc3749323e773db92e0984debb00019d6e"
+dependencies = [
+ "bitflags",
+ "fallible-iterator",
+ "fallible-streaming-iterator",
+ "hashlink 0.9.1",
+ "libsqlite3-sys",
+ "smallvec",
+]
+
[[package]]
name = "rustc-hash"
version = "2.1.1"
@@ -1854,20 +3441,45 @@ dependencies = [
"windows-sys 0.61.2",
]
+[[package]]
+name = "rustls"
+version = "0.21.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
+dependencies = [
+ "log",
+ "ring",
+ "rustls-webpki 0.101.7",
+ "sct",
+]
+
[[package]]
name = "rustls"
version = "0.23.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
dependencies = [
+ "aws-lc-rs",
"once_cell",
"ring",
"rustls-pki-types",
- "rustls-webpki",
+ "rustls-webpki 0.103.9",
"subtle",
"zeroize",
]
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
[[package]]
name = "rustls-pki-types"
version = "1.14.0"
@@ -1878,12 +3490,23 @@ dependencies = [
"zeroize",
]
+[[package]]
+name = "rustls-webpki"
+version = "0.101.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
+dependencies = [
+ "ring",
+ "untrusted",
+]
+
[[package]]
name = "rustls-webpki"
version = "0.103.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
dependencies = [
+ "aws-lc-rs",
"ring",
"rustls-pki-types",
"untrusted",
@@ -1932,12 +3555,68 @@ dependencies = [
"winapi-util",
]
+[[package]]
+name = "schannel"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+[[package]]
+name = "sct"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
+dependencies = [
+ "ring",
+ "untrusted",
+]
+
+[[package]]
+name = "sec1"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928"
+dependencies = [
+ "base16ct",
+ "der 0.6.1",
+ "generic-array",
+ "pkcs8 0.9.0",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "security-framework"
+version = "3.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
+dependencies = [
+ "bitflags",
+ "core-foundation",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
[[package]]
name = "semver"
version = "1.0.27"
@@ -1988,6 +3667,17 @@ dependencies = [
"zmij",
]
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
@@ -2000,6 +3690,19 @@ dependencies = [
"serde",
]
+[[package]]
+name = "serde_yaml"
+version = "0.9.34+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
[[package]]
name = "sha1"
version = "0.10.6"
@@ -2011,6 +3714,26 @@ dependencies = [
"digest",
]
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
[[package]]
name = "shlex"
version = "1.3.0"
@@ -2048,6 +3771,26 @@ dependencies = [
"libc",
]
+[[package]]
+name = "signature"
+version = "1.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c"
+dependencies = [
+ "digest",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "signature"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de"
+dependencies = [
+ "digest",
+ "rand_core 0.6.4",
+]
+
[[package]]
name = "simd-adler32"
version = "0.3.8"
@@ -2055,25 +3798,255 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
-name = "slab"
-version = "0.4.12"
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "socket2"
+version = "0.5.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "socket2"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "spin"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
+
+[[package]]
+name = "spki"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b"
+dependencies = [
+ "base64ct",
+ "der 0.6.1",
+]
+
+[[package]]
+name = "spki"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
+dependencies = [
+ "base64ct",
+ "der 0.7.10",
+]
+
+[[package]]
+name = "sqlx"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc"
+dependencies = [
+ "sqlx-core",
+ "sqlx-macros",
+ "sqlx-mysql",
+ "sqlx-postgres",
+ "sqlx-sqlite",
+]
+
+[[package]]
+name = "sqlx-core"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6"
+dependencies = [
+ "base64",
+ "bytes",
+ "crc",
+ "crossbeam-queue",
+ "either",
+ "event-listener",
+ "futures-core",
+ "futures-intrusive",
+ "futures-io",
+ "futures-util",
+ "hashbrown 0.15.5",
+ "hashlink 0.10.0",
+ "indexmap",
+ "log",
+ "memchr",
+ "once_cell",
+ "percent-encoding",
+ "serde",
+ "serde_json",
+ "sha2",
+ "smallvec",
+ "thiserror 2.0.18",
+ "tokio",
+ "tokio-stream",
+ "tracing",
+ "url",
+]
+
+[[package]]
+name = "sqlx-macros"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "sqlx-core",
+ "sqlx-macros-core",
+ "syn",
+]
+
+[[package]]
+name = "sqlx-macros-core"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b"
+dependencies = [
+ "dotenvy",
+ "either",
+ "heck",
+ "hex",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "sha2",
+ "sqlx-core",
+ "sqlx-mysql",
+ "sqlx-postgres",
+ "sqlx-sqlite",
+ "syn",
+ "tokio",
+ "url",
+]
+
+[[package]]
+name = "sqlx-mysql"
+version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526"
+dependencies = [
+ "atoi",
+ "base64",
+ "bitflags",
+ "byteorder",
+ "bytes",
+ "crc",
+ "digest",
+ "dotenvy",
+ "either",
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-util",
+ "generic-array",
+ "hex",
+ "hkdf",
+ "hmac",
+ "itoa",
+ "log",
+ "md-5",
+ "memchr",
+ "once_cell",
+ "percent-encoding",
+ "rand 0.8.5",
+ "rsa",
+ "serde",
+ "sha1",
+ "sha2",
+ "smallvec",
+ "sqlx-core",
+ "stringprep",
+ "thiserror 2.0.18",
+ "tracing",
+ "whoami",
+]
[[package]]
-name = "smallvec"
-version = "1.15.1"
+name = "sqlx-postgres"
+version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46"
+dependencies = [
+ "atoi",
+ "base64",
+ "bitflags",
+ "byteorder",
+ "crc",
+ "dotenvy",
+ "etcetera",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "hex",
+ "hkdf",
+ "hmac",
+ "home",
+ "itoa",
+ "log",
+ "md-5",
+ "memchr",
+ "once_cell",
+ "rand 0.8.5",
+ "serde",
+ "serde_json",
+ "sha2",
+ "smallvec",
+ "sqlx-core",
+ "stringprep",
+ "thiserror 2.0.18",
+ "tracing",
+ "whoami",
+]
[[package]]
-name = "socket2"
-version = "0.6.3"
+name = "sqlx-sqlite"
+version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea"
dependencies = [
- "libc",
- "windows-sys 0.61.2",
+ "atoi",
+ "flume",
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-intrusive",
+ "futures-util",
+ "libsqlite3-sys",
+ "log",
+ "percent-encoding",
+ "serde",
+ "serde_urlencoded",
+ "sqlx-core",
+ "thiserror 2.0.18",
+ "tracing",
+ "url",
]
[[package]]
@@ -2094,6 +4067,17 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f42444fea5b87a39db4218d9422087e66a85d0e7a0963a439b07bcdf91804006"
+[[package]]
+name = "stringprep"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b4df3d392d81bd458a8a621b8bffbd2302a12ffe288a9d931670948749463b1"
+dependencies = [
+ "unicode-bidi",
+ "unicode-normalization",
+ "unicode-properties",
+]
+
[[package]]
name = "strsim"
version = "0.11.1"
@@ -2159,6 +4143,12 @@ dependencies = [
"syn",
]
+[[package]]
+name = "target-lexicon"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
+
[[package]]
name = "tempfile"
version = "3.26.0"
@@ -2228,6 +4218,15 @@ dependencies = [
"syn",
]
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
[[package]]
name = "time"
version = "0.3.47"
@@ -2239,6 +4238,7 @@ dependencies = [
"powerfmt",
"serde_core",
"time-core",
+ "time-macros",
]
[[package]]
@@ -2247,6 +4247,16 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
+[[package]]
+name = "time-macros"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
[[package]]
name = "tinystr"
version = "0.8.2"
@@ -2257,6 +4267,16 @@ dependencies = [
"zerovec",
]
+[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
[[package]]
name = "tinyvec"
version = "1.10.0"
@@ -2284,7 +4304,7 @@ dependencies = [
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
- "socket2",
+ "socket2 0.6.3",
"tokio-macros",
"windows-sys 0.61.2",
]
@@ -2300,13 +4320,47 @@ dependencies = [
"syn",
]
+[[package]]
+name = "tokio-rustls"
+version = "0.24.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
+dependencies = [
+ "rustls 0.21.12",
+ "tokio",
+]
+
[[package]]
name = "tokio-rustls"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
dependencies = [
- "rustls",
+ "rustls 0.23.37",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
"tokio",
]
@@ -2323,6 +4377,24 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5"
+dependencies = [
+ "bitflags",
+ "bytes",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+ "tracing",
]
[[package]]
@@ -2334,8 +4406,8 @@ dependencies = [
"bitflags",
"bytes",
"futures-util",
- "http",
- "http-body",
+ "http 1.4.0",
+ "http-body 1.0.1",
"iri-string",
"pin-project-lite",
"tower",
@@ -2361,10 +4433,23 @@ version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
dependencies = [
+ "log",
"pin-project-lite",
+ "tracing-attributes",
"tracing-core",
]
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "tracing-core"
version = "0.1.36"
@@ -2372,6 +4457,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
dependencies = [
"once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-serde"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1"
+dependencies = [
+ "serde",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb7f578e5945fb242538965c2d0b04418d38ec25c79d160cd279bf0731c8d319"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "serde",
+ "serde_json",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+ "tracing-serde",
]
[[package]]
@@ -2392,12 +4520,33 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
+[[package]]
+name = "unicode-bidi"
+version = "0.3.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
+
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+[[package]]
+name = "unicode-normalization"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-properties"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
+
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
@@ -2410,7 +4559,7 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf"
dependencies = [
- "itertools",
+ "itertools 0.13.0",
"unicode-segmentation",
"unicode-width 0.1.14",
]
@@ -2433,6 +4582,18 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+[[package]]
+name = "unindent"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
+
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
+
[[package]]
name = "untrusted"
version = "0.9.0"
@@ -2451,6 +4612,12 @@ dependencies = [
"serde",
]
+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
[[package]]
name = "utf8_iter"
version = "1.0.4"
@@ -2475,12 +4642,30 @@ dependencies = [
"wasm-bindgen",
]
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+[[package]]
+name = "vsimd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64"
+
[[package]]
name = "walkdir"
version = "2.5.0"
@@ -2524,6 +4709,12 @@ dependencies = [
"wit-bindgen",
]
+[[package]]
+name = "wasite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
+
[[package]]
name = "wasm-bindgen"
version = "0.2.114"
@@ -2646,6 +4837,16 @@ dependencies = [
"rustls-pki-types",
]
+[[package]]
+name = "whoami"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d"
+dependencies = [
+ "libredox",
+ "wasite",
+]
+
[[package]]
name = "winapi"
version = "0.3.9"
@@ -2677,6 +4878,41 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "windows-link"
version = "0.2.1"
@@ -3037,6 +5273,12 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+[[package]]
+name = "xmlparser"
+version = "0.13.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
+
[[package]]
name = "xz2"
version = "0.1.7"
diff --git a/Cargo.toml b/Cargo.toml
index c09add8cd..ce34f9e19 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
[workspace]
-members = ["crates/ov_cli"]
+members = ["crates/ov_cli", "crates/ragfs", "crates/ragfs-python"]
resolver = "2"
[profile.release]
diff --git a/Dockerfile b/Dockerfile
index 5659a0585..3515dc84b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,22 +1,17 @@
# syntax=docker/dockerfile:1.9
-# Stage 1: provide Go toolchain (required by setup.py -> build_agfs_artifacts -> make build)
-FROM golang:1.26-trixie AS go-toolchain
-
-# Stage 2: provide Rust toolchain (required by setup.py -> build_ov_cli_artifact -> cargo build)
+# Stage 1: provide Rust toolchain (required by setup.py -> build_ov_cli_artifact -> cargo build)
FROM rust:1.88-trixie AS rust-toolchain
-# Stage 3: build Python environment with uv (builds AGFS + Rust CLI + C++ extension from source)
+# Stage 2: build Python environment with uv (builds Rust CLI + C++ extension from source)
FROM ghcr.io/astral-sh/uv:python3.13-trixie-slim AS py-builder
-# Reuse Go toolchain from stage 1 so setup.py can compile agfs-server in-place.
-COPY --from=go-toolchain /usr/local/go /usr/local/go
-# Reuse Rust toolchain from stage 2 so setup.py can compile ov CLI in-place.
+# Reuse Rust toolchain from stage 1 so setup.py can compile ov CLI in-place.
COPY --from=rust-toolchain /usr/local/cargo /usr/local/cargo
COPY --from=rust-toolchain /usr/local/rustup /usr/local/rustup
ENV CARGO_HOME=/usr/local/cargo
ENV RUSTUP_HOME=/usr/local/rustup
-ENV PATH="/usr/local/cargo/bin:/usr/local/go/bin:${PATH}"
+ENV PATH="/app/.venv/bin:/usr/local/cargo/bin:${PATH}"
ARG OPENVIKING_VERSION=0.0.0
ARG TARGETPLATFORM
ARG UV_LOCK_STRATEGY=auto
@@ -42,7 +37,6 @@ COPY crates/ crates/
COPY openviking/ openviking/
COPY openviking_cli/ openviking_cli/
COPY src/ src/
-COPY third_party/ third_party/
# Install project and dependencies (triggers setup.py artifact builds + build_extension).
# Default to auto-refreshing uv.lock inside the ephemeral build context when it is
@@ -51,13 +45,13 @@ COPY third_party/ third_party/
RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \
case "${UV_LOCK_STRATEGY}" in \
locked) \
- uv sync --locked --no-editable --extra bot \
+ uv sync --locked --no-editable --extra bot --extra gemini \
;; \
auto) \
if ! uv lock --check; then \
uv lock; \
fi; \
- uv sync --locked --no-editable --extra bot \
+ uv sync --locked --no-editable --extra bot --extra gemini \
;; \
*) \
echo "Unsupported UV_LOCK_STRATEGY: ${UV_LOCK_STRATEGY}" >&2; \
@@ -65,7 +59,44 @@ RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \
;; \
esac
-# Stage 4: runtime
+# Build ragfs-python (Rust RAGFS binding) and extract the native extension
+# into the installed openviking package.
+RUN --mount=type=cache,target=/root/.cache/uv,id=uv-${TARGETPLATFORM} \
+ uv pip install maturin && \
+ export _TMPDIR=$(mktemp -d) && \
+ trap 'rm -rf "$_TMPDIR"' EXIT && \
+ cd crates/ragfs-python && \
+ python -m maturin build --release --out "$_TMPDIR" && \
+ cd ../.. && \
+ export _OV_LIB=$(python -c "import openviking; from pathlib import Path; print(Path(openviking.__file__).resolve().parent / 'lib')") && \
+ mkdir -p "$_OV_LIB" && \
+ python - <<'PY'
+import glob
+import os
+import sys
+import zipfile
+
+tmpdir = os.environ["_TMPDIR"]
+ov_lib = os.environ["_OV_LIB"]
+whls = glob.glob(os.path.join(tmpdir, "ragfs_python-*.whl"))
+assert whls, "maturin produced no wheel"
+
+with zipfile.ZipFile(whls[0]) as zf:
+ for name in zf.namelist():
+ bn = os.path.basename(name)
+ if bn.startswith("ragfs_python") and (bn.endswith(".so") or bn.endswith(".pyd")):
+ dst = os.path.join(ov_lib, bn)
+ with zf.open(name) as src, open(dst, "wb") as f:
+ f.write(src.read())
+ os.chmod(dst, 0o755)
+ print(f"ragfs-python: extracted {bn} -> {dst}")
+ sys.exit(0)
+
+print("WARNING: No ragfs_python .so/.pyd in wheel")
+sys.exit(1)
+PY
+
+# Stage 3: runtime
FROM python:3.13-slim-trixie
RUN apt-get update && apt-get install -y --no-install-recommends \
diff --git a/MANIFEST.in b/MANIFEST.in
index 800d1691d..d93d175a5 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,13 +3,14 @@ graft third_party/leveldb-1.23
graft third_party/spdlog-1.14.1
graft third_party/croaring
graft third_party/rapidjson
-recursive-include third_party/agfs/agfs-server *.go go.mod go.sum Makefile
-recursive-include third_party/agfs/agfs-sdk/go *.go go.mod
-include third_party/agfs/bin/agfs-server
include LICENSE
include README.md
include pyproject.toml
include setup.py
+include Cargo.toml
+include Cargo.lock
+graft crates/ragfs
+graft crates/ragfs-python
recursive-include openviking *.yaml
# sdist should be source-only: never ship runtime binaries from working tree
diff --git a/Makefile b/Makefile
index 55db08601..f736e67b5 100644
--- a/Makefile
+++ b/Makefile
@@ -3,12 +3,10 @@
# Variables
PYTHON ?= python3
SETUP_PY := setup.py
-AGFS_SERVER_DIR := third_party/agfs/agfs-server
OV_CLI_DIR := crates/ov_cli
# Dependency Versions
MIN_PYTHON_VERSION := 3.10
-MIN_GO_VERSION := 1.22
MIN_CMAKE_VERSION := 3.12
MIN_RUST_VERSION := 1.88
MIN_GCC_VERSION := 9
@@ -21,7 +19,6 @@ CLEAN_DIRS := \
*.egg-info/ \
openviking/bin/ \
openviking/lib/ \
- $(AGFS_SERVER_DIR)/build/ \
$(OV_CLI_DIR)/target/ \
src/cmake_build/ \
.pytest_cache/ \
@@ -35,9 +32,9 @@ all: build
help:
@echo "Available targets:"
- @echo " build - Build AGFS, ov CLI, and C++ extensions using setup.py"
+ @echo " build - Build ragfs-python and C++ extensions using setup.py"
@echo " clean - Remove build artifacts and temporary files"
- @echo " check-deps - Check if required dependencies (Go, Rust, CMake, etc.) are installed"
+ @echo " check-deps - Check if required dependencies (Rust, CMake, etc.) are installed"
@echo " help - Show this help message"
check-pip:
@@ -59,11 +56,6 @@ check-deps:
@# Python check
@$(PYTHON) -c "import sys; v=sys.version_info; exit(0 if v.major > 3 or (v.major == 3 and v.minor >= 10) else 1)" || (echo "Error: Python >= $(MIN_PYTHON_VERSION) is required."; exit 1)
@echo " [OK] Python $$( $(PYTHON) -V | cut -d' ' -f2 )"
- @# Go check
- @command -v go > /dev/null 2>&1 || (echo "Error: Go is not installed."; exit 1)
- @GO_VER=$$(go version | awk '{print $$3}' | sed 's/go//'); \
- $(PYTHON) -c "v='$$GO_VER'.split('.'); exit(0 if int(v[0]) > 1 or (int(v[0]) == 1 and int(v[1]) >= 22) else 1)" || (echo "Error: Go >= $(MIN_GO_VERSION) is required. Found $$GO_VER"; exit 1); \
- echo " [OK] Go $$GO_VER"
@# CMake check
@command -v cmake > /dev/null 2>&1 || (echo "Error: CMake is not installed."; exit 1)
@CMAKE_VER=$$(cmake --version | head -n1 | awk '{print $$3}'); \
@@ -99,6 +91,39 @@ build: check-deps check-pip
echo " [OK] pip found, use pip to install..."; \
$(PYTHON) -m pip install -e .; \
fi
+ @echo "Building ragfs-python (Rust RAGFS binding) into openviking/lib/..."
+ @MATURIN_CMD=""; \
+ if command -v maturin > /dev/null 2>&1; then \
+ MATURIN_CMD=maturin; \
+ elif command -v uv > /dev/null 2>&1 && uv pip --help > /dev/null 2>&1; then \
+ uv pip install maturin && MATURIN_CMD=maturin; \
+ fi; \
+ if [ -n "$$MATURIN_CMD" ]; then \
+ TMPDIR=$$(mktemp -d); \
+ cd crates/ragfs-python && $$MATURIN_CMD build --release --out "$$TMPDIR" 2>&1; \
+ cd ../..; \
+ mkdir -p openviking/lib; \
+ echo "import zipfile, glob, shutil, os, sys" > /tmp/extract_ragfs.py; \
+ echo "whls = glob.glob(os.path.join('$$TMPDIR', 'ragfs_python-*.whl'))" >> /tmp/extract_ragfs.py; \
+ echo "assert whls, 'maturin produced no wheel'" >> /tmp/extract_ragfs.py; \
+ echo "with zipfile.ZipFile(whls[0]) as zf:" >> /tmp/extract_ragfs.py; \
+ echo " for name in zf.namelist():" >> /tmp/extract_ragfs.py; \
+ echo " bn = os.path.basename(name)" >> /tmp/extract_ragfs.py; \
+ echo " if bn.startswith('ragfs_python') and (bn.endswith('.so') or bn.endswith('.pyd')):" >> /tmp/extract_ragfs.py; \
+ echo " dst = os.path.join('openviking', 'lib', bn)" >> /tmp/extract_ragfs.py; \
+ echo " with zf.open(name) as src, open(dst, 'wb') as f: f.write(src.read())" >> /tmp/extract_ragfs.py; \
+ echo " os.chmod(dst, 0o755)" >> /tmp/extract_ragfs.py; \
+ echo " print(f' [OK] ragfs-python: extracted {bn} -> {dst}')" >> /tmp/extract_ragfs.py; \
+ echo " sys.exit(0)" >> /tmp/extract_ragfs.py; \
+ echo "print('[Warning] No ragfs_python .so/.pyd found in wheel')" >> /tmp/extract_ragfs.py; \
+ echo "sys.exit(1)" >> /tmp/extract_ragfs.py; \
+ $(PYTHON) /tmp/extract_ragfs.py; \
+ rm -f /tmp/extract_ragfs.py; \
+ rm -rf "$$TMPDIR"; \
+ else \
+ echo " [SKIP] maturin not found, ragfs-python (Rust binding) will not be built."; \
+ echo " Install maturin to enable: uv pip install maturin"; \
+ fi
@echo "Build completed successfully."
clean:
@@ -111,4 +136,4 @@ clean:
done
@find . -name "*.pyc" -delete
@find . -name "__pycache__" -type d -exec rm -rf {} +
- @echo "Cleanup completed."
+ @echo "Cleanup completed."
\ No newline at end of file
diff --git a/README.md b/README.md
index 3ea775d60..4dc37240d 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
English / [中文](README_CN.md) / [日本語](README_JA.md)
-Website · GitHub · Issues · Docs
+Website · GitHub · Issues · Docs
[![][release-shield]][release-link]
[![][github-stars-shield]][github-stars-link]
@@ -534,6 +534,8 @@ After integrating OpenViking:
👉 **[View: OpenCode Memory Plugin Example](examples/opencode-memory-plugin/README.md)**
+👉 **[View: Claude Code Memory Plugin Example](examples/claude-code-memory-plugin/README.md)**
+
--
## Core Concepts
diff --git a/README_CN.md b/README_CN.md
index c64168e64..c63d14361 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -559,6 +559,8 @@ ov chat
👉 **[查看:OpenCode 记忆插件示例](examples/opencode-memory-plugin/README_CN.md)**
+👉 **[查看:Claude Code 记忆插件示例](examples/claude-code-memory-plugin/README_CN.md)**
+
## VikingBot 部署详情
OpenViking 有一个类似 nanobot 的机器人用于交互工作,现已可用。
diff --git a/README_JA.md b/README_JA.md
index d867e1531..9ebc971e6 100644
--- a/README_JA.md
+++ b/README_JA.md
@@ -495,6 +495,8 @@ OpenViking統合後:
👉 **[参照: OpenCodeメモリプラグインの例](examples/opencode-memory-plugin/README.md)**
+👉 **[参照: Claude Codeメモリプラグインの例](examples/claude-code-memory-plugin/README.md)**
+
--
## コアコンセプト
diff --git a/benchmark/.gitignore b/benchmark/.gitignore
new file mode 100644
index 000000000..68bcbc960
--- /dev/null
+++ b/benchmark/.gitignore
@@ -0,0 +1 @@
+results/
\ No newline at end of file
diff --git a/benchmark/RAG/ov.conf.example b/benchmark/RAG/ov.conf.example
index e41a79d9a..9ea5f47e2 100644
--- a/benchmark/RAG/ov.conf.example
+++ b/benchmark/RAG/ov.conf.example
@@ -1,7 +1,6 @@
{
"storage": {
"agfs": {
- "port": 1876
}
},
"log": {
diff --git a/benchmark/custom/session_contention_benchmark.py b/benchmark/custom/session_contention_benchmark.py
new file mode 100644
index 000000000..c351952ae
--- /dev/null
+++ b/benchmark/custom/session_contention_benchmark.py
@@ -0,0 +1,1672 @@
+#!/usr/bin/env python3
+"""Daily session mixed-load contention benchmark for OpenViking."""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import csv
+import json
+import math
+import os
+import random
+import sys
+import time
+from dataclasses import asdict, dataclass, field
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional
+
+import httpx
+
+DEFAULT_FIND_QUERIES = [
+ "how to authenticate users",
+ "what is OpenViking",
+ "session commit memory extraction",
+]
+DEFAULT_SLOW_THRESHOLDS_MS = (1000, 3000, 5000)
+MAX_ERROR_MESSAGE_LEN = 500
+
+
+@dataclass
+class BenchmarkConfig:
+ server_url: str
+ api_key: str
+ account: str
+ user: str
+ request_timeout: float
+ session_count: int
+ writer_concurrency: int
+ reader_concurrency: int
+ extract_concurrency: int
+ messages_per_commit: int
+ extract_ratio: float
+ message_size: int
+ baseline_seconds: float
+ mixed_seconds: float
+ recovery_seconds: float
+ window_seconds: float
+ observer_interval: float
+ task_poll_interval: float
+ task_drain_timeout: float
+ output_dir: str
+ cleanup: bool
+ require_extract_load: bool
+ find_queries: List[str]
+ find_limit: int
+ find_target_uri: str
+ find_score_threshold: Optional[float]
+ seed: int
+
+
+@dataclass
+class PhaseMetadata:
+ phase: str
+ started_at: str
+ ended_at: str
+ duration_seconds: float
+
+
+@dataclass
+class RequestEvent:
+ api: str
+ method: str
+ path: str
+ phase: str
+ started_at: str
+ ended_at: str
+ elapsed_ms_since_run_start: float
+ latency_ms: float
+ success: bool
+ status_code: Optional[int]
+ timeout: bool
+ exception_type: Optional[str]
+ error_code: Optional[str]
+ error_message: Optional[str]
+ session_id: Optional[str] = None
+ cycle_index: Optional[int] = None
+ worker_id: Optional[int] = None
+ task_id: Optional[str] = None
+
+ def to_dict(self) -> Dict[str, Any]:
+ return asdict(self)
+
+
+@dataclass
+class CommitTaskEvent:
+ task_id: str
+ session_id: str
+ origin_phase: str
+ completion_phase: str
+ status: str
+ created_at: Optional[float]
+ updated_at: Optional[float]
+ server_duration_ms: Optional[float]
+ local_duration_ms: float
+ active_count_updated: Optional[int]
+ memories_extracted: Optional[Dict[str, int]]
+ error: Optional[str]
+ cycle_index: Optional[int]
+ polled_at: str
+
+ def to_dict(self) -> Dict[str, Any]:
+ return asdict(self)
+
+
+@dataclass
+class ObserverSample:
+ api: str
+ phase: str
+ sampled_at: str
+ elapsed_ms_since_run_start: float
+ latency_ms: float
+ success: bool
+ is_healthy: Optional[bool]
+ has_errors: Optional[bool]
+ payload: Optional[Dict[str, Any]]
+ error_message: Optional[str] = None
+
+ def to_dict(self) -> Dict[str, Any]:
+ return asdict(self)
+
+
+@dataclass
+class PendingCommitTask:
+ task_id: str
+ session_id: str
+ origin_phase: str
+ cycle_index: int
+ local_started_monotonic: float
+
+
+@dataclass
+class Recorder:
+ request_events: List[RequestEvent] = field(default_factory=list)
+ task_events: List[CommitTaskEvent] = field(default_factory=list)
+ observer_samples: List[ObserverSample] = field(default_factory=list)
+ notes: List[str] = field(default_factory=list)
+
+ def add_request(self, event: RequestEvent) -> None:
+ self.request_events.append(event)
+
+ def add_task(self, event: CommitTaskEvent) -> None:
+ self.task_events.append(event)
+
+ def add_sample(self, sample: ObserverSample) -> None:
+ self.observer_samples.append(sample)
+
+ def add_note(self, note: str) -> None:
+ self.notes.append(note)
+
+
+class PhaseState:
+ def __init__(self, initial: str = "setup") -> None:
+ self.current = initial
+
+
+class BenchmarkHTTPClient:
+ def __init__(self, config: BenchmarkConfig, recorder: Recorder) -> None:
+ self._config = config
+ self._recorder = recorder
+ self._run_start_monotonic = time.perf_counter()
+ self._client = httpx.AsyncClient(
+ base_url=config.server_url.rstrip("/"),
+ headers=self._default_headers(),
+ timeout=httpx.Timeout(config.request_timeout),
+ follow_redirects=True,
+ limits=httpx.Limits(
+ max_connections=max(
+ 32,
+ config.writer_concurrency
+ + config.reader_concurrency
+ + config.extract_concurrency
+ + 8,
+ ),
+ max_keepalive_connections=max(
+ 16,
+ config.writer_concurrency + config.reader_concurrency + 4,
+ ),
+ ),
+ )
+
+ @property
+ def run_start_monotonic(self) -> float:
+ return self._run_start_monotonic
+
+ async def aclose(self) -> None:
+ await self._client.aclose()
+
+ def _default_headers(self) -> Dict[str, str]:
+ headers = {
+ "Accept": "*/*",
+ "Content-Type": "application/json",
+ "User-Agent": "OpenViking-Session-Contention-Benchmark/1.0",
+ "X-OpenViking-Account": self._config.account,
+ "X-OpenViking-User": self._config.user,
+ }
+ if self._config.api_key:
+ headers["Authorization"] = f"Bearer {self._config.api_key}"
+ return headers
+
+ async def request_json(
+ self,
+ *,
+ api: str,
+ method: str,
+ path: str,
+ phase: str,
+ session_id: Optional[str] = None,
+ cycle_index: Optional[int] = None,
+ worker_id: Optional[int] = None,
+ task_id: Optional[str] = None,
+ json_payload: Optional[Dict[str, Any]] = None,
+ params: Optional[Dict[str, Any]] = None,
+ ) -> tuple[Optional[httpx.Response], Optional[Dict[str, Any]]]:
+ started_monotonic = time.perf_counter()
+ started_wall = utc_now()
+ response: Optional[httpx.Response] = None
+ body: Optional[Dict[str, Any]] = None
+ status_code: Optional[int] = None
+ success = False
+ timeout = False
+ exception_type: Optional[str] = None
+ error_code: Optional[str] = None
+ error_message: Optional[str] = None
+
+ try:
+ response = await self._client.request(
+ method=method,
+ url=path,
+ json=json_payload,
+ params=params,
+ )
+ status_code = response.status_code
+ body = maybe_json(response)
+ success = self._is_success(status_code, body)
+ if not success:
+ error_code, error_message = extract_error(body, status_code)
+ except httpx.TimeoutException as exc:
+ timeout = True
+ exception_type = type(exc).__name__
+ error_message = truncate_error_message(str(exc))
+ except Exception as exc: # pragma: no cover - exercised in real runs
+ exception_type = type(exc).__name__
+ error_message = truncate_error_message(str(exc))
+
+ ended_wall = utc_now()
+ ended_monotonic = time.perf_counter()
+ latency_ms = (ended_monotonic - started_monotonic) * 1000.0
+ elapsed_ms = (started_monotonic - self._run_start_monotonic) * 1000.0
+ self._recorder.add_request(
+ RequestEvent(
+ api=api,
+ method=method.upper(),
+ path=path,
+ phase=phase,
+ started_at=started_wall,
+ ended_at=ended_wall,
+ elapsed_ms_since_run_start=elapsed_ms,
+ latency_ms=latency_ms,
+ success=success,
+ status_code=status_code,
+ timeout=timeout,
+ exception_type=exception_type,
+ error_code=error_code,
+ error_message=error_message,
+ session_id=session_id,
+ cycle_index=cycle_index,
+ worker_id=worker_id,
+ task_id=task_id,
+ )
+ )
+ return response, body
+
+ @staticmethod
+ def _is_success(status_code: Optional[int], body: Optional[Dict[str, Any]]) -> bool:
+ if status_code is None or status_code >= 400:
+ return False
+ if not isinstance(body, dict):
+ return status_code < 400
+ if "status" in body:
+ return body.get("status") == "ok"
+ return True
+
+
+class CommitTaskPoller:
+ def __init__(
+ self,
+ client: BenchmarkHTTPClient,
+ recorder: Recorder,
+ phase_state: PhaseState,
+ poll_interval: float,
+ ) -> None:
+ self._client = client
+ self._recorder = recorder
+ self._phase_state = phase_state
+ self._poll_interval = poll_interval
+ self._pending: Dict[str, PendingCommitTask] = {}
+ self._closed = False
+ self._wake_event = asyncio.Event()
+ self._lock = asyncio.Lock()
+
+ async def register(self, task: PendingCommitTask) -> None:
+ async with self._lock:
+ self._pending[task.task_id] = task
+ self._wake_event.set()
+
+ async def close(self) -> None:
+ self._closed = True
+ self._wake_event.set()
+
+ async def drain(self, timeout: float) -> None:
+ deadline = time.perf_counter() + timeout
+ while True:
+ async with self._lock:
+ remaining = len(self._pending)
+ if remaining == 0:
+ return
+ if time.perf_counter() >= deadline:
+ return
+ await asyncio.sleep(min(self._poll_interval, 0.5))
+
+ async def finalize_incomplete(self) -> None:
+ async with self._lock:
+ leftovers = list(self._pending.values())
+ self._pending.clear()
+ for item in leftovers:
+ local_duration_ms = (time.perf_counter() - item.local_started_monotonic) * 1000.0
+ self._recorder.add_task(
+ CommitTaskEvent(
+ task_id=item.task_id,
+ session_id=item.session_id,
+ origin_phase=item.origin_phase,
+ completion_phase=self._phase_state.current,
+ status="incomplete",
+ created_at=None,
+ updated_at=None,
+ server_duration_ms=None,
+ local_duration_ms=local_duration_ms,
+ active_count_updated=None,
+ memories_extracted=None,
+ error="task not completed before benchmark end",
+ cycle_index=item.cycle_index,
+ polled_at=utc_now(),
+ )
+ )
+
+ async def run(self) -> None:
+ while True:
+ await self._wake_event.wait()
+ self._wake_event.clear()
+
+ while True:
+ async with self._lock:
+ pending = list(self._pending.values())
+ if not pending:
+ break
+ await self._poll_pending(pending)
+ if self._closed:
+ return
+ await asyncio.sleep(self._poll_interval)
+
+ if self._closed:
+ return
+
+ async def _poll_pending(self, pending: List[PendingCommitTask]) -> None:
+ coroutines = [self._poll_one(item) for item in pending]
+ results = await asyncio.gather(*coroutines, return_exceptions=True)
+ completed_ids = [task_id for task_id in results if isinstance(task_id, str)]
+ if not completed_ids:
+ return
+ async with self._lock:
+ for task_id in completed_ids:
+ self._pending.pop(task_id, None)
+
+ async def _poll_one(self, item: PendingCommitTask) -> Optional[str]:
+ _, body = await self._client.request_json(
+ api="get_task",
+ method="GET",
+ path=f"/api/v1/tasks/{item.task_id}",
+ phase=self._phase_state.current,
+ session_id=item.session_id,
+ cycle_index=item.cycle_index,
+ task_id=item.task_id,
+ )
+ if not isinstance(body, dict) or body.get("status") != "ok":
+ return None
+ result = body.get("result") or {}
+ task_status = result.get("status")
+ if task_status not in {"completed", "failed"}:
+ return None
+
+ created_at = to_float(result.get("created_at"))
+ updated_at = to_float(result.get("updated_at"))
+ server_duration_ms = None
+ if created_at is not None and updated_at is not None:
+ server_duration_ms = max(updated_at - created_at, 0.0) * 1000.0
+ local_duration_ms = (time.perf_counter() - item.local_started_monotonic) * 1000.0
+ task_result = result.get("result") or {}
+ self._recorder.add_task(
+ CommitTaskEvent(
+ task_id=item.task_id,
+ session_id=item.session_id,
+ origin_phase=item.origin_phase,
+ completion_phase=self._phase_state.current,
+ status=task_status,
+ created_at=created_at,
+ updated_at=updated_at,
+ server_duration_ms=server_duration_ms,
+ local_duration_ms=local_duration_ms,
+ active_count_updated=task_result.get("active_count_updated"),
+ memories_extracted=task_result.get("memories_extracted"),
+ error=result.get("error"),
+ cycle_index=item.cycle_index,
+ polled_at=utc_now(),
+ )
+ )
+ return item.task_id
+
+
+class BenchmarkRunner:
+ def __init__(self, config: BenchmarkConfig) -> None:
+ self.config = config
+ self.random = random.Random(config.seed)
+ self.recorder = Recorder()
+ self.phase_state = PhaseState()
+ self.phase_metadata: List[PhaseMetadata] = []
+ self.phase_durations: Dict[str, float] = {}
+ self.session_ids: List[str] = []
+ self.session_queue: asyncio.Queue[str] = asyncio.Queue()
+ self.session_cycle_counts: Dict[str, int] = {}
+ self.extract_semaphore = asyncio.Semaphore(max(1, config.extract_concurrency))
+ self.client = BenchmarkHTTPClient(config, self.recorder)
+ self.task_poller = CommitTaskPoller(
+ client=self.client,
+ recorder=self.recorder,
+ phase_state=self.phase_state,
+ poll_interval=config.task_poll_interval,
+ )
+
+ async def run(self) -> int:
+ poller_task = asyncio.create_task(self.task_poller.run())
+ exit_code = 0
+ try:
+ await self._preflight()
+ await self._create_sessions()
+ await self._run_phase(
+ phase="baseline",
+ duration_seconds=self.config.baseline_seconds,
+ enable_readers=self.config.reader_concurrency > 0,
+ enable_writers=False,
+ enable_sampler=self.config.observer_interval > 0,
+ )
+ await self._run_phase(
+ phase="mixed_load",
+ duration_seconds=self.config.mixed_seconds,
+ enable_readers=self.config.reader_concurrency > 0,
+ enable_writers=self.config.writer_concurrency > 0 and bool(self.session_ids),
+ enable_sampler=self.config.observer_interval > 0,
+ )
+ await self._run_phase(
+ phase="recovery",
+ duration_seconds=self.config.recovery_seconds,
+ enable_readers=self.config.reader_concurrency > 0,
+ enable_writers=False,
+ enable_sampler=self.config.observer_interval > 0,
+ )
+ if self.config.task_drain_timeout > 0:
+ self.phase_state.current = "drain"
+ await self.task_poller.drain(self.config.task_drain_timeout)
+ except RuntimeError as exc:
+ self.recorder.add_note(f"fatal: {exc}")
+ print(f"[fatal] {exc}", file=sys.stderr)
+ exit_code = 1
+ finally:
+ await self.task_poller.close()
+ await poller_task
+ await self.task_poller.finalize_incomplete()
+ if self.config.cleanup and self.session_ids:
+ await self._cleanup_sessions()
+ await self.client.aclose()
+
+ self._write_outputs()
+ self._print_summary()
+ return exit_code
+
+ async def _preflight(self) -> None:
+ self.phase_state.current = "setup"
+ _, health_body = await self.client.request_json(
+ api="health",
+ method="GET",
+ path="/health",
+ phase="setup",
+ )
+ if not isinstance(health_body, dict) or health_body.get("status") != "ok":
+ raise RuntimeError("server health check failed")
+
+ _, status_body = await self.client.request_json(
+ api="system_status",
+ method="GET",
+ path="/api/v1/system/status",
+ phase="setup",
+ )
+ if not isinstance(status_body, dict) or status_body.get("status") != "ok":
+ raise RuntimeError("authenticated system status request failed")
+
+ _, models_body = await self.client.request_json(
+ api="observer_models",
+ method="GET",
+ path="/api/v1/observer/models",
+ phase="setup",
+ )
+ model_result = (models_body or {}).get("result") if isinstance(models_body, dict) else None
+ model_note = self._extract_model_note(model_result)
+ if model_note:
+ self.recorder.add_note(model_note)
+
+ if self.config.extract_ratio > 0:
+ preflight_result = await self._run_extract_preflight()
+ if preflight_result:
+ self.recorder.add_note(preflight_result)
+ if self.config.require_extract_load:
+ raise RuntimeError(preflight_result)
+
+ async def _run_extract_preflight(self) -> Optional[str]:
+ _, create_body = await self.client.request_json(
+ api="create_session",
+ method="POST",
+ path="/api/v1/sessions",
+ phase="setup",
+ )
+ session_id = extract_session_id(create_body)
+ if not session_id:
+ return "extract preflight could not create session"
+
+ try:
+ payload = {
+ "role": "user",
+ "content": build_message_content(
+ session_id=session_id,
+ cycle_index=0,
+ message_index=0,
+ size=self.config.message_size,
+ ),
+ }
+ await self.client.request_json(
+ api="add_message",
+ method="POST",
+ path=f"/api/v1/sessions/{session_id}/messages",
+ phase="setup",
+ session_id=session_id,
+ cycle_index=0,
+ json_payload=payload,
+ )
+ _, extract_body = await self.client.request_json(
+ api="extract",
+ method="POST",
+ path=f"/api/v1/sessions/{session_id}/extract",
+ phase="setup",
+ session_id=session_id,
+ cycle_index=0,
+ )
+ if not isinstance(extract_body, dict) or extract_body.get("status") != "ok":
+ return "extract preflight request failed"
+ result = extract_body.get("result")
+ if isinstance(result, list) and not result:
+ return (
+ "extract preflight returned empty result; long-tail load may be weak if models are "
+ "not configured"
+ )
+ return None
+ finally:
+ await self.client.request_json(
+ api="delete_session",
+ method="DELETE",
+ path=f"/api/v1/sessions/{session_id}",
+ phase="setup",
+ session_id=session_id,
+ )
+
+ def _extract_model_note(self, model_result: Any) -> Optional[str]:
+ if not isinstance(model_result, dict):
+ return None
+ is_healthy = model_result.get("is_healthy")
+ status = model_result.get("status")
+ if is_healthy is False:
+ return f"observer/models reports unhealthy state; extract load may not be representative: {status}"
+ return None
+
+ async def _create_sessions(self) -> None:
+ if self.config.session_count <= 0:
+ return
+ for _ in range(self.config.session_count):
+ _, body = await self.client.request_json(
+ api="create_session",
+ method="POST",
+ path="/api/v1/sessions",
+ phase="setup",
+ )
+ session_id = extract_session_id(body)
+ if not session_id:
+ raise RuntimeError("failed to create benchmark sessions")
+ self.session_ids.append(session_id)
+ self.session_cycle_counts[session_id] = 0
+ await self.session_queue.put(session_id)
+
+ async def _cleanup_sessions(self) -> None:
+ self.phase_state.current = "cleanup"
+ for session_id in self.session_ids:
+ await self.client.request_json(
+ api="delete_session",
+ method="DELETE",
+ path=f"/api/v1/sessions/{session_id}",
+ phase="cleanup",
+ session_id=session_id,
+ )
+
+ async def _run_phase(
+ self,
+ *,
+ phase: str,
+ duration_seconds: float,
+ enable_readers: bool,
+ enable_writers: bool,
+ enable_sampler: bool,
+ ) -> None:
+ if duration_seconds <= 0:
+ return
+
+ self.phase_state.current = phase
+ stop_event = asyncio.Event()
+ tasks: List[asyncio.Task[Any]] = []
+
+ if enable_readers:
+ for worker_id in range(self.config.reader_concurrency):
+ tasks.append(asyncio.create_task(self._reader_worker(phase, worker_id, stop_event)))
+ if enable_writers:
+ for worker_id in range(self.config.writer_concurrency):
+ tasks.append(asyncio.create_task(self._writer_worker(phase, worker_id, stop_event)))
+ if enable_sampler:
+ tasks.append(asyncio.create_task(self._sampler_worker(phase, stop_event)))
+
+ phase_started = time.perf_counter()
+ started_wall = utc_now()
+ await asyncio.sleep(duration_seconds)
+ stop_event.set()
+ if tasks:
+ await asyncio.gather(*tasks, return_exceptions=True)
+ phase_duration = time.perf_counter() - phase_started
+ ended_wall = utc_now()
+ self.phase_metadata.append(
+ PhaseMetadata(
+ phase=phase,
+ started_at=started_wall,
+ ended_at=ended_wall,
+ duration_seconds=phase_duration,
+ )
+ )
+ self.phase_durations[phase] = phase_duration
+
+ async def _writer_worker(self, phase: str, worker_id: int, stop_event: asyncio.Event) -> None:
+ while not stop_event.is_set():
+ session_id = await self._borrow_session(stop_event)
+ if not session_id:
+ return
+ try:
+ cycle_index = self.session_cycle_counts[session_id]
+ self.session_cycle_counts[session_id] += 1
+ await self._run_session_cycle(
+ phase=phase,
+ worker_id=worker_id,
+ session_id=session_id,
+ cycle_index=cycle_index,
+ )
+ finally:
+ await self.session_queue.put(session_id)
+
+ async def _run_session_cycle(
+ self,
+ *,
+ phase: str,
+ worker_id: int,
+ session_id: str,
+ cycle_index: int,
+ ) -> None:
+ successful_messages = 0
+ for message_index in range(self.config.messages_per_commit):
+ payload = {
+ "role": "user",
+ "content": build_message_content(
+ session_id=session_id,
+ cycle_index=cycle_index,
+ message_index=message_index,
+ size=self.config.message_size,
+ ),
+ }
+ _, body = await self.client.request_json(
+ api="add_message",
+ method="POST",
+ path=f"/api/v1/sessions/{session_id}/messages",
+ phase=phase,
+ session_id=session_id,
+ cycle_index=cycle_index,
+ worker_id=worker_id,
+ json_payload=payload,
+ )
+ if isinstance(body, dict) and body.get("status") == "ok":
+ successful_messages += 1
+
+ if successful_messages <= 0:
+ return
+
+ if self.config.extract_ratio > 0 and self.random.random() < self.config.extract_ratio:
+ async with self.extract_semaphore:
+ await self.client.request_json(
+ api="extract",
+ method="POST",
+ path=f"/api/v1/sessions/{session_id}/extract",
+ phase=phase,
+ session_id=session_id,
+ cycle_index=cycle_index,
+ worker_id=worker_id,
+ )
+
+ _, body = await self.client.request_json(
+ api="commit",
+ method="POST",
+ path=f"/api/v1/sessions/{session_id}/commit",
+ phase=phase,
+ session_id=session_id,
+ cycle_index=cycle_index,
+ worker_id=worker_id,
+ )
+ task_id = extract_task_id(body)
+ if task_id:
+ await self.task_poller.register(
+ PendingCommitTask(
+ task_id=task_id,
+ session_id=session_id,
+ origin_phase=phase,
+ cycle_index=cycle_index,
+ local_started_monotonic=time.perf_counter(),
+ )
+ )
+
+ async def _reader_worker(self, phase: str, worker_id: int, stop_event: asyncio.Event) -> None:
+ while not stop_event.is_set():
+ payload = {
+ "query": self.random.choice(self.config.find_queries),
+ "limit": self.config.find_limit,
+ }
+ if self.config.find_target_uri:
+ payload["target_uri"] = self.config.find_target_uri
+ if self.config.find_score_threshold is not None:
+ payload["score_threshold"] = self.config.find_score_threshold
+ await self.client.request_json(
+ api="find",
+ method="POST",
+ path="/api/v1/search/find",
+ phase=phase,
+ worker_id=worker_id,
+ json_payload=payload,
+ )
+
+ async def _sampler_worker(self, phase: str, stop_event: asyncio.Event) -> None:
+ sample_specs = [
+ ("system_status", "GET", "/api/v1/system/status"),
+ ("observer_queue", "GET", "/api/v1/observer/queue"),
+ ("observer_system", "GET", "/api/v1/observer/system"),
+ ]
+ while not stop_event.is_set():
+ for api, method, path in sample_specs:
+ started = time.perf_counter()
+ response, body = await self.client.request_json(
+ api=api,
+ method=method,
+ path=path,
+ phase=phase,
+ )
+ latency_ms = (time.perf_counter() - started) * 1000.0
+ success = response is not None and self.client._is_success(
+ response.status_code if response else None,
+ body,
+ )
+ self.recorder.add_sample(
+ ObserverSample(
+ api=api,
+ phase=phase,
+ sampled_at=utc_now(),
+ elapsed_ms_since_run_start=(
+ time.perf_counter() - self.client.run_start_monotonic
+ )
+ * 1000.0,
+ latency_ms=latency_ms,
+ success=success,
+ is_healthy=extract_boolean(body, "result", "is_healthy"),
+ has_errors=extract_boolean(body, "result", "has_errors"),
+ payload=body if isinstance(body, dict) else None,
+ error_message=extract_error(
+ body, response.status_code if response else None
+ )[1]
+ if response is not None and not success
+ else None,
+ )
+ )
+ if stop_event.is_set():
+ break
+ if stop_event.is_set():
+ return
+ try:
+ await asyncio.wait_for(stop_event.wait(), timeout=self.config.observer_interval)
+ except asyncio.TimeoutError:
+ continue
+
+ async def _borrow_session(self, stop_event: asyncio.Event) -> Optional[str]:
+ while not stop_event.is_set():
+ try:
+ return await asyncio.wait_for(self.session_queue.get(), timeout=0.2)
+ except asyncio.TimeoutError:
+ continue
+ return None
+
+ def _write_outputs(self) -> None:
+ output_dir = Path(self.config.output_dir)
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ request_summary_rows = build_request_summary_rows(
+ events=self.recorder.request_events,
+ phase_durations=self.phase_durations,
+ total_run_duration=total_duration_seconds(self.phase_metadata),
+ )
+ task_summary_rows = build_task_summary_rows(self.recorder.task_events)
+ human_summary_zh = render_human_summary_zh(
+ config=self.config,
+ output_dir=self.config.output_dir,
+ notes=self.recorder.notes,
+ phase_metadata=self.phase_metadata,
+ request_summary_rows=request_summary_rows,
+ request_events=self.recorder.request_events,
+ task_summary_rows=task_summary_rows,
+ task_events=self.recorder.task_events,
+ )
+
+ write_json(output_dir / "run_config.json", asdict(self.config))
+ write_json(
+ output_dir / "phases.json",
+ [asdict(item) for item in self.phase_metadata],
+ )
+ write_json(
+ output_dir / "run_summary.json",
+ self._build_run_summary(
+ request_summary_rows=request_summary_rows,
+ task_summary_rows=task_summary_rows,
+ human_summary_zh=human_summary_zh,
+ ),
+ )
+ write_text(output_dir / "summary_zh.txt", human_summary_zh)
+ write_jsonl(output_dir / "request_events.jsonl", self.recorder.request_events)
+ write_jsonl(output_dir / "task_events.jsonl", self.recorder.task_events)
+ write_jsonl(output_dir / "observer_samples.jsonl", self.recorder.observer_samples)
+
+ write_csv(
+ output_dir / "request_summary.csv",
+ request_summary_rows,
+ )
+ write_csv(
+ output_dir / "request_windows.csv",
+ build_request_window_rows(
+ events=self.recorder.request_events,
+ window_seconds=self.config.window_seconds,
+ ),
+ )
+ write_csv(
+ output_dir / "task_summary.csv",
+ task_summary_rows,
+ )
+
+ def _build_run_summary(
+ self,
+ *,
+ request_summary_rows: List[Dict[str, Any]],
+ task_summary_rows: List[Dict[str, Any]],
+ human_summary_zh: str,
+ ) -> Dict[str, Any]:
+ find_delta = build_find_phase_delta(request_summary_rows)
+ return {
+ "notes": self.recorder.notes,
+ "phase_metadata": [asdict(item) for item in self.phase_metadata],
+ "request_summary": request_summary_rows,
+ "task_summary": task_summary_rows,
+ "find_phase_delta": find_delta,
+ "human_summary_zh": human_summary_zh,
+ "created_at": utc_now(),
+ }
+
+ def _print_summary(self) -> None:
+ request_summary_rows = build_request_summary_rows(
+ events=self.recorder.request_events,
+ phase_durations=self.phase_durations,
+ total_run_duration=total_duration_seconds(self.phase_metadata),
+ )
+ task_summary_rows = build_task_summary_rows(self.recorder.task_events)
+ print(
+ "\n"
+ + render_human_summary_zh(
+ config=self.config,
+ output_dir=self.config.output_dir,
+ notes=self.recorder.notes,
+ phase_metadata=self.phase_metadata,
+ request_summary_rows=request_summary_rows,
+ request_events=self.recorder.request_events,
+ task_summary_rows=task_summary_rows,
+ task_events=self.recorder.task_events,
+ )
+ )
+
+
+def parse_args(argv: Optional[List[str]] = None) -> BenchmarkConfig:
+ server_host = os.getenv("SERVER_HOST", "127.0.0.1")
+ server_port = int(os.getenv("SERVER_PORT", "1933"))
+ default_server_url = f"http://{server_host}:{server_port}"
+ default_output_dir = (
+ Path(__file__).resolve().parents[1]
+ / "results"
+ / "session_contention"
+ / datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
+ )
+
+ parser = argparse.ArgumentParser(
+ description="Reproduce session addMessage/extract/commit contention against concurrent find traffic.",
+ )
+ parser.add_argument("--server-url", default=default_server_url)
+ parser.add_argument("--api-key", default=os.getenv("OPENVIKING_API_KEY", "test-root-api-key"))
+ parser.add_argument("--account", default=os.getenv("OPENVIKING_ACCOUNT", "default"))
+ parser.add_argument("--user", default=os.getenv("OPENVIKING_USER", "default"))
+ parser.add_argument("--request-timeout", type=float, default=30.0)
+ parser.add_argument("--sessions", type=int, default=8)
+ parser.add_argument("--writer-concurrency", type=int, default=8)
+ parser.add_argument("--reader-concurrency", type=int, default=4)
+ parser.add_argument("--extract-concurrency", type=int, default=4)
+ parser.add_argument("--messages-per-commit", type=int, default=5)
+ parser.add_argument("--extract-ratio", type=float, default=0.5)
+ parser.add_argument("--message-size", type=int, default=768)
+ parser.add_argument("--baseline-seconds", type=float, default=30.0)
+ parser.add_argument("--mixed-seconds", type=float, default=120.0)
+ parser.add_argument("--recovery-seconds", type=float, default=30.0)
+ parser.add_argument("--window-seconds", type=float, default=5.0)
+ parser.add_argument("--observer-interval", type=float, default=5.0)
+ parser.add_argument("--task-poll-interval", type=float, default=1.0)
+ parser.add_argument("--task-drain-timeout", type=float, default=30.0)
+ parser.add_argument("--output-dir", default=str(default_output_dir))
+ parser.add_argument("--cleanup", action="store_true")
+ parser.add_argument("--require-extract-load", action="store_true")
+ parser.add_argument(
+ "--find-query",
+ action="append",
+ dest="find_queries",
+ default=[],
+ help="Repeat to add multiple find queries.",
+ )
+ parser.add_argument("--find-limit", type=int, default=10)
+ parser.add_argument("--find-target-uri", default="")
+ parser.add_argument("--find-score-threshold", type=float, default=None)
+ parser.add_argument("--seed", type=int, default=42)
+
+ args = parser.parse_args(argv)
+ find_queries = args.find_queries or list(DEFAULT_FIND_QUERIES)
+
+ config = BenchmarkConfig(
+ server_url=args.server_url,
+ api_key=args.api_key,
+ account=args.account,
+ user=args.user,
+ request_timeout=args.request_timeout,
+ session_count=max(0, args.sessions),
+ writer_concurrency=max(0, args.writer_concurrency),
+ reader_concurrency=max(0, args.reader_concurrency),
+ extract_concurrency=max(1, args.extract_concurrency),
+ messages_per_commit=max(1, args.messages_per_commit),
+ extract_ratio=min(max(args.extract_ratio, 0.0), 1.0),
+ message_size=max(128, args.message_size),
+ baseline_seconds=max(0.0, args.baseline_seconds),
+ mixed_seconds=max(0.0, args.mixed_seconds),
+ recovery_seconds=max(0.0, args.recovery_seconds),
+ window_seconds=max(args.window_seconds, 1.0),
+ observer_interval=0.0 if args.observer_interval <= 0 else max(args.observer_interval, 0.1),
+ task_poll_interval=max(args.task_poll_interval, 0.1),
+ task_drain_timeout=max(0.0, args.task_drain_timeout),
+ output_dir=args.output_dir,
+ cleanup=args.cleanup,
+ require_extract_load=args.require_extract_load,
+ find_queries=find_queries,
+ find_limit=max(1, args.find_limit),
+ find_target_uri=args.find_target_uri,
+ find_score_threshold=args.find_score_threshold,
+ seed=args.seed,
+ )
+ if config.writer_concurrency > 0 and config.session_count <= 0:
+ parser.error("--sessions must be > 0 when --writer-concurrency is enabled")
+ return config
+
+
+def maybe_json(response: httpx.Response) -> Optional[Dict[str, Any]]:
+ try:
+ body = response.json()
+ except ValueError:
+ return None
+ return body if isinstance(body, dict) else {"value": body}
+
+
+def extract_error(
+ body: Optional[Dict[str, Any]], status_code: Optional[int]
+) -> tuple[Optional[str], Optional[str]]:
+ if not isinstance(body, dict):
+ if status_code is None:
+ return None, None
+ return None, f"http status {status_code}"
+ error = body.get("error")
+ if isinstance(error, dict):
+ return error.get("code"), truncate_error_message(error.get("message"))
+ if body.get("status") not in {None, "ok"}:
+ return body.get("status"), truncate_error_message(json.dumps(body, ensure_ascii=False))
+ if status_code is not None and status_code >= 400:
+ return None, f"http status {status_code}"
+ return None, None
+
+
+def extract_session_id(body: Optional[Dict[str, Any]]) -> Optional[str]:
+ if not isinstance(body, dict):
+ return None
+ result = body.get("result")
+ if not isinstance(result, dict):
+ return None
+ session_id = result.get("session_id")
+ return session_id if isinstance(session_id, str) else None
+
+
+def extract_task_id(body: Optional[Dict[str, Any]]) -> Optional[str]:
+ if not isinstance(body, dict):
+ return None
+ result = body.get("result")
+ if not isinstance(result, dict):
+ return None
+ task_id = result.get("task_id")
+ return task_id if isinstance(task_id, str) and task_id else None
+
+
+def build_message_content(
+ *, session_id: str, cycle_index: int, message_index: int, size: int
+) -> str:
+ prefix = (
+ f"session={session_id} cycle={cycle_index} message={message_index}. "
+ "We discussed project goals, deployment constraints, user preferences, debugging notes, "
+ "timelines, risks, and follow-up actions. "
+ )
+ detail = (
+ "The user prefers production-safe changes, wants clear rollback steps, and asked for "
+ "memory extraction to keep decisions, entities, and events. "
+ "We also covered resource bottlenecks, queue backlog, response latency, and how read "
+ "traffic regressed during heavy write pressure. "
+ )
+ content = prefix
+ while len(content) < size:
+ content += detail
+ return content[:size]
+
+
+def truncate_error_message(message: Optional[str]) -> Optional[str]:
+ if message is None:
+ return None
+ if len(message) <= MAX_ERROR_MESSAGE_LEN:
+ return message
+ return message[:MAX_ERROR_MESSAGE_LEN] + "...[truncated]"
+
+
+def utc_now() -> str:
+ return datetime.now(UTC).isoformat(timespec="milliseconds").replace("+00:00", "Z")
+
+
+def total_duration_seconds(phases: List[PhaseMetadata]) -> float:
+ return sum(item.duration_seconds for item in phases)
+
+
+def percentile(values: Iterable[float], pct: float) -> Optional[float]:
+ ordered = sorted(float(value) for value in values)
+ if not ordered:
+ return None
+ if len(ordered) == 1:
+ return ordered[0]
+ rank = (pct / 100.0) * (len(ordered) - 1)
+ lower = math.floor(rank)
+ upper = math.ceil(rank)
+ if lower == upper:
+ return ordered[int(rank)]
+ weight = rank - lower
+ return ordered[lower] + (ordered[upper] - ordered[lower]) * weight
+
+
+def build_request_summary_rows(
+ *,
+ events: List[RequestEvent],
+ phase_durations: Dict[str, float],
+ total_run_duration: float,
+) -> List[Dict[str, Any]]:
+ rows: List[Dict[str, Any]] = []
+ rows.extend(
+ _build_request_summary_for_groups(
+ events=events,
+ grouping=lambda event: (event.phase, event.api),
+ duration_lookup=phase_durations,
+ )
+ )
+ overall_groups = _build_request_summary_for_groups(
+ events=events,
+ grouping=lambda event: ("ALL", event.api),
+ duration_lookup={"ALL": total_run_duration},
+ )
+ rows.extend(overall_groups)
+ return sorted(rows, key=lambda row: (row["phase"], row["api"]))
+
+
+def _build_request_summary_for_groups(
+ *,
+ events: List[RequestEvent],
+ grouping,
+ duration_lookup: Dict[str, float],
+) -> List[Dict[str, Any]]:
+ groups: Dict[tuple[str, str], List[RequestEvent]] = {}
+ for event in events:
+ key = grouping(event)
+ groups.setdefault(key, []).append(event)
+
+ rows: List[Dict[str, Any]] = []
+ for (phase, api), api_events in groups.items():
+ latencies = [event.latency_ms for event in api_events]
+ successes = sum(1 for event in api_events if event.success)
+ failures = len(api_events) - successes
+ timeouts = sum(1 for event in api_events if event.timeout)
+ exceptions = sum(1 for event in api_events if event.exception_type)
+ status_counts: Dict[str, int] = {}
+ for event in api_events:
+ key = str(event.status_code) if event.status_code is not None else "exception"
+ status_counts[key] = status_counts.get(key, 0) + 1
+ duration = max(duration_lookup.get(phase, 0.0), 1e-9)
+ row = {
+ "phase": phase,
+ "api": api,
+ "requests": len(api_events),
+ "successes": successes,
+ "failures": failures,
+ "timeouts": timeouts,
+ "exceptions": exceptions,
+ "success_rate": round((successes / len(api_events)) * 100.0, 4),
+ "qps": round(len(api_events) / duration, 4),
+ "avg_ms": round(sum(latencies) / len(latencies), 4),
+ "p50_ms": round_optional(percentile(latencies, 50)),
+ "p90_ms": round_optional(percentile(latencies, 90)),
+ "p95_ms": round_optional(percentile(latencies, 95)),
+ "p99_ms": round_optional(percentile(latencies, 99)),
+ "max_ms": round_optional(max(latencies) if latencies else None),
+ "slow_gt_1s": sum(
+ 1 for latency in latencies if latency > DEFAULT_SLOW_THRESHOLDS_MS[0]
+ ),
+ "slow_gt_3s": sum(
+ 1 for latency in latencies if latency > DEFAULT_SLOW_THRESHOLDS_MS[1]
+ ),
+ "slow_gt_5s": sum(
+ 1 for latency in latencies if latency > DEFAULT_SLOW_THRESHOLDS_MS[2]
+ ),
+ "status_codes": json.dumps(status_counts, sort_keys=True),
+ }
+ rows.append(row)
+ return rows
+
+
+def build_request_window_rows(
+ *,
+ events: List[RequestEvent],
+ window_seconds: float,
+) -> List[Dict[str, Any]]:
+ groups: Dict[tuple[int, str, str], List[RequestEvent]] = {}
+ for event in events:
+ window_index = int((event.elapsed_ms_since_run_start / 1000.0) // window_seconds)
+ key = (window_index, event.phase, event.api)
+ groups.setdefault(key, []).append(event)
+
+ rows: List[Dict[str, Any]] = []
+ for (window_index, phase, api), window_events in sorted(groups.items()):
+ latencies = [event.latency_ms for event in window_events]
+ successes = sum(1 for event in window_events if event.success)
+ rows.append(
+ {
+ "window_index": window_index,
+ "window_start_sec": round(window_index * window_seconds, 4),
+ "window_end_sec": round((window_index + 1) * window_seconds, 4),
+ "phase": phase,
+ "api": api,
+ "requests": len(window_events),
+ "successes": successes,
+ "failures": len(window_events) - successes,
+ "success_rate": round((successes / len(window_events)) * 100.0, 4),
+ "qps": round(len(window_events) / window_seconds, 4),
+ "p95_ms": round_optional(percentile(latencies, 95)),
+ "p99_ms": round_optional(percentile(latencies, 99)),
+ "max_ms": round_optional(max(latencies) if latencies else None),
+ }
+ )
+ return rows
+
+
+def build_task_summary_rows(events: List[CommitTaskEvent]) -> List[Dict[str, Any]]:
+ groups: Dict[str, List[CommitTaskEvent]] = {}
+ for event in events:
+ groups.setdefault(event.status, []).append(event)
+
+ rows: List[Dict[str, Any]] = []
+ for status, status_events in sorted(groups.items()):
+ server_latencies = [
+ event.server_duration_ms
+ for event in status_events
+ if event.server_duration_ms is not None
+ ]
+ local_latencies = [event.local_duration_ms for event in status_events]
+ successes = sum(1 for event in status_events if event.status == "completed")
+ rows.append(
+ {
+ "status": status,
+ "tasks": len(status_events),
+ "successes": successes,
+ "success_rate": round((successes / len(status_events)) * 100.0, 4),
+ "p50_server_duration_ms": round_optional(percentile(server_latencies, 50)),
+ "p95_server_duration_ms": round_optional(percentile(server_latencies, 95)),
+ "p99_server_duration_ms": round_optional(percentile(server_latencies, 99)),
+ "max_server_duration_ms": round_optional(
+ max(server_latencies) if server_latencies else None
+ ),
+ "p50_local_duration_ms": round_optional(percentile(local_latencies, 50)),
+ "p95_local_duration_ms": round_optional(percentile(local_latencies, 95)),
+ "p99_local_duration_ms": round_optional(percentile(local_latencies, 99)),
+ "max_local_duration_ms": round_optional(
+ max(local_latencies) if local_latencies else None
+ ),
+ }
+ )
+ return rows
+
+
+def build_find_phase_delta(summary_rows: List[Dict[str, Any]]) -> Optional[Dict[str, float]]:
+ baseline = next(
+ (row for row in summary_rows if row["phase"] == "baseline" and row["api"] == "find"),
+ None,
+ )
+ mixed = next(
+ (row for row in summary_rows if row["phase"] == "mixed_load" and row["api"] == "find"),
+ None,
+ )
+ if not baseline or not mixed:
+ return None
+ baseline_p95 = baseline.get("p95_ms")
+ baseline_p99 = baseline.get("p99_ms")
+ mixed_p95 = mixed.get("p95_ms")
+ mixed_p99 = mixed.get("p99_ms")
+ if not all(metric is not None for metric in [baseline_p95, baseline_p99, mixed_p95, mixed_p99]):
+ return None
+ return {
+ "baseline_p95_ms": baseline_p95,
+ "mixed_p95_ms": mixed_p95,
+ "p95_delta_percent": percent_change(baseline_p95, mixed_p95),
+ "baseline_p99_ms": baseline_p99,
+ "mixed_p99_ms": mixed_p99,
+ "p99_delta_percent": percent_change(baseline_p99, mixed_p99),
+ "baseline_success_rate": baseline["success_rate"],
+ "mixed_success_rate": mixed["success_rate"],
+ "success_rate_delta_percent": mixed["success_rate"] - baseline["success_rate"],
+ }
+
+
+def find_request_summary_row(
+ summary_rows: List[Dict[str, Any]],
+ *,
+ api: str,
+ phase: str,
+) -> Optional[Dict[str, Any]]:
+ return next((row for row in summary_rows if row["api"] == api and row["phase"] == phase), None)
+
+
+def phase_target_seconds(config: BenchmarkConfig, phase: str) -> Optional[float]:
+ mapping = {
+ "baseline": config.baseline_seconds,
+ "mixed_load": config.mixed_seconds,
+ "recovery": config.recovery_seconds,
+ }
+ return mapping.get(phase)
+
+
+def build_phase_overview_rows(
+ config: BenchmarkConfig,
+ phase_metadata: List[PhaseMetadata],
+) -> List[Dict[str, Optional[float]]]:
+ rows: List[Dict[str, Optional[float]]] = []
+ for item in phase_metadata:
+ target = phase_target_seconds(config, item.phase)
+ delta = None if target is None else item.duration_seconds - target
+ rows.append(
+ {
+ "phase": item.phase,
+ "target_seconds": round_optional(target),
+ "actual_seconds": round_optional(item.duration_seconds),
+ "delta_seconds": round_optional(delta),
+ }
+ )
+ return rows
+
+
+def build_api_error_breakdown(
+ events: List[RequestEvent],
+ *,
+ api: str,
+ phase: Optional[str] = None,
+) -> Dict[str, Any]:
+ filtered = [
+ event for event in events if event.api == api and (phase is None or event.phase == phase)
+ ]
+ exception_counts: Dict[str, int] = {}
+ error_counts: Dict[str, int] = {}
+ for event in filtered:
+ if event.exception_type:
+ exception_counts[event.exception_type] = (
+ exception_counts.get(event.exception_type, 0) + 1
+ )
+ key = event.error_code or event.exception_type
+ if key:
+ error_counts[key] = error_counts.get(key, 0) + 1
+ return {
+ "requests": len(filtered),
+ "successes": sum(1 for event in filtered if event.success),
+ "failures": sum(1 for event in filtered if not event.success),
+ "timeouts": sum(1 for event in filtered if event.timeout),
+ "exception_counts": exception_counts,
+ "error_counts": error_counts,
+ }
+
+
+def format_phase_name_cn(phase: str) -> str:
+ mapping = {
+ "setup": "预热",
+ "baseline": "基线阶段",
+ "mixed_load": "混合压测阶段",
+ "recovery": "恢复阶段",
+ "drain": "收尾等待阶段",
+ "cleanup": "清理阶段",
+ "ALL": "全程",
+ }
+ return mapping.get(phase, phase)
+
+
+def format_seconds(value: Optional[float]) -> str:
+ if value is None:
+ return "n/a"
+ return f"{value:.1f}s"
+
+
+def format_percent(value: Optional[float]) -> str:
+ if value is None:
+ return "n/a"
+ return f"{value:.2f}%"
+
+
+def format_delta_percent(value: Optional[float]) -> str:
+ if value is None:
+ return "n/a"
+ sign = "+" if value >= 0 else ""
+ return f"{sign}{value:.2f}%"
+
+
+def format_delta_seconds(value: Optional[float]) -> str:
+ if value is None:
+ return "n/a"
+ sign = "+" if value >= 0 else ""
+ return f"{sign}{value:.1f}s"
+
+
+def format_change(old: Optional[float], new: Optional[float], *, unit: str = "ms") -> str:
+ if old is None or new is None:
+ return "n/a"
+ if unit == "ms":
+ return (
+ f"{old:.2f}{unit} -> {new:.2f}{unit} ({format_delta_percent(percent_change(old, new))})"
+ )
+ return f"{old:.2f} -> {new:.2f} ({format_delta_percent(percent_change(old, new))})"
+
+
+def format_qps_change(old: Optional[float], new: Optional[float]) -> str:
+ if old is None or new is None:
+ return "n/a"
+ return f"{old:.2f} -> {new:.2f} ({format_delta_percent(percent_change(old, new))})"
+
+
+def render_human_summary_zh(
+ *,
+ config: BenchmarkConfig,
+ output_dir: str,
+ notes: List[str],
+ phase_metadata: List[PhaseMetadata],
+ request_summary_rows: List[Dict[str, Any]],
+ request_events: List[RequestEvent],
+ task_summary_rows: List[Dict[str, Any]],
+ task_events: List[CommitTaskEvent],
+) -> str:
+ lines: List[str] = []
+ lines.append("=== OpenViking Session 竞争压测摘要 ===")
+ lines.append(f"结果目录: {output_dir}")
+
+ if notes:
+ lines.append("")
+ lines.append("说明:")
+ for note in notes:
+ lines.append(f"- {note}")
+
+ phase_rows = build_phase_overview_rows(config, phase_metadata)
+ baseline_find = find_request_summary_row(request_summary_rows, api="find", phase="baseline")
+ mixed_find = find_request_summary_row(request_summary_rows, api="find", phase="mixed_load")
+ recovery_find = find_request_summary_row(request_summary_rows, api="find", phase="recovery")
+ mixed_add = find_request_summary_row(
+ request_summary_rows, api="add_message", phase="mixed_load"
+ )
+ mixed_commit = find_request_summary_row(request_summary_rows, api="commit", phase="mixed_load")
+ mixed_extract = find_request_summary_row(
+ request_summary_rows, api="extract", phase="mixed_load"
+ )
+ baseline_status = find_request_summary_row(
+ request_summary_rows, api="system_status", phase="baseline"
+ )
+ mixed_status = find_request_summary_row(
+ request_summary_rows, api="system_status", phase="mixed_load"
+ )
+ baseline_queue = find_request_summary_row(
+ request_summary_rows, api="observer_queue", phase="baseline"
+ )
+ mixed_queue = find_request_summary_row(
+ request_summary_rows, api="observer_queue", phase="mixed_load"
+ )
+ find_delta = build_find_phase_delta(request_summary_rows)
+ extract_breakdown = build_api_error_breakdown(request_events, api="extract", phase="mixed_load")
+ completed_tasks = next(
+ (row for row in task_summary_rows if row["status"] == "completed"),
+ None,
+ )
+ incomplete_tasks = next(
+ (row for row in task_summary_rows if row["status"] == "incomplete"),
+ None,
+ )
+ total_task_count = len(task_events)
+
+ lines.append("")
+ lines.append("一、核心结论")
+ if baseline_find and mixed_find and find_delta:
+ lines.append(
+ "- 已明确复现读接口退化:`find` 在混合压测阶段的 p95 从 "
+ f"{baseline_find['p95_ms']:.2f}ms 升到 {mixed_find['p95_ms']:.2f}ms,"
+ f"增幅 {find_delta['p95_delta_percent']:.2f}%;p99 从 "
+ f"{baseline_find['p99_ms']:.2f}ms 升到 {mixed_find['p99_ms']:.2f}ms,"
+ f"增幅 {find_delta['p99_delta_percent']:.2f}%。"
+ )
+ lines.append(
+ "- `find` 吞吐也下降了:QPS 从 "
+ f"{baseline_find['qps']:.2f} 降到 {mixed_find['qps']:.2f},"
+ f"变化 {format_delta_percent(percent_change(baseline_find['qps'], mixed_find['qps']))}。"
+ )
+ if recovery_find and baseline_find and mixed_find:
+ lines.append(
+ "- 恢复阶段没有完全回到基线:`find` p95 为 "
+ f"{recovery_find['p95_ms']:.2f}ms,仍高于基线 "
+ f"{format_delta_percent(percent_change(baseline_find['p95_ms'], recovery_find['p95_ms']))};"
+ "但相比混合压测阶段已经有明显回落。"
+ )
+ if mixed_extract:
+ lines.append(
+ "- 长尾压力主要来自 `extract`:混合压测阶段共 "
+ f"{mixed_extract['requests']} 次调用,成功率 {mixed_extract['success_rate']:.2f}%,"
+ f"p95 {mixed_extract['p95_ms']:.2f}ms。"
+ )
+ if extract_breakdown["timeouts"] > 0:
+ lines.append(
+ "- `extract` 失败几乎全是客户端超时:"
+ f"{extract_breakdown['timeouts']}/{extract_breakdown['requests']} 次超时,"
+ f"主异常是 {format_top_counts(extract_breakdown['exception_counts'])}。"
+ )
+ if mixed_commit and completed_tasks:
+ lines.append(
+ "- `commit` 接口本身不是最重的部分:前台 `commit` p95 只有 "
+ f"{mixed_commit['p95_ms']:.2f}ms;真正重的是后台任务,已完成任务的后台 p95 达 "
+ f"{completed_tasks['p95_server_duration_ms']:.2f}ms。"
+ )
+ if incomplete_tasks:
+ lines.append(
+ "- 后台积压明显:本次共跟踪到 "
+ f"{total_task_count} 个 `commit` 背景任务,其中 {incomplete_tasks['tasks']} 个在压测结束"
+ "并等待 drain 后仍未完成。"
+ )
+
+ lines.append("")
+ lines.append("二、阶段时长")
+ for row in phase_rows:
+ extra = ""
+ if row["delta_seconds"] is not None and row["delta_seconds"] > 1:
+ extra = ",实际时长明显长于目标值,通常说明脚本在等待 in-flight 会话周期收尾"
+ lines.append(
+ f"- {format_phase_name_cn(row['phase'])}: 目标 {format_seconds(row['target_seconds'])},"
+ f"实际 {format_seconds(row['actual_seconds'])},偏差 {format_delta_seconds(row['delta_seconds'])}{extra}"
+ )
+
+ lines.append("")
+ lines.append("三、关键指标对比")
+ if baseline_find and mixed_find and recovery_find:
+ lines.append(
+ "- `find`:"
+ f" 基线 p95={baseline_find['p95_ms']:.2f}ms / p99={baseline_find['p99_ms']:.2f}ms / qps={baseline_find['qps']:.2f};"
+ f" 压测中 p95={mixed_find['p95_ms']:.2f}ms / p99={mixed_find['p99_ms']:.2f}ms / qps={mixed_find['qps']:.2f};"
+ f" 恢复期 p95={recovery_find['p95_ms']:.2f}ms / p99={recovery_find['p99_ms']:.2f}ms / qps={recovery_find['qps']:.2f}。"
+ )
+ if mixed_add:
+ lines.append(
+ "- `add_message`: 混合压测阶段 "
+ f"requests={mixed_add['requests']},p50={mixed_add['p50_ms']:.2f}ms,"
+ f"p95={mixed_add['p95_ms']:.2f}ms,p99={mixed_add['p99_ms']:.2f}ms。"
+ )
+ if mixed_commit:
+ lines.append(
+ "- `commit`: 混合压测阶段 "
+ f"requests={mixed_commit['requests']},p50={mixed_commit['p50_ms']:.2f}ms,"
+ f"p95={mixed_commit['p95_ms']:.2f}ms,p99={mixed_commit['p99_ms']:.2f}ms。"
+ )
+ if mixed_extract:
+ lines.append(
+ "- `extract`: 混合压测阶段 "
+ f"requests={mixed_extract['requests']},success_rate={mixed_extract['success_rate']:.2f}%,"
+ f"timeouts={extract_breakdown['timeouts']},p95={mixed_extract['p95_ms']:.2f}ms。"
+ )
+ if completed_tasks:
+ lines.append(
+ "- `commit` 背景任务(completed):"
+ f" tasks={completed_tasks['tasks']},p50={format_metric(completed_tasks['p50_server_duration_ms'])},"
+ f" p95={format_metric(completed_tasks['p95_server_duration_ms'])},"
+ f" p99={format_metric(completed_tasks['p99_server_duration_ms'])}。"
+ )
+ if incomplete_tasks:
+ lines.append(
+ "- `commit` 背景任务(incomplete):"
+ f" tasks={incomplete_tasks['tasks']},本地等待 p95={format_metric(incomplete_tasks['p95_local_duration_ms'])}。"
+ )
+ if baseline_status and mixed_status:
+ lines.append(
+ "- `system_status`: p95 "
+ f"{format_change(baseline_status['p95_ms'], mixed_status['p95_ms'])}。"
+ )
+ if baseline_queue and mixed_queue:
+ lines.append(
+ "- `observer_queue`: p95 "
+ f"{format_change(baseline_queue['p95_ms'], mixed_queue['p95_ms'])}。"
+ )
+
+ lines.append("")
+ lines.append("四、怎么理解这次结果")
+ lines.append(
+ "- `find` 没有报错,但延迟和吞吐同时变差,这比“报错”更说明问题:读请求被明显挤压了。"
+ )
+ lines.append("- `extract` 的大量 30 秒超时说明长尾请求已经被稳定制造出来了,压测目标基本达成。")
+ lines.append(
+ "- `commit` 前台接口看起来还好,但后台任务非常慢,说明资源竞争更可能发生在后续提取/索引阶段,而不是 HTTP 返回这一步。"
+ )
+ lines.append(
+ "- 如果你要拿这次结果给别人看,最应该盯的是三组数字:"
+ "`find` 基线 vs 压测 p95/p99、`extract` 超时比例、`commit` 背景任务完成时长。"
+ )
+
+ return "\n".join(lines)
+
+
+def format_top_counts(counts: Dict[str, int], limit: int = 3) -> str:
+ if not counts:
+ return "无"
+ ordered = sorted(counts.items(), key=lambda item: (-item[1], item[0]))
+ return ", ".join(f"{key}={value}" for key, value in ordered[:limit])
+
+
+def percent_change(old: float, new: float) -> float:
+ if old == 0:
+ return 0.0 if new == 0 else 100.0
+ return ((new - old) / old) * 100.0
+
+
+def round_optional(value: Optional[float], ndigits: int = 4) -> Optional[float]:
+ if value is None:
+ return None
+ return round(value, ndigits)
+
+
+def write_json(path: Path, data: Any) -> None:
+ with path.open("w", encoding="utf-8") as handle:
+ json.dump(data, handle, indent=2, ensure_ascii=False)
+
+
+def write_text(path: Path, content: str) -> None:
+ path.write_text(content, encoding="utf-8")
+
+
+def write_jsonl(path: Path, rows: Iterable[Any]) -> None:
+ with path.open("w", encoding="utf-8") as handle:
+ for row in rows:
+ if hasattr(row, "to_dict"):
+ row = row.to_dict()
+ handle.write(json.dumps(row, ensure_ascii=False) + "\n")
+
+
+def write_csv(path: Path, rows: List[Dict[str, Any]]) -> None:
+ if not rows:
+ path.write_text("", encoding="utf-8")
+ return
+ fieldnames = list(rows[0].keys())
+ with path.open("w", encoding="utf-8", newline="") as handle:
+ writer = csv.DictWriter(handle, fieldnames=fieldnames)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def extract_boolean(body: Optional[Dict[str, Any]], *keys: str) -> Optional[bool]:
+ current: Any = body
+ for key in keys:
+ if not isinstance(current, dict):
+ return None
+ current = current.get(key)
+ return current if isinstance(current, bool) else None
+
+
+def to_float(value: Any) -> Optional[float]:
+ if isinstance(value, (float, int)):
+ return float(value)
+ return None
+
+
+def format_metric(value: Optional[float]) -> str:
+ if value is None:
+ return "n/a"
+ return f"{value:.2f}ms"
+
+
+async def async_main(argv: Optional[List[str]] = None) -> int:
+ config = parse_args(argv)
+ runner = BenchmarkRunner(config)
+ return await runner.run()
+
+
+def main(argv: Optional[List[str]] = None) -> int:
+ try:
+ return asyncio.run(async_main(argv))
+ except KeyboardInterrupt:
+ print("\n[stopped] benchmark interrupted by user", file=sys.stderr)
+ return 130
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/benchmark/locomo/README.md b/benchmark/locomo/README.md
index cbb1bf460..8a0737745 100644
--- a/benchmark/locomo/README.md
+++ b/benchmark/locomo/README.md
@@ -10,12 +10,19 @@ benchmark/locomo/
│ ├── run_eval.py # 运行 QA 评估
│ ├── judge.py # LLM 裁判打分
│ ├── import_to_ov.py # 导入数据到 OpenViking
-│ ├── stat_judge_result.py # 统计评分结果
-│ ├── run_full_eval.sh # 一键运行完整评测流程
-│ ├── test_data/ # 测试数据目录
+│ ├── import_and_eval_one.sh # 单题/批量测试脚本
+│ ├── stat_judge_result.py # 统计评分结果
+│ ├── run_full_eval.sh # 一键运行完整评测流程
+│ ├── data/ # 测试数据目录
│ └── result/ # 评测结果目录
└── openclaw/ # OpenClaw 评测脚本
- └── eval.py # OpenClaw 评估脚本
+ ├── import_to_ov.py # 导入数据到 OpenViking
+ ├── eval.py # OpenClaw 评估脚本 (ingest/qa)
+ ├── judge.py # LLM 裁判打分(适配 OpenClaw)
+ ├── stat_judge_result.py # 统计评分结果和 token 使用
+ ├── run_full_eval.sh # 一键运行完整评测流程
+ ├── data/ # 测试数据目录
+ └── result/ # 评测结果目录
```
---
@@ -28,11 +35,33 @@ benchmark/locomo/
```bash
cd benchmark/locomo/vikingbot
-bash run_full_eval.sh
+bash run_full_eval.sh # 完整流程
+bash run_full_eval.sh --skip-import # 跳过导入,仅评测
```
该脚本会依次执行以下四个步骤:
+### 单题/批量测试
+
+使用 `import_and_eval_one.sh` 可以快速测试单个问题或批量测试某个 sample:
+
+```bash
+cd benchmark/locomo/vikingbot
+```
+
+**单题测试:**
+```bash
+./import_and_eval_one.sh 0 2 # sample 索引 0, question 2
+./import_and_eval_one.sh conv-26 2 # sample_id conv-26, question 2
+./import_and_eval_one.sh conv-26 2 --skip-import # 跳过导入
+```
+
+**批量测试单个 sample:**
+```bash
+./import_and_eval_one.sh conv-26 # conv-26 所有问题
+./import_and_eval_one.sh conv-26 --skip-import
+```
+
### 分步使用说明
#### 步骤 1: 导入对话数据
@@ -44,7 +73,7 @@ python import_to_ov.py --input <数据文件路径> [选项]
```
**参数说明:**
-- `--input`: 输入文件路径(JSON 或 TXT 格式),默认 `./test_data/locomo10.json`
+- `--input`: 输入文件路径(JSON 或 TXT 格式),默认 `./data/locomo10.json`
- `--sample`: 指定样本索引(0-based),默认处理所有样本
- `--sessions`: 指定会话范围,例如 `1-4` 或 `3`,默认所有会话
- `--parallel`: 并发导入数,默认 5
@@ -55,10 +84,10 @@ python import_to_ov.py --input <数据文件路径> [选项]
**示例:**
```bash
# 导入第一个样本的 1-4 会话
-python import_to_ov.py --input ./test_data/locomo10.json --sample 0 --sessions 1-4
+python import_to_ov.py --input ./data/locomo10.json --sample 0 --sessions 1-4
# 强制重新导入所有数据
-python import_to_ov.py --input ./test_data/locomo10.json --force-ingest
+python import_to_ov.py --input ./data/locomo10.json --force-ingest
```
#### 步骤 2: 运行 QA 评估
@@ -70,7 +99,7 @@ python run_eval.py <输入数据> [选项]
```
**参数说明:**
-- `input`: 输入 JSON/CSV 文件路径,默认 `./test_data/locomo10.json`
+- `input`: 输入 JSON/CSV 文件路径,默认 `./data/locomo10.json`
- `--output`: 输出 CSV 文件路径,默认 `./result/locomo_qa_result.csv`
- `--sample`: 指定样本索引
- `--count`: 运行的 QA 问题数量,默认全部
@@ -82,7 +111,7 @@ python run_eval.py <输入数据> [选项]
python run_eval.py
# 指定输入输出文件,使用 20 线程
-python run_eval.py ./test_data/locomo_qa_1528.csv --output ./result/my_result.csv --threads 20
+python run_eval.py ./data/locomo_qa_1528.csv --output ./result/my_result.csv --threads 20
```
#### 步骤 3: LLM 裁判打分
@@ -126,9 +155,91 @@ python stat_judge_result.py --input <评分结果文件>
## OpenClaw 评测流程
-使用 `openclaw/eval.py` 进行 OpenClaw 评测,该脚本有两种模式:
+### 完整一键评测
+
+使用 `openclaw/run_full_eval.sh` 可以一键运行完整评测流程:
+
+```bash
+cd benchmark/locomo/openclaw
+bash run_full_eval.sh # 只导入 OpenViking(跳过已导入的)
+bash run_full_eval.sh --with-claw-import # 同时导入 OpenViking 和 OpenClaw(并行执行)
+bash run_full_eval.sh --skip-import # 跳过导入步骤,直接运行 QA 评估
+bash run_full_eval.sh --force-ingest # 强制重新导入所有数据
+bash run_full_eval.sh --sample 0 # 只处理第 0 个 sample
+```
+
+**脚本参数说明:**
+
+| 参数 | 说明 |
+|------|------|
+| `--skip-import` | 跳过导入步骤,直接运行 QA 评估 |
+| `--with-claw-import` | 同时导入 OpenViking 和 OpenClaw(并行执行) |
+| `--force-ingest` | 强制重新导入所有数据(忽略已导入记录) |
+| `--sample ` | 只处理指定的 sample(0-based) |
+
+**脚本执行流程:**
+1. 导入数据到 OpenViking(可选同时导入 OpenClaw)
+2. 等待 60 秒确保数据导入完成
+3. 运行 QA 评估(`eval.py qa`,输出到 `result/qa_results.csv`)
+4. 裁判打分(`judge.py`,并行度 40)
+5. 统计结果(`stat_judge_result.py`,同时统计 QA 和 Import 的 token 使用)
+
+**脚本内部配置参数:**
+
+在 `run_full_eval.sh` 脚本顶部可以修改以下配置:
+
+| 变量 | 说明 | 默认值 |
+|------|------|---------------------------|
+| `INPUT_FILE` | 输入数据文件路径 | `../data/locomo10.json` |
+| `RESULT_DIR` | 结果输出目录 | `./result` |
+| `GATEWAY_TOKEN` | OpenClaw Gateway Token | 需要设置为实际 openclaw 网关 token |
+
+### 分步使用说明
+
+OpenClaw 评测包含以下脚本:
+- `import_to_ov.py`: 导入数据到 OpenViking
+- `eval.py`: OpenClaw 评估脚本(ingest/qa 两种模式)
+- `judge.py`: LLM 裁判打分
+- `stat_judge_result.py`: 统计评分结果和 token 使用
+
+---
+
+#### import_to_ov.py - 导入对话数据到 OpenViking
-### 模式 1: ingest - 导入对话数据到OpenClaw
+```bash
+python import_to_ov.py [选项]
+```
+
+**参数说明:**
+- `--input`: 输入文件路径(JSON 或 TXT),默认 `../data/locomo10.json`
+- `--sample`: 指定样本索引(0-based)
+- `--sessions`: 指定会话范围,如 `1-4`
+- `--question-index`: 根据 question 的 evidence 自动推断需要的 session
+- `--force-ingest`: 强制重新导入
+- `--no-user-agent-id`: 不传入 user_id 和 agent_id 给 OpenViking 客户端
+- `--openviking-url`: OpenViking 服务地址,默认 `http://localhost:1933`
+- `--success-csv`: 成功记录 CSV 路径,默认 `./result/import_success.csv`
+- `--error-log`: 错误日志路径,默认 `./result/import_errors.log`
+
+**示例:**
+```bash
+# 导入所有数据(跳过已导入的)
+python import_to_ov.py
+
+# 强制重新导入,不使用 user/agent id
+python import_to_ov.py --force-ingest --no-user-agent-id
+
+# 只导入第 0 个 sample
+python import_to_ov.py --sample 0
+```
+
+---
+
+#### eval.py - OpenClaw 评估脚本
+
+该脚本有两种模式:
+
+##### 模式 1: ingest - 导入对话数据到 OpenClaw
```bash
python eval.py ingest <输入文件> [选项]
@@ -137,37 +248,83 @@ python eval.py ingest <输入文件> [选项]
**参数说明:**
- `--sample`: 指定样本索引
- `--sessions`: 指定会话范围,如 `1-4`
-- `--viking`: 使用 OpenViking 而非 OpenClaw 导入
- `--force-ingest`: 强制重新导入
- `--agent-id`: Agent ID,默认 `locomo-eval`
+- `--token`: OpenClaw Gateway Token
**示例:**
```bash
# 导入第一个样本的 1-4 会话到 OpenClaw
-python eval.py ingest locomo10.json --sample 0 --sessions 1-4
-
-# 导入到 OpenViking
-python eval.py ingest locomo10.json --sample 0 --viking
+python eval.py ingest locomo10.json --sample 0 --sessions 1-4 --token
```
-### 模式 2: qa - 运行 QA 评估
-- 该评测制定了指定了`X-OpenClaw-Session-Key`,确保每次openclaw使用相同的session_id。Token计算将统计`session.jsonl`文件中的所有assistant轮次的Token消耗。每道题目执行完后会清空session.jsonl文件。
-- 该评测仅支持单线程运行,不支持并发。
-- 需先执行一次,查看`.openclaw/agents/{your_agent_id}/sessions/`下的session文件ID,作为`--session-id`参数的值开始完整评测。
+##### 模式 2: qa - 运行 QA 评估
+
+- 该评测指定了 `X-OpenClaw-Session-Key`,确保每次 OpenClaw 使用相同的 session_id
+- Token 计算统计 `session.jsonl` 文件中的所有 assistant 轮次的 Token 消耗
+- 每道题目执行完后会归档 session 文件
+- 支持并发运行(`--parallel` 参数)
+- 问题会自动添加时间上下文(从最后一个 session 提取)
+
```bash
python eval.py qa <输入文件> [选项]
```
**参数说明:**
-- `--output`: 输出文件路径
+- `--output`: 输出文件路径(不含 .csv 后缀)
- `--sample`: 指定样本索引
- `--count`: 运行的 QA 问题数量
- `--user`: 用户 ID,默认 `eval-1`
+- `--parallel`: 并发数,默认 10,最大 40
- `--token`: OpenClaw Gateway Token(或设置 `OPENCLAW_GATEWAY_TOKEN` 环境变量)
**示例:**
```bash
-python eval.py qa locomo10.json --sample 0 --output qa_results.txt
+# 运行所有 sample 的 QA 评估
+python eval.py qa locomo10.json --token --parallel 15
+
+# 只运行第 0 个 sample
+python eval.py qa locomo10.json --sample 0 --output qa_results_sample0
+```
+
+---
+
+#### judge.py - LLM 裁判打分
+
+```bash
+python judge.py [选项]
+```
+
+**参数说明:**
+- `--input`: QA 结果 CSV 文件路径
+- `--parallel`: 并发请求数,默认 40
+
+**示例:**
+```bash
+python judge.py --input ./result/qa_results.csv --parallel 40
+```
+
+---
+
+#### stat_judge_result.py - 统计结果
+
+同时统计 QA 结果和 OpenViking Import 的 token 使用:
+
+```bash
+python stat_judge_result.py [选项]
+```
+
+**参数说明:**
+- `--input`: QA 结果 CSV 文件路径,默认 `./result/qa_results_sample0.csv`
+- `--import-csv`: Import 成功 CSV 文件路径,默认 `./result/import_success.csv`
+
+**输出统计包括:**
+- QA 结果统计:正确率、token 使用(no-cache、cacheRead、output)
+- OpenViking Import 统计:embedding_tokens、vlm_tokens、total_tokens
+
+**示例:**
+```bash
+python stat_judge_result.py --input ./result/qa_results_sample0.csv --import-csv ./result/import_success.csv
```
---
diff --git a/benchmark/locomo/mem0/README.md b/benchmark/locomo/mem0/README.md
new file mode 100644
index 000000000..366ce04a5
--- /dev/null
+++ b/benchmark/locomo/mem0/README.md
@@ -0,0 +1,158 @@
+# LoCoMo Benchmark — mem0 Evaluation
+
+Evaluate mem0 memory retrieval on the [LoCoMo](https://github.com/snap-stanford/locomo) benchmark using OpenClaw as the agent.
+
+## Overview
+
+Two-phase pipeline:
+
+1. **Ingest** — Import LoCoMo conversations into mem0 (one `user_id` per sample)
+2. **Eval** — Send QA questions to OpenClaw agent (which recalls from mem0), then judge answers with an LLM
+
+## Prerequisites
+
+- [OpenClaw](https://openclaw.ai) installed and configured
+- `openclaw-mem0` plugin installed (`~/.openclaw/extensions/openclaw-mem0`)
+- `~/.openclaw/openclaw.json` with `plugins.slots.memory = "openclaw-mem0"`
+- API keys in `~/.openviking_benchmark_env`:
+
+```env
+MEM0_API_KEY=m0-...
+ARK_API_KEY=... # Volcengine ARK, used for judge LLM
+```
+
+- Python dependencies:
+
+```bash
+uv sync --frozen --extra dev
+```
+
+## Data
+
+LoCoMo 10-sample dataset at `benchmark/locomo/data/locomo10.json`:
+
+- 10 samples (conversations between two people)
+- 1986 QA pairs across 5 categories:
+ - 1: single-hop
+ - 2: multi-hop
+ - 3: temporal
+ - 4: world-knowledge
+ - 5: adversarial (skipped by default)
+
+## Step 1 — Ingest
+
+Import conversations into mem0. Each sample is stored under `user_id = sample_id` (e.g. `conv-26`).
+
+```bash
+# Ingest all 10 samples
+python ingest.py
+
+# Ingest a single sample
+python ingest.py --sample conv-26
+
+# Force re-ingest (ignore existing records)
+python ingest.py --sample conv-26 --force-ingest
+
+# Clear all ingest records and start fresh
+python ingest.py --clear-ingest-record
+```
+
+Key options:
+
+| Option | Description |
+|--------|-------------|
+| `--sample` | Sample ID (e.g. `conv-26`) or index (0-based). Default: all |
+| `--sessions` | Session range, e.g. `1-4` or `3`. Default: all |
+| `--limit` | Max samples to process |
+| `--force-ingest` | Re-ingest even if already recorded |
+| `--clear-ingest-record` | Clear `.ingest_record.json` before running |
+
+Ingest records are saved to `result/.ingest_record.json` to avoid duplicate ingestion.
+
+## Step 2 — Eval
+
+Send QA questions to OpenClaw agent and optionally judge answers.
+
+Before each sample, `eval.py` automatically:
+1. Updates `~/.openclaw/openclaw.json` to set `openclaw-mem0.config.userId = sample_id`
+2. Restarts the OpenClaw gateway to pick up the new config
+3. Verifies the correct `userId` is active via a dummy request
+
+```bash
+# Run QA + judge for all samples (6 concurrent threads)
+python eval.py --threads 6 --judge
+
+# Single sample
+python eval.py --sample conv-26 --threads 6 --judge
+
+# First 12 questions only
+python eval.py --sample conv-26 --count 12 --threads 6 --judge
+
+# Judge-only (grade existing responses in CSV)
+python eval.py --judge-only
+```
+
+Key options:
+
+| Option | Description |
+|--------|-------------|
+| `--sample` | Sample ID or index. Default: all |
+| `--count` | Max QA items to process |
+| `--threads` | Concurrent threads per sample (default: 10) |
+| `--judge` | Auto-judge each response after answering |
+| `--judge-only` | Skip QA, only grade ungraded rows in existing CSV |
+| `--no-skip-adversarial` | Include category-5 adversarial questions |
+| `--openclaw-url` | OpenClaw gateway URL (default: `http://127.0.0.1:18789`) |
+| `--openclaw-token` | Auth token (or `OPENCLAW_GATEWAY_TOKEN` env var) |
+| `--judge-base-url` | Judge API base URL (default: Volcengine ARK) |
+| `--judge-model` | Judge model (default: `doubao-seed-2-0-pro-260215`) |
+| `--output` | Output CSV path (default: `result/qa_results.csv`) |
+
+Results are written to `result/qa_results.csv`. Failed (`[ERROR]`) rows are automatically removed at the start of each run and retried.
+
+## Output
+
+`result/qa_results.csv` columns:
+
+| Column | Description |
+|--------|-------------|
+| `sample_id` | Conversation sample ID |
+| `question_id` | Unique question ID (e.g. `conv-26_qa0`) |
+| `question` / `answer` | Question and gold answer |
+| `category` / `category_name` | Question category |
+| `response` | Agent response |
+| `input_tokens` / `output_tokens` / `total_tokens` | LLM token usage (all turns summed) |
+| `time_cost` | End-to-end latency (seconds) |
+| `result` | `CORRECT` or `WRONG` |
+| `reasoning` | Judge's reasoning |
+
+## Summary Output
+
+After eval completes:
+
+```
+=== Token & Latency Summary ===
+ Total input tokens : 123456
+ Avg time per query : 18.3s
+
+=== Accuracy Summary ===
+ Overall: 512/1540 = 33.25%
+ By category:
+ multi-hop : 120/321 = 37.38%
+ single-hop : 98/282 = 34.75%
+ temporal : 28/96 = 29.17%
+ world-knowledge : 266/841 = 31.63%
+```
+
+## Delete mem0 Data
+
+```bash
+# Delete a specific sample
+python delete_user.py conv-26
+
+# Delete all samples from the dataset
+python delete_user.py --from-data
+
+# Delete first N samples
+python delete_user.py --from-data --limit 3
+```
diff --git a/benchmark/locomo/mem0/delete_user.py b/benchmark/locomo/mem0/delete_user.py
new file mode 100644
index 000000000..462ae8c09
--- /dev/null
+++ b/benchmark/locomo/mem0/delete_user.py
@@ -0,0 +1,84 @@
+"""
+Delete all memories for one or more mem0 users.
+
+Usage:
+ # Delete a single user
+ python delete_user.py conv-26
+
+ # Delete multiple users
+ python delete_user.py conv-26 conv-31 conv-45
+
+ # Delete first N users from locomo10.json
+ python delete_user.py --from-data --limit 2
+
+ # Delete all users from locomo10.json
+ python delete_user.py --from-data
+"""
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+load_dotenv(Path.home() / ".openviking_benchmark_env")
+
+try:
+ from mem0 import MemoryClient
+except ImportError:
+ print("Error: mem0 package not installed. Run: pip install mem0ai", file=sys.stderr)
+ sys.exit(1)
+
+SCRIPT_DIR = Path(__file__).parent.resolve()
+DEFAULT_DATA_PATH = str(SCRIPT_DIR / ".." / "data" / "locomo10.json")
+
+
+def delete_user(client: MemoryClient, user_id: str) -> bool:
+ try:
+ client.delete_all(user_id=user_id)
+ print(f" [OK] {user_id}")
+ return True
+ except Exception as e:
+ print(f" [ERROR] {user_id}: {e}")
+ return False
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Delete all mem0 memories for given user(s)")
+ parser.add_argument("users", nargs="*", help="user_id(s) to delete (e.g. conv-26 conv-31)")
+ parser.add_argument("--api-key", default=None, help="mem0 API key (or MEM0_API_KEY env var)")
+ parser.add_argument("--from-data", action="store_true", help="load user_ids from locomo10.json")
+ parser.add_argument("--input", default=DEFAULT_DATA_PATH, help="path to locomo10.json")
+ parser.add_argument("--limit", type=int, default=None, help="max users to delete (with --from-data)")
+ args = parser.parse_args()
+
+ api_key = args.api_key or os.environ.get("MEM0_API_KEY", "")
+ if not api_key:
+ print("Error: mem0 API key required (--api-key or MEM0_API_KEY env var)", file=sys.stderr)
+ sys.exit(1)
+
+ # Convert bare sample_ids (e.g. "conv-26") to mem0 user_id format
+ user_ids: list[str] = list(args.users)
+
+ if args.from_data:
+ with open(args.input, "r", encoding="utf-8") as f:
+ data = json.load(f)
+ if args.limit:
+ data = data[: args.limit]
+ user_ids += [s["sample_id"] for s in data]
+
+ if not user_ids:
+ print("Error: no users specified. Pass user_ids or use --from-data", file=sys.stderr)
+ sys.exit(1)
+
+ user_ids = list(dict.fromkeys(user_ids)) # deduplicate, preserve order
+ print(f"Deleting memories for {len(user_ids)} user(s)...")
+
+ client = MemoryClient(api_key=api_key)
+ ok = sum(delete_user(client, uid) for uid in user_ids)
+ print(f"\nDone: {ok}/{len(user_ids)} succeeded")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/locomo/mem0/eval.py b/benchmark/locomo/mem0/eval.py
new file mode 100644
index 000000000..fa273a718
--- /dev/null
+++ b/benchmark/locomo/mem0/eval.py
@@ -0,0 +1,837 @@
+"""
+Evaluate LoCoMo QA via mem0 + OpenClaw (agent mode).
+
+Questions are sent to an OpenClaw agent which calls mem0 internally.
+Before each request, ~/.openclaw/openclaw.json is updated so that the
+openclaw-mem0 plugin uses userId = sample_id, giving each conversation
+sample its own isolated memory namespace.
+
+Prerequisites:
+ - Conversations already ingested into mem0 via ingest.py (user_id = sample_id)
+ - OpenClaw running locally with the openclaw-mem0 plugin installed
+
+Usage:
+ # Run QA + auto-judge
+ python eval.py --openclaw-url http://127.0.0.1:18789 --openclaw-token xxx \\
+ --judge --judge-token xxx
+
+ # Single sample
+ python eval.py --sample conv-26 --openclaw-token xxx
+
+ # Only judge an existing result CSV (skip QA)
+ python eval.py --judge-only --output result/qa_results.csv --judge-token xxx
+"""
+
+import argparse
+import csv
+import json
+import os
+import subprocess
+import sys
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Optional
+
+import requests
+from dotenv import load_dotenv
+
+load_dotenv(Path.home() / ".openviking_benchmark_env")
+
+SCRIPT_DIR = Path(__file__).parent.resolve()
+DEFAULT_DATA_PATH = str(SCRIPT_DIR / ".." / "data" / "locomo10.json")
+DEFAULT_OUTPUT_PATH = str(SCRIPT_DIR / "result" / "qa_results.csv")
+DEFAULT_OPENCLAW_URL = "http://127.0.0.1:18789"
+DEFAULT_SESSION_KEY = "locomo-eval"
+OPENCLAW_CONFIG_PATH = Path.home() / ".openclaw" / "openclaw.json"
+
+# Serialize openclaw config updates across threads so each request sees the right userId
+_openclaw_config_lock = threading.Lock()
+
+# ---------------------------------------------------------------------------
+# openclaw.json config helpers
+# ---------------------------------------------------------------------------
+
+def _update_openclaw_mem0_user(sample_id: str) -> None:
+ """
+ Rewrite ~/.openclaw/openclaw.json so that openclaw-mem0 uses sample_id as userId.
+ Also ensures the plugin is enabled.
+ Must be called while holding _openclaw_config_lock.
+ """
+ with open(OPENCLAW_CONFIG_PATH, "r", encoding="utf-8") as f:
+ config = json.load(f)
+
+ entries = config.setdefault("plugins", {}).setdefault("entries", {})
+ mem0_entry = entries.setdefault("openclaw-mem0", {})
+ mem0_entry["enabled"] = True
+ mem0_entry.setdefault("config", {})["userId"] = sample_id
+
+ tmp = str(OPENCLAW_CONFIG_PATH) + ".tmp"
+ with open(tmp, "w", encoding="utf-8") as f:
+ json.dump(config, f, indent=2, ensure_ascii=False)
+ os.replace(tmp, str(OPENCLAW_CONFIG_PATH))
+
+
+def _restart_openclaw_gateway(base_url: str, sample_id: str, startup_timeout: int = 30) -> None:
+ """
+ Kill the running openclaw gateway process and restart it.
+ Waits until the gateway is ready to accept requests.
+ Must be called while holding _openclaw_config_lock.
+ """
+ # Kill existing gateway
+ try:
+ subprocess.run(["pkill", "-f", "openclaw gateway"], capture_output=True)
+ except Exception as e:
+ print(f" [gateway] pkill failed: {e}", file=sys.stderr)
+
+ # Start new gateway in background
+ try:
+ subprocess.Popen(
+ ["openclaw", "gateway"],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL,
+ )
+ except Exception as e:
+ raise RuntimeError(f"Failed to start openclaw gateway: {e}")
+
+ # Wait for process to fully start before checking health
+ time.sleep(3)
+
+ # Wait until gateway is ready
+ health_url = f"{base_url.rstrip('/')}/health"
+ deadline = time.time() + startup_timeout
+ while time.time() < deadline:
+ try:
+ resp = requests.get(health_url, timeout=2)
+ if resp.status_code < 500:
+ break
+ except Exception:
+ pass
+ time.sleep(0.5)
+ else:
+ raise RuntimeError(f"openclaw gateway did not become ready within {startup_timeout}s")
+
+ # Verify the correct userId is active by sending a dummy request and checking session log
+ _verify_openclaw_user(base_url, sample_id, max_retries=3)
+
+
+def _verify_openclaw_user(base_url: str, expected_user: str, max_retries: int = 3) -> None:
+ """
+ Send a dummy request and check the session jsonl to confirm
+ openclaw-mem0 is searching with the correct userId.
+ Retries up to max_retries times with 3s interval.
+ """
+ verify_session_key = f"locomo-verify-{expected_user}-{int(time.time())}"
+ url = f"{base_url.rstrip('/')}/v1/responses"
+ headers = {
+ "Content-Type": "application/json",
+ "X-OpenClaw-Session-Key": verify_session_key,
+ }
+ payload = {
+ "model": "openclaw",
+ "input": "What did we talk about recently?",
+ "stream": False,
+ }
+
+ for attempt in range(max_retries):
+ try:
+ resp = requests.post(url, json=payload, headers=headers, timeout=120)
+ resp.raise_for_status()
+ except Exception as e:
+ print(f" [verify] request failed: {e}", file=sys.stderr)
+ time.sleep(3)
+ continue
+
+ # Wait for session jsonl to be written
+ time.sleep(1)
+ session_id = get_openclaw_session_id(verify_session_key)
+ if not session_id:
+ time.sleep(3)
+ continue
+
+ # Check session log for the userId in the memories context
+ sessions_dir = os.path.expanduser("~/.openclaw/agents/main/sessions")
+ jsonl_path = os.path.join(sessions_dir, f"{session_id}.jsonl")
+ try:
+ with open(jsonl_path, "r", encoding="utf-8") as f:
+ content = f.read()
+ if f'user "{expected_user}"' in content or f'user \\"{expected_user}\\"' in content:
+ print(f" [verify] userId confirmed: {expected_user}", file=sys.stderr)
+ return
+ else:
+ print(f" [verify] userId mismatch, retrying in 3s...", file=sys.stderr)
+ except Exception:
+ pass
+ time.sleep(3)
+
+ raise RuntimeError(f"openclaw userId did not switch to {expected_user} after {max_retries} retries")
+
+
+CATEGORY_NAMES = {
+ 1: "single-hop",
+ 2: "multi-hop",
+ 3: "temporal",
+ 4: "world-knowledge",
+ 5: "adversarial",
+}
+
+# ---------------------------------------------------------------------------
+# LoCoMo data loading
+# ---------------------------------------------------------------------------
+
+def load_locomo_data(path: str, sample_id: Optional[str] = None) -> list[dict]:
+ with open(path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ if sample_id is not None:
+ try:
+ idx = int(sample_id)
+ if idx < 0 or idx >= len(data):
+ raise ValueError(f"Sample index {idx} out of range (0-{len(data) - 1})")
+ return [data[idx]]
+ except ValueError:
+ pass
+ matched = [s for s in data if s.get("sample_id") == sample_id]
+ if not matched:
+ raise ValueError(f"sample_id '{sample_id}' not found")
+ return matched
+
+ return data
+
+
+def get_sample_last_session_date(sample: dict) -> Optional[str]:
+ """Return the date of the last session as YYYY-MM-DD, or None."""
+ conv = sample.get("conversation", {})
+ session_keys = [k for k in conv if k.startswith("session_") and "date_time" not in k]
+ if not session_keys:
+ return None
+
+ def sess_num(k: str) -> int:
+ try:
+ return int(k.split("_")[1])
+ except ValueError:
+ return 0
+
+ for sk in sorted(session_keys, key=sess_num, reverse=True):
+ if conv.get(sk):
+ dt_key = f"{sk}_date_time"
+ date_str = conv.get(dt_key, "")
+ if date_str and " on " in date_str:
+ try:
+ from datetime import datetime
+ date_part = date_str.split(" on ")[-1]
+ dt = datetime.strptime(date_part.strip(), "%d %B, %Y")
+ return dt.strftime("%Y-%m-%d")
+ except ValueError:
+ pass
+ return None
+
+
+def load_qa_items(
+ data: list[dict],
+ skip_adversarial: bool = True,
+ question_index: Optional[int] = None,
+ count: Optional[int] = None,
+) -> list[dict]:
+ items = []
+ for sample in data:
+ sample_id = sample.get("sample_id", "")
+ question_time = get_sample_last_session_date(sample)
+
+ for q_idx, qa in enumerate(sample.get("qa", [])):
+ if question_index is not None and q_idx != question_index:
+ continue
+ category = qa.get("category", 0)
+ if skip_adversarial and str(category) == "5":
+ continue
+ items.append(
+ {
+ "sample_id": sample_id,
+ "question_index": q_idx,
+ "question_id": f"{sample_id}_qa{q_idx}",
+ "question": qa["question"],
+ "answer": str(qa["answer"]),
+ "category": category,
+ "category_name": CATEGORY_NAMES.get(category, "unknown"),
+ "evidence": qa.get("evidence", []),
+ "question_time": question_time,
+ }
+ )
+
+ if count is not None:
+ items = items[:count]
+ return items
+
+
+# ---------------------------------------------------------------------------
+# CSV helpers
+# ---------------------------------------------------------------------------
+
+QA_FIELDNAMES = [
+ "sample_id",
+ "question_index",
+ "question_id",
+ "question",
+ "answer",
+ "category",
+ "category_name",
+ "question_time",
+ "evidence",
+ "response",
+ "input_tokens",
+ "output_tokens",
+ "total_tokens",
+ "time_cost",
+ "result",
+ "reasoning",
+ "timestamp",
+]
+
+
+def load_processed_ids(output_path: str) -> set[str]:
+ processed: set[str] = set()
+ if not os.path.exists(output_path):
+ return processed
+ try:
+ with open(output_path, "r", encoding="utf-8", newline="") as f:
+ for row in csv.DictReader(f):
+ if row.get("response"):
+ processed.add(row.get("question_id", ""))
+ except Exception as e:
+ print(f"[WARN] Error reading {output_path}: {e}", file=sys.stderr)
+ return processed
+
+
+def save_row(output_path: str, row: dict, write_lock: threading.Lock) -> None:
+ with write_lock:
+ file_exists = os.path.exists(output_path)
+ with open(output_path, "a", encoding="utf-8", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=QA_FIELDNAMES, extrasaction="ignore")
+ if not file_exists:
+ writer.writeheader()
+ writer.writerow(row)
+ f.flush()
+
+
+# ---------------------------------------------------------------------------
+# OpenClaw agent
+# ---------------------------------------------------------------------------
+
+def extract_openclaw_text(body: dict) -> str:
+ """Extract assistant text from /v1/responses API response."""
+ try:
+ for item in body.get("output", []):
+ if item.get("type") == "message":
+ for content in item.get("content", []):
+ if content.get("type") == "output_text":
+ return content.get("text", "")
+ for item in body.get("output", []):
+ if "text" in item:
+ return item["text"]
+ for content in item.get("content", []):
+ if "text" in content:
+ return content["text"]
+ except Exception:
+ pass
+ return f"[ERROR: could not parse response: {body}]"
+
+
+def get_openclaw_session_id(session_key: str) -> Optional[str]:
+ # main agent sessions
+ sessions_file = os.path.expanduser("~/.openclaw/agents/main/sessions/sessions.json")
+ try:
+ with open(sessions_file, "r") as f:
+ data = json.load(f)
+ return data.get(session_key, {}).get("sessionId")
+ except Exception:
+ return None
+
+
+
+def parse_session_tokens(session_id: str, agent_id: str) -> dict:
+ """Sum up all LLM usage across all assistant messages in the session jsonl."""
+ sessions_dir = os.path.expanduser(f"~/.openclaw/agents/{agent_id}/sessions")
+ src = os.path.join(sessions_dir, f"{session_id}.jsonl")
+ total_input = total_output = total_cache_read = 0
+ try:
+ with open(src, "r", encoding="utf-8") as f:
+ for line in f:
+ line = line.strip()
+ if not line:
+ continue
+ obj = json.loads(line)
+ if obj.get("type") == "message" and obj.get("message", {}).get("role") == "assistant":
+ usage = obj["message"].get("usage", {})
+ total_input += usage.get("input", 0)
+ total_output += usage.get("output", 0)
+ total_cache_read += usage.get("cacheRead", 0)
+ except Exception:
+ pass
+ return {
+ "input_tokens": total_input,
+ "output_tokens": total_output,
+ "total_tokens": total_input + total_output + total_cache_read,
+ }
+
+
+def send_to_openclaw(
+ question: str,
+ sample_id: str,
+ base_url: str,
+ token: str,
+ question_time: Optional[str] = None,
+ question_id: Optional[str] = None,
+ retries: int = 2,
+) -> tuple[str, dict, float]:
+ """
+ Send a question to an OpenClaw agent.
+
+ Before each request we update ~/.openclaw/openclaw.json to set the
+ openclaw-mem0 userId = sample_id, providing per-sample memory isolation.
+ A global lock serializes these config writes so concurrent threads don't
+ clobber each other's userId.
+
+ Returns (response_text, usage, time_cost).
+ """
+ # Send only the question as input so mem0 semantic search isn't polluted by the date prefix.
+ input_text = question
+
+ # Use a unique session key per question to avoid cross-thread session collision.
+ session_key = f"{DEFAULT_SESSION_KEY}-{question_id}" if question_id else DEFAULT_SESSION_KEY
+
+ url = f"{base_url.rstrip('/')}/v1/responses"
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {token}",
+ "X-OpenClaw-Session-Key": session_key,
+ }
+ payload = {
+ "model": "openclaw",
+ "input": input_text,
+ "stream": False,
+ "user": sample_id,
+ }
+
+ last_exc: Optional[Exception] = None
+ t0 = time.time()
+ for attempt in range(retries + 1):
+ try:
+ resp = requests.post(url, json=payload, headers=headers, timeout=300)
+ resp.raise_for_status()
+ body = resp.json()
+ response_text = extract_openclaw_text(body)
+
+ # Wait for openclaw to flush the session jsonl before parsing tokens
+ time.sleep(1)
+ session_id = get_openclaw_session_id(session_key)
+ if session_id:
+ usage = parse_session_tokens(session_id, "main")
+ else:
+ usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+
+ return response_text, usage, time.time() - t0
+ except Exception as e:
+ last_exc = e
+ if attempt < retries:
+ print(f" [retry {attempt + 1}/{retries}] {e}", file=sys.stderr)
+
+ raise RuntimeError(f"OpenClaw request failed after {retries + 1} attempts: {last_exc}")
+
+
+# ---------------------------------------------------------------------------
+# LLM judge
+# ---------------------------------------------------------------------------
+
+JUDGE_SYSTEM_PROMPT = "You are an expert grader that determines if answers to questions match a gold standard answer"
+
+JUDGE_ACCURACY_PROMPT = """Your task is to label an answer to a question as 'CORRECT' or 'WRONG'. You will be given the following data:
+ (1) a question (posed by one user to another user),
+ (2) a 'gold' (ground truth) answer,
+ (3) a generated answer
+which you will score as CORRECT/WRONG.
+
+The point of the question is to ask about something one user should know about the other user based on their prior conversations.
+The gold answer will usually be a concise and short answer that includes the referenced topic, for example:
+Question: Do you remember what I got the last time I went to Hawaii?
+Gold answer: A shell necklace
+The generated answer might be much longer, but you should be generous with your grading - as long as it touches on the same topic as the gold answer, it should be counted as CORRECT.
+
+For time related questions, the gold answer will be a specific date, month, year, etc. The generated answer might be much longer or use relative time references (like "last Tuesday" or "next month"), but you should be generous with your grading - as long as it refers to the same date or time period as the gold answer, it should be counted as CORRECT. Even if the format differs (e.g., "May 7th" vs "7 May"), consider it CORRECT if it's the same date.
+
+Now it's time for the real question:
+Question: {question}
+Gold answer: {gold_answer}
+Generated answer: {response}
+
+First, provide a short (one sentence) explanation of your reasoning, then finish with CORRECT or WRONG.
+Do NOT include both CORRECT and WRONG in your response, or it will break the evaluation script.
+
+Respond with JSON only: {{"reasoning": "your explanation", "is_correct": "CORRECT" or "WRONG"}}"""
+
+
+
+def judge_answer(
+ question: str,
+ gold_answer: str,
+ response: str,
+ judge_base_url: str,
+ judge_token: str,
+ judge_model: str,
+) -> tuple[str, str]:
+ from openai import OpenAI
+ client = OpenAI(base_url=judge_base_url, api_key=judge_token)
+ prompt = JUDGE_ACCURACY_PROMPT.format(
+ question=question, gold_answer=gold_answer, response=response
+ )
+ try:
+ resp = client.chat.completions.create(
+ model=judge_model,
+ messages=[
+ {"role": "system", "content": JUDGE_SYSTEM_PROMPT},
+ {"role": "user", "content": prompt},
+ ],
+ temperature=0,
+ timeout=60,
+ )
+ content = resp.choices[0].message.content.strip()
+ start, end = content.find("{"), content.rfind("}")
+ if start != -1 and end != -1:
+ parsed = json.loads(content[start : end + 1])
+ label = "CORRECT" if parsed.get("is_correct", "WRONG").strip().upper() == "CORRECT" else "WRONG"
+ return label, parsed.get("reasoning", "")
+ return "WRONG", f"[PARSE ERROR] {content}"
+ except Exception as e:
+ return "WRONG", f"[API ERROR] {e}"
+
+
+# ---------------------------------------------------------------------------
+# Accuracy summary
+# ---------------------------------------------------------------------------
+
+def print_accuracy(rows: list[dict]) -> None:
+ graded = [r for r in rows if r.get("result") in ("CORRECT", "WRONG")]
+ if not graded:
+ print("\n[INFO] No graded results to summarize.", file=sys.stderr)
+ return
+
+ correct_total = sum(1 for r in graded if r["result"] == "CORRECT")
+ print("\n=== Accuracy Summary ===", file=sys.stderr)
+ print(f" Overall: {correct_total}/{len(graded)} = {correct_total/len(graded):.2%}", file=sys.stderr)
+
+ by_cat: dict[str, list[str]] = {}
+ for r in graded:
+ cat = r.get("category_name") or str(r.get("category", "?"))
+ by_cat.setdefault(cat, []).append(r["result"])
+
+ print(" By category:", file=sys.stderr)
+ for cat, results in sorted(by_cat.items()):
+ n = sum(1 for r in results if r == "CORRECT")
+ print(f" {cat:20s}: {n}/{len(results)} = {n/len(results):.2%}", file=sys.stderr)
+
+
+# ---------------------------------------------------------------------------
+# Main runners
+# ---------------------------------------------------------------------------
+
+def run_qa(args: argparse.Namespace) -> None:
+ openclaw_token = args.openclaw_token or os.environ.get("OPENCLAW_GATEWAY_TOKEN", "")
+
+ judge_token = args.judge_token or os.environ.get("ARK_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
+ if args.judge and not judge_token:
+ print(
+ "Error: judge token required (--judge-token or OPENAI_API_KEY env var)",
+ file=sys.stderr,
+ )
+ sys.exit(1)
+
+ data = load_locomo_data(args.input, args.sample)
+ qa_items = load_qa_items(
+ data,
+ skip_adversarial=args.skip_adversarial,
+ question_index=args.question_index,
+ count=args.count,
+ )
+ print(f"[INFO] {len(qa_items)} QA items loaded", file=sys.stderr)
+
+ Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+
+ # Remove ERROR rows from CSV before loading processed ids
+ if os.path.exists(args.output):
+ with open(args.output, "r", encoding="utf-8", newline="") as f:
+ reader = csv.DictReader(f)
+ fieldnames = reader.fieldnames or QA_FIELDNAMES
+ clean_rows = [r for r in reader if not r.get("response", "").startswith("[ERROR]")]
+ with open(args.output, "w", encoding="utf-8", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
+ writer.writeheader()
+ writer.writerows(clean_rows)
+
+ processed_ids = load_processed_ids(args.output)
+ remaining = [qa for qa in qa_items if qa["question_id"] not in processed_ids]
+ print(
+ f"[INFO] {len(processed_ids)} already done, {len(remaining)} remaining",
+ file=sys.stderr,
+ )
+
+ if not remaining:
+ print("[INFO] All questions already processed.", file=sys.stderr)
+ else:
+ write_lock = threading.Lock()
+ total = len(remaining)
+
+ # Group remaining questions by sample_id to minimize gateway restarts
+ from collections import defaultdict
+ by_sample: dict[str, list[tuple[int, dict]]] = defaultdict(list)
+ for i, qa in enumerate(remaining):
+ by_sample[qa["sample_id"]].append((i + 1, qa))
+
+ def run_one(qa: dict, idx: int) -> None:
+ print(
+ f" [{idx}/{total}] {qa['question_id']}: {qa['question'][:60]}...",
+ file=sys.stderr,
+ )
+ try:
+ response, usage, time_cost = send_to_openclaw(
+ qa["question"],
+ qa["sample_id"],
+ args.openclaw_url,
+ openclaw_token,
+ qa.get("question_time"),
+ qa["question_id"],
+ )
+ except Exception as e:
+ response = f"[ERROR] {e}"
+ usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+ time_cost = 0.0
+
+ result_label, reasoning = "", ""
+ if args.judge and response and not response.startswith("[ERROR]"):
+ result_label, reasoning = judge_answer(
+ qa["question"],
+ qa["answer"],
+ response,
+ args.judge_base_url or args.openclaw_url,
+ judge_token,
+ args.judge_model,
+ )
+
+ row = {
+ "sample_id": qa["sample_id"],
+ "question_index": qa["question_index"],
+ "question_id": qa["question_id"],
+ "question": qa["question"],
+ "answer": qa["answer"],
+ "category": qa["category"],
+ "category_name": qa["category_name"],
+ "question_time": qa.get("question_time", ""),
+ "evidence": json.dumps(qa.get("evidence", [])),
+ "response": response,
+ "input_tokens": usage.get("input_tokens", 0),
+ "output_tokens": usage.get("output_tokens", 0),
+ "total_tokens": usage.get("total_tokens", 0),
+ "time_cost": round(time_cost, 2),
+ "result": result_label,
+ "reasoning": reasoning,
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+ }
+ save_row(args.output, row, write_lock)
+ label_str = f" → {result_label}" if result_label else ""
+ print(f" [{idx}/{total}] done {time_cost:.1f}s{label_str}", file=sys.stderr)
+
+ # Process sample by sample: restart gateway once per sample to pick up new userId
+ for sample_id, qa_list in by_sample.items():
+ print(f"\n[INFO] Switching to sample {sample_id}, restarting openclaw gateway...", file=sys.stderr)
+ with _openclaw_config_lock:
+ _update_openclaw_mem0_user(sample_id)
+ _restart_openclaw_gateway(args.openclaw_url, sample_id)
+ print(f"[INFO] Gateway ready, running {len(qa_list)} questions for {sample_id}", file=sys.stderr)
+
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
+ futures = {
+ executor.submit(run_one, qa, idx): qa
+ for idx, qa in qa_list
+ }
+ for fut in as_completed(futures):
+ try:
+ fut.result()
+ except Exception as e:
+ qa = futures[fut]
+ print(f" [ERROR] {qa['question_id']}: {e}", file=sys.stderr)
+
+ # Print token and latency summary
+ try:
+ with open(args.output, "r", encoding="utf-8", newline="") as f:
+ rows = list(csv.DictReader(f))
+ total_input = sum(int(r.get("input_tokens") or 0) for r in rows)
+ total_input_with_cache = sum(
+ int(r.get("total_tokens") or 0) - int(r.get("output_tokens") or 0) for r in rows
+ )
+ times = [float(r["time_cost"]) for r in rows if r.get("time_cost")]
+ avg_time = sum(times) / len(times) if times else 0.0
+ print(f"\n=== Token & Latency Summary ===", file=sys.stderr)
+ print(f" Total input tokens : {total_input}", file=sys.stderr)
+ print(f" Total input tokens (with cache): {total_input_with_cache}", file=sys.stderr)
+ print(f" Avg time per query : {avg_time:.1f}s", file=sys.stderr)
+ except Exception:
+ pass
+
+ if args.judge:
+ try:
+ with open(args.output, "r", encoding="utf-8", newline="") as f:
+ print_accuracy(list(csv.DictReader(f)))
+ except Exception:
+ pass
+
+
+def run_judge_only(args: argparse.Namespace) -> None:
+ """Grade responses in an existing CSV that lack a result label."""
+ if not os.path.exists(args.output):
+ print(f"Error: output file not found: {args.output}", file=sys.stderr)
+ sys.exit(1)
+
+ judge_token = args.judge_token or os.environ.get("ARK_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
+ if not judge_token:
+ print(
+ "Error: judge token required (--judge-token or ARK_API_KEY env var)",
+ file=sys.stderr,
+ )
+ sys.exit(1)
+
+ with open(args.output, "r", encoding="utf-8", newline="") as f:
+ reader = csv.DictReader(f)
+ fieldnames = list(reader.fieldnames or QA_FIELDNAMES)
+ rows = list(reader)
+
+ for extra in ("result", "reasoning"):
+ if extra not in fieldnames:
+ fieldnames.append(extra)
+
+ ungraded_indices = [i for i, row in enumerate(rows) if not row.get("result")]
+ print(f"[INFO] {len(rows)} rows total, {len(ungraded_indices)} ungraded", file=sys.stderr)
+
+ if not ungraded_indices:
+ print("[INFO] All rows already graded.", file=sys.stderr)
+ print_accuracy(rows)
+ return
+
+ judge_base_url = args.judge_base_url or "https://ark.cn-beijing.volces.com/api/v3"
+ file_lock = threading.Lock()
+
+ def grade_one(idx: int) -> None:
+ row = rows[idx]
+ label, reasoning = judge_answer(
+ row.get("question", ""),
+ row.get("answer", ""),
+ row.get("response", ""),
+ judge_base_url,
+ judge_token,
+ args.judge_model,
+ )
+ row["result"] = label
+ row["reasoning"] = reasoning
+ with file_lock:
+ tmp = args.output + ".tmp"
+ with open(tmp, "w", encoding="utf-8", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
+ writer.writeheader()
+ writer.writerows(rows)
+ os.replace(tmp, args.output)
+ print(f" Graded {row.get('question_id','?')}: {label}", file=sys.stderr)
+
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
+ futures = [executor.submit(grade_one, idx) for idx in ungraded_indices]
+ for fut in as_completed(futures):
+ try:
+ fut.result()
+ except Exception as e:
+ print(f"[ERROR] grading failed: {e}", file=sys.stderr)
+
+ print_accuracy(rows)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="Evaluate LoCoMo QA via OpenClaw agent (mem0-backed)",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=__doc__,
+ )
+
+ # Data selection
+ parser.add_argument("--input", default=DEFAULT_DATA_PATH, help="Path to locomo10.json")
+ parser.add_argument("--output", default=DEFAULT_OUTPUT_PATH, help="Path to output CSV")
+ parser.add_argument(
+ "--sample",
+ default=None,
+ help="Sample index (int) or sample_id (e.g. conv-26). Default: all.",
+ )
+ parser.add_argument(
+ "--question-index",
+ type=int,
+ default=None,
+ help="Single question index (0-based) within the sample.",
+ )
+ parser.add_argument("--count", type=int, default=None, help="Max QA items to process.")
+ parser.add_argument(
+ "--no-skip-adversarial",
+ dest="skip_adversarial",
+ action="store_false",
+ default=True,
+ help="Include category-5 adversarial questions (skipped by default).",
+ )
+ parser.add_argument("--threads", type=int, default=10, help="Concurrent threads (default: 10)")
+
+ # OpenClaw
+ parser.add_argument(
+ "--openclaw-url",
+ default=DEFAULT_OPENCLAW_URL,
+ help=f"OpenClaw gateway URL (default: {DEFAULT_OPENCLAW_URL})",
+ )
+ parser.add_argument(
+ "--openclaw-token",
+ default=None,
+ help="OpenClaw auth token (or OPENCLAW_GATEWAY_TOKEN env var)",
+ )
+ # Judge
+ parser.add_argument(
+ "--judge",
+ action="store_true",
+ default=False,
+ help="Auto-judge each response right after answering.",
+ )
+ parser.add_argument(
+ "--judge-only",
+ action="store_true",
+ default=False,
+ help="Skip QA; only grade ungraded responses in the existing --output CSV.",
+ )
+ parser.add_argument(
+ "--judge-base-url",
+ default="https://ark.cn-beijing.volces.com/api/v3",
+ help="OpenAI-compatible API base URL for judge (default: Volcengine ARK)",
+ )
+ parser.add_argument(
+ "--judge-token",
+ default=None,
+ help="API token for judge (or ARK_API_KEY / OPENAI_API_KEY env var)",
+ )
+ parser.add_argument(
+ "--judge-model",
+ default="doubao-seed-2-0-pro-260215",
+ help="Judge model (default: doubao-seed-2-0-pro-260215)",
+ )
+
+ args = parser.parse_args()
+
+ if args.judge_only:
+ run_judge_only(args)
+ else:
+ run_qa(args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/locomo/mem0/ingest.py b/benchmark/locomo/mem0/ingest.py
new file mode 100644
index 000000000..406d82bb0
--- /dev/null
+++ b/benchmark/locomo/mem0/ingest.py
@@ -0,0 +1,453 @@
+"""
+Ingest LoCoMo conversations into mem0.
+
+Each sample gets an isolated mem0 namespace keyed by sample_id (e.g. "conv-26").
+speaker_a → "user" role, speaker_b → "assistant" role (following memorybench convention).
+
+Usage:
+ # Ingest all samples
+ python ingest.py
+
+ # Ingest a specific sample
+ python ingest.py --sample conv-26
+
+ # Ingest specific sessions
+ python ingest.py --sample conv-26 --sessions 1-4
+
+ # Force re-ingest even if already done
+ python ingest.py --force-ingest
+
+ # Set mem0 API key via env or flag
+ MEM0_API_KEY=xxx python ingest.py
+ python ingest.py --api-key xxx
+"""
+
+import argparse
+import asyncio
+import json
+import os
+import sys
+import time
+from pathlib import Path
+from typing import Any, Optional
+
+import requests
+from dotenv import load_dotenv
+load_dotenv(Path.home() / ".openviking_benchmark_env")
+
+try:
+ from mem0 import MemoryClient
+except ImportError:
+ print("Error: mem0 package not installed. Run: pip install mem0ai", file=sys.stderr)
+ sys.exit(1)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+SCRIPT_DIR = Path(__file__).parent.resolve()
+DEFAULT_DATA_PATH = str(SCRIPT_DIR / ".." / "data" / "locomo10.json")
+DEFAULT_RECORD_PATH = str(SCRIPT_DIR / "result" / ".ingest_record.json")
+DEFAULT_LOG_PATH = str(SCRIPT_DIR / "result" / "ingest_errors.log")
+
+MEM0_API_URL = "https://api.mem0.ai"
+
+# Must match the userId format used by openclaw-mem0 plugin:
+# effectiveUserId(sample_id, "agent:locomo-mem0:eval") = "{sample_id}:agent:locomo-mem0"
+
+# Same custom instructions as memorybench mem0 provider
+CUSTOM_INSTRUCTIONS = """Extract memories from group chat conversations between two people. Each message is prefixed with the speaker's name in brackets (e.g. [Alice]: text).
+
+Guidelines:
+1. Always include the speaker's name in the memory, never use generic terms like "user"
+2. Extract memories for both speakers equally
+3. Each memory should be self-contained with full context: who, what, when
+4. Include specific details: dates, places, names of activities, emotional states
+5. Cover all meaningful topics: life events, plans, hobbies, relationships, opinions"""
+
+
+# ---------------------------------------------------------------------------
+# LoCoMo data loading
+# ---------------------------------------------------------------------------
+
+def load_locomo_data(path: str, sample_id: Optional[str] = None) -> list[dict]:
+ """Load LoCoMo JSON and optionally filter to one sample by sample_id or numeric index."""
+ with open(path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ if sample_id is not None:
+ # Try numeric index first
+ try:
+ idx = int(sample_id)
+ if idx < 0 or idx >= len(data):
+ raise ValueError(f"Sample index {idx} out of range (0-{len(data) - 1})")
+ return [data[idx]]
+ except ValueError:
+ pass
+ # Try matching sample_id string
+ matched = [s for s in data if s.get("sample_id") == sample_id]
+ if not matched:
+ raise ValueError(f"sample_id '{sample_id}' not found")
+ return matched
+
+ return data
+
+
+def parse_session_range(s: str) -> tuple[int, int]:
+ """Parse '1-4' or '3' into (lo, hi) inclusive tuple."""
+ if "-" in s:
+ lo, hi = s.split("-", 1)
+ return int(lo), int(hi)
+ n = int(s)
+ return n, n
+
+
+def build_session_messages(
+ item: dict,
+ session_range: Optional[tuple[int, int]] = None,
+) -> list[dict]:
+ """
+ Extract sessions from a LoCoMo sample.
+
+ Returns list of dicts with keys:
+ - messages: list of {role, content} for mem0
+ - meta: session metadata
+ """
+ conv = item["conversation"]
+ speaker_a = conv["speaker_a"]
+ speaker_b = conv["speaker_b"]
+
+ session_keys = sorted(
+ [k for k in conv if k.startswith("session_") and not k.endswith("_date_time")],
+ key=lambda k: int(k.split("_")[1]),
+ )
+
+ sessions = []
+ for sk in session_keys:
+ sess_num = int(sk.split("_")[1])
+ if session_range:
+ lo, hi = session_range
+ if sess_num < lo or sess_num > hi:
+ continue
+
+ raw_messages = conv[sk]
+ if not isinstance(raw_messages, list) or not raw_messages:
+ continue
+
+ dt_key = f"{sk}_date_time"
+ date_time = conv.get(dt_key, "")
+
+ messages = []
+ if date_time:
+ messages.append({"role": "user", "content": f"[System]: This conversation took place on {date_time}."})
+ for msg in raw_messages:
+ speaker = msg.get("speaker", "")
+ text = msg.get("text", "")
+ messages.append({"role": "user", "content": f"[{speaker}]: {text}"})
+
+ sessions.append(
+ {
+ "messages": messages,
+ "meta": {
+ "sample_id": item["sample_id"],
+ "session_key": sk,
+ "date_time": date_time,
+ "speaker_a": speaker_a,
+ "speaker_b": speaker_b,
+ },
+ }
+ )
+
+ return sessions
+
+
+# ---------------------------------------------------------------------------
+# Ingest record (progress tracking)
+# ---------------------------------------------------------------------------
+
+def load_ingest_record(path: str) -> dict:
+ try:
+ with open(path, "r", encoding="utf-8") as f:
+ return json.load(f)
+ except (FileNotFoundError, json.JSONDecodeError):
+ return {}
+
+
+def save_ingest_record(record: dict, path: str) -> None:
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
+ with open(path, "w", encoding="utf-8") as f:
+ json.dump(record, f, indent=2, ensure_ascii=False)
+
+
+def is_already_ingested(sample_id: str, session_key: str, record: dict) -> bool:
+ key = f"mem0:{sample_id}:{session_key}"
+ return key in record and record[key].get("success", False)
+
+
+def mark_ingested(
+ sample_id: str,
+ session_key: str,
+ record: dict,
+ event_ids: list[str],
+ meta: Optional[dict] = None,
+) -> None:
+ key = f"mem0:{sample_id}:{session_key}"
+ record[key] = {
+ "success": True,
+ "timestamp": int(time.time()),
+ "event_ids": event_ids,
+ "meta": meta or {},
+ }
+
+
+def write_error_log(path: str, sample_id: str, session_key: str, error: str) -> None:
+ Path(path).parent.mkdir(parents=True, exist_ok=True)
+ ts = time.strftime("%Y-%m-%d %H:%M:%S")
+ with open(path, "a", encoding="utf-8") as f:
+ f.write(f"[{ts}] ERROR [{sample_id}/{session_key}]: {error}\n")
+
+
+# ---------------------------------------------------------------------------
+# mem0 event polling
+# ---------------------------------------------------------------------------
+
+def poll_events(api_key: str, event_ids: list[str], timeout_sec: int = 600) -> dict[str, str]:
+ """
+ Poll mem0 event statuses until all complete or timeout.
+ Returns {event_id: final_status}.
+ """
+ pending = set(event_ids)
+ statuses: dict[str, str] = {}
+ backoff = 0.5
+ start = time.time()
+
+ while pending:
+ if time.time() - start > timeout_sec:
+ for eid in pending:
+ statuses[eid] = "TIMEOUT"
+ break
+
+ done_this_round = set()
+ for event_id in list(pending):
+ try:
+ resp = requests.get(
+ f"{MEM0_API_URL}/v1/event/{event_id}/",
+ headers={"Authorization": f"Token {api_key}"},
+ timeout=30,
+ )
+ if resp.ok:
+ status = resp.json().get("status", "UNKNOWN")
+ if status in ("SUCCEEDED", "FAILED"):
+ statuses[event_id] = status
+ done_this_round.add(event_id)
+ except Exception as e:
+ print(f" [poll] Error checking event {event_id}: {e}", file=sys.stderr)
+
+ pending -= done_this_round
+ if pending:
+ time.sleep(backoff)
+ backoff = min(backoff * 1.5, 5.0)
+
+ return statuses
+
+
+# ---------------------------------------------------------------------------
+# Core ingest logic
+# ---------------------------------------------------------------------------
+
+def ingest_session(
+ client: MemoryClient,
+ api_key: str,
+ messages: list[dict],
+ user_id: str,
+ meta: dict,
+ wait_for_indexing: bool = True,
+) -> list[str]:
+ """
+ Add one session's messages to mem0.
+ Returns list of event_ids (may be empty if async_mode=False or API returns none).
+ """
+ add_kwargs: dict[str, Any] = {
+ "user_id": user_id,
+ "version": "v2",
+ "enable_graph": False,
+ "async_mode": False,
+ "metadata": {
+ "session_key": meta.get("session_key", ""),
+ "date_time": meta.get("date_time", ""),
+ "speaker_a": meta.get("speaker_a", ""),
+ "speaker_b": meta.get("speaker_b", ""),
+ },
+ }
+
+ result = client.add(messages, **add_kwargs)
+
+ event_ids: list[str] = []
+ if isinstance(result, list):
+ for item in result:
+ if isinstance(item, dict) and item.get("event_id"):
+ event_ids.append(item["event_id"])
+ elif isinstance(result, dict) and result.get("event_id"):
+ event_ids.append(result["event_id"])
+
+ if wait_for_indexing and event_ids:
+ statuses = poll_events(api_key, event_ids)
+ failed = [eid for eid, s in statuses.items() if s != "SUCCEEDED"]
+ if failed:
+ raise RuntimeError(f"Events failed/timed-out: {failed}")
+
+ return event_ids
+
+
+def run_ingest(args: argparse.Namespace) -> None:
+ api_key = args.api_key or os.environ.get("MEM0_API_KEY", "")
+ if not api_key:
+ print("Error: mem0 API key required (--api-key or MEM0_API_KEY env var)", file=sys.stderr)
+ sys.exit(1)
+
+ client = MemoryClient(api_key=api_key)
+
+ # Set project-level custom instructions once
+ try:
+ client.update_project(custom_instructions=CUSTOM_INSTRUCTIONS)
+ print("[INFO] Updated mem0 project custom instructions", file=sys.stderr)
+ except Exception as e:
+ print(f"[WARN] Could not set custom instructions: {e}", file=sys.stderr)
+
+ session_range = parse_session_range(args.sessions) if args.sessions else None
+
+ # Load / clear ingest record
+ if args.clear_ingest_record:
+ ingest_record: dict = {}
+ save_ingest_record(ingest_record, args.record)
+ print("[INFO] Cleared existing ingest records", file=sys.stderr)
+ else:
+ ingest_record = load_ingest_record(args.record)
+
+ samples = load_locomo_data(args.input, args.sample)
+ if args.limit:
+ samples = samples[: args.limit]
+ print(f"[INFO] Loaded {len(samples)} sample(s)", file=sys.stderr)
+
+ total_sessions = 0
+ success_count = 0
+ skip_count = 0
+ error_count = 0
+
+ for item in samples:
+ sample_id: str = item["sample_id"]
+ sessions = build_session_messages(item, session_range)
+ print(f"\n=== Sample {sample_id} ({len(sessions)} sessions) ===", file=sys.stderr)
+
+ for sess in sessions:
+ meta = sess["meta"]
+ session_key = meta["session_key"]
+ label = f"{session_key} ({meta['date_time']})"
+ total_sessions += 1
+
+ if not args.force_ingest and is_already_ingested(sample_id, session_key, ingest_record):
+ print(f" [{label}] SKIP (already ingested)", file=sys.stderr)
+ skip_count += 1
+ continue
+
+ print(f" [{label}] ingesting {len(sess['messages'])} messages ...", file=sys.stderr)
+ t0 = time.time()
+
+ try:
+ event_ids = ingest_session(
+ client,
+ api_key,
+ sess["messages"],
+ user_id=sample_id,
+ meta=meta,
+ wait_for_indexing=args.wait_indexing,
+ )
+ elapsed = time.time() - t0
+ mark_ingested(sample_id, session_key, ingest_record, event_ids, meta)
+ save_ingest_record(ingest_record, args.record)
+ print(
+ f" [{label}] OK events={len(event_ids)} {elapsed:.1f}s",
+ file=sys.stderr,
+ )
+ success_count += 1
+ except Exception as e:
+ elapsed = time.time() - t0
+ print(f" [{label}] ERROR: {e} {elapsed:.1f}s", file=sys.stderr)
+ write_error_log(args.error_log, sample_id, session_key, str(e))
+ error_count += 1
+
+ print(f"\n=== Ingest summary ===", file=sys.stderr)
+ print(f" Total sessions: {total_sessions}", file=sys.stderr)
+ print(f" Succeeded: {success_count}", file=sys.stderr)
+ print(f" Skipped: {skip_count}", file=sys.stderr)
+ print(f" Failed: {error_count}", file=sys.stderr)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+ parser = argparse.ArgumentParser(description="Ingest LoCoMo conversations into mem0")
+ parser.add_argument(
+ "--input",
+ default=DEFAULT_DATA_PATH,
+ help="Path to locomo10.json (default: ../data/locomo10.json)",
+ )
+ parser.add_argument(
+ "--api-key",
+ default=None,
+ help="mem0 API key (or set MEM0_API_KEY env var)",
+ )
+ parser.add_argument(
+ "--sample",
+ default=None,
+ help="Sample index (0-based int) or sample_id string (e.g. conv-26). Default: all.",
+ )
+ parser.add_argument(
+ "--limit",
+ type=int,
+ default=None,
+ help="Max number of samples to ingest. Default: all.",
+ )
+ parser.add_argument(
+ "--sessions",
+ default=None,
+ help="Session range, e.g. '1-4' or '3'. Default: all.",
+ )
+ parser.add_argument(
+ "--record",
+ default=DEFAULT_RECORD_PATH,
+ help=f"Path to ingest progress record (default: {DEFAULT_RECORD_PATH})",
+ )
+ parser.add_argument(
+ "--error-log",
+ default=DEFAULT_LOG_PATH,
+ help=f"Path to error log (default: {DEFAULT_LOG_PATH})",
+ )
+ parser.add_argument(
+ "--force-ingest",
+ action="store_true",
+ default=False,
+ help="Re-ingest even if already recorded as done",
+ )
+ parser.add_argument(
+ "--clear-ingest-record",
+ action="store_true",
+ default=False,
+ help="Clear all existing ingest records before running",
+ )
+ parser.add_argument(
+ "--no-wait-indexing",
+ dest="wait_indexing",
+ action="store_false",
+ default=True,
+ help="Don't wait for mem0 async indexing to complete (faster but no status check)",
+ )
+
+ args = parser.parse_args()
+ run_ingest(args)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/locomo/openclaw/eval.py b/benchmark/locomo/openclaw/eval.py
index 744d441eb..4bc323da9 100644
--- a/benchmark/locomo/openclaw/eval.py
+++ b/benchmark/locomo/openclaw/eval.py
@@ -22,15 +22,20 @@
import os
import sys
import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
+from pathlib import Path
+from threading import Lock
import requests
# Configuration constants
DEFAULT_BASE_URL = "http://127.0.0.1:18789"
-DEFAULT_SESSION_KEY = "eval-test-2"
DEFAULT_AGENT_ID = "locomo-eval"
DEFAULT_INGEST_RECORD_PATH = ".ingest_record.json"
-DEFAULT_OV_COMMAND = ["ov", "add-memory"]
+
+# CSV write lock for thread safety
+csv_lock = Lock()
# ---------------------------------------------------------------------------
@@ -180,6 +185,56 @@ def build_session_messages(
return sessions
+# ---------------------------------------------------------------------------
+# Question time helpers
+# ---------------------------------------------------------------------------
+
+def parse_locomo_datetime(date_str: str) -> datetime | None:
+ """解析 LoCoMo 时间格式,如 '1:56 pm on 8 May, 2023'"""
+ try:
+ # 移除时间部分,只保留日期 "8 May, 2023"
+ if " on " in date_str:
+ date_part = date_str.split(" on ")[-1]
+ return datetime.strptime(date_part.strip(), "%d %B, %Y")
+ except ValueError:
+ pass
+ return None
+
+
+def get_sample_question_time(sample: dict) -> str | None:
+ """从 sample 的 conversation 中提取最后一个有内容 session 的时间,返回 ISO 格式日期"""
+ conversation = sample.get("conversation", {})
+
+ # 找所有 session_N 字段(非 date_time)
+ session_keys = [
+ k for k in conversation.keys() if k.startswith("session_") and "date_time" not in k
+ ]
+ if not session_keys:
+ return None
+
+ # 按 session 编号排序,找到最后一个有内容的
+ def get_session_num(key):
+ try:
+ return int(key.replace("session_", ""))
+ except ValueError:
+ return 0
+
+ session_keys.sort(key=get_session_num, reverse=True)
+
+ for session_key in session_keys:
+ if conversation.get(session_key): # 有内容
+ # 找到对应的 date_time
+ session_num = get_session_num(session_key)
+ dt_key = f"session_{session_num}_date_time"
+ date_str = conversation.get(dt_key)
+ if date_str:
+ dt = parse_locomo_datetime(date_str)
+ if dt:
+ return dt.strftime("%Y-%m-%d")
+
+ return None
+
+
# ---------------------------------------------------------------------------
# Ingest record helpers (avoid duplicate ingestion)
# ---------------------------------------------------------------------------
@@ -260,6 +315,51 @@ def extract_response_text(response_json: dict) -> str:
return f"[ERROR: could not extract text from response: {response_json}]"
+def get_session_id_from_key(session_key: str, user: str, agent_id: str = "main") -> str | None:
+ """Search all agents' sessions.json files for the session_key and return sessionFile path.
+ Returns the full path to the session JSONL file if found, None otherwise.
+ """
+ agents_base_dir = os.path.expanduser("~/.openclaw/agents")
+
+ if not os.path.exists(agents_base_dir):
+ print(f" [session] Agents directory not found: {agents_base_dir}", file=sys.stderr)
+ return None
+
+ # Iterate through all agent directories
+ for agent_name in os.listdir(agents_base_dir):
+ agent_dir = os.path.join(agents_base_dir, agent_name)
+ if not os.path.isdir(agent_dir):
+ continue
+
+ sessions_dir = os.path.join(agent_dir, "sessions")
+ sessions_file = os.path.join(sessions_dir, "sessions.json")
+
+ if not os.path.exists(sessions_file):
+ continue
+
+ try:
+ with open(sessions_file, "r") as f:
+ data = json.load(f)
+
+ # Search for the session_key in this sessions.json
+ for key, value in data.items():
+ if session_key in key and isinstance(value, dict):
+ session_file = value.get("sessionFile")
+ if session_file:
+ print(f" [session] Found sessionFile in agent '{agent_name}': {session_file}", file=sys.stderr)
+ return session_file
+
+ except json.JSONDecodeError as e:
+ print(f" [session] Error parsing {sessions_file}: {e}", file=sys.stderr)
+ continue
+ except IOError as e:
+ print(f" [session] Error reading {sessions_file}: {e}", file=sys.stderr)
+ continue
+
+ print(f" [session] session_key '{session_key}' not found in any agent's sessions.json", file=sys.stderr)
+ return None
+
+
def get_session_id(user: str, agent_id: str = "main") -> str | None:
"""Read the current session ID for the given user from sessions.json."""
sessions_file = os.path.expanduser(f"~/.openclaw/agents/{agent_id}/sessions/sessions.json")
@@ -279,46 +379,85 @@ def get_session_id(user: str, agent_id: str = "main") -> str | None:
return None
-def reset_session(session_id: str, agent_id: str = "main") -> str | None:
- """Archive the session .jsonl file by renaming it with a timestamp suffix.
+def reset_session(session_path: str, agent_id: str = "main") -> str | None:
+ """Rename the session .jsonl file with a timestamp suffix.
+ Accepts either a session_id or a full path to the session file.
Returns the new filename if successful, None otherwise.
"""
- sessions_dir = os.path.expanduser(f"~/.openclaw/agents/{agent_id}/sessions")
- src = os.path.join(sessions_dir, f"{session_id}.jsonl")
- dst = f"{src}.{int(time.time())}"
+ # Check if session_path is already a full path
+ if os.path.isabs(session_path) and os.path.exists(session_path):
+ src = session_path
+ else:
+ # Treat as session_id
+ sessions_dir = os.path.expanduser(f"~/.openclaw/agents/{agent_id}/sessions")
+ src = os.path.join(sessions_dir, f"{session_path}.jsonl")
+
+ if not os.path.exists(src):
+ print(f" [backup] Session file not found: {src}", file=sys.stderr)
+ return None
+
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
+ dst = f"{src}.{timestamp}"
try:
os.rename(src, dst)
new_filename = os.path.basename(dst)
- print(f" [reset] archived {session_id}.jsonl -> {new_filename}", file=sys.stderr)
+ print(f" [backup] renamed {os.path.basename(src)} -> {new_filename}", file=sys.stderr)
return new_filename
- except FileNotFoundError:
- print(f" [reset] Session file not found: {src}", file=sys.stderr)
- return None
except IOError as e:
- print(f" [reset] could not archive session file: {e}", file=sys.stderr)
+ print(f" [backup] could not rename session file: {e}", file=sys.stderr)
return None
-def viking_ingest(msg: str) -> None:
- """Save a message to OpenViking via `ov add-memory`."""
- import subprocess
- result = subprocess.run(
- DEFAULT_OV_COMMAND + [msg],
- capture_output=True,
- text=True,
- )
- if result.returncode != 0:
- raise RuntimeError(result.stderr.strip() or f"ov exited with code {result.returncode}")
+def calculate_usage_from_jsonl(jsonl_filename: str, agent_id: str = "main") -> dict:
+ """Calculate token usage from archived JSONL file."""
+ # Check if jsonl_filename is already a full path
+ if os.path.isabs(jsonl_filename) and os.path.exists(jsonl_filename):
+ jsonl_full_path = jsonl_filename
+ else:
+ sessions_dir = os.path.expanduser(f"~/.openclaw/agents/{agent_id}/sessions")
+ jsonl_full_path = os.path.join(sessions_dir, jsonl_filename)
+
+ usage = {
+ "input_tokens": 0,
+ "output_tokens": 0,
+ "cacheRead": 0,
+ "cacheWrite": 0,
+ "total_tokens": 0,
+ }
+
+ if not os.path.exists(jsonl_full_path):
+ return usage
+
+ try:
+ with open(jsonl_full_path, "r", encoding="utf-8") as f:
+ for line in f:
+ if not line.strip():
+ continue
+ entry = json.loads(line)
+ if entry.get("type") == "message" and entry.get("message", {}).get("role") == "assistant":
+ entry_usage = entry.get("message", {}).get("usage", {})
+ usage["input_tokens"] += entry_usage.get("input", 0)
+ usage["output_tokens"] += entry_usage.get("output", 0)
+ usage["cacheRead"] += entry_usage.get("cacheRead", 0)
+ usage["cacheWrite"] += entry_usage.get("cacheWrite", 0)
+ usage["total_tokens"] += entry_usage.get("totalTokens", 0)
+ except json.JSONDecodeError as e:
+ print(f" [usage] Error parsing JSONL file: {e}", file=sys.stderr)
+ except IOError as e:
+ print(f" [usage] Error reading JSONL file: {e}", file=sys.stderr)
+
+ return usage
def send_message_with_retry(
- base_url: str, token: str, user: str, message: str, retries: int = 2, agent_id: str = DEFAULT_AGENT_ID
+ base_url: str, token: str, user: str, message: str, retries: int = 2,
+ agent_id: str = DEFAULT_AGENT_ID, session_key: str | None = None
) -> tuple[str, dict]:
"""Call send_message with up to `retries` retries on failure."""
last_exc = None
for attempt in range(retries + 1):
try:
- return send_message(base_url, token, user, message, agent_id)
+ return send_message(base_url, token, user, message, agent_id, session_key)
except Exception as e:
last_exc = e
if attempt < retries:
@@ -327,7 +466,8 @@ def send_message_with_retry(
def send_message(
- base_url: str, token: str, user: str, message: str, agent_id: str = DEFAULT_AGENT_ID
+ base_url: str, token: str, user: str, message: str,
+ agent_id: str = DEFAULT_AGENT_ID, session_key: str | None = None
) -> tuple[str, dict]:
"""Send a single message to the OpenClaw responses API.
@@ -337,9 +477,10 @@ def send_message(
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {token}",
- "X-OpenClaw-Agent-ID": agent_id,
- "X-OpenClaw-Session-Key": DEFAULT_SESSION_KEY
+ "X-OpenClaw-Agent-ID": agent_id
}
+ if session_key:
+ headers["X-OpenClaw-Session-Key"] = session_key
payload = {
"model": "openclaw",
"input": message,
@@ -413,62 +554,36 @@ def run_ingest(
preview = msg.replace("\n", " | ")[:80]
print(f" [{label}] {preview}...", file=sys.stderr)
- if args.viking:
- try:
- viking_ingest(msg)
- print(f" -> [viking] saved", file=sys.stderr)
- results.append({
- "sample_id": sample_id,
- "session": meta["session_key"],
- "user": user_key,
- "reply": "[viking] saved",
- "usage": {},
- })
- # Mark as successfully ingested
- mark_ingested(args.agent_id, user_key, sample_id, meta['session_key'], ingest_record, {
- "mode": "viking",
- "date_time": meta['date_time']
- })
- except Exception as e:
- print(f" -> [ERROR] {e}", file=sys.stderr)
- results.append({
- "sample_id": sample_id,
- "session": meta["session_key"],
- "user": user_key,
- "reply": f"[ERROR] {e}",
- "usage": {},
- })
- else:
- try:
- reply, usage = send_message(args.base_url, args.token, user_key, msg, args.agent_id)
- print(f" -> {reply[:80]}{'...' if len(reply) > 80 else ''}", file=sys.stderr)
- results.append({
- "sample_id": sample_id,
- "session": meta["session_key"],
- "user": user_key,
- "reply": reply,
- "usage": usage,
- })
- # Mark as successfully ingested
- mark_ingested(args.agent_id, user_key, sample_id, meta['session_key'], ingest_record, {
- "mode": "openclaw",
- "date_time": meta['date_time'],
- "usage": usage
- })
- except Exception as e:
- print(f" -> [ERROR] {e}", file=sys.stderr)
- results.append({
- "sample_id": sample_id,
- "session": meta["session_key"],
- "user": user_key,
- "reply": f"[ERROR] {e}",
- "usage": {},
- })
-
- if session_id is None:
- session_id = get_session_id(user_key, args.agent_id)
- if session_id:
- reset_session(session_id, args.agent_id)
+ try:
+ reply, usage = send_message(args.base_url, args.token, user_key, msg, args.agent_id)
+ print(f" -> {reply[:80]}{'...' if len(reply) > 80 else ''}", file=sys.stderr)
+ results.append({
+ "sample_id": sample_id,
+ "session": meta["session_key"],
+ "user": user_key,
+ "reply": reply,
+ "usage": usage,
+ })
+ # Mark as successfully ingested
+ mark_ingested(args.agent_id, user_key, sample_id, meta['session_key'], ingest_record, {
+ "mode": "openclaw",
+ "date_time": meta['date_time'],
+ "usage": usage
+ })
+ except Exception as e:
+ print(f" -> [ERROR] {e}", file=sys.stderr)
+ results.append({
+ "sample_id": sample_id,
+ "session": meta["session_key"],
+ "user": user_key,
+ "reply": f"[ERROR] {e}",
+ "usage": {},
+ })
+
+ if session_id is None:
+ session_id = get_session_id(user_key, args.agent_id)
+ if session_id:
+ reset_session(session_id, args.agent_id)
if args.output:
try:
@@ -544,6 +659,89 @@ def run_ingest(
# QA: run QA questions and compare with expected answers
# ---------------------------------------------------------------------------
+def process_single_question(
+ sample_id: str,
+ sample_idx: int,
+ original_qi: int,
+ qa: dict,
+ args: argparse.Namespace,
+ csv_path: str,
+ question_time: str | None = None,
+) -> dict:
+ """Process a single QA question. Returns the record."""
+ question = qa["question"]
+ expected = str(qa["answer"])
+ category = qa.get("category", "")
+ evidence = qa.get("evidence", [])
+
+ # Generate unique session_key based on sample_id + question_index
+ session_key = f"qa-{sample_id}-q{original_qi}"
+ user_key = args.user or f"eval-{sample_idx}"
+
+ print(f" [{sample_idx}] Q{original_qi}: {question[:60]}{'...' if len(question) > 60 else ''}", file=sys.stderr)
+ # 如果有 question_time,注入到 prompt 中
+ if question_time:
+ input_msg = f"Current date: {question_time}. Answer the question directly: {question}"
+ else:
+ input_msg = f"Answer the question directly: {question}"
+
+ jsonl_filename = ""
+ try:
+ response, api_usage = send_message_with_retry(
+ args.base_url, args.token, sample_id, input_msg, 2, args.agent_id, session_key
+ )
+ print(f" [{sample_idx}] A: {response[:60]}{'...' if len(response) > 60 else ''}", file=sys.stderr)
+
+ # Get sessionFile path from sessions.json using session_key
+ session_file_path = get_session_id_from_key(session_key, user_key, args.agent_id)
+ jsonl_filename = ""
+
+ # Archive the session file if we found it
+ if session_file_path:
+ jsonl_filename = reset_session(session_file_path, args.agent_id)
+
+ # Calculate usage from JSONL file if available, otherwise use API usage
+ if jsonl_filename and session_file_path:
+ # Use the directory from session_file_path and the archived filename
+ usage = calculate_usage_from_jsonl(os.path.join(os.path.dirname(session_file_path), jsonl_filename), args.agent_id)
+ print(f" [{sample_idx}] tokens (from JSONL): in={usage['input_tokens']} out={usage['output_tokens']} cacheRead={usage['cacheRead']} cacheWrite={usage['cacheWrite']} total={usage['total_tokens']}", file=sys.stderr)
+ else:
+ usage = {
+ "input_tokens": api_usage.get("input_tokens", 0),
+ "output_tokens": api_usage.get("output_tokens", 0),
+ "cacheRead": api_usage.get("cacheRead", 0),
+ "cacheWrite": api_usage.get("cacheWrite", 0),
+ "total_tokens": api_usage.get("total_tokens", 0),
+ }
+ print(f" [{sample_idx}] tokens (from API): in={usage['input_tokens']} out={usage['output_tokens']} cacheRead={usage['cacheRead']} cacheWrite={usage['cacheWrite']} total={usage['total_tokens']}", file=sys.stderr)
+
+ except Exception as e:
+ response = f"[ERROR] {e}"
+ usage = {}
+ jsonl_filename = ""
+ print(f" [{sample_idx}] A: {response}", file=sys.stderr)
+
+ record = {
+ "sample_id": sample_id,
+ "sample_idx": sample_idx,
+ "qi": original_qi,
+ "question": question,
+ "expected": expected,
+ "response": response,
+ "category": category,
+ "evidence": evidence,
+ "usage": usage,
+ "jsonl_filename": jsonl_filename,
+ }
+
+ # Save to CSV with lock for thread safety
+ with csv_lock:
+ save_record_to_csv(csv_path, record)
+ print(f" [{sample_idx}] Saved to CSV: Q{original_qi}", file=sys.stderr)
+
+ return record
+
+
def run_sample_qa(
item: dict,
sample_idx: int,
@@ -551,9 +749,10 @@ def run_sample_qa(
executed_records: set,
csv_path: str,
) -> tuple[list[dict], dict]:
- """Process QA for a single sample. Returns (records, sample_usage)."""
+ """Process QA for a single sample with concurrent question execution. Returns (records, sample_usage)."""
sample_id = item["sample_id"]
user_key = args.user or f"eval-{sample_idx}"
+ question_time = get_sample_question_time(item)
qas = [q for q in item.get("qa", []) if str(q.get("category", "")) != "5"]
if args.count is not None:
qas = qas[:args.count]
@@ -570,133 +769,37 @@ def run_sample_qa(
if not qas:
print(f"\n=== Sample {sample_id} [{sample_idx}] (user={user_key}) ===", file=sys.stderr)
print(f" All QA questions already executed, skipping sample.", file=sys.stderr)
- return [], {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
-
- jsonl_path = f"{args.output}.{sample_idx}.jsonl" if args.output else None
-
- sample_usage = {"input_tokens": 0, "output_tokens": 0, "cacheRead": 0, "cacheWrite": 0, "total_tokens": 0}
- records = []
- session_id = None
+ return [], {"input_tokens": 0, "output_tokens": 0, "cacheRead": 0, "cacheWrite": 0, "total_tokens": 0}
print(f"\n=== Sample {sample_id} [{sample_idx}] (user={user_key}) ===", file=sys.stderr)
- print(f" Running {len(qas)} QA question(s)...", file=sys.stderr)
+ if question_time:
+ print(f" Question time context: {question_time}", file=sys.stderr)
+ print(f" Running {len(qas)} QA question(s) with max {args.parallel} workers...", file=sys.stderr)
- jsonl_file = None
- if jsonl_path:
- try:
- jsonl_file = open(jsonl_path, "w", encoding="utf-8")
- except IOError as e:
- print(f"Warning: Could not open JSONL file {jsonl_path}: {e}", file=sys.stderr)
+ records = []
+ sample_usage = {"input_tokens": 0, "output_tokens": 0, "cacheRead": 0, "cacheWrite": 0, "total_tokens": 0}
- try:
+ # Use ThreadPoolExecutor for concurrent question execution
+ with ThreadPoolExecutor(max_workers=args.parallel) as executor:
+ futures = []
for original_qi, qa in qas:
- question = qa["question"]
- expected = str(qa["answer"])
- category = qa.get("category", "")
- evidence = qa.get("evidence", [])
-
- print(f" [{sample_idx}] Q{original_qi}: {question[:60]}{'...' if len(question) > 60 else ''}", file=sys.stderr)
-
- jsonl_filename = ""
+ future = executor.submit(
+ process_single_question,
+ sample_id, sample_idx, original_qi, qa, args, csv_path, question_time
+ )
+ futures.append(future)
+
+ # Collect results
+ for future in as_completed(futures):
try:
- response, api_usage = send_message_with_retry(
- args.base_url, args.token, user_key, question, 2, args.agent_id,
- )
- print(f" [{sample_idx}] A: {response[:60]}{'...' if len(response) > 60 else ''}", file=sys.stderr)
-
- # Use provided session_id if available, otherwise get from system
- if args.session_id:
- session_id = args.session_id
- elif session_id is None:
- session_id = get_session_id(user_key, args.agent_id)
-
- # Reset session and get archived filename
- if session_id:
- jsonl_filename = reset_session(session_id, args.agent_id)
-
- # Use API usage by default
- usage = api_usage
- # Calculate usage from JSONL file if session_id is provided and we have the archived file
- if args.session_id and jsonl_filename:
- # Parse the archived JSONL file to calculate usage
- sessions_dir = os.path.expanduser(f"~/.openclaw/agents/{args.agent_id}/sessions")
- jsonl_full_path = os.path.join(sessions_dir, jsonl_filename)
- if os.path.exists(jsonl_full_path):
- total_input = 0
- total_output = 0
- total_cache_read = 0
- total_cache_write = 0
- total_total_tokens = 0
- try:
- with open(jsonl_full_path, "r", encoding="utf-8") as f:
- for line in f:
- if not line.strip():
- continue
- entry = json.loads(line)
- if entry.get("type") == "message" and entry.get("message", {}).get("role") == "assistant":
- entry_usage = entry.get("message", {}).get("usage", {})
- total_input += entry_usage.get("input", 0)
- total_output += entry_usage.get("output", 0)
- total_cache_read += entry_usage.get("cacheRead", 0)
- total_cache_write += entry_usage.get("cacheWrite", 0)
- total_total_tokens += entry_usage.get("totalTokens", 0)
- usage = {
- "input_tokens": total_input,
- "output_tokens": total_output,
- "cacheRead": total_cache_read,
- "cacheWrite": total_cache_write,
- "total_tokens": total_total_tokens,
- }
- print(f" [{sample_idx}] tokens (from JSONL): in={total_input} out={total_output} cacheRead={total_cache_read} cacheWrite={total_cache_write} total={total_total_tokens}", file=sys.stderr)
- except json.JSONDecodeError as e:
- print(f" [{sample_idx}] Error parsing JSONL file: {e}, using API usage", file=sys.stderr)
- print(f" [{sample_idx}] tokens (from API): in={usage.get('input_tokens',0)} out={usage.get('output_tokens',0)} cacheRead={usage.get('cacheRead',0)} cacheWrite={usage.get('cacheWrite',0)} total={usage.get('total_tokens',0)}", file=sys.stderr)
- except IOError as e:
- print(f" [{sample_idx}] Error reading JSONL file: {e}, using API usage", file=sys.stderr)
- print(f" [{sample_idx}] tokens (from API): in={usage.get('input_tokens',0)} out={usage.get('output_tokens',0)} cacheRead={usage.get('cacheRead',0)} cacheWrite={usage.get('cacheWrite',0)} total={usage.get('total_tokens',0)}", file=sys.stderr)
- else:
- print(f" [{sample_idx}] JSONL file not found: {jsonl_full_path}, using API usage", file=sys.stderr)
- print(f" [{sample_idx}] tokens (from API): in={usage.get('input_tokens',0)} out={usage.get('output_tokens',0)} cacheRead={usage.get('cacheRead',0)} cacheWrite={usage.get('cacheWrite',0)} total={usage.get('total_tokens',0)}", file=sys.stderr)
- else:
- print(f" [{sample_idx}] tokens (from API): in={usage.get('input_tokens',0)} out={usage.get('output_tokens',0)} cacheRead={usage.get('cacheRead',0)} cacheWrite={usage.get('cacheWrite',0)} total={usage.get('total_tokens',0)}", file=sys.stderr)
-
+ record = future.result()
+ records.append(record)
+ # Accumulate usage
+ usage = record.get("usage", {})
for k in sample_usage:
sample_usage[k] += usage.get(k, 0)
except Exception as e:
- response = f"[ERROR] {e}"
- usage = {}
- jsonl_filename = ""
- print(f" [{sample_idx}] A: {response}", file=sys.stderr)
-
- record = {
- "sample_id": sample_id,
- "sample_idx": sample_idx,
- "qi": original_qi,
- "question": question,
- "expected": expected,
- "response": response,
- "category": category,
- "evidence": evidence,
- "usage": usage,
- "jsonl_filename": jsonl_filename,
- }
- records.append(record)
-
- # Save to CSV immediately after successful execution
- save_record_to_csv(csv_path, record)
- print(f" [{sample_idx}] Saved to CSV: Q{original_qi}", file=sys.stderr)
-
- if jsonl_file:
- try:
- jsonl_file.write(json.dumps(record, ensure_ascii=False) + "\n")
- jsonl_file.flush()
- except IOError as e:
- print(f"Warning: Error writing to JSONL file: {e}", file=sys.stderr)
-
- finally:
- if jsonl_file:
- jsonl_file.close()
- print(f" [{sample_idx}] written to {jsonl_path}", file=sys.stderr)
+ print(f" [{sample_idx}] Error in question task: {e}", file=sys.stderr)
return records, sample_usage
@@ -725,7 +828,7 @@ def save_record_to_csv(csv_path: str, record: dict) -> None:
"sample_id", "sample_idx", "qi", "question", "expected",
"response", "category", "evidence", "input_tokens",
"output_tokens", "cacheRead", "cacheWrite", "total_tokens",
- "timestamp", "jsonl_filename"
+ "timestamp", "jsonl_filename", "result", "reasoning"
]
# Flatten usage fields
@@ -738,6 +841,8 @@ def save_record_to_csv(csv_path: str, record: dict) -> None:
flat_record["total_tokens"] = usage.get("total_tokens", 0)
flat_record["timestamp"] = time.strftime("%Y-%m-%d %H:%M:%S")
flat_record["jsonl_filename"] = flat_record.get("jsonl_filename", "")
+ flat_record["result"] = "" # 默认为空,由 judge.py 填充
+ flat_record["reasoning"] = "" # 默认为空,由 judge.py 填充
try:
with open(csv_path, "a", encoding="utf-8", newline="") as f:
@@ -760,26 +865,20 @@ def run_qa(
print("Error: QA mode only works with LoCoMo JSON files", file=sys.stderr)
sys.exit(1)
+ # Ensure parallel is within reasonable bounds (1-40)
+ args.parallel = max(1, min(40, args.parallel))
+
samples = load_locomo_data(args.input, args.sample)
print(f" user: {args.user or 'eval-{sample_idx}'}", file=sys.stderr)
- print(f" running in single-thread mode", file=sys.stderr)
+ print(f" running with {args.parallel} concurrent workers", file=sys.stderr)
# Load already executed records from CSV
- csv_path = f"{args.output}.csv" if args.output else "qa_results.csv"
+ csv_path = f"{args.output}.csv" if args.output else args.default_csv_path
+ # 确保输出目录存在
+ os.makedirs(os.path.dirname(csv_path), exist_ok=True)
executed_records = load_executed_records(csv_path)
print(f" Loaded {len(executed_records)} already executed records from {csv_path}", file=sys.stderr)
- # Clean up existing session file if session_id is provided
- if args.session_id:
- sessions_dir = os.path.expanduser(f"~/.openclaw/agents/{args.agent_id}/sessions")
- session_file = os.path.join(sessions_dir, f"{args.session_id}.jsonl")
- if os.path.exists(session_file):
- try:
- os.remove(session_file)
- print(f" Cleaned up existing session file: {os.path.basename(session_file)}", file=sys.stderr)
- except Exception as e:
- print(f" Warning: Could not remove existing session file: {e}", file=sys.stderr)
-
results_list = []
for idx, item in enumerate(samples):
result = run_sample_qa(item, idx + 1, args, executed_records, csv_path)
@@ -792,7 +891,31 @@ def run_qa(
print(f"\n total tokens: in={total_usage['input_tokens']} out={total_usage['output_tokens']} total={total_usage['total_tokens']}", file=sys.stderr)
+ # Generate timestamp once for all backups
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
+ import shutil
+
+ # Backup CSV file with timestamp
+ if os.path.exists(csv_path):
+ csv_path_obj = Path(csv_path)
+ backup_csv_path = csv_path_obj.parent / f"{csv_path_obj.stem}_{timestamp}{csv_path_obj.suffix}"
+ try:
+ shutil.copy2(csv_path, backup_csv_path)
+ print(f" CSV backed up to: {backup_csv_path}", file=sys.stderr)
+ except Exception as e:
+ print(f"Warning: Failed to backup CSV file: {e}", file=sys.stderr)
+
if args.output:
+ # Backup output summary file too
+ if os.path.exists(args.output):
+ output_path_obj = Path(args.output)
+ backup_output_path = output_path_obj.parent / f"{output_path_obj.stem}_{timestamp}{output_path_obj.suffix}"
+ try:
+ shutil.copy2(args.output, backup_output_path)
+ print(f" Summary backed up to: {backup_output_path}", file=sys.stderr)
+ except Exception as e:
+ print(f"Warning: Failed to backup summary file: {e}", file=sys.stderr)
+
try:
with open(args.output, "w", encoding="utf-8") as f:
f.write("=== TOTAL USAGE ===\n")
@@ -820,6 +943,10 @@ def parse_session_range(s: str) -> tuple[int, int]:
def main():
+ # 基于脚本所在目录计算默认 CSV 路径
+ script_dir = Path(__file__).parent.resolve()
+ default_csv_path = str(script_dir / "result" / "qa_results.csv")
+
parser = argparse.ArgumentParser(description="Evaluate OpenClaw responses")
parser.add_argument("mode", choices=["ingest", "qa"], help="Mode: ingest (load conversations) or qa (run QA eval)")
parser.add_argument("input", help="Path to test file (.txt or .json)")
@@ -868,15 +995,9 @@ def main():
parser.add_argument(
"-p", "--parallel",
type=int,
- default=1,
+ default=10,
metavar="N",
- help="QA mode: number of samples to process concurrently (max 10, default 1).",
- )
- parser.add_argument(
- "--viking",
- action="store_true",
- default=False,
- help="Ingest mode: save to OpenViking via `ov add-memory` instead of OpenClaw.",
+ help="QA mode: number of questions to process concurrently (max 40, default 10).",
)
parser.add_argument(
"--agent-id",
@@ -886,7 +1007,7 @@ def main():
parser.add_argument(
"--session-id",
default=None,
- help="Session ID for API requests. If provided, will use this session ID and calculate token usage from corresponding JSONL file.",
+ help="Session ID for API requests (ingest mode only).",
)
parser.add_argument(
"--force-ingest",
@@ -901,8 +1022,10 @@ def main():
help="Clear all existing ingest records before running",
)
args = parser.parse_args()
+ # 添加默认 CSV 路径到 args
+ args.default_csv_path = default_csv_path
- if not args.token and not getattr(args, "viking", False):
+ if not args.token:
print("Error: --token or OPENCLAW_GATEWAY_TOKEN env var is required", file=sys.stderr)
sys.exit(1)
diff --git a/benchmark/locomo/openclaw/import_to_ov.py b/benchmark/locomo/openclaw/import_to_ov.py
new file mode 100644
index 000000000..02d9d6578
--- /dev/null
+++ b/benchmark/locomo/openclaw/import_to_ov.py
@@ -0,0 +1,669 @@
+"""
+OpenViking data import tool.
+
+Import conversations from LoCoMo JSON or plain text files into OpenViking memory.
+
+Usage:
+ # Import LoCoMo JSON conversations
+ uv run python import_to_ov.py locomo10.json --sample 0 --sessions 1-4
+
+ # Import plain text conversations
+ uv run python import_to_ov.py example.txt
+"""
+
+import argparse
+import asyncio
+import csv
+import json
+import sys
+import time
+import traceback
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Dict, Any, Tuple, Optional
+
+import openviking as ov
+
+
+def _get_session_number(session_key: str) -> int:
+ """Extract session number from session key."""
+ return int(session_key.split("_")[1])
+
+
+def parse_test_file(path: str) -> List[Dict[str, Any]]:
+ """Parse txt test file into sessions.
+
+ Each session is a dict with:
+ - messages: list of user message strings
+ """
+ with open(path, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ raw_sessions = content.split("---\n")
+ sessions = []
+
+ for raw in raw_sessions:
+ lines = [line for line in raw.strip().splitlines() if line.strip()]
+ if not lines:
+ continue
+
+ messages = []
+ for line in lines:
+ if not line.startswith("eval:"): # Skip eval lines
+ messages.append(line)
+
+ if messages:
+ sessions.append({"messages": messages})
+
+ return sessions
+
+
+def load_locomo_data(
+ path: str,
+ sample_index: Optional[int] = None,
+) -> List[Dict[str, Any]]:
+ """Load LoCoMo JSON and optionally filter to one sample."""
+ with open(path, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ if sample_index is not None:
+ if sample_index < 0 or sample_index >= len(data):
+ raise ValueError(f"Sample index {sample_index} out of range (0-{len(data) - 1})")
+ return [data[sample_index]]
+ return data
+
+
+def build_session_messages(
+ item: Dict[str, Any],
+ session_range: Optional[Tuple[int, int]] = None,
+) -> List[Dict[str, Any]]:
+ """Build session messages for one LoCoMo sample.
+
+ Returns list of dicts with keys: messages, meta.
+ Each dict represents a session with multiple messages (user/assistant role).
+ """
+ conv = item["conversation"]
+ speakers = f"{conv['speaker_a']} & {conv['speaker_b']}"
+
+ session_keys = sorted(
+ [k for k in conv if k.startswith("session_") and not k.endswith("_date_time")],
+ key=_get_session_number,
+ )
+
+ sessions = []
+ for sk in session_keys:
+ sess_num = _get_session_number(sk)
+ if session_range:
+ lo, hi = session_range
+ if sess_num < lo or sess_num > hi:
+ continue
+
+ dt_key = f"{sk}_date_time"
+ date_time = conv.get(dt_key, "")
+
+ # Extract messages with all as user role, including speaker in content
+ messages = []
+ for idx, msg in enumerate(conv[sk]):
+ speaker = msg.get("speaker", "unknown")
+ text = msg.get("text", "")
+ messages.append(
+ {"role": "user", "text": f"[{speaker}]: {text}", "speaker": speaker, "index": idx}
+ )
+
+ sessions.append(
+ {
+ "messages": messages,
+ "meta": {
+ "sample_id": item["sample_id"],
+ "session_key": sk,
+ "date_time": date_time,
+ "speakers": speakers,
+ },
+ }
+ )
+
+ return sessions
+
+
+# ---------------------------------------------------------------------------
+# Ingest record helpers (avoid duplicate ingestion)
+# ---------------------------------------------------------------------------
+
+
+def load_success_csv(csv_path: str = "./result/import_success.csv") -> set:
+ """加载成功导入的CSV记录,返回已成功的键集合"""
+ success_keys = set()
+ if Path(csv_path).exists():
+ with open(csv_path, "r", encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ key = f"viking:{row['sample_id']}:{row['session']}"
+ success_keys.add(key)
+ return success_keys
+
+
+def write_success_record(
+ record: Dict[str, Any], csv_path: str = "./result/import_success.csv"
+) -> None:
+ """写入成功记录到CSV文件"""
+ file_exists = Path(csv_path).exists()
+ fieldnames = [
+ "timestamp",
+ "sample_id",
+ "session",
+ "date_time",
+ "speakers",
+ "embedding_tokens",
+ "vlm_tokens",
+ "llm_input_tokens",
+ "llm_output_tokens",
+ "total_tokens",
+ ]
+
+ with open(csv_path, "a", encoding="utf-8", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
+ if not file_exists:
+ writer.writeheader()
+
+ writer.writerow(
+ {
+ "timestamp": record["timestamp"],
+ "sample_id": record["sample_id"],
+ "session": record["session"],
+ "date_time": record.get("meta", {}).get("date_time", ""),
+ "speakers": record.get("meta", {}).get("speakers", ""),
+ "embedding_tokens": record["token_usage"].get("embedding", 0),
+ "vlm_tokens": record["token_usage"].get("vlm", 0),
+ "llm_input_tokens": record["token_usage"].get("llm_input", 0),
+ "llm_output_tokens": record["token_usage"].get("llm_output", 0),
+ "total_tokens": record["token_usage"].get("total", 0),
+ }
+ )
+
+
+def write_error_record(
+ record: Dict[str, Any], error_path: str = "./result/import_errors.log"
+) -> None:
+ """写入错误记录到日志文件"""
+ with open(error_path, "a", encoding="utf-8") as f:
+ timestamp = record["timestamp"]
+ sample_id = record["sample_id"]
+ session = record["session"]
+ error = record["error"]
+ f.write(f"[{timestamp}] ERROR [{sample_id}/{session}]: {error}\n")
+
+
+def is_already_ingested(
+ sample_id: str | int,
+ session_key: str,
+ success_keys: Optional[set] = None,
+) -> bool:
+ """Check if a specific session has already been successfully ingested."""
+ key = f"viking:{sample_id}:{session_key}"
+ return success_keys is not None and key in success_keys
+
+
+# ---------------------------------------------------------------------------
+# OpenViking import
+# ---------------------------------------------------------------------------
+def _parse_token_usage(commit_result: Dict[str, Any]) -> Dict[str, int]:
+ """解析Token使用数据(从commit返回的telemetry或task result中提取)"""
+ # 尝试从 task result 中提取(task 完成后包含完整 token_usage)
+ if "result" in commit_result:
+ result = commit_result["result"]
+ if "token_usage" in result:
+ tu = result["token_usage"]
+ embedding = tu.get("embedding", {})
+ llm = tu.get("llm", {})
+ # embedding 格式可能是 {"total": N} 或 {"total_tokens": N}
+ embed_total = embedding.get("total", embedding.get("total_tokens", 0))
+ llm_total = llm.get("total", llm.get("total_tokens", 0))
+ return {
+ "embedding": embed_total,
+ "vlm": llm_total,
+ "llm_input": llm.get("input", 0),
+ "llm_output": llm.get("output", 0),
+ "total": tu.get("total", {}).get("total_tokens", embed_total + llm_total),
+ }
+
+ # 从 commit 响应的 telemetry 中提取
+ telemetry = commit_result.get("telemetry", {}).get("summary", {})
+ tokens = telemetry.get("tokens", {})
+ return {
+ "embedding": tokens.get("embedding", {}).get("total", 0),
+ "vlm": tokens.get("llm", {}).get("total", 0),
+ "llm_input": tokens.get("llm", {}).get("input", 0),
+ "llm_output": tokens.get("llm", {}).get("output", 0),
+ "total": tokens.get("total", 0),
+ }
+
+
+async def viking_ingest(
+ messages: List[Dict[str, Any]],
+ openviking_url: str,
+ session_time: Optional[str] = None,
+ user_id: Optional[str] = None,
+ agent_id: Optional[str] = None,
+) -> Dict[str, int]:
+ """Save messages to OpenViking via OpenViking SDK client.
+ Returns token usage dict with embedding and vlm token counts.
+
+ Args:
+ messages: List of message dicts with role and text
+ openviking_url: OpenViking service URL
+ session_time: Session time string (e.g., "9:36 am on 2 April, 2023")
+ user_id: User identifier for separate userspace (e.g., "conv-26")
+ agent_id: Agent identifier for separate agentspace (e.g., "conv-26")
+ """
+ # 解析 session_time - 为每条消息计算递增的时间戳
+ base_datetime = None
+ if session_time:
+ try:
+ base_datetime = datetime.strptime(session_time, "%I:%M %p on %d %B, %Y")
+ except ValueError:
+ print(f"Warning: Failed to parse session_time: {session_time}", file=sys.stderr)
+
+ # Create client
+ client_kwargs = {"url": openviking_url}
+ if user_id is not None:
+ client_kwargs["user"] = user_id
+ if agent_id is not None:
+ client_kwargs["agent_id"] = agent_id
+ client = ov.AsyncHTTPClient(**client_kwargs)
+ await client.initialize()
+
+ try:
+ # Create session
+ create_res = await client.create_session()
+ session_id = create_res["session_id"]
+
+ # Add messages one by one with created_at
+ for idx, msg in enumerate(messages):
+ msg_created_at = None
+ if base_datetime:
+ # 每条消息递增1秒,确保时间顺序
+ msg_dt = base_datetime + timedelta(seconds=idx)
+ msg_created_at = msg_dt.isoformat()
+
+ await client.add_message(
+ session_id=session_id,
+ role=msg["role"],
+ parts=[{"type": "text", "text": msg["text"]}],
+ created_at=msg_created_at,
+ )
+
+ # Commit
+ result = await client.commit_session(session_id, telemetry=True)
+
+ # Accept both "committed" and "accepted" as success - accepted means the session was archived
+ if result.get("status") not in ("committed", "accepted"):
+ raise RuntimeError(f"Commit failed: {result}")
+
+ # 等待 task 完成以获取准确 token 消耗
+ task_id = result.get("task_id")
+ if task_id:
+ # 轮询任务状态直到完成
+ max_attempts = 3600 # 最多等待1小时
+ for attempt in range(max_attempts):
+ task = await client.get_task(task_id)
+ status = task.get("status") if task else "unknown"
+ if status == "completed":
+ token_usage = _parse_token_usage(task)
+ break
+ elif status in ("failed", "cancelled", "unknown"):
+ raise RuntimeError(f"Task {task_id} {status}: {task}")
+ await asyncio.sleep(1)
+ else:
+ raise RuntimeError(f"Task {task_id} timed out after {max_attempts} attempts")
+ else:
+ token_usage = {"embedding": 0, "vlm": 0, "total": 0}
+
+ # Get trace_id from commit result
+ trace_id = result.get("trace_id", "")
+ return {"token_usage": token_usage, "task_id": task_id, "trace_id": trace_id}
+
+ finally:
+ await client.close()
+
+
+def parse_session_range(s: str) -> Tuple[int, int]:
+ """Parse '1-4' or '3' into (lo, hi) inclusive tuple."""
+ if "-" in s:
+ lo, hi = s.split("-", 1)
+ return int(lo), int(hi)
+ n = int(s)
+ return n, n
+
+
+async def process_single_session(
+ messages: List[Dict[str, Any]],
+ sample_id: str | int,
+ session_key: str,
+ meta: Dict[str, Any],
+ run_time: str,
+ args: argparse.Namespace,
+) -> Dict[str, Any]:
+ """处理单个会话的导入任务"""
+ try:
+ # 根据参数决定是否使用 sample_id 作为 user_id 和 agent_id
+ user_id = str(sample_id) if not args.no_user_agent_id else None
+ agent_id = str(sample_id) if not args.no_user_agent_id else None
+ result = await viking_ingest(
+ messages,
+ args.openviking_url,
+ meta.get("date_time"),
+ user_id=user_id,
+ agent_id=agent_id,
+ )
+ token_usage = result["token_usage"]
+ task_id = result.get("task_id")
+ trace_id = result.get("trace_id", "")
+ embedding_tokens = token_usage.get("embedding", 0)
+ vlm_tokens = token_usage.get("vlm", 0)
+ print(
+ f" -> [COMPLETED] [{sample_id}/{session_key}] embed={embedding_tokens}, vlm={vlm_tokens}, task_id={task_id}, trace_id={trace_id}",
+ file=sys.stderr,
+ )
+
+ # Write success record
+ result = {
+ "timestamp": run_time,
+ "sample_id": sample_id,
+ "session": session_key,
+ "status": "success",
+ "meta": meta,
+ "token_usage": token_usage,
+ "embedding_tokens": embedding_tokens,
+ "vlm_tokens": vlm_tokens,
+ "task_id": task_id,
+ "trace_id": trace_id,
+ }
+
+ # 写入成功CSV
+ write_success_record(result, args.success_csv)
+
+ return result
+
+ except Exception as e:
+ print(f" -> [ERROR] [{sample_id}/{session_key}] {e}", file=sys.stderr)
+ traceback.print_exc(file=sys.stderr)
+
+ # Write error record
+ result = {
+ "timestamp": run_time,
+ "sample_id": sample_id,
+ "session": session_key,
+ "status": "error",
+ "error": str(e),
+ }
+
+ # 写入错误日志
+ write_error_record(result, args.error_log)
+
+ return result
+
+
+async def run_import(args: argparse.Namespace) -> None:
+ session_range = parse_session_range(args.sessions) if args.sessions else None
+
+ # 如果指定了 question-index,自动从 evidence 推断需要的 session
+ if args.question_index is not None and not args.sessions:
+ # 加载数据获取 question 的 evidence
+ with open(args.input, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ # 获取 sample
+ sample_idx = args.sample if args.sample is not None else 0
+ if sample_idx < 0 or sample_idx >= len(data):
+ raise ValueError(f"sample index {sample_idx} out of range")
+ sample = data[sample_idx]
+
+ # 获取 question 的 evidence
+ qa_items = sample.get("qa", [])
+ if args.question_index < 0 or args.question_index >= len(qa_items):
+ raise ValueError(f"question index {args.question_index} out of range")
+ qa = qa_items[args.question_index]
+ evidence_list = qa.get("evidence", [])
+
+ # 从 evidence 提取 session 号 (D1:3 -> session 1)
+ session_nums = set()
+ for ev in evidence_list:
+ try:
+ # D1:3 -> session 1
+ sess_num = int(ev.split(":")[0][1:])
+ session_nums.add(sess_num)
+ except (ValueError, IndexError):
+ pass
+
+ if session_nums:
+ min_sess = min(session_nums)
+ max_sess = max(session_nums)
+ session_range = (min_sess, max_sess)
+ print(
+ f"[INFO] Auto-detected sessions from evidence: {min_sess}-{max_sess}",
+ file=sys.stderr,
+ )
+
+ # 加载成功CSV记录用于去重
+ success_keys = set()
+ if not args.force_ingest:
+ success_keys = load_success_csv(args.success_csv)
+ print(
+ f"[INFO] Loaded {len(success_keys)} existing success records from {args.success_csv}",
+ file=sys.stderr,
+ )
+
+ # Write run header
+ run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+ skipped_count = 0
+ success_count = 0
+ error_count = 0
+ total_embedding_tokens = 0
+ total_vlm_tokens = 0
+
+ if args.input.endswith(".json"):
+ # LoCoMo JSON format
+ samples = load_locomo_data(args.input, args.sample)
+
+ # 为每个 sample 创建独立的处理协程
+ async def process_sample(item):
+ sample_id = item["sample_id"]
+ sessions = build_session_messages(item, session_range)
+
+ print(f"\n=== Sample {sample_id} ===", file=sys.stderr)
+ print(f" {len(sessions)} session(s) to import", file=sys.stderr)
+
+ # 同一 sample 内串行处理所有 sessions
+ for sess in sessions:
+ meta = sess["meta"]
+ messages = sess["messages"]
+ session_key = meta["session_key"]
+ label = f"{session_key} ({meta['date_time']})"
+
+ # Skip already ingested sessions unless force-ingest is enabled
+ if not args.force_ingest and is_already_ingested(
+ sample_id, session_key, success_keys
+ ):
+ print(
+ f" [{label}] [SKIP] already imported (use --force-ingest to reprocess)",
+ file=sys.stderr,
+ )
+ nonlocal skipped_count
+ skipped_count += 1
+ continue
+
+ # Preview messages
+ preview = " | ".join(
+ [f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]]
+ )
+ print(f" [{label}] {preview}", file=sys.stderr)
+
+ # 串行执行(等待完成后再处理下一个 session)
+ await process_single_session(
+ messages=messages,
+ sample_id=sample_id,
+ session_key=session_key,
+ meta=meta,
+ run_time=run_time,
+ args=args,
+ )
+
+ # 不同 sample 之间并行执行
+ tasks = [asyncio.create_task(process_sample(item)) for item in samples]
+ results = await asyncio.gather(*tasks, return_exceptions=True)
+
+ else:
+ # Plain text format
+ sessions = parse_test_file(args.input)
+ print(f"Found {len(sessions)} session(s) in text file", file=sys.stderr)
+
+ for idx, session in enumerate(sessions, start=1):
+ session_key = f"txt-session-{idx}"
+ print(f"\n=== Text Session {idx} ===", file=sys.stderr)
+
+ # Skip already ingested sessions unless force-ingest is enabled
+ if not args.force_ingest and is_already_ingested(
+ "txt", session_key, success_keys
+ ):
+ print(
+ f" [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr
+ )
+ skipped_count += 1
+ continue
+
+ # For plain text, all messages as user role
+ messages = []
+ for i, text in enumerate(session["messages"]):
+ messages.append(
+ {"role": "user", "text": text.strip(), "speaker": "user", "index": i}
+ )
+
+ preview = " | ".join([f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]])
+ print(f" {preview}", file=sys.stderr)
+
+ # 创建异步任务
+ task = asyncio.create_task(
+ process_single_session(
+ messages=messages,
+ sample_id="txt",
+ session_key=session_key,
+ meta={"session_index": idx},
+ run_time=run_time,
+ args=args,
+ )
+ )
+ tasks.append(task)
+
+ # 等待所有 sample 处理完成
+ print(
+ f"\n[INFO] Starting import with {len(tasks)} tasks to process",
+ file=sys.stderr,
+ )
+ await asyncio.gather(*tasks, return_exceptions=True)
+
+ # 从成功 CSV 统计结果
+ if Path(args.success_csv).exists():
+ with open(args.success_csv, "r", encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ success_count += 1
+ total_embedding_tokens += int(row.get("embedding_tokens", 0) or 0)
+ total_vlm_tokens += int(row.get("vlm_tokens", 0) or 0)
+
+ # Final summary
+ total_processed = success_count + error_count + skipped_count
+ print(f"\n=== Import summary ===", file=sys.stderr)
+ print(f"Total sessions: {total_processed}", file=sys.stderr)
+ print(f"Successfully imported: {success_count}", file=sys.stderr)
+ print(f"Failed: {error_count}", file=sys.stderr)
+ print(f"Skipped (already imported): {skipped_count}", file=sys.stderr)
+ print(f"\n=== Token usage summary ===", file=sys.stderr)
+ print(f"Total Embedding tokens: {total_embedding_tokens}", file=sys.stderr)
+ print(f"Total VLM tokens: {total_vlm_tokens}", file=sys.stderr)
+ if success_count > 0:
+ print(
+ f"Average Embedding per session: {total_embedding_tokens // success_count}",
+ file=sys.stderr,
+ )
+ print(f"Average VLM per session: {total_vlm_tokens // success_count}", file=sys.stderr)
+ print(f"\nResults saved to:", file=sys.stderr)
+ print(f" - Success records: {args.success_csv}", file=sys.stderr)
+ print(f" - Error logs: {args.error_log}", file=sys.stderr)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+
+def main():
+ # 基于脚本所在目录计算默认数据文件路径
+ script_dir = Path(__file__).parent.resolve()
+ default_input = str(script_dir / ".." / "data" / "locomo10.json")
+
+ parser = argparse.ArgumentParser(description="Import conversations into OpenViking")
+ parser.add_argument(
+ "--input",
+ default=default_input,
+ help="Path to input file (.txt or LoCoMo .json)",
+ )
+ parser.add_argument(
+ "--success-csv",
+ default="./result/import_success.csv",
+ help="Path to success records CSV file (default: import_success.csv)",
+ )
+ parser.add_argument(
+ "--error-log",
+ default="./result/import_errors.log",
+ help="Path to error log file (default: import_errors.log)",
+ )
+ parser.add_argument(
+ "--openviking-url",
+ default="http://localhost:1933",
+ help="OpenViking service URL (default: http://localhost:1933)",
+ )
+ parser.add_argument(
+ "--sample",
+ type=int,
+ default=None,
+ help="LoCoMo JSON: sample index (0-based). Default: all samples.",
+ )
+ parser.add_argument(
+ "--sessions",
+ default=None,
+ help="LoCoMo JSON: session range, e.g. '1-4' or '3'. Default: all sessions.",
+ )
+ parser.add_argument(
+ "--question-index",
+ type=int,
+ default=None,
+ help="LoCoMo JSON: question index (0-based). When specified, auto-detect required sessions from question's evidence.",
+ )
+ parser.add_argument(
+ "--force-ingest",
+ action="store_true",
+ default=False,
+ help="Force re-import even if already recorded as completed",
+ )
+ parser.add_argument(
+ "--no-user-agent-id",
+ action="store_true",
+ default=False,
+ help="Do not pass user_id and agent_id to OpenViking client",
+ )
+ args = parser.parse_args()
+
+ # 确保输出目录存在
+ Path(args.success_csv).parent.mkdir(parents=True, exist_ok=True)
+ Path(args.error_log).parent.mkdir(parents=True, exist_ok=True)
+
+ try:
+ asyncio.run(run_import(args))
+ except ValueError as e:
+ print(f"Error: {e}", file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/locomo/openclaw/judge.py b/benchmark/locomo/openclaw/judge.py
new file mode 100644
index 000000000..f89bbc688
--- /dev/null
+++ b/benchmark/locomo/openclaw/judge.py
@@ -0,0 +1,203 @@
+import argparse
+import csv
+import json
+import os
+import asyncio
+from openai import AsyncOpenAI
+from dotenv import load_dotenv
+from pathlib import Path
+
+# 加载本地环境变量文件
+env_file = Path.home() / ".openviking_benchmark_env"
+load_dotenv(env_file)
+
+
+async def grade_answer(
+ llm_client, model: str, question: str, gold_answer: str, response: str
+) -> tuple[bool, str]:
+ system_prompt = """
+ You are an expert grader that determines if answers to questions match a gold standard answer
+ """
+
+ ACCURACY_PROMPT = f"""
+ Your task is to label an answer to a question as 'CORRECT' or 'WRONG'. You will be given the following data:
+ (1) a question (posed by one user to another user),
+ (2) a 'gold' (ground truth) answer,
+ (3) a generated answer
+ which you will score as CORRECT/WRONG.
+
+ The point of the question is to ask about something one user should know about the other user based on their prior conversations.
+ The gold answer will usually be a concise and short answer that includes the referenced topic, for example:
+ Question: Do you remember what I got the last time I went to Hawaii?
+ Gold answer: A shell necklace
+ The generated answer might be much longer, but you should be generous with your grading - as long as it touches on the same topic as the gold answer, it should be counted as CORRECT.
+
+ For time related questions, the gold answer will be a specific date, month, year, etc. The generated answer might be much longer or use relative time references (like "last Tuesday" or "next month"), but you should be generous with your grading - as long as it refers to the same date or time period as the gold answer, it should be counted as CORRECT. Even if the format differs (e.g., "May 7th" vs "7 May"), consider it CORRECT if it's the same date.
+
+ Now it's time for the real question:
+ Question: {question}
+ Gold answer: {gold_answer}
+ Generated answer: {response}
+
+ First, provide a short (one sentence) explanation of your reasoning, then finish with CORRECT or WRONG.
+ Do NOT include both CORRECT and WRONG in your response, or it will break the evaluation script.
+
+ Respond with JSON only: {{"is_correct": "CORRECT" or "WRONG", "reasoning": "your explanation"}}
+ """
+
+ try:
+ resp = await llm_client.chat.completions.create(
+ model=model,
+ messages=[
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": ACCURACY_PROMPT},
+ ],
+ temperature=0,
+ timeout=60,
+ )
+ content = resp.choices[0].message.content.strip()
+ # 提取JSON内容
+ start_idx = content.find("{")
+ end_idx = content.rfind("}")
+ if start_idx != -1 and end_idx != -1:
+ json_str = content[start_idx : end_idx + 1].strip()
+ result = json.loads(json_str)
+ is_correct = result.get("is_correct", "WRONG").strip().upper() == "CORRECT"
+ reasoning = result.get("reasoning", "")
+ return is_correct, reasoning
+ return False, f"[PARSE ERROR] Invalid response: {content}"
+ except Exception as e:
+ return False, f"[API ERROR] {str(e)}"
+
+
+def load_answers(input_path: str) -> tuple[list[dict], list[str]]:
+ """加载待评分的回答,返回所有行和表头"""
+ if not os.path.exists(input_path):
+ raise FileNotFoundError(f"Input file not found: {input_path}")
+
+ with open(input_path, "r", encoding="utf-8", newline="") as f:
+ reader = csv.DictReader(f)
+ fieldnames = reader.fieldnames.copy()
+ # 新增reasoning列如果不存在
+ if "reasoning" not in fieldnames:
+ fieldnames.append("reasoning")
+ rows = list(reader)
+ return rows, fieldnames
+
+
+async def main():
+ parser = argparse.ArgumentParser(
+ description="VikingBot QA judge script, same logic as openclaw evaluation"
+ )
+ parser.add_argument(
+ "--input",
+ default="./result/locomo_qa_result_only_sys_memory.csv",
+ help="Path to QA result csv file, default: ./result/locomo_qa_result.csv",
+ )
+ parser.add_argument(
+ "--base-url",
+ default="https://ark.cn-beijing.volces.com/api/v3",
+ help="Volcengine API base URL, default: https://ark.cn-beijing.volces.com/api/v3",
+ )
+ parser.add_argument(
+ "--token",
+ default=os.getenv("ARK_API_KEY", os.getenv("OPENAI_API_KEY", "")),
+ help="Volcengine API token, default from ARK_API_KEY or OPENAI_API_KEY env var",
+ )
+ parser.add_argument(
+ "--model",
+ default="doubao-seed-2-0-pro-260215",
+ help="Judge model name, default: doubao-seed-2-0-pro-260215",
+ )
+ parser.add_argument(
+ "--parallel", type=int, default=5, help="Parallel request count, default: 5"
+ )
+ args = parser.parse_args()
+
+ if not args.token:
+ print("Error: API token is required")
+ print("\n请通过以下方式设置 API key:")
+ print(" 1. 创建 ~/.openviking_benchmark_env 文件,内容如下:")
+ print(" ARK_API_KEY=你的key")
+ print(" 2. 或者通过 --token 参数传入")
+ print(" 3. 或者设置环境变量: export ARK_API_KEY=你的key")
+ exit(1)
+
+ # 加载数据
+ rows, fieldnames = load_answers(args.input)
+
+ # 筛选掉 category=5 的行,只处理未评分的行
+ valid_rows = []
+ ungraded = []
+ for i, row in enumerate(rows):
+ category = row.get("category", "")
+ if category == "5":
+ continue
+ valid_rows.append(i)
+ if not row.get("result"):
+ ungraded.append(i)
+
+ total = len(rows)
+ valid_total = len(valid_rows)
+ print(f"Total answers: {total}, valid (category != 5): {valid_total}, ungraded: {len(ungraded)}")
+
+ if not ungraded:
+ print("All valid answers already graded, exit")
+ return
+
+ # 初始化OpenAI客户端
+ client = AsyncOpenAI(base_url=args.base_url, api_key=args.token)
+
+ # 并发处理
+ semaphore = asyncio.Semaphore(args.parallel)
+ file_lock = asyncio.Lock() # 用于同步文件写入
+
+ async def save_results():
+ """保存当前所有结果到CSV文件,使用临时文件+原子替换避免文件损坏"""
+ async with file_lock:
+ temp_file = f"{args.input}.tmp"
+ with open(temp_file, "w", encoding="utf-8", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
+ writer.writeheader()
+ writer.writerows(rows)
+ os.replace(temp_file, args.input)
+
+ async def process_row(idx):
+ async with semaphore:
+ row = rows[idx]
+ question = row["question"]
+ # 兼容两种列名: expected (eval.py) 或 answer (vikingbot)
+ gold = row.get("expected") or row.get("answer")
+ response = row["response"]
+ print(f"Grading {idx + 1}/{total}: {question[:60]}...")
+ is_correct, reasoning = await grade_answer(client, args.model, question, gold, response)
+ row["result"] = "CORRECT" if is_correct else "WRONG"
+ row["reasoning"] = reasoning
+
+ # 处理完一条就立即保存结果
+ await save_results()
+ print(f"Saved result for {idx + 1}/{total}: {row['result']}")
+
+ return idx, row
+
+ tasks = [process_row(idx) for idx in ungraded]
+ await asyncio.gather(*tasks)
+
+ # 统计结果
+ correct = 0
+ total_graded = 0
+ for row in rows:
+ category = row.get("category", "")
+ if category == "5":
+ continue
+ if row.get("result"):
+ total_graded += 1
+ if row.get("result") == "CORRECT":
+ correct += 1
+ accuracy = correct / total_graded if total_graded > 0 else 0.0
+ print(f"\nGrading completed: {correct}/{total_graded} correct, accuracy: {accuracy:.2%}")
+ print(f"All results saved to {args.input}")
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/benchmark/locomo/openclaw/run_full_eval.sh b/benchmark/locomo/openclaw/run_full_eval.sh
new file mode 100755
index 000000000..9429e7b2d
--- /dev/null
+++ b/benchmark/locomo/openclaw/run_full_eval.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+set -e
+
+: '
+OpenClaw 完整评估流程脚本
+
+用法:
+ ./run_full_eval.sh # 只导入 OpenViking (所有 samples)
+ ./run_full_eval.sh --with-claw-import # 同时导入 OpenViking 和 OpenClaw (所有 samples)
+ ./run_full_eval.sh --skip-import # 跳过导入步骤 (所有 samples)
+ ./run_full_eval.sh --sample 0 # 只处理第 0 个 sample
+ ./run_full_eval.sh --sample 1 --with-claw-import # 只处理第 1 个 sample,同时导入 OpenClaw
+ ./run_full_eval.sh --force-ingest # 强制重新导入所有数据
+'
+
+# 基于脚本所在目录计算数据文件路径
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+INPUT_FILE="$SCRIPT_DIR/../data/locomo10.json"
+RESULT_DIR="$SCRIPT_DIR/result"
+OUTPUT_CSV="$RESULT_DIR/qa_results.csv"
+GATEWAY_TOKEN="90f2d2dc2f7b4d50cb943d3d3345e667bb3e9bcb7ec3a1fb"
+
+
+# 解析参数
+SKIP_IMPORT=false
+WITH_CLAW_IMPORT=false
+FORCE_INGEST=false
+SAMPLE_IDX=""
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --skip-import)
+ SKIP_IMPORT=true
+ shift
+ ;;
+ --with-claw-import)
+ WITH_CLAW_IMPORT=true
+ shift
+ ;;
+ --force-ingest)
+ FORCE_INGEST=true
+ shift
+ ;;
+ --sample)
+ if [ -z "$2" ] || [[ "$2" == --* ]]; then
+ echo "错误: --sample 需要一个参数 (sample index, 0-based)"
+ exit 1
+ fi
+ SAMPLE_IDX="$2"
+ shift 2
+ ;;
+ *)
+ echo "警告: 未知参数 $1"
+ shift
+ ;;
+ esac
+done
+
+# 构建 sample 参数
+SAMPLE_ARG=""
+if [ -n "$SAMPLE_IDX" ]; then
+ SAMPLE_ARG="--sample $SAMPLE_IDX"
+ # 如果指定了 sample,修改输出文件名以避免覆盖
+ OUTPUT_CSV="$RESULT_DIR/qa_results_sample${SAMPLE_IDX}.csv"
+fi
+
+# 构建 force-ingest 参数
+FORCE_INGEST_ARG=""
+if [ "$FORCE_INGEST" = true ]; then
+ FORCE_INGEST_ARG="--force-ingest"
+fi
+
+# 确保结果目录存在
+mkdir -p "$RESULT_DIR"
+
+# Step 1: 导入数据
+if [ "$SKIP_IMPORT" = false ]; then
+ if [ "$WITH_CLAW_IMPORT" = true ]; then
+ echo "[1/5] 导入数据到 OpenViking 和 OpenClaw..."
+
+ # 后台运行 OpenViking 导入
+ python "$SCRIPT_DIR/import_to_ov.py" --no-user-agent-id --input "$INPUT_FILE" $FORCE_INGEST_ARG $SAMPLE_ARG > "$RESULT_DIR/import_ov.log" 2>&1 &
+ PID_OV=$!
+
+ # 后台运行 OpenClaw 导入
+ python "$SCRIPT_DIR/eval.py" ingest "$INPUT_FILE" $FORCE_INGEST_ARG --token "$GATEWAY_TOKEN" $SAMPLE_ARG > "$RESULT_DIR/import_claw.log" 2>&1 &
+ PID_CLAW=$!
+
+ # 等待两个导入任务完成
+ wait $PID_OV $PID_CLAW
+ else
+ echo "[1/5] 导入数据到 OpenViking..."
+ python "$SCRIPT_DIR/import_to_ov.py" --no-user-agent-id --input "$INPUT_FILE" $FORCE_INGEST_ARG $SAMPLE_ARG
+ fi
+
+ echo "导入完成,等待 1 分钟..."
+ sleep 60
+else
+ echo "[1/5] 跳过导入数据..."
+fi
+
+# Step 2: 运行 QA 模型(默认输出到 result/qa_results.csv)
+echo "[2/5] 运行 QA 评估..."
+python "$SCRIPT_DIR/eval.py" qa "$INPUT_FILE" --token "$GATEWAY_TOKEN" $SAMPLE_ARG --parallel 15 --output "${OUTPUT_CSV%.csv}"
+
+# Step 3: 裁判打分
+echo "[3/5] 裁判打分..."
+python "$SCRIPT_DIR/judge.py" --input "$OUTPUT_CSV" --parallel 40
+
+# Step 4: 计算结果
+echo "[4/5] 计算结果..."
+python "$SCRIPT_DIR/stat_judge_result.py" --input "$OUTPUT_CSV"
+
+echo "[5/5] 完成!"
+echo "结果文件: $OUTPUT_CSV"
diff --git a/benchmark/locomo/openclaw/stat_judge_result.py b/benchmark/locomo/openclaw/stat_judge_result.py
new file mode 100644
index 000000000..63816e004
--- /dev/null
+++ b/benchmark/locomo/openclaw/stat_judge_result.py
@@ -0,0 +1,161 @@
+import argparse
+import csv
+import os
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Statistics for judge result csv")
+ parser.add_argument(
+ "--input",
+ default="./result/qa_results_sample0.csv",
+ help="Path to judge result csv file, default: ./result/qa_results_sample0.csv",
+ )
+ parser.add_argument(
+ "--import-csv",
+ default="./result/import_success.csv",
+ help="Path to import_success.csv file for OpenViking token stats, default: ./result/import_success.csv",
+ )
+ args = parser.parse_args()
+
+ output_lines = []
+
+ # 统计 QA 结果
+ if os.path.exists(args.input):
+ qa_stats = process_qa_results(args.input)
+ output_lines.extend(qa_stats)
+ else:
+ output_lines.append(f"Warning: QA result file not found: {args.input}")
+
+ # 统计 Import token
+ if os.path.exists(args.import_csv):
+ if output_lines:
+ output_lines.append("")
+ import_stats = process_import_csv(args.import_csv)
+ output_lines.extend(import_stats)
+ else:
+ output_lines.append(f"Warning: Import CSV file not found: {args.import_csv}")
+
+ # 打印到控制台
+ for line in output_lines:
+ print(line)
+
+ # 写入summary.txt
+ if args.input:
+ summary_path = os.path.join(os.path.dirname(args.input), "summary.txt")
+ elif args.import_csv:
+ summary_path = os.path.join(os.path.dirname(args.import_csv), "summary.txt")
+ else:
+ summary_path = "./result/summary.txt"
+
+ os.makedirs(os.path.dirname(summary_path), exist_ok=True)
+ with open(summary_path, "w", encoding="utf-8") as f:
+ f.write("\n".join(output_lines) + "\n")
+ print(f"\nSummary saved to {summary_path}")
+
+
+def process_qa_results(input_path: str) -> list[str]:
+ """处理 QA 结果 CSV"""
+ # 统计所有题目 (排除 category=5)
+ correct = 0
+ wrong = 0
+ total_no_cache_tokens = 0 # input_tokens
+ total_cache_read_tokens = 0 # cacheRead
+ total_output_tokens = 0 # output_tokens
+ total_input_tokens = 0 # no_cache + cacheRead
+ valid_rows = 0
+
+ with open(input_path, "r", encoding="utf-8", newline="") as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ # 检查 category 是否为 5,跳过
+ category = row.get("category", "")
+ if category == "5":
+ continue
+
+ valid_rows += 1
+
+ # 统计结果
+ result = row.get("result", "").strip().upper()
+ if result == "CORRECT":
+ correct += 1
+ elif result == "WRONG":
+ wrong += 1
+
+ # 统计token
+ try:
+ no_cache = int(row.get("input_tokens", 0))
+ cache_read = int(row.get("cacheRead", 0))
+ output = int(row.get("output_tokens", 0))
+
+ total_no_cache_tokens += no_cache
+ total_cache_read_tokens += cache_read
+ total_output_tokens += output
+ total_input_tokens += no_cache + cache_read
+ except (ValueError, TypeError):
+ pass
+
+ total_graded = correct + wrong
+ accuracy = correct / total_graded if total_graded > 0 else 0.0
+
+ # 平均 token 消耗
+ avg_no_cache = total_no_cache_tokens / valid_rows if valid_rows > 0 else 0.0
+ avg_cache_read = total_cache_read_tokens / valid_rows if valid_rows > 0 else 0.0
+ avg_output = total_output_tokens / valid_rows if valid_rows > 0 else 0.0
+ avg_total_input = total_input_tokens / valid_rows if valid_rows > 0 else 0.0
+
+ return [
+ "=== Judge Result Statistics (excluding category=5) ===",
+ f"Total rows: {valid_rows:,}",
+ f"Graded rows: {total_graded:,}",
+ f"Correct: {correct:,}",
+ f"Wrong: {wrong:,}",
+ f"Accuracy: {accuracy:.2%}",
+ f"\nToken usage (QA):",
+ f" Total no-cache tokens (input_tokens): {total_no_cache_tokens:,}",
+ f" Total cacheRead tokens: {total_cache_read_tokens:,}",
+ f" Total output tokens: {total_output_tokens:,}",
+ f" Total input tokens (no-cache + cacheRead): {total_input_tokens:,}",
+ f" Avg no-cache tokens: {avg_no_cache:,.2f}",
+ f" Avg cacheRead tokens: {avg_cache_read:,.2f}",
+ f" Avg output tokens: {avg_output:,.2f}",
+ f" Avg total input tokens: {avg_total_input:,.2f}",
+ ]
+
+
+def process_import_csv(input_path: str) -> list[str]:
+ """处理 import_success.csv 的 token 统计"""
+ total_embedding = 0
+ total_vlm = 0
+ total_total = 0
+ valid_rows = 0
+
+ with open(input_path, "r", encoding="utf-8", newline="") as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ valid_rows += 1
+ try:
+ total_embedding += int(row.get("embedding_tokens", 0))
+ total_vlm += int(row.get("vlm_tokens", 0))
+ total_total += int(row.get("total_tokens", 0))
+ except (ValueError, TypeError):
+ pass
+
+ avg_embedding = total_embedding / valid_rows if valid_rows > 0 else 0.0
+ avg_vlm = total_vlm / valid_rows if valid_rows > 0 else 0.0
+ avg_total = total_total / valid_rows if valid_rows > 0 else 0.0
+
+ return [
+ "=== OpenViking Import Token Statistics ===",
+ f"Total sessions: {valid_rows:,}",
+ f"\nToken usage (Import):",
+ f" Total embedding tokens: {total_embedding:,}",
+ f" Total VLM tokens: {total_vlm:,}",
+ f" Total tokens: {total_total:,}",
+ f" Avg embedding tokens: {avg_embedding:,.2f}",
+ f" Avg VLM tokens: {avg_vlm:,.2f}",
+ f" Avg total tokens: {avg_total:,.2f}",
+ ]
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/locomo/vikingbot/import_and_eval_one.sh b/benchmark/locomo/vikingbot/import_and_eval_one.sh
new file mode 100755
index 000000000..3289fcb14
--- /dev/null
+++ b/benchmark/locomo/vikingbot/import_and_eval_one.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# 单题/批量测试脚本:导入对话 + 提问验证
+#
+# Usage:
+# ./import_and_eval_one.sh 0 2 # sample 0, question 2 (单题)
+# ./import_and_eval_one.sh conv-26 2 # sample_id conv-26, question 2 (单题)
+# ./import_and_eval_one.sh conv-26 # sample_id conv-26, 所有问题 (批量)
+# ./import_and_eval_one.sh conv-26 2 --skip-import # 跳过导入,直接评测
+# ./import_and_eval_one.sh conv-26 --skip-import # 跳过导入,批量评测
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SKIP_IMPORT=false
+
+# 解析参数
+for arg in "$@"; do
+ if [ "$arg" = "--skip-import" ]; then
+ SKIP_IMPORT=true
+ fi
+done
+
+# 过滤掉 --skip-import 获取实际参数
+ARGS=()
+for arg in "$@"; do
+ if [ "$arg" != "--skip-import" ]; then
+ ARGS+=("$arg")
+ fi
+done
+
+SAMPLE=${ARGS[0]}
+QUESTION_INDEX=${ARGS[1]}
+INPUT_FILE="$SCRIPT_DIR/../data/locomo10.json"
+
+if [ -z "$SAMPLE" ]; then
+ echo "Usage: $0 [question_index] [--skip-import]"
+ echo " sample_index: 数字索引 (0,1,2...) 或 sample_id (conv-26)"
+ echo " question_index: 问题索引 (可选),不传则测试该 sample 的所有问题"
+ echo " --skip-import: 跳过导入步骤,直接使用已导入的数据进行评测"
+ exit 1
+fi
+
+# 判断是数字还是 sample_id
+if [[ "$SAMPLE" =~ ^-?[0-9]+$ ]]; then
+ SAMPLE_INDEX=$SAMPLE
+ SAMPLE_ID_FOR_CMD=$SAMPLE_INDEX
+ echo "Using sample index: $SAMPLE_INDEX"
+else
+ # 通过 sample_id 查找索引
+ SAMPLE_INDEX=$(python3 -c "
+import json
+data = json.load(open('$INPUT_FILE'))
+for i, s in enumerate(data):
+ if s.get('sample_id') == '$SAMPLE':
+ print(i)
+ break
+else:
+ print('NOT_FOUND')
+")
+ if [ "$SAMPLE_INDEX" = "NOT_FOUND" ]; then
+ echo "Error: sample_id '$SAMPLE' not found"
+ exit 1
+ fi
+ SAMPLE_ID_FOR_CMD=$SAMPLE
+ echo "Using sample_id: $SAMPLE (index: $SAMPLE_INDEX)"
+fi
+
+# 判断是单题模式还是批量模式
+if [ -n "$QUESTION_INDEX" ]; then
+ # ========== 单题模式 ==========
+ echo "=== 单题模式: sample $SAMPLE, question $QUESTION_INDEX ==="
+
+ # 导入对话(只导入 question 对应的 session)
+ if [ "$SKIP_IMPORT" = "true" ]; then
+ echo "[1/3] Skipping import (--skip-import)"
+ else
+ echo "[1/3] Importing sample $SAMPLE_INDEX, question $QUESTION_INDEX..."
+ python benchmark/locomo/vikingbot/import_to_ov.py \
+ --input "$INPUT_FILE" \
+ --sample "$SAMPLE_INDEX" \
+ --question-index "$QUESTION_INDEX" \
+ --force-ingest
+
+ echo "Waiting for data processing..."
+ sleep 3
+ fi
+
+ # 运行评测
+ if [ "$SKIP_IMPORT" = "true" ]; then
+ echo "[1/2] Running evaluation (skip-import mode)..."
+ else
+ echo "[2/3] Running evaluation..."
+ fi
+ if [[ "$SAMPLE" =~ ^-?[0-9]+$ ]]; then
+ # 数字索引用默认输出文件
+ OUTPUT_FILE=./result/locomo_qa_result.csv
+ python benchmark/locomo/vikingbot/run_eval.py \
+ "$INPUT_FILE" \
+ --sample "$SAMPLE_ID_FOR_CMD" \
+ --question-index "$QUESTION_INDEX" \
+ --count 1
+ else
+ # sample_id 模式直接更新批量结果文件
+ OUTPUT_FILE=./result/locomo_${SAMPLE}_result.csv
+ python benchmark/locomo/vikingbot/run_eval.py \
+ "$INPUT_FILE" \
+ --sample "$SAMPLE_ID_FOR_CMD" \
+ --question-index "$QUESTION_INDEX" \
+ --count 1 \
+ --output "$OUTPUT_FILE" \
+ --update-mode
+ fi
+
+ # 运行 Judge 评分
+ if [ "$SKIP_IMPORT" = "true" ]; then
+ echo "[2/2] Running judge..."
+ else
+ echo "[3/3] Running judge..."
+ fi
+ python benchmark/locomo/vikingbot/judge.py --input "$OUTPUT_FILE" --parallel 1
+
+ # 输出结果
+ echo ""
+ echo "=== 评测结果 ==="
+ python3 -c "
+import csv
+import json
+
+question_index = $QUESTION_INDEX
+
+with open('$OUTPUT_FILE') as f:
+ reader = csv.DictReader(f)
+ rows = list(reader)
+
+# 找到指定 question_index 的结果
+row = None
+for r in rows:
+ if int(r.get('question_index', -1)) == question_index:
+ row = r
+ break
+
+if row is None:
+ # 没找到则用最后一条
+ row = rows[-1]
+
+# 解析 evidence_text
+evidence_text = json.loads(row.get('evidence_text', '[]'))
+evidence_str = '\\n'.join(evidence_text) if evidence_text else ''
+
+print(f\"问题: {row['question']}\")
+print(f\"期望答案: {row['answer']}\")
+print(f\"模型回答: {row['response']}\")
+print(f\"证据原文:\\n{evidence_str}\")
+print(f\"结果: {row.get('result', 'N/A')}\")
+print(f\"原因: {row.get('reasoning', 'N/A')}\")
+"
+
+else
+ # ========== 批量模式 ==========
+ echo "=== 批量模式: sample $SAMPLE, 所有问题 ==="
+
+ # 获取该 sample 的问题数量
+ QUESTION_COUNT=$(python3 -c "
+import json
+data = json.load(open('$INPUT_FILE'))
+sample = data[$SAMPLE_INDEX]
+print(len(sample.get('qa', [])))
+")
+ echo "Found $QUESTION_COUNT questions for sample $SAMPLE"
+
+ # 导入所有 sessions
+ if [ "$SKIP_IMPORT" = "true" ]; then
+ echo "[1/4] Skipping import (--skip-import)"
+ else
+ echo "[1/4] Importing all sessions for sample $SAMPLE_INDEX..."
+ python benchmark/locomo/vikingbot/import_to_ov.py \
+ --input "$INPUT_FILE" \
+ --sample "$SAMPLE_INDEX" \
+ --force-ingest
+
+ echo "Waiting for data processing..."
+ sleep 10
+ fi
+
+ # 运行评测(所有问题)
+ if [ "$SKIP_IMPORT" = "true" ]; then
+ echo "[1/3] Running evaluation for all questions (skip-import mode)..."
+ else
+ echo "[2/4] Running evaluation for all questions..."
+ fi
+ OUTPUT_FILE=./result/locomo_${SAMPLE}_result.csv
+ python benchmark/locomo/vikingbot/run_eval.py \
+ "$INPUT_FILE" \
+ --sample "$SAMPLE_ID_FOR_CMD" \
+ --output "$OUTPUT_FILE" \
+ --threads 5
+
+ # 运行 Judge 评分
+ if [ "$SKIP_IMPORT" = "true" ]; then
+ echo "[2/3] Running judge..."
+ else
+ echo "[3/4] Running judge..."
+ fi
+ python benchmark/locomo/vikingbot/judge.py --input "$OUTPUT_FILE" --parallel 5
+
+ # 输出统计结果
+ if [ "$SKIP_IMPORT" = "true" ]; then
+ echo "[3/3] Calculating statistics..."
+ else
+ echo "[4/4] Calculating statistics..."
+ fi
+ python benchmark/locomo/vikingbot/stat_judge_result.py --input "$OUTPUT_FILE"
+
+ echo ""
+ echo "=== 批量评测完成 ==="
+ echo "结果文件: $OUTPUT_FILE"
+fi
\ No newline at end of file
diff --git a/benchmark/locomo/vikingbot/import_to_ov.py b/benchmark/locomo/vikingbot/import_to_ov.py
index 9d68ad520..a6b23c461 100644
--- a/benchmark/locomo/vikingbot/import_to_ov.py
+++ b/benchmark/locomo/vikingbot/import_to_ov.py
@@ -68,7 +68,7 @@ def load_locomo_data(
if sample_index is not None:
if sample_index < 0 or sample_index >= len(data):
- raise ValueError(f"Sample index {sample_index} out of range (0-{len(data)-1})")
+ raise ValueError(f"Sample index {sample_index} out of range (0-{len(data) - 1})")
return [data[sample_index]]
return data
@@ -106,22 +106,21 @@ def build_session_messages(
for idx, msg in enumerate(conv[sk]):
speaker = msg.get("speaker", "unknown")
text = msg.get("text", "")
- messages.append({
- "role": "user",
- "text": f"[{speaker}]: {text}",
- "speaker": speaker,
- "index": idx
- })
-
- sessions.append({
- "messages": messages,
- "meta": {
- "sample_id": item["sample_id"],
- "session_key": sk,
- "date_time": date_time,
- "speakers": speakers,
- },
- })
+ messages.append(
+ {"role": "user", "text": f"[{speaker}]: {text}", "speaker": speaker, "index": idx}
+ )
+
+ sessions.append(
+ {
+ "messages": messages,
+ "meta": {
+ "sample_id": item["sample_id"],
+ "session_key": sk,
+ "date_time": date_time,
+ "speakers": speakers,
+ },
+ }
+ )
return sessions
@@ -130,6 +129,7 @@ def build_session_messages(
# Ingest record helpers (avoid duplicate ingestion)
# ---------------------------------------------------------------------------
+
def load_success_csv(csv_path: str = "./result/import_success.csv") -> set:
"""加载成功导入的CSV记录,返回已成功的键集合"""
success_keys = set()
@@ -142,33 +142,48 @@ def load_success_csv(csv_path: str = "./result/import_success.csv") -> set:
return success_keys
-def write_success_record(record: Dict[str, Any], csv_path: str = "./result/import_success.csv") -> None:
+def write_success_record(
+ record: Dict[str, Any], csv_path: str = "./result/import_success.csv"
+) -> None:
"""写入成功记录到CSV文件"""
file_exists = Path(csv_path).exists()
- fieldnames = ["timestamp", "sample_id", "session", "date_time", "speakers",
- "embedding_tokens", "vlm_tokens", "llm_input_tokens",
- "llm_output_tokens", "total_tokens"]
+ fieldnames = [
+ "timestamp",
+ "sample_id",
+ "session",
+ "date_time",
+ "speakers",
+ "embedding_tokens",
+ "vlm_tokens",
+ "llm_input_tokens",
+ "llm_output_tokens",
+ "total_tokens",
+ ]
with open(csv_path, "a", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
if not file_exists:
writer.writeheader()
- writer.writerow({
- "timestamp": record["timestamp"],
- "sample_id": record["sample_id"],
- "session": record["session"],
- "date_time": record.get("meta", {}).get("date_time", ""),
- "speakers": record.get("meta", {}).get("speakers", ""),
- "embedding_tokens": record["token_usage"].get("embedding", 0),
- "vlm_tokens": record["token_usage"].get("vlm", 0),
- "llm_input_tokens": record["token_usage"].get("llm_input", 0),
- "llm_output_tokens": record["token_usage"].get("llm_output", 0),
- "total_tokens": record["token_usage"].get("total", 0)
- })
-
-
-def write_error_record(record: Dict[str, Any], error_path: str = "./result/import_errors.log") -> None:
+ writer.writerow(
+ {
+ "timestamp": record["timestamp"],
+ "sample_id": record["sample_id"],
+ "session": record["session"],
+ "date_time": record.get("meta", {}).get("date_time", ""),
+ "speakers": record.get("meta", {}).get("speakers", ""),
+ "embedding_tokens": record["token_usage"].get("embedding", 0),
+ "vlm_tokens": record["token_usage"].get("vlm", 0),
+ "llm_input_tokens": record["token_usage"].get("llm_input", 0),
+ "llm_output_tokens": record["token_usage"].get("llm_output", 0),
+ "total_tokens": record["token_usage"].get("total", 0),
+ }
+ )
+
+
+def write_error_record(
+ record: Dict[str, Any], error_path: str = "./result/import_errors.log"
+) -> None:
"""写入错误记录到日志文件"""
with open(error_path, "a", encoding="utf-8") as f:
timestamp = record["timestamp"]
@@ -187,7 +202,9 @@ def load_ingest_record(record_path: str = "./result/.ingest_record.json") -> Dic
return {}
-def save_ingest_record(record: Dict[str, Any], record_path: str = "./result/.ingest_record.json") -> None:
+def save_ingest_record(
+ record: Dict[str, Any], record_path: str = "./result/.ingest_record.json"
+) -> None:
"""Save ingest record to file."""
with open(record_path, "w", encoding="utf-8") as f:
json.dump(record, f, indent=2, ensure_ascii=False)
@@ -224,27 +241,44 @@ def mark_ingested(
# ---------------------------------------------------------------------------
# OpenViking import
# ---------------------------------------------------------------------------
-def _parse_token_usage(task_result: Dict[str, Any]) -> Dict[str, int]:
- """解析Token使用数据(从get_task返回的result中提取)"""
- result_data = task_result.get("result", {})
- token_usage = result_data.get("token_usage", {})
- llm_tokens = token_usage.get("llm", {})
- embedding_tokens = token_usage.get("embedding", {})
- total_tokens = token_usage.get("total", {})
+def _parse_token_usage(commit_result: Dict[str, Any]) -> Dict[str, int]:
+ """解析Token使用数据(从commit返回的telemetry或task result中提取)"""
+ # 尝试从 task result 中提取(task 完成后包含完整 token_usage)
+ if "result" in commit_result:
+ result = commit_result["result"]
+ if "token_usage" in result:
+ tu = result["token_usage"]
+ embedding = tu.get("embedding", {})
+ llm = tu.get("llm", {})
+ # embedding 格式可能是 {"total": N} 或 {"total_tokens": N}
+ embed_total = embedding.get("total", embedding.get("total_tokens", 0))
+ llm_total = llm.get("total", llm.get("total_tokens", 0))
+ return {
+ "embedding": embed_total,
+ "vlm": llm_total,
+ "llm_input": llm.get("input", 0),
+ "llm_output": llm.get("output", 0),
+ "total": tu.get("total", {}).get("total_tokens", embed_total + llm_total),
+ }
+
+ # 从 commit 响应的 telemetry 中提取
+ telemetry = commit_result.get("telemetry", {}).get("summary", {})
+ tokens = telemetry.get("tokens", {})
return {
- "embedding": embedding_tokens.get("total_tokens", 0),
- "vlm": llm_tokens.get("total_tokens", 0),
- "llm_input": llm_tokens.get("prompt_tokens", 0),
- "llm_output": llm_tokens.get("completion_tokens", 0),
- "total": total_tokens.get("total_tokens", 0)
+ "embedding": tokens.get("embedding", {}).get("total", 0),
+ "vlm": tokens.get("llm", {}).get("total", 0),
+ "llm_input": tokens.get("llm", {}).get("input", 0),
+ "llm_output": tokens.get("llm", {}).get("output", 0),
+ "total": tokens.get("total", 0),
}
async def viking_ingest(
messages: List[Dict[str, Any]],
openviking_url: str,
- semaphore: asyncio.Semaphore,
- session_time: Optional[str] = None
+ session_time: Optional[str] = None,
+ user_id: Optional[str] = None,
+ agent_id: Optional[str] = None,
) -> Dict[str, int]:
"""Save messages to OpenViking via OpenViking SDK client.
Returns token usage dict with embedding and vlm token counts.
@@ -252,8 +286,9 @@ async def viking_ingest(
Args:
messages: List of message dicts with role and text
openviking_url: OpenViking service URL
- semaphore: Async semaphore for concurrency control
session_time: Session time string (e.g., "9:36 am on 2 April, 2023")
+ user_id: User identifier for separate userspace (e.g., "conv-26")
+ agent_id: Agent identifier for separate agentspace (e.g., "conv-26")
"""
# 解析 session_time - 为每条消息计算递增的时间戳
base_datetime = None
@@ -263,74 +298,67 @@ async def viking_ingest(
except ValueError:
print(f"Warning: Failed to parse session_time: {session_time}", file=sys.stderr)
- # 使用信号量控制并发
- async with semaphore:
- # Create client
- client = ov.AsyncHTTPClient(url=openviking_url)
- await client.initialize()
-
- try:
- # Create session
- create_res = await client.create_session()
- session_id = create_res["session_id"]
-
- # Add messages one by one with created_at
- for idx, msg in enumerate(messages):
- msg_created_at = None
- if base_datetime:
- # 每条消息递增1秒,确保时间顺序
- msg_dt = base_datetime + timedelta(seconds=idx)
- msg_created_at = msg_dt.isoformat()
-
- await client.add_message(
- session_id=session_id,
- role=msg["role"],
- parts=[{"type": "text", "text": msg["text"]}],
- created_at=msg_created_at
- )
+ # Create client
+ client = ov.AsyncHTTPClient(
+ url=openviking_url,
+ user=user_id,
+ agent_id=agent_id,
+ )
+ await client.initialize()
- # Commit
- commit_result = await client.commit_session(session_id, telemetry=True)
+ try:
+ # Create session
+ create_res = await client.create_session()
+ session_id = create_res["session_id"]
+
+ # Add messages one by one with created_at
+ for idx, msg in enumerate(messages):
+ msg_created_at = None
+ if base_datetime:
+ # 每条消息递增1秒,确保时间顺序
+ msg_dt = base_datetime + timedelta(seconds=idx)
+ msg_created_at = msg_dt.isoformat()
+
+ await client.add_message(
+ session_id=session_id,
+ role=msg["role"],
+ parts=[{"type": "text", "text": msg["text"]}],
+ created_at=msg_created_at,
+ )
- if commit_result.get("status") != "accepted":
- raise RuntimeError(f"Commit failed: {commit_result}")
+ # Commit
+ result = await client.commit_session(session_id, telemetry=True)
- # 获取异步任务ID并轮询任务完成状态
- task_id = commit_result.get("task_id")
- if not task_id:
- raise RuntimeError(f"No task_id in commit result: {commit_result}")
+ # Accept both "committed" and "accepted" as success - accepted means the session was archived
+ if result.get("status") not in ("committed", "accepted"):
+ raise RuntimeError(f"Commit failed: {result}")
+ # 等待 task 完成以获取准确 token 消耗
+ task_id = result.get("task_id")
+ if task_id:
# 轮询任务状态直到完成
max_attempts = 1200 # 最多等待20分钟
for attempt in range(max_attempts):
- task_result = await client.get_task(task_id)
- task_status = task_result.get("status")
- if task_status == "completed":
+ task = await client.get_task(task_id)
+ status = task.get("status") if task else "unknown"
+ if status == "completed":
+ token_usage = _parse_token_usage(task)
break
- elif task_status in ("failed", "cancelled"):
- raise RuntimeError(f"Task {task_id} {task_status}: {task_result.get('error')}")
- # 等待1秒后重试
+ elif status in ("failed", "cancelled", "unknown"):
+ raise RuntimeError(f"Task {task_id} {status}: {task}")
await asyncio.sleep(1)
else:
raise RuntimeError(f"Task {task_id} timed out after {max_attempts} attempts")
+ else:
+ token_usage = {"embedding": 0, "vlm": 0, "total": 0}
- # 从任务结果中提取token使用情况
- token_usage = _parse_token_usage(task_result)
+ # Get trace_id from commit result
+ trace_id = result.get("trace_id", "")
+ return {"token_usage": token_usage, "task_id": task_id, "trace_id": trace_id}
- return token_usage
+ finally:
+ await client.close()
- finally:
- await client.close()
-
-
-def sync_viking_ingest(messages: List[Dict[str, Any]], openviking_url: str, session_time: Optional[str] = None) -> Dict[str, int]:
- """Synchronous wrapper for viking_ingest to maintain existing API."""
- semaphore = asyncio.Semaphore(1) # 同步调用时使用信号量为1
- return asyncio.run(viking_ingest(messages, openviking_url, semaphore, session_time))
-
-# ---------------------------------------------------------------------------
-# Main import logic
-# ---------------------------------------------------------------------------
def parse_session_range(s: str) -> Tuple[int, int]:
"""Parse '1-4' or '3' into (lo, hi) inclusive tuple."""
@@ -349,17 +377,26 @@ async def process_single_session(
run_time: str,
ingest_record: Dict[str, Any],
args: argparse.Namespace,
- semaphore: asyncio.Semaphore
) -> Dict[str, Any]:
"""处理单个会话的导入任务"""
try:
- token_usage = await viking_ingest(messages, args.openviking_url, semaphore, meta.get("date_time"))
- print(f" -> [SUCCESS] [{sample_id}/{session_key}] imported to OpenViking", file=sys.stderr)
-
- # Extract token counts
+ # 使用 sample_id 作为 user_id 和 agent_id,实现独立的 userspace/agentspace
+ result = await viking_ingest(
+ messages,
+ args.openviking_url,
+ meta.get("date_time"),
+ user_id=str(sample_id),
+ agent_id=str(sample_id),
+ )
+ token_usage = result["token_usage"]
+ task_id = result.get("task_id")
+ trace_id = result.get("trace_id", "")
embedding_tokens = token_usage.get("embedding", 0)
vlm_tokens = token_usage.get("vlm", 0)
- print(f" -> [USAGE] [{sample_id}/{session_key}] Embedding tokens: {embedding_tokens}, VLM tokens: {vlm_tokens}", file=sys.stderr)
+ print(
+ f" -> [COMPLETED] [{sample_id}/{session_key}] embed={embedding_tokens}, vlm={vlm_tokens}, task_id={task_id}, trace_id={trace_id}",
+ file=sys.stderr,
+ )
# Write success record
result = {
@@ -370,7 +407,9 @@ async def process_single_session(
"meta": meta,
"token_usage": token_usage,
"embedding_tokens": embedding_tokens,
- "vlm_tokens": vlm_tokens
+ "vlm_tokens": vlm_tokens,
+ "task_id": task_id,
+ "trace_id": trace_id,
}
# 写入成功CSV
@@ -392,7 +431,7 @@ async def process_single_session(
"sample_id": sample_id,
"session": session_key,
"status": "error",
- "error": str(e)
+ "error": str(e),
}
# 写入错误日志
@@ -402,11 +441,46 @@ async def process_single_session(
async def run_import(args: argparse.Namespace) -> None:
- # 初始化信号量控制并发
- semaphore = asyncio.Semaphore(args.parallel)
-
session_range = parse_session_range(args.sessions) if args.sessions else None
+ # 如果指定了 question-index,自动从 evidence 推断需要的 session
+ if args.question_index is not None and not args.sessions:
+ # 加载数据获取 question 的 evidence
+ with open(args.input, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ # 获取 sample
+ sample_idx = args.sample if args.sample is not None else 0
+ if sample_idx < 0 or sample_idx >= len(data):
+ raise ValueError(f"sample index {sample_idx} out of range")
+ sample = data[sample_idx]
+
+ # 获取 question 的 evidence
+ qa_items = sample.get("qa", [])
+ if args.question_index < 0 or args.question_index >= len(qa_items):
+ raise ValueError(f"question index {args.question_index} out of range")
+ qa = qa_items[args.question_index]
+ evidence_list = qa.get("evidence", [])
+
+ # 从 evidence 提取 session 号 (D1:3 -> session 1)
+ session_nums = set()
+ for ev in evidence_list:
+ try:
+ # D1:3 -> session 1
+ sess_num = int(ev.split(":")[0][1:])
+ session_nums.add(sess_num)
+ except (ValueError, IndexError):
+ pass
+
+ if session_nums:
+ min_sess = min(session_nums)
+ max_sess = max(session_nums)
+ session_range = (min_sess, max_sess)
+ print(
+ f"[INFO] Auto-detected sessions from evidence: {min_sess}-{max_sess}",
+ file=sys.stderr,
+ )
+
# Handle ingest record operations
if args.clear_ingest_record:
ingest_record = {}
@@ -419,7 +493,10 @@ async def run_import(args: argparse.Namespace) -> None:
success_keys = set()
if not args.force_ingest:
success_keys = load_success_csv(args.success_csv)
- print(f"[INFO] Loaded {len(success_keys)} existing success records from {args.success_csv}", file=sys.stderr)
+ print(
+ f"[INFO] Loaded {len(success_keys)} existing success records from {args.success_csv}",
+ file=sys.stderr,
+ )
# Write run header
run_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
@@ -429,19 +506,20 @@ async def run_import(args: argparse.Namespace) -> None:
error_count = 0
total_embedding_tokens = 0
total_vlm_tokens = 0
- tasks: List[asyncio.Task] = []
if args.input.endswith(".json"):
# LoCoMo JSON format
samples = load_locomo_data(args.input, args.sample)
- for item in samples:
+ # 为每个 sample 创建独立的处理协程
+ async def process_sample(item):
sample_id = item["sample_id"]
sessions = build_session_messages(item, session_range)
print(f"\n=== Sample {sample_id} ===", file=sys.stderr)
print(f" {len(sessions)} session(s) to import", file=sys.stderr)
+ # 同一 sample 内串行处理所有 sessions
for sess in sessions:
meta = sess["meta"]
messages = sess["messages"]
@@ -449,29 +527,35 @@ async def run_import(args: argparse.Namespace) -> None:
label = f"{session_key} ({meta['date_time']})"
# Skip already ingested sessions unless force-ingest is enabled
- if not args.force_ingest and is_already_ingested(sample_id, session_key, ingest_record, success_keys):
- print(f" [{label}] [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr)
- skipped_count += 1
+ if not args.force_ingest and is_already_ingested(
+ sample_id, session_key, ingest_record, success_keys
+ ):
+ print(
+ f" [{label}] [SKIP] already imported (use --force-ingest to reprocess)",
+ file=sys.stderr,
+ )
continue
# Preview messages
- preview = " | ".join([f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]])
+ preview = " | ".join(
+ [f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]]
+ )
print(f" [{label}] {preview}", file=sys.stderr)
- # 创建异步任务
- task = asyncio.create_task(
- process_single_session(
- messages=messages,
- sample_id=sample_id,
- session_key=session_key,
- meta=meta,
- run_time=run_time,
- ingest_record=ingest_record,
- args=args,
- semaphore=semaphore
- )
+ # 串行执行(等待完成后再处理下一个 session)
+ await process_single_session(
+ messages=messages,
+ sample_id=sample_id,
+ session_key=session_key,
+ meta=meta,
+ run_time=run_time,
+ ingest_record=ingest_record,
+ args=args,
)
- tasks.append(task)
+
+ # 不同 sample 之间并行执行
+ tasks = [asyncio.create_task(process_sample(item)) for item in samples]
+ results = await asyncio.gather(*tasks, return_exceptions=True)
else:
# Plain text format
@@ -483,20 +567,21 @@ async def run_import(args: argparse.Namespace) -> None:
print(f"\n=== Text Session {idx} ===", file=sys.stderr)
# Skip already ingested sessions unless force-ingest is enabled
- if not args.force_ingest and is_already_ingested("txt", session_key, ingest_record, success_keys):
- print(f" [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr)
+ if not args.force_ingest and is_already_ingested(
+ "txt", session_key, ingest_record, success_keys
+ ):
+ print(
+ f" [SKIP] already imported (use --force-ingest to reprocess)", file=sys.stderr
+ )
skipped_count += 1
continue
# For plain text, all messages as user role
messages = []
for i, text in enumerate(session["messages"]):
- messages.append({
- "role": "user",
- "text": text.strip(),
- "speaker": "user",
- "index": i
- })
+ messages.append(
+ {"role": "user", "text": text.strip(), "speaker": "user", "index": i}
+ )
preview = " | ".join([f"{msg['role']}: {msg['text'][:30]}..." for msg in messages[:3]])
print(f" {preview}", file=sys.stderr)
@@ -511,30 +596,25 @@ async def run_import(args: argparse.Namespace) -> None:
run_time=run_time,
ingest_record=ingest_record,
args=args,
- semaphore=semaphore
)
)
tasks.append(task)
- # 等待所有任务完成
- print(f"\n[INFO] Starting import with {args.parallel} concurrent workers, {len(tasks)} tasks to process", file=sys.stderr)
- results = await asyncio.gather(*tasks, return_exceptions=True)
-
- # 统计结果
- for result in results:
- if isinstance(result, Exception):
- error_count += 1
- print(f"[UNEXPECTED ERROR] Task failed with exception: {result}", file=sys.stderr)
- if hasattr(result, '__traceback__'):
- traceback.print_exception(type(result), result, result.__traceback__, file=sys.stderr)
- continue
+ # 等待所有 sample 处理完成
+ print(
+ f"\n[INFO] Starting import with {len(tasks)} tasks to process",
+ file=sys.stderr,
+ )
+ await asyncio.gather(*tasks, return_exceptions=True)
- if result["status"] == "success":
- success_count += 1
- total_embedding_tokens += result["embedding_tokens"]
- total_vlm_tokens += result["vlm_tokens"]
- elif result["status"] == "error":
- error_count += 1
+ # 从成功 CSV 统计结果
+ if Path(args.success_csv).exists():
+ with open(args.success_csv, "r", encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ success_count += 1
+ total_embedding_tokens += int(row.get("embedding_tokens", 0) or 0)
+ total_vlm_tokens += int(row.get("vlm_tokens", 0) or 0)
# Final summary
total_processed = success_count + error_count + skipped_count
@@ -547,7 +627,10 @@ async def run_import(args: argparse.Namespace) -> None:
print(f"Total Embedding tokens: {total_embedding_tokens}", file=sys.stderr)
print(f"Total VLM tokens: {total_vlm_tokens}", file=sys.stderr)
if success_count > 0:
- print(f"Average Embedding per session: {total_embedding_tokens // success_count}", file=sys.stderr)
+ print(
+ f"Average Embedding per session: {total_embedding_tokens // success_count}",
+ file=sys.stderr,
+ )
print(f"Average VLM per session: {total_vlm_tokens // success_count}", file=sys.stderr)
print(f"\nResults saved to:", file=sys.stderr)
print(f" - Success records: {args.success_csv}", file=sys.stderr)
@@ -558,12 +641,17 @@ async def run_import(args: argparse.Namespace) -> None:
# CLI
# ---------------------------------------------------------------------------
+
def main():
+ # 基于脚本所在目录计算默认数据文件路径
+ script_dir = Path(__file__).parent.resolve()
+ default_input = str(script_dir / ".." / "data" / "locomo10.json")
+
parser = argparse.ArgumentParser(description="Import conversations into OpenViking")
parser.add_argument(
"--input",
- default="./test_data/locomo10.json",
- help="Path to input file (.txt or LoCoMo .json)"
+ default=default_input,
+ help="Path to input file (.txt or LoCoMo .json)",
)
parser.add_argument(
"--success-csv",
@@ -580,12 +668,6 @@ def main():
default="http://localhost:1933",
help="OpenViking service URL (default: http://localhost:1933)",
)
- parser.add_argument(
- "--parallel",
- type=int,
- default=5,
- help="Number of concurrent import workers (default: 5)",
- )
parser.add_argument(
"--sample",
type=int,
@@ -597,6 +679,12 @@ def main():
default=None,
help="LoCoMo JSON: session range, e.g. '1-4' or '3'. Default: all sessions.",
)
+ parser.add_argument(
+ "--question-index",
+ type=int,
+ default=None,
+ help="LoCoMo JSON: question index (0-based). When specified, auto-detect required sessions from question's evidence.",
+ )
parser.add_argument(
"--force-ingest",
action="store_true",
diff --git a/benchmark/locomo/vikingbot/judge.py b/benchmark/locomo/vikingbot/judge.py
index 0b2e171f6..65a510fc2 100644
--- a/benchmark/locomo/vikingbot/judge.py
+++ b/benchmark/locomo/vikingbot/judge.py
@@ -5,8 +5,11 @@
import asyncio
from openai import AsyncOpenAI
from dotenv import load_dotenv
+from pathlib import Path
-load_dotenv()
+# 加载本地环境变量文件
+env_file = Path.home() / ".openviking_benchmark_env"
+load_dotenv(env_file)
async def grade_answer(
@@ -112,7 +115,12 @@ async def main():
args = parser.parse_args()
if not args.token:
- print("Error: API token is required, set ARK_API_KEY env var or pass via --token")
+ print("Error: API token is required")
+ print("\n请通过以下方式设置 API key:")
+ print(" 1. 创建 ~/.openviking_benchmark_env 文件,内容如下:")
+ print(" ARK_API_KEY=你的key")
+ print(" 2. 或者通过 --token 参数传入")
+ print(" 3. 或者设置环境变量: export ARK_API_KEY=你的key")
exit(1)
# 加载数据
diff --git a/benchmark/locomo/vikingbot/run_eval.py b/benchmark/locomo/vikingbot/run_eval.py
index 1799aec49..2d38a0454 100644
--- a/benchmark/locomo/vikingbot/run_eval.py
+++ b/benchmark/locomo/vikingbot/run_eval.py
@@ -7,9 +7,93 @@
import re
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
+from pathlib import Path
-def load_csv_qa(input_path: str, count: int | None = None) -> list[dict]:
+def get_evidence_text(evidence_list: list, sample: dict) -> list[str]:
+ """根据 evidence 列表获取原始对话文本
+
+ evidence 格式: ['D1:3', 'D2:5'] -> session_1 第3条, session_2 第5条
+ """
+ if not evidence_list:
+ return []
+
+ conv = sample.get("conversation", {})
+ results = []
+
+ for ev in evidence_list:
+ # 解析 D1:3 -> session_1, index 2
+ try:
+ parts = ev.split(":")
+ session_num = int(parts[0][1:]) # D1 -> 1
+ msg_index = int(parts[1]) - 1 # 3 -> index 2
+
+ session_key = f"session_{session_num}"
+ session_messages = conv.get(session_key, [])
+
+ if msg_index < len(session_messages):
+ msg = session_messages[msg_index]
+ text = msg.get("text", "")
+ speaker = msg.get("speaker", "")
+ results.append(f"{speaker}: {text}")
+ else:
+ results.append(f"[{ev}: out of range]")
+ except (ValueError, IndexError):
+ results.append(f"[{ev}: invalid format]")
+
+ return results
+
+
+def parse_locomo_datetime(date_str: str) -> datetime | None:
+ """解析 LoCoMo 时间格式,如 '1:56 pm on 8 May, 2023'"""
+ try:
+ # 移除时间部分,只保留日期 "8 May, 2023"
+ if " on " in date_str:
+ date_part = date_str.split(" on ")[-1]
+ return datetime.strptime(date_part.strip(), "%d %B, %Y")
+ except ValueError:
+ pass
+ return None
+
+
+def get_sample_question_time(sample: dict) -> str | None:
+ """从 sample 的 conversation 中提取最后一个有内容 session 的时间,返回 ISO 格式日期"""
+ conversation = sample.get("conversation", {})
+
+ # 找所有 session_N 字段(非 date_time)
+ session_keys = [
+ k for k in conversation.keys() if k.startswith("session_") and "date_time" not in k
+ ]
+ if not session_keys:
+ return None
+
+ # 按 session 编号排序,找到最后一个有内容的
+ def get_session_num(key):
+ try:
+ return int(key.replace("session_", ""))
+ except ValueError:
+ return 0
+
+ session_keys.sort(key=get_session_num, reverse=True)
+
+ for session_key in session_keys:
+ if conversation.get(session_key): # 有内容
+ # 找到对应的 date_time
+ session_num = get_session_num(session_key)
+ dt_key = f"session_{session_num}_date_time"
+ date_str = conversation.get(dt_key)
+ if date_str:
+ dt = parse_locomo_datetime(date_str)
+ if dt:
+ return dt.strftime("%Y-%m-%d")
+
+ return None
+
+
+def load_csv_qa(
+ input_path: str, count: int | None = None, default_time: str | None = None
+) -> list[dict]:
"""从CSV文件加载QA数据,取sample_id和question字段"""
qa_list = []
with open(input_path, "r", encoding="utf-8", newline="") as f:
@@ -22,6 +106,7 @@ def load_csv_qa(input_path: str, count: int | None = None) -> list[dict]:
"answer": row.get("answer", ""),
"category": "",
"evidence": [],
+ "question_time": default_time,
}
)
@@ -31,48 +116,139 @@ def load_csv_qa(input_path: str, count: int | None = None) -> list[dict]:
def load_locomo_qa(
- input_path: str, sample_index: int | None = None, count: int | None = None
+ input_path: str,
+ sample_index: int | None = None,
+ count: int | None = None,
+ default_time: str | None = None,
+ question_index: int | None = None,
+ invalid_questions: set | None = None,
) -> list[dict]:
- """加载LoCoMo数据集的QA部分,支持JSON和CSV格式"""
+ """加载LoCoMo数据集的QA部分,支持JSON和CSV格式
+
+ Args:
+ invalid_questions: 无效题目问题内容集合,用于标记无效题目
+ """
if input_path.lower().endswith(".csv"):
- return load_csv_qa(input_path, count)
+ return load_csv_qa(input_path, count, default_time)
# 原有JSON格式处理逻辑
with open(input_path, "r", encoding="utf-8") as f:
data = json.load(f)
qa_list = []
+ # 支持数字索引或 sample_id (如 "conv-26")
if sample_index is not None:
- if sample_index < 0 or sample_index >= len(data):
- raise ValueError(f"sample index {sample_index} out of range (0-{len(data) - 1})")
- samples = [data[sample_index]]
+ # 尝试解析为数字索引
+ try:
+ idx = int(sample_index)
+ if idx < 0 or idx >= len(data):
+ raise ValueError(f"sample index {idx} out of range (0-{len(data) - 1})")
+ samples = [data[idx]]
+ except ValueError:
+ # 尝试匹配 sample_id
+ matched = [s for s in data if s.get("sample_id") == sample_index]
+ if not matched:
+ raise ValueError(f"sample_id '{sample_index}' not found")
+ samples = matched
else:
samples = data
for sample in samples:
sample_id = sample.get("sample_id", "")
- for qa in sample.get("qa", []):
+ question_time = get_sample_question_time(sample)
+ qa_items = sample.get("qa", [])
+
+ # 如果指定了 question_index,只返回那一个问题
+ if question_index is not None:
+ if question_index < 0 or question_index >= len(qa_items):
+ raise ValueError(
+ f"question index {question_index} out of range (0-{len(qa_items) - 1})"
+ )
+ qa = qa_items[question_index]
+ evidence_list = qa.get("evidence", [])
+ question_id = f"{sample_id}_qa{question_index}"
qa_list.append(
{
"sample_id": sample_id,
+ "question_id": question_id,
+ "question_index": question_index,
"question": qa["question"],
"answer": qa["answer"],
"category": qa.get("category", ""),
- "evidence": qa.get("evidence", []),
+ "evidence": evidence_list,
+ "evidence_text": get_evidence_text(evidence_list, sample),
+ "question_time": question_time,
+ "is_invalid": qa["question"] in invalid_questions
+ if invalid_questions
+ else False,
}
)
+ else:
+ for q_idx, qa in enumerate(qa_items):
+ evidence_list = qa.get("evidence", [])
+ question_id = f"{sample_id}_qa{q_idx}"
+ qa_list.append(
+ {
+ "sample_id": sample_id,
+ "question_id": question_id,
+ "question_index": q_idx,
+ "question": qa["question"],
+ "answer": qa["answer"],
+ "category": qa.get("category", ""),
+ "evidence": evidence_list,
+ "evidence_text": get_evidence_text(evidence_list, sample),
+ "question_time": question_time,
+ "is_invalid": qa["question"] in invalid_questions
+ if invalid_questions
+ else False,
+ }
+ )
if count is not None:
qa_list = qa_list[:count]
return qa_list
-def run_vikingbot_chat(question: str) -> tuple[str, dict, float, int, list]:
+def run_vikingbot_chat(
+ question: str,
+ question_time: str | None = None,
+ sample_id: str | None = None,
+ question_id: str | None = None,
+) -> tuple[str, dict, float, int, list]:
"""执行vikingbot chat命令,返回回答、token使用情况、耗时(秒)、迭代次数、使用的工具列表"""
- input = f"Answer the question directly: {question}"
+ # 先执行 /new 命令清除会话
+ if sample_id:
+ new_cmd = [
+ "vikingbot",
+ "chat",
+ "-m",
+ "/new",
+ "-e",
+ "--sender",
+ sample_id,
+ "--session",
+ question_id,
+ ]
+ try:
+ # print(f'new_cmd={new_cmd}')
+ subprocess.run(new_cmd, capture_output=True, text=True, timeout=60)
+ except Exception:
+ # 忽略 /new 命令的错误
+ pass
+
+ # 如果有 question_time,注入到 prompt 中
+ if question_time:
+ input = f"Current date: {question_time}. Answer the question directly: {question}"
+ else:
+ input = f"Answer the question directly: {question}"
+
cmd = ["vikingbot", "chat", "-m", input, "-e"]
+ # 添加 --sender 作为 user_id,--session 作为 agent_id,实现访问独立 userspace
+ if sample_id:
+ cmd.extend(["--sender", sample_id, "--session", question_id])
start_time = time.time()
try:
+ # print(f'cmd={cmd}')
result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=300)
end_time = time.time()
time_cost = end_time - start_time
@@ -114,50 +290,95 @@ def run_vikingbot_chat(question: str) -> tuple[str, dict, float, int, list]:
def load_processed_questions(output_path: str) -> set:
- """加载已处理的问题集合,避免重复执行"""
- processed = set()
- if os.path.exists(output_path):
- with open(output_path, "r", encoding="utf-8", newline="") as f:
- reader = csv.DictReader(f)
- for row in reader:
- processed.add(row["question"])
- return processed
+ """加载已处理的问题集合(已禁用,每次重新运行)"""
+ # 注意:去重逻辑已禁用,每次运行都会重新执行所有问题
+ return set()
def main():
+ # 基于脚本所在目录计算默认数据文件路径
+ script_dir = Path(__file__).parent.resolve()
+ default_input = str(script_dir / ".." / "data" / "locomo10.json")
+ default_errors = str(script_dir / ".." / "data" / "errors.json")
+
parser = argparse.ArgumentParser(description="VikingBot QA evaluation script")
parser.add_argument(
"input",
nargs="?",
- default="./test_data/locomo10.json",
- help="Path to locomo10.json file, default: ./test_data/locomo10.json",
+ default=default_input,
+ help="Path to locomo10.json file",
)
parser.add_argument(
"--output",
default="./result/locomo_qa_result.csv",
help="Path to output csv file, default: ./result/locomo_qa_result.csv",
)
+ parser.add_argument(
+ "--errors",
+ default=default_errors,
+ help="Path to invalid questions JSON file",
+ )
parser.add_argument(
"--sample",
+ type=str,
+ default=None,
+ help="LoCoMo sample index (0-based) or sample_id (e.g., conv-26)",
+ )
+ parser.add_argument(
+ "--question-index",
type=int,
default=None,
- help="LoCoMo sample index (0-based), default all samples",
+ help="Question index (0-based) for single question testing",
)
parser.add_argument(
"--count", type=int, default=None, help="Number of QA questions to run, default all"
)
parser.add_argument(
- "--threads", type=int, default=5, help="Number of concurrent threads, default: 5"
+ "--threads", type=int, default=40, help="Number of concurrent threads, default: 40"
+ )
+ parser.add_argument(
+ "--update-mode",
+ action="store_true",
+ help="Update mode: if output file exists, update matching question_index rows instead of overwriting",
)
args = parser.parse_args()
+ # 如果指定了 question-index,自动设置 count=1
+ if args.question_index is not None and args.count is None:
+ args.count = 1
+
# 确保输出目录存在
os.makedirs(os.path.dirname(args.output), exist_ok=True)
- # 加载QA数据
- qa_list = load_locomo_qa(args.input, args.sample, args.count)
+ # 加载无效题目集合(按问题内容匹配,因为 errors.json 索引可能与数据不匹配)
+ invalid_questions = set()
+ errors_path = os.path.expanduser(args.errors)
+ if os.path.exists(errors_path):
+ with open(errors_path, "r", encoding="utf-8") as f:
+ errors_data = json.load(f)
+ # 按问题内容建立集合
+ if errors_data and isinstance(errors_data[0], dict):
+ invalid_questions = {item["question"] for item in errors_data}
+ else:
+ invalid_questions = set(errors_data)
+ print(f"Loaded {len(invalid_questions)} invalid questions from {errors_path}")
+ else:
+ print(f"No errors file found at {errors_path}, is_invalid will be False for all questions")
+
+ # 加载QA数据(所有题目,包括无效题目,只标记 is_invalid)
+ qa_list = load_locomo_qa(
+ args.input,
+ args.sample,
+ args.count,
+ question_index=args.question_index,
+ invalid_questions=invalid_questions,
+ )
total = len(qa_list)
+ # 过滤掉 category=5 的问题
+ qa_list = [qa for qa in qa_list if str(qa.get("category")) != "5"]
+ print(f"Filtered to {len(qa_list)} questions after removing category=5")
+
# 加载已处理的问题
processed_questions = load_processed_questions(args.output)
remaining = total - len(processed_questions)
@@ -167,77 +388,135 @@ def main():
fieldnames = [
"sample_id",
+ "question_index",
+ "result",
+ "is_invalid",
"question",
"answer",
+ "category",
+ "question_time",
+ "evidence",
+ "evidence_text",
"response",
"token_usage",
"time_cost",
"iteration",
"tools_used_names",
- "result",
]
- # 打开CSV文件,不存在则创建写表头,存在则追加
- file_exists = os.path.exists(args.output)
+
# 创建线程锁,确保多线程写文件安全
write_lock = threading.Lock()
- with open(args.output, "a+", encoding="utf-8", newline="") as f:
- writer = csv.DictWriter(f, fieldnames=fieldnames)
- if not file_exists:
+ # 存储处理后的新行
+ new_rows = []
+ processed_count = 0
+
+ # 过滤掉已经处理过的问题
+ remaining_qa = [qa for qa in qa_list if qa["question"] not in processed_questions]
+ remaining_count = len(remaining_qa)
+ print(
+ f"Starting evaluation with {args.threads} concurrent threads, {remaining_count} questions to process"
+ )
+
+ def process_qa(qa_item, idx, total_count):
+ """单个QA处理函数,供多线程调用"""
+ question = qa_item["question"]
+ answer = qa_item["answer"]
+ question_time = qa_item.get("question_time")
+ # 使用 question_id 作为 session_id,实现完全独立并行
+ sample_id = qa_item.get("sample_id")
+ question_id = qa_item.get("question_id")
+ print(f"Processing {idx}/{total_count}: {question[:60]}...")
+ if question_time:
+ print(f" [time context: {question_time}]")
+
+ response, token_usage, time_cost, iteration, tools_used_names = run_vikingbot_chat(
+ question, question_time, sample_id, question_id
+ )
+
+ row = {
+ "sample_id": qa_item["sample_id"],
+ "question_index": qa_item.get("question_index", ""),
+ "result": "",
+ "question": question,
+ "answer": answer,
+ "category": qa_item.get("category", ""),
+ "question_time": question_time or "",
+ "evidence": json.dumps(qa_item.get("evidence", [])),
+ "evidence_text": json.dumps(qa_item.get("evidence_text", [])),
+ "response": response,
+ "token_usage": json.dumps(token_usage, ensure_ascii=False),
+ "time_cost": round(time_cost, 2),
+ "iteration": iteration,
+ "tools_used_names": json.dumps(tools_used_names, ensure_ascii=False),
+ "is_invalid": qa_item.get("is_invalid", False),
+ }
+
+ # 线程安全的结果收集
+ with write_lock:
+ nonlocal processed_count
+ new_rows.append(row)
+ processed_questions.add(question)
+ processed_count += 1
+ print(f"Completed {processed_count}/{total_count}, time cost: {round(time_cost, 2)}s")
+ return True
+
+ # 使用线程池处理:全局并行,每个 question 独立 session
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
+ # 提交所有任务
+ futures = []
+ for idx, qa_item in enumerate(remaining_qa, 1):
+ futures.append(executor.submit(process_qa, qa_item, idx, remaining_count))
+
+ # 等待所有任务完成
+ for future in as_completed(futures):
+ try:
+ future.result()
+ except Exception as e:
+ print(f"Error processing QA item: {str(e)}")
+
+ # 写文件逻辑
+ if args.update_mode and os.path.exists(args.output):
+ # 更新模式:读取现有文件,更新匹配行
+ print(f"Update mode: updating existing file {args.output}")
+ with open(args.output, "r", encoding="utf-8", newline="") as f:
+ reader = csv.DictReader(f)
+ existing_rows = list(reader)
+ existing_fieldnames = reader.fieldnames or fieldnames
+
+ # 更新匹配的行
+ updated_count = 0
+ for new_row in new_rows:
+ q_idx = str(new_row.get("question_index", ""))
+ found = False
+ for row in existing_rows:
+ if str(row.get("question_index", "")) == q_idx:
+ row.update(new_row)
+ found = True
+ updated_count += 1
+ break
+ if not found:
+ existing_rows.append(new_row)
+ updated_count += 1
+
+ # 写回文件
+ with open(args.output, "w", encoding="utf-8", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=existing_fieldnames)
+ writer.writeheader()
+ writer.writerows(existing_rows)
+
+ print(f"Updated {updated_count} rows in {args.output}")
+ else:
+ # 普通模式:覆盖写入
+ if os.path.exists(args.output):
+ os.remove(args.output)
+
+ with open(args.output, "w", encoding="utf-8", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
- f.flush()
-
- processed_count = len(processed_questions)
- # 过滤掉已经处理过的问题
- remaining_qa = [qa for qa in qa_list if qa["question"] not in processed_questions]
- remaining_count = len(remaining_qa)
- print(f"Starting evaluation with {args.threads} concurrent threads, {remaining_count} questions to process")
-
- def process_qa(qa_item, idx, total_count):
- """单个QA处理函数,供多线程调用"""
- question = qa_item["question"]
- answer = qa_item["answer"]
- print(f"Processing {idx}/{total_count}: {question[:60]}...")
-
- response, token_usage, time_cost, iteration, tools_used_names = run_vikingbot_chat(question)
-
- row = {
- "sample_id": qa_item["sample_id"],
- "question": question,
- "answer": answer,
- "response": response,
- "token_usage": json.dumps(token_usage, ensure_ascii=False),
- "time_cost": round(time_cost, 2),
- "iteration": iteration,
- "tools_used_names": json.dumps(tools_used_names, ensure_ascii=False),
- "result": "",
- }
-
- # 线程安全的文件写入
- with write_lock:
- nonlocal processed_count
- writer.writerow(row)
- f.flush()
- processed_questions.add(question)
- processed_count += 1
- print(f"Completed {processed_count}/{total}, time cost: {round(time_cost, 2)}s")
- return True
-
- # 使用线程池处理
- with ThreadPoolExecutor(max_workers=args.threads) as executor:
- # 提交所有任务
- futures = []
- for idx, qa_item in enumerate(remaining_qa, 1):
- futures.append(executor.submit(process_qa, qa_item, idx, remaining_count))
-
- # 等待所有任务完成
- for future in as_completed(futures):
- try:
- future.result()
- except Exception as e:
- print(f"Error processing QA item: {str(e)}")
-
- print(f"Evaluation completed, results saved to {args.output}")
+ writer.writerows(new_rows)
+
+ print(f"Evaluation completed, results saved to {args.output}")
if __name__ == "__main__":
diff --git a/benchmark/locomo/vikingbot/run_full_eval.sh b/benchmark/locomo/vikingbot/run_full_eval.sh
index 72d58f739..08746e774 100755
--- a/benchmark/locomo/vikingbot/run_full_eval.sh
+++ b/benchmark/locomo/vikingbot/run_full_eval.sh
@@ -2,29 +2,31 @@
set -e
-# Step 1: 导入数据
-echo "[1/4] 导入数据..."
-python bot/eval/locomo/import_to_ov.py --input ~/.test_data/locomo10.json --force-ingest
-
-echo "等待 3 分钟..."
-sleep 180
+# 基于脚本所在目录计算数据文件路径
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+INPUT_FILE="$SCRIPT_DIR/../data/locomo10.json"
+
+# Step 1: 导入数据(可跳过)
+if [ "$1" != "--skip-import" ]; then
+ echo "[1/4] 导入数据..."
+ python benchmark/locomo/vikingbot/import_to_ov.py --input $INPUT_FILE --force-ingest
+ echo "等待 1 分钟..."
+ sleep 60
+else
+ echo "[1/4] 跳过导入数据..."
+fi
# Step 2: 评估
echo "[2/4] 评估..."
-python bot/eval/locomo/run_eval.py ~/.test_data/locomo_qa_1528.csv --output ./result/locomo_result_multi_read_all.csv --threads 20
+python benchmark/locomo/vikingbot/run_eval.py $INPUT_FILE --output ./result/locomo_result_multi_read_all.csv
-echo "等待 3 分钟..."
-sleep 180
# Step 3: 裁判打分
echo "[3/4] 裁判打分..."
-python bot/eval/locomo/judge.py --token 0a2b68f6-4df3-48f5-81b9-f85fe0af9cef --input ./result/locomo_result_multi_read_all.csv --parallel 10
-
-echo "等待 3 分钟..."
-sleep 180
+python benchmark/locomo/vikingbot/judge.py --input ./result/locomo_result_multi_read_all.csv --parallel 40
# Step 4: 计算结果
echo "[4/4] 计算结果..."
-python bot/eval/locomo/stat_judge_result.py --input ./result/locomo_result_multi_read_all.csv
+python benchmark/locomo/vikingbot/stat_judge_result.py --input ./result/locomo_result_multi_read_all.csv
echo "完成!"
\ No newline at end of file
diff --git a/benchmark/locomo/vikingbot/stat_judge_result.py b/benchmark/locomo/vikingbot/stat_judge_result.py
index 2d7ebd8d6..298d0c708 100644
--- a/benchmark/locomo/vikingbot/stat_judge_result.py
+++ b/benchmark/locomo/vikingbot/stat_judge_result.py
@@ -17,6 +17,7 @@ def main():
print(f"Error: File not found: {args.input}")
exit(1)
+ # 统计所有题目 (排除 category=5)
correct = 0
wrong = 0
total_time = 0.0
@@ -26,23 +27,53 @@ def main():
valid_rows = 0
total_iteration = 0
+ # 统计 is_valid=True 的题目 (排除 category=5)
+ valid_only_correct = 0
+ valid_only_wrong = 0
+ valid_only_total_time = 0.0
+ valid_only_total_prompt_tokens = 0
+ valid_only_total_completion_tokens = 0
+ valid_only_total_tokens = 0
+ valid_only_rows = 0
+ valid_only_total_iteration = 0
+
with open(args.input, "r", encoding="utf-8", newline="") as f:
reader = csv.DictReader(f)
for row in reader:
+ # 检查 category 是否为 5,跳过
+ category = row.get("category", "")
+ if category == "5":
+ continue
+
valid_rows += 1
+
+ # 检查是否是无效题目
+ is_invalid = row.get("is_invalid", "").lower() == "true"
+ is_valid = not is_invalid
+
# 统计结果
result = row.get("result", "").strip().upper()
if result == "CORRECT":
correct += 1
+ if is_valid:
+ valid_only_correct += 1
elif result == "WRONG":
wrong += 1
+ if is_valid:
+ valid_only_wrong += 1
total_iteration += int(row.get("iteration", "0"))
+ if is_valid:
+ valid_only_total_iteration += int(row.get("iteration", "0"))
+
# 统计耗时
time_cost = row.get("time_cost", "")
if time_cost:
try:
- total_time += float(time_cost)
+ time_val = float(time_cost)
+ total_time += time_val
+ if is_valid:
+ valid_only_total_time += time_val
except (ValueError, TypeError):
pass
@@ -54,15 +85,45 @@ def main():
total_prompt_tokens += token_data.get("prompt_tokens", 0)
total_completion_tokens += token_data.get("completion_tokens", 0)
total_tokens += token_data.get("total_tokens", 0)
+
+ if is_valid:
+ valid_only_total_prompt_tokens += token_data.get("prompt_tokens", 0)
+ valid_only_total_completion_tokens += token_data.get("completion_tokens", 0)
+ valid_only_total_tokens += token_data.get("total_tokens", 0)
except json.JSONDecodeError:
pass
+ if is_valid:
+ valid_only_rows += 1
+
total_graded = correct + wrong
accuracy = correct / total_graded if total_graded > 0 else 0.0
avg_time = total_time / valid_rows if valid_rows > 0 else 0.0
+ # is_valid=True 题目的统计 (排除 category=5)
+ valid_only_total_graded = valid_only_correct + valid_only_wrong
+ valid_only_accuracy = (
+ valid_only_correct / valid_only_total_graded if valid_only_total_graded > 0 else 0.0
+ )
+ valid_only_avg_time = valid_only_total_time / valid_only_rows if valid_only_rows > 0 else 0.0
+
+ # 平均 token 消耗
+ avg_prompt_tokens = total_prompt_tokens / valid_rows if valid_rows > 0 else 0.0
+ avg_completion_tokens = total_completion_tokens / valid_rows if valid_rows > 0 else 0.0
+ avg_total_tokens = total_tokens / valid_rows if valid_rows > 0 else 0.0
+
+ valid_only_avg_prompt_tokens = (
+ valid_only_total_prompt_tokens / valid_only_rows if valid_only_rows > 0 else 0.0
+ )
+ valid_only_avg_completion_tokens = (
+ valid_only_total_completion_tokens / valid_only_rows if valid_only_rows > 0 else 0.0
+ )
+ valid_only_avg_total_tokens = (
+ valid_only_total_tokens / valid_only_rows if valid_only_rows > 0 else 0.0
+ )
+
output_lines = [
- "=== Judge Result Statistics ===",
+ "=== Judge Result Statistics (excluding category=5) ===",
f"Total rows: {valid_rows}",
f"Graded rows: {total_graded}",
f"Correct: {correct}",
@@ -74,6 +135,25 @@ def main():
f" Total prompt tokens: {total_prompt_tokens}",
f" Total completion tokens: {total_completion_tokens}",
f" Total tokens: {total_tokens}",
+ f" Avg prompt tokens: {avg_prompt_tokens:.2f}",
+ f" Avg completion tokens: {avg_completion_tokens:.2f}",
+ f" Avg total tokens: {avg_total_tokens:.2f}",
+ "",
+ "=== Valid Questions Only (is_valid=True, excluding category=5) ===",
+ f"Valid rows: {valid_only_rows}",
+ f"Valid graded rows: {valid_only_total_graded}",
+ f"Valid correct: {valid_only_correct}",
+ f"Valid wrong: {valid_only_wrong}",
+ f"Valid accuracy: {valid_only_accuracy:.2%}",
+ f"\nAverage time cost: {valid_only_avg_time:.2f}s",
+ f"\nAverage iteration: {valid_only_total_iteration / valid_only_rows if valid_only_rows > 0 else 0.0:.2f}",
+ f"\nToken usage:",
+ f" Total prompt tokens: {valid_only_total_prompt_tokens}",
+ f" Total completion tokens: {valid_only_total_completion_tokens}",
+ f" Total tokens: {valid_only_total_tokens}",
+ f" Avg prompt tokens: {valid_only_avg_prompt_tokens:.2f}",
+ f" Avg completion tokens: {valid_only_avg_completion_tokens:.2f}",
+ f" Avg total tokens: {valid_only_avg_total_tokens:.2f}",
]
# 打印到控制台
diff --git a/bot/README.md b/bot/README.md
index a7d994797..0661cd3ab 100644
--- a/bot/README.md
+++ b/bot/README.md
@@ -258,6 +258,7 @@ Provider configuration is read from OpenViking config (`vlm` section in `ov.conf
> - **Groq** provides free voice transcription via Whisper. If configured, Telegram voice messages will be automatically transcribed.
> - **Zhipu Coding Plan**: If you're on Zhipu's coding plan, set `"apiBase": "https://open.bigmodel.cn/api/coding/paas/v4"` in your zhipu provider config.
> - **MiniMax (Mainland China)**: If your API key is from MiniMax's mainland China platform (minimaxi.com), set `"apiBase": "https://api.minimaxi.com/v1"` in your minimax provider config.
+> - **MiniMax Recommended Models**: `MiniMax-M2.7` (peak performance) and `MiniMax-M2.7-highspeed` (faster, more agile). Configure with `"model": "MiniMax-M2.7"` in your agent config.
| Provider | Purpose | Get API Key |
|----------|---------|-------------|
diff --git a/bot/scripts/restart_openviking_server.sh b/bot/scripts/restart_openviking_server.sh
index d8f1caee4..167d5a0a7 100755
--- a/bot/scripts/restart_openviking_server.sh
+++ b/bot/scripts/restart_openviking_server.sh
@@ -42,8 +42,29 @@ echo "Bot URL: $BOT_URL"
echo "Bot Port: $BOT_PORT"
echo ""
-# Step 0: Kill existing vikingbot processes
-echo "Step 0: Stopping existing vikingbot processes..."
+# Step 0: Kill process on port and delete data directory
+echo "Step 0: Killing process on port $PORT..."
+if lsof -i :"$PORT" > /dev/null 2>&1; then
+ pid=$(lsof -ti :"$PORT")
+ kill -9 "$pid" 2>/dev/null || true
+ sleep 1
+ echo " ✓ Killed process $pid on port $PORT"
+else
+ echo " ✓ No process found on port $PORT"
+fi
+
+echo ""
+echo "Step 0b: Deleting data directory /Users/bytedance/.openviking/data..."
+if [ -d "/Users/bytedance/.openviking/data" ]; then
+ rm -rf /Users/bytedance/.openviking/data
+ echo " ✓ Deleted /Users/bytedance/.openviking/data"
+else
+ echo " ✓ Data directory does not exist"
+fi
+
+# Kill existing vikingbot processes
+echo ""
+echo "Step 0c: Stopping existing vikingbot processes..."
if pgrep -f "vikingbot.*openapi" > /dev/null 2>&1 || pgrep -f "vikingbot.*gateway" > /dev/null 2>&1; then
pkill -f "vikingbot.*openapi" 2>/dev/null || true
pkill -f "vikingbot.*gateway" 2>/dev/null || true
@@ -53,36 +74,20 @@ else
echo " ✓ No existing vikingbot processes found"
fi
-# Step 1: Kill existing openviking-server processes
-echo "Step 1: Stopping existing openviking-server processes..."
-if pgrep -f "openviking-server" > /dev/null 2>&1; then
- pkill -f "openviking-server" 2>/dev/null || true
- sleep 2
- # Force kill if still running
- if pgrep -f "openviking-server" > /dev/null 2>&1; then
- echo " Force killing remaining processes..."
- pkill -9 -f "openviking-server" 2>/dev/null || true
- sleep 1
- fi
- echo " ✓ Stopped existing processes"
-else
- echo " ✓ No existing processes found"
-fi
-
-# Step 2: Wait for port to be released
+# Step 1: Verify port is free
echo ""
-echo "Step 2: Waiting for port $PORT to be released..."
-for i in {1..10}; do
- if ! lsof -i :"$PORT" > /dev/null 2>&1; then
- echo " ✓ Port $PORT is free"
- break
- fi
+echo "Step 1: Verifying port $PORT is free..."
+if lsof -i :"$PORT" > /dev/null 2>&1; then
+ echo " ✗ Port $PORT is still in use, trying to force kill..."
+ pid=$(lsof -ti :"$PORT")
+ kill -9 "$pid" 2>/dev/null || true
sleep 1
-done
+fi
+echo " ✓ Port $PORT is free"
-# Step 3: Start openviking-server with --with-bot
+# Step 2: Start openviking-server with --with-bot
echo ""
-echo "Step 3: Starting openviking-server with Bot API..."
+echo "Step 2: Starting openviking-server with Bot API..."
echo " Command: openviking-server --with-bot --port $PORT --bot-url $BOT_URL"
echo ""
@@ -102,9 +107,9 @@ openviking-server \
SERVER_PID=$!
echo " Server PID: $SERVER_PID"
-# Step 4: Wait for server to start
+# Step 3: Wait for server to start
echo ""
-echo "Step 4: Waiting for server to be ready..."
+echo "Step 3: Waiting for server to be ready..."
sleep 3
# First check if server is responding at all
diff --git a/bot/scripts/test_restart_openviking_server.sh b/bot/scripts/test_restart_openviking_server.sh
index ef8a86af3..547d62a6d 100755
--- a/bot/scripts/test_restart_openviking_server.sh
+++ b/bot/scripts/test_restart_openviking_server.sh
@@ -55,17 +55,9 @@ fi
mkdir -p "$TEST_DATA_DIR"
echo " ✓ Created clean $TEST_DATA_DIR"
-# Step 1: Kill existing vikingbot processes
+# Step 1: Clean up test data directory (skip vikingbot kill)
echo ""
-echo "Step 1: Stopping existing vikingbot processes..."
-if pgrep -f "vikingbot.*openapi" > /dev/null 2>&1 || pgrep -f "vikingbot.*gateway" > /dev/null 2>&1; then
- pkill -f "vikingbot.*openapi" 2>/dev/null || true
- pkill -f "vikingbot.*gateway" 2>/dev/null || true
- sleep 2
- echo " ✓ Stopped existing vikingbot processes"
-else
- echo " ✓ No existing vikingbot processes found"
-fi
+echo "Step 1: Skipping vikingbot kill (will only kill by port)..."
# Step 2: Kill existing openviking-server on specific port
echo ""
@@ -73,8 +65,6 @@ echo "Step 2: Stopping openviking-server on port $PORT..."
PID=$(lsof -ti :$PORT 2>/dev/null || true)
if [ -n "$PID" ]; then
echo " Found PID: $PID"
- pkill -f "vikingbot.*openapi" 2>/dev/null || true
- pkill -f "vikingbot.*gateway" 2>/dev/null || true
kill $PID 2>/dev/null || true
sleep 2
# Force kill if still running
@@ -124,10 +114,7 @@ echo ""
export OPENVIKING_CONFIG_FILE="$TEST_CONFIG"
# Start server
-openviking-server \
- --with-bot \
- --port "$PORT" \
- --bot-url "$BOT_URL"
+openviking-server --port "$PORT"
SERVER_PID=$!
echo " Server PID: $SERVER_PID"
diff --git a/bot/tests/test_minimax_provider.py b/bot/tests/test_minimax_provider.py
new file mode 100644
index 000000000..26c1ee10b
--- /dev/null
+++ b/bot/tests/test_minimax_provider.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
+# SPDX-License-Identifier: AGPL-3.0
+"""Tests for MiniMax provider support (MiniMax-M2.7, MiniMax-M2.7-highspeed)."""
+
+import pytest
+
+from vikingbot.providers.registry import find_by_model, find_by_name, PROVIDERS, ProviderSpec
+
+
+class TestMiniMaxRegistry:
+ """Tests for MiniMax provider registry entries."""
+
+ def test_minimax_spec_exists(self):
+ """MiniMax must be registered in the PROVIDERS tuple."""
+ spec = find_by_name("minimax")
+ assert spec is not None, "MiniMax provider not found in registry"
+ assert isinstance(spec, ProviderSpec)
+
+ def test_minimax_spec_fields(self):
+ """Verify MiniMax ProviderSpec has correct field values."""
+ spec = find_by_name("minimax")
+ assert spec.name == "minimax"
+ assert spec.env_key == "MINIMAX_API_KEY"
+ assert spec.display_name == "MiniMax"
+ assert spec.litellm_prefix == "minimax"
+ assert "minimax/" in spec.skip_prefixes
+ assert spec.default_api_base == "https://api.minimax.io/v1"
+ assert not spec.is_gateway
+ assert not spec.is_local
+
+ def test_minimax_m2_7_matched_by_keyword(self):
+ """MiniMax-M2.7 should be matched to the minimax ProviderSpec."""
+ spec = find_by_model("MiniMax-M2.7")
+ assert spec is not None, "MiniMax-M2.7 not matched to any provider"
+ assert spec.name == "minimax"
+
+ def test_minimax_m2_7_highspeed_matched_by_keyword(self):
+ """MiniMax-M2.7-highspeed should be matched to the minimax ProviderSpec."""
+ spec = find_by_model("MiniMax-M2.7-highspeed")
+ assert spec is not None, "MiniMax-M2.7-highspeed not matched to any provider"
+ assert spec.name == "minimax"
+
+ def test_minimax_keyword_is_case_insensitive(self):
+ """Model name matching must be case-insensitive."""
+ for model in ("minimax-m2.7", "MINIMAX-M2.7", "MiniMax-M2.7"):
+ spec = find_by_model(model)
+ assert spec is not None, f"{model!r} not matched"
+ assert spec.name == "minimax"
+
+ def test_minimax_api_base_uses_international_domain(self):
+ """Default API base must point to the international endpoint."""
+ spec = find_by_name("minimax")
+ assert spec.default_api_base.startswith("https://api.minimax.io"), (
+ "Default base URL must use international domain api.minimax.io, "
+ "not the mainland China domain api.minimaxi.com"
+ )
+
+
+class TestMiniMaxModelPrefixResolution:
+ """Tests for LiteLLM model prefix resolution with MiniMax models."""
+
+ def _resolve_model(self, model: str) -> str:
+ """Reproduce _resolve_model logic from LiteLLMProvider."""
+ from vikingbot.providers.registry import find_by_model
+
+ spec = find_by_model(model)
+ if spec and spec.litellm_prefix:
+ if not any(model.startswith(s) for s in spec.skip_prefixes):
+ model = f"{spec.litellm_prefix}/{model}"
+ return model
+
+ def test_m2_7_gets_minimax_prefix(self):
+ """MiniMax-M2.7 should be prefixed as minimax/MiniMax-M2.7."""
+ resolved = self._resolve_model("MiniMax-M2.7")
+ assert resolved == "minimax/MiniMax-M2.7"
+
+ def test_m2_7_highspeed_gets_minimax_prefix(self):
+ """MiniMax-M2.7-highspeed should be prefixed as minimax/MiniMax-M2.7-highspeed."""
+ resolved = self._resolve_model("MiniMax-M2.7-highspeed")
+ assert resolved == "minimax/MiniMax-M2.7-highspeed"
+
+ def test_already_prefixed_model_not_double_prefixed(self):
+ """Model already carrying minimax/ prefix must not be double-prefixed."""
+ resolved = self._resolve_model("minimax/MiniMax-M2.7")
+ assert resolved == "minimax/MiniMax-M2.7"
+
+
+class TestMiniMaxSystemMessageHandling:
+ """Tests for MiniMax system message merging in both LLM providers."""
+
+ # ------------------------------------------------------------------ #
+ # Helpers
+ # ------------------------------------------------------------------ #
+
+ def _handle_system_litellm(self, model: str, messages: list[dict]) -> list[dict]:
+ """Call the LiteLLMProvider._handle_system_message without a real provider."""
+ from vikingbot.providers.litellm_provider import LiteLLMProvider
+
+ # Instantiate without a real API key — we only call the static-ish helper.
+ provider = LiteLLMProvider.__new__(LiteLLMProvider)
+ provider._gateway = None
+ return provider._handle_system_message(model, messages)
+
+ def _handle_system_openai_compat(self, model: str, messages: list[dict]) -> list[dict]:
+ """Call the OpenAICompatibleProvider._handle_system_message."""
+ from vikingbot.providers.openai_compatible_provider import OpenAICompatibleProvider
+
+ provider = OpenAICompatibleProvider.__new__(OpenAICompatibleProvider)
+ return provider._handle_system_message(model, messages)
+
+ # ------------------------------------------------------------------ #
+ # LiteLLMProvider tests (model name after prefix resolution)
+ # ------------------------------------------------------------------ #
+
+ def test_litellm_system_message_merged_for_m2_7(self):
+ """System message is merged into the first user message for minimax/MiniMax-M2.7."""
+ messages = [
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Hello!"},
+ ]
+ result = self._handle_system_litellm("minimax/MiniMax-M2.7", messages)
+ assert all(m["role"] != "system" for m in result), "System message not removed"
+ user_content = next(m["content"] for m in result if m["role"] == "user")
+ assert "You are a helpful assistant." in user_content
+ assert "Hello!" in user_content
+
+ def test_litellm_system_message_merged_for_m2_7_highspeed(self):
+ """System message is merged for minimax/MiniMax-M2.7-highspeed."""
+ messages = [
+ {"role": "system", "content": "Be concise."},
+ {"role": "user", "content": "What is 2+2?"},
+ ]
+ result = self._handle_system_litellm("minimax/MiniMax-M2.7-highspeed", messages)
+ assert all(m["role"] != "system" for m in result)
+ user_content = next(m["content"] for m in result if m["role"] == "user")
+ assert "Be concise." in user_content
+
+ def test_litellm_multiple_system_messages_combined(self):
+ """Multiple system messages are combined before merging."""
+ messages = [
+ {"role": "system", "content": "Rule 1."},
+ {"role": "system", "content": "Rule 2."},
+ {"role": "user", "content": "Go!"},
+ ]
+ result = self._handle_system_litellm("minimax/MiniMax-M2.7", messages)
+ assert all(m["role"] != "system" for m in result)
+ user_content = next(m["content"] for m in result if m["role"] == "user")
+ assert "Rule 1." in user_content
+ assert "Rule 2." in user_content
+
+ def test_litellm_no_system_message_passthrough(self):
+ """Messages without a system role are returned unchanged."""
+ messages = [
+ {"role": "user", "content": "Hello!"},
+ {"role": "assistant", "content": "Hi there!"},
+ ]
+ result = self._handle_system_litellm("minimax/MiniMax-M2.7", messages)
+ assert result == messages
+
+ def test_litellm_non_minimax_model_not_affected(self):
+ """System messages for non-MiniMax models must not be touched."""
+ messages = [
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Hello!"},
+ ]
+ result = self._handle_system_litellm("anthropic/claude-opus-4-5", messages)
+ assert result == messages
+
+ # ------------------------------------------------------------------ #
+ # OpenAICompatibleProvider tests (raw model name, no prefix)
+ # ------------------------------------------------------------------ #
+
+ def test_openai_compat_system_message_merged_for_m2_7(self):
+ """System message is merged for MiniMax-M2.7 in OpenAICompatibleProvider."""
+ messages = [
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Hello!"},
+ ]
+ result = self._handle_system_openai_compat("MiniMax-M2.7", messages)
+ assert all(m["role"] != "system" for m in result)
+ user_content = next(m["content"] for m in result if m["role"] == "user")
+ assert "You are a helpful assistant." in user_content
+
+ def test_openai_compat_no_system_message_passthrough(self):
+ """Messages without a system role pass through unchanged."""
+ messages = [
+ {"role": "user", "content": "Hello!"},
+ ]
+ result = self._handle_system_openai_compat("MiniMax-M2.7", messages)
+ assert result == messages
+
+ def test_openai_compat_system_only_creates_user_message(self):
+ """System-only messages create a synthetic user message."""
+ messages = [
+ {"role": "system", "content": "You are a bot."},
+ ]
+ result = self._handle_system_openai_compat("MiniMax-M2.7", messages)
+ assert any(m["role"] == "user" for m in result)
+ assert all(m["role"] != "system" for m in result)
diff --git a/bot/vikingbot/agent/context.py b/bot/vikingbot/agent/context.py
index 54a5b47ae..a687d2a19 100644
--- a/bot/vikingbot/agent/context.py
+++ b/bot/vikingbot/agent/context.py
@@ -68,7 +68,7 @@ def _ensure_templates_once(self):
self._templates_ensured = True
async def build_system_prompt(
- self, session_key: SessionKey, current_message: str, history: list[dict[str, Any]]
+ self, session_key: SessionKey, current_message: str, history: list[dict[str, Any]], ov_tools_enable: bool = True
) -> str:
"""
Build the system prompt from bootstrap files, memory, and skills.
@@ -95,18 +95,6 @@ async def build_system_prompt(
f"## Sandbox Environment\n\nYou are running in a sandboxed environment. All file operations and command execution are restricted to the sandbox directory.\nThe sandbox root directory is `{sandbox_cwd}` (use relative paths for all operations)."
)
- # Add session context
- session_context = "## Current Session"
- if session_key and session_key.type:
- session_context += f"\nChannel: {session_key.type}"
- if self._is_group_chat:
- session_context += (
- f"\n**Group chat session.** Current user ID: {self._sender_id}\n"
- f"Multiple users can participate in this conversation. Each user message is prefixed with the user ID in brackets like @. "
- f"You should pay attention to who is speaking to understand the context. "
- )
- parts.append(session_context)
-
# Bootstrap files
bootstrap = self._load_bootstrap_files()
if bootstrap:
@@ -135,22 +123,23 @@ async def build_system_prompt(
{skills_summary}""")
- # Viking user profile
- start = _time.time()
- profile = await self.memory.get_viking_user_profile(
- workspace_id=workspace_id, user_id=self._sender_id
- )
- cost = round(_time.time() - start, 2)
- logger.info(
- f"[READ_USER_PROFILE]: cost {cost}s, profile={profile[:50] if profile else 'None'}"
- )
- if profile:
- parts.append(f"## Current user's information\n{profile}")
+ # Viking user profile (only if ov tools are enabled)
+ if ov_tools_enable:
+ start = _time.time()
+ profile = await self.memory.get_viking_user_profile(
+ workspace_id=workspace_id, user_id=self._sender_id
+ )
+ cost = round(_time.time() - start, 2)
+ logger.info(
+ f"[READ_USER_PROFILE]: cost {cost}s, profile={profile[:50] if profile else 'None'}"
+ )
+ if profile:
+ parts.append(f"## Current user's information\n{profile}")
return "\n\n---\n\n".join(parts)
async def _build_user_memory(
- self, session_key: SessionKey, current_message: str, sender_id: str
+ self, session_key: SessionKey, current_message: str, sender_id: str, ov_tools_enable: bool = True
) -> str:
"""
Build the system prompt from bootstrap files, memory, and skills.
@@ -166,23 +155,38 @@ async def _build_user_memory(
tz = _time.strftime("%Z") or "UTC"
parts.append(f"## Current Time: {now} ({tz})")
+ # Add session context
+ session_context = "## Current Session"
+ if session_key and session_key.type:
+ session_context += f"\nChannel: {session_key.type}"
+ if self._is_group_chat:
+ session_context += (
+ f"\n**Group chat session.** Current user ID: {self._sender_id}\n"
+ f"Multiple users can participate in this conversation. Each user message is prefixed with the user ID in brackets like @. "
+ f"You should pay attention to who is speaking to understand the context. "
+ )
+ parts.append(session_context)
+
workspace_id = self.sandbox_manager.to_workspace_id(session_key)
- # Viking agent memory
- start = _time.time()
- viking_memory = await self.memory.get_viking_memory_context(
- current_message=current_message, workspace_id=workspace_id, sender_id=sender_id
- )
- cost = round(_time.time() - start, 2)
- logger.info(
- f"[READ_USER_MEMORY]: cost {cost}s, memory={viking_memory[:50] if viking_memory else 'None'}"
- )
- if viking_memory:
- parts.append(
- f"## Long term memory about this conversation.\n"
- f"You do not need to use tool to search again:\n"
- f"{viking_memory}"
+ # Viking agent memory (only if ov tools are enabled)
+ if ov_tools_enable:
+ start = _time.time()
+ viking_memory = await self.memory.get_viking_memory_context(
+ current_message=current_message, workspace_id=workspace_id, sender_id=sender_id
+ )
+ logger.info(f'viking_memory={viking_memory}')
+ cost = round(_time.time() - start, 2)
+ logger.info(
+ f"[READ_USER_MEMORY]: cost {cost}s, memory={viking_memory[:50] if viking_memory else 'None'}"
)
+ if viking_memory:
+ parts.append(
+ f"## openviking_search(query=[user_query])\n"
+ f"{viking_memory}"
+ )
+
+ parts.append("Reply in the same language as the user's query, ignoring the language of the reference materials. User's query:")
return "\n\n---\n\n".join(parts)
@@ -220,11 +224,10 @@ async def _get_identity(self, session_key: SessionKey) -> str:
2. OpenViking workspace: managed via OpenViking tools
- Custom skills: {workspace_display}/skills/{{skill-name}}/SKILL.md
-IMPORTANT: When responding to direct questions or conversations, reply directly with your text response.
-Please keep your reply in the same language as the user's message.
-Only use the 'message' tool when you need to send a message to a specific chat channel (like WhatsApp).
-For normal conversation, just respond with text - do not call the message tool.
-Always be helpful, accurate, and concise. When using tools, think step by step: what you know, what you need, and why you chose this tool.
+IMPORTANT:
+- When responding to direct questions or conversations, reply directly with your text response.
+- Only use the 'message' tool when you need to send a message to a specific chat channel (like WhatsApp).For normal conversation, just respond with text - do not call the message tool.
+- Always be helpful, accurate, and concise. When using tools, think step by step: what you know, what you need, and why you chose this tool.
## Memory
- Remember important facts: using openviking_memory_commit tool to commit"""
@@ -248,6 +251,7 @@ async def build_messages(
current_message: str,
media: list[str] | None = None,
session_key: SessionKey | None = None,
+ ov_tools_enable: bool = True,
) -> list[dict[str, Any]]:
"""
Build the complete message list for an LLM call.
@@ -257,6 +261,7 @@ async def build_messages(
current_message: The new user message.
media: Optional list of local file paths for images/media.
session_key: Optional session key.
+ ov_tools_enable: Whether to enable OpenViking tools and memory.
Returns:
List of messages including system prompt.
@@ -264,7 +269,7 @@ async def build_messages(
messages = []
# System prompt
- system_prompt = await self.build_system_prompt(session_key, current_message, history)
+ system_prompt = await self.build_system_prompt(session_key, current_message, history, ov_tools_enable=ov_tools_enable)
messages.append({"role": "system", "content": system_prompt})
# logger.debug(f"system_prompt: {system_prompt}")
@@ -273,7 +278,7 @@ async def build_messages(
messages.extend(history)
# User
- user_info = await self._build_user_memory(session_key, current_message, self._sender_id)
+ user_info = await self._build_user_memory(session_key, current_message, self._sender_id, ov_tools_enable=ov_tools_enable)
messages.append({"role": "user", "content": user_info})
# Current message (with optional image attachments)
diff --git a/bot/vikingbot/agent/loop.py b/bot/vikingbot/agent/loop.py
index 2bfef8e51..df0d31478 100644
--- a/bot/vikingbot/agent/loop.py
+++ b/bot/vikingbot/agent/loop.py
@@ -217,6 +217,7 @@ async def _run_agent_loop(
session_key: SessionKey,
publish_events: bool = True,
sender_id: str | None = None,
+ ov_tools_enable: bool = True,
) -> tuple[str | None, list[dict], dict[str, int], int]:
"""
Run the core agent loop: call LLM, execute tools, repeat until done.
@@ -225,6 +226,7 @@ async def _run_agent_loop(
messages: Initial message list
session_key: Session key for tool execution context
publish_events: Whether to publish ITERATION/REASONING/TOOL_CALL events to the bus
+ ov_tools_enable: Whether to enable OpenViking tools for this session
Returns:
tuple of (final_content, tools_used)
@@ -252,7 +254,7 @@ async def _run_agent_loop(
response = await self.provider.chat(
messages=messages,
- tools=self.tools.get_definitions(),
+ tools=self.tools.get_definitions(ov_tools_enable=ov_tools_enable),
model=self.model,
session_id=session_key.safe_name(),
)
@@ -396,7 +398,7 @@ async def check_long_running():
max_ticks = 7
while not long_running_notified and tick_count < max_ticks:
- await asyncio.sleep(40)
+ await asyncio.sleep(60)
if long_running_notified:
break
if msg.metadata:
@@ -440,6 +442,18 @@ async def check_long_running():
else:
cmd = msg.content.strip().lower()
if cmd == "/new":
+ # Clone session for async consolidation, then immediately clear original
+ if not self._check_cmd_auth(msg):
+ return OutboundMessage(
+ session_key=msg.session_key, content="🐈 Sorry, you are not authorized to use this command.",
+ metadata=msg.metadata
+ )
+ session.clear()
+ await self.sessions.save(session)
+ return OutboundMessage(
+ session_key=msg.session_key, content="🐈 New session started. Session history droped.", metadata=msg.metadata
+ )
+ elif cmd == "/compact":
# Clone session for async consolidation, then immediately clear original
if not self._check_cmd_auth(msg):
return OutboundMessage(
@@ -484,7 +498,7 @@ async def check_long_running():
await self.sessions.save(session)
return OutboundMessage(
session_key=msg.session_key,
- content=None,
+ content="",
metadata=msg.metadata,
event_type=OutboundEventType.NO_REPLY,
)
@@ -514,14 +528,16 @@ async def check_long_running():
eval=self._eval,
)
+ ov_tools_enable = self._get_ov_tools_enable(session_key)
# Build initial messages (use get_history for LLM-formatted messages)
messages = await message_context.build_messages(
history=session.get_history(),
current_message=msg.content,
media=msg.media if msg.media else None,
session_key=msg.session_key,
+ ov_tools_enable=ov_tools_enable,
)
- # logger.info(f"New messages: {messages}")
+ logger.info(f"New messages: {messages}")
# Run agent loop
final_content, tools_used, token_usage, iteration = await self._run_agent_loop(
@@ -529,6 +545,7 @@ async def check_long_running():
session_key=session_key,
publish_events=True,
sender_id=msg.sender_id,
+ ov_tools_enable=ov_tools_enable,
)
# Log response preview
@@ -565,6 +582,29 @@ async def check_long_running():
except asyncio.CancelledError:
pass
+ def _get_channel_config(self, session_key: SessionKey):
+ """Get channel config for a session key.
+
+ Args:
+ session_key: Session key to get channel config for
+
+ Returns:
+ Channel config object if found, None otherwise
+ """
+ return self.config.channels_config.get_channel_by_key(session_key.channel_key())
+
+ def _get_ov_tools_enable(self, session_key: SessionKey) -> bool:
+ """Get ov_tools_enable setting from channel config.
+
+ Args:
+ session_key: Session key to get channel config for
+
+ Returns:
+ True if ov tools should be enabled, False otherwise
+ """
+ channel_config = self._get_channel_config(session_key)
+ return getattr(channel_config, "ov_tools_enable", True) if channel_config else True
+
async def _process_system_message(self, msg: InboundMessage) -> OutboundMessage | None:
"""
Process a system message (e.g., subagent announce).
@@ -577,15 +617,23 @@ async def _process_system_message(self, msg: InboundMessage) -> OutboundMessage
session = self.sessions.get_or_create(msg.session_key)
# Build messages with the announce content
+ ov_tools_enable = self._get_ov_tools_enable(msg.session_key)
messages = await self.context.build_messages(
- history=session.get_history(), current_message=msg.content, session_key=msg.session_key
+ history=session.get_history(),
+ current_message=msg.content,
+ session_key=msg.session_key,
+ ov_tools_enable=ov_tools_enable,
)
+ # Check channel config for ov_tools_enable setting
+ ov_tools_enable = self._get_ov_tools_enable(msg.session_key)
+
# Run agent loop (no events published)
final_content, tools_used, token_usage, iteration = await self._run_agent_loop(
messages=messages,
session_key=msg.session_key,
publish_events=False,
+ ov_tools_enable=ov_tools_enable,
)
if final_content is None or (
@@ -741,12 +789,11 @@ def _check_cmd_auth(self, msg: InboundMessage) -> bool:
allow_from = []
if self.config.ov_server and self.config.ov_server.admin_user_id:
allow_from.append(self.config.ov_server.admin_user_id)
- for channel in self.config.channels_config.get_all_channels():
- if channel.channel_key() == msg.session_key.channel_key():
- allow_cmd = getattr(channel, 'allow_cmd_from', [])
- if allow_cmd:
- allow_from.extend(allow_cmd)
- break
+ channel_config = self._get_channel_config(msg.session_key)
+ if channel_config:
+ allow_cmd = getattr(channel_config, 'allow_cmd_from', [])
+ if allow_cmd:
+ allow_from.extend(allow_cmd)
# If channel not found or sender not in allow_from list, ignore message
if msg.sender_id not in allow_from:
diff --git a/bot/vikingbot/agent/memory.py b/bot/vikingbot/agent/memory.py
index bfe0ed4d7..abc8f21f0 100644
--- a/bot/vikingbot/agent/memory.py
+++ b/bot/vikingbot/agent/memory.py
@@ -23,20 +23,90 @@ def read_long_term(self) -> str:
return self.memory_file.read_text(encoding="utf-8")
return ""
- def _parse_viking_memory(self, result: Any) -> str:
- if result and len(result) > 0:
- user_memories = []
- for idx, memory in enumerate(result, start=1):
- user_memories.append(
- f"\n"
- f" {getattr(memory, 'abstract', '')}\n"
- f" {getattr(memory, 'uri', '')}\n"
- f" {getattr(memory, 'is_leaf', False)}\n"
- f" {getattr(memory, 'score', 0.0)}\n"
+ async def _parse_viking_memory(
+ self, result: Any, client: Any, min_score: float = 0.3, max_chars: int = 4000
+ ) -> str:
+ """Parse viking memory with score filtering and character limit.
+ Automatically reads full content for memories above threshold.
+
+ Args:
+ result: Memory search results
+ client: VikingClient instance to read content
+ min_score: Minimum score threshold (default: 0.4)
+ max_chars: Maximum character limit for output (default: 4000)
+
+ Returns:
+ Formatted memory string within character limit
+ """
+ if not result or len(result) == 0:
+ return ""
+
+ # Filter by min_score and sort by score descending
+ filtered_memories = [
+ memory for memory in result if getattr(memory, "score", 0.0) >= min_score
+ ]
+ filtered_memories.sort(key=lambda m: getattr(m, "score", 0.0), reverse=True)
+
+ user_memories = []
+ total_chars = 0
+
+ for idx, memory in enumerate(filtered_memories, start=1):
+ uri = getattr(memory, "uri", "")
+ abstract = getattr(memory, "abstract", "")
+ score = getattr(memory, "score", 0.0)
+
+ # First, try to build full memory with content
+ try:
+ content = await client.read_content(uri, level="read")
+ except Exception:
+ content = ""
+
+ if content:
+ # Try full version first (no abstract when content is present)
+ memory_str = (
+ f'\n'
+ f" {uri}\n"
+ f" {score}\n"
+ f" {content}\n"
+ f""
+ )
+ else:
+ # No content available, use link-only version
+ memory_str = (
+ f'\n'
+ f" {uri}\n"
+ f" {score}\n"
f""
)
- return "\n".join(user_memories)
- return ""
+
+ # Check if adding this memory would exceed the limit
+ memory_chars = len(memory_str)
+ if user_memories:
+ memory_chars += 1
+
+ if total_chars + memory_chars <= max_chars:
+ user_memories.append(memory_str)
+ total_chars += memory_chars
+ else:
+ # If full version is too big, try link-only version
+ link_only_str = (
+ f'\n'
+ f" {uri}\n"
+ f" {score}\n"
+ f""
+ )
+ link_chars = len(link_only_str)
+ if user_memories:
+ link_chars += 1
+
+ if total_chars + link_chars <= max_chars:
+ user_memories.append(link_only_str)
+ total_chars += link_chars
+ else:
+ # Even link-only is too big, skip this memory
+ continue
+
+ return "\n".join(user_memories)
def write_long_term(self, content: str) -> None:
self.memory_file.write_text(content, encoding="utf-8")
@@ -49,21 +119,36 @@ def get_memory_context(self) -> str:
long_term = self.read_long_term()
return f"## Long-term Memory\n{long_term}" if long_term else ""
- async def get_viking_memory_context(self, current_message: str, workspace_id: str, sender_id: str) -> str:
+ async def get_viking_memory_context(
+ self, current_message: str, workspace_id: str, sender_id: str
+ ) -> str:
try:
config = load_config().ov_server
admin_user_id = config.admin_user_id
- user_id = sender_id if config.mode == "remote" else admin_user_id
+ user_id = sender_id
+ logger.info(f'workspace_id={workspace_id}')
+ logger.info(f'user_id={user_id}')
+ logger.info(f'admin_user_id={admin_user_id}')
client = await VikingClient.create(agent_id=workspace_id)
- result = await client.search_memory(query=current_message, user_id=user_id, agent_user_id=admin_user_id, limit=5)
+ result = await client.search_memory(
+ query=current_message, user_id=user_id, agent_user_id=admin_user_id, limit=30
+ )
if not result:
return ""
- user_memory = self._parse_viking_memory(result["user_memory"])
- agent_memory = self._parse_viking_memory(result["agent_memory"])
- return (
- f"### user memories:\n{user_memory}\n"
- f"### agent memories:\n{agent_memory}"
- )
+
+ # Log raw search results for debugging
+ memory_list = []
+ memory_list.append(f'user_memory[{len(result['user_memory'])}]:')
+
+ for i, mem in enumerate(result['user_memory']):
+ memory_list.append(f"{i},{getattr(mem, 'uri', '')},{getattr(mem, 'score', 0)}")
+ memory_list.append(f'agent_memory[{len(result['agent_memory'])}]:')
+ for i, mem in enumerate(result['agent_memory']):
+ memory_list.append(f"{i},{getattr(mem, 'uri', '')},{getattr(mem, 'score', 0)}")
+ logger.info(f"[RAW_MEMORIES]\n{'\n'.join(memory_list)}")
+ user_memory = await self._parse_viking_memory(result["user_memory"], client, min_score=0.35)
+ agent_memory = await self._parse_viking_memory(result["agent_memory"], client, min_score=0.35, max_chars=2000)
+ return f"### user memories:\n{user_memory}\n### agent memories:\n{agent_memory}"
except Exception as e:
logger.error(f"[READ_USER_MEMORY]: search error. {e}")
return ""
@@ -73,4 +158,4 @@ async def get_viking_user_profile(self, workspace_id: str, user_id: str) -> str:
result = await client.read_user_profile(user_id)
if not result:
return ""
- return result
\ No newline at end of file
+ return result
diff --git a/bot/vikingbot/agent/tools/registry.py b/bot/vikingbot/agent/tools/registry.py
index 628e2bd44..1797ebce9 100644
--- a/bot/vikingbot/agent/tools/registry.py
+++ b/bot/vikingbot/agent/tools/registry.py
@@ -100,13 +100,17 @@ def has(self, name: str) -> bool:
"""
return name in self._tools
- def get_definitions(self) -> list[dict[str, Any]]:
+ def get_definitions(self, ov_tools_enable: bool = True) -> list[dict[str, Any]]:
"""
Get all tool definitions in OpenAI format.
Converts all registered tools to the OpenAI function schema format,
suitable for use with OpenAI's function calling API.
+ Args:
+ ov_tools_enable: Whether to include OpenViking tools. If False,
+ tools with names starting with "openviking_" will be excluded.
+
Returns:
List of tool schemas in OpenAI format, where each schema contains
the tool's type, name, description, and parameters.
@@ -116,7 +120,10 @@ def get_definitions(self) -> list[dict[str, Any]]:
>>> for defn in definitions:
... print(f"Tool: {defn['function']['name']}")
"""
- return [tool.to_schema() for tool in self._tools.values()]
+ tools = self._tools.values()
+ if not ov_tools_enable:
+ tools = [tool for tool in tools if not tool.name.startswith("openviking_")]
+ return [tool.to_schema() for tool in tools]
async def execute(
self,
diff --git a/bot/vikingbot/channels/base.py b/bot/vikingbot/channels/base.py
index 1cf2b3d00..924d3ca2d 100644
--- a/bot/vikingbot/channels/base.py
+++ b/bot/vikingbot/channels/base.py
@@ -145,6 +145,7 @@ async def _handle_message(
sender_id: str,
chat_id: str,
content: str,
+ need_reply: bool = True,
media: list[str] | None = None,
metadata: dict[str, Any] | None = None,
) -> None:
@@ -174,6 +175,7 @@ async def _handle_message(
chat_id=chat_id,
),
sender_id=str(sender_id),
+ need_reply=need_reply,
content=content,
media=media or [],
metadata=metadata or {},
diff --git a/bot/vikingbot/channels/feishu.py b/bot/vikingbot/channels/feishu.py
index bfe75501d..451b23b9f 100644
--- a/bot/vikingbot/channels/feishu.py
+++ b/bot/vikingbot/channels/feishu.py
@@ -809,12 +809,10 @@ async def _on_message(self, data: "P2ImMessageReceiveV1") -> None:
# 6. 检查是否需要处理该消息
should_process = await self._check_should_process(chat_type, chat_id, message, is_mentioned)
- if not should_process:
- return
# 7. 添加已读表情
config = load_config()
- if config.mode != BotMode.DEBUG:
+ if config.mode != BotMode.DEBUG and should_process:
await self._add_reaction(message_id, "MeMeMe")
# 8. 处理@占位符
@@ -840,6 +838,7 @@ async def _on_message(self, data: "P2ImMessageReceiveV1") -> None:
chat_id=final_chat_id,
content=content,
media=media if media else None,
+ need_reply=should_process,
metadata={
"message_id": message_id,
"chat_type": chat_type,
diff --git a/bot/vikingbot/config/loader.py b/bot/vikingbot/config/loader.py
index 9aae2f826..bc49b0a4d 100644
--- a/bot/vikingbot/config/loader.py
+++ b/bot/vikingbot/config/loader.py
@@ -4,11 +4,14 @@
import os
from pathlib import Path
from typing import Any
+
from loguru import logger
+
from vikingbot.config.schema import Config
CONFIG_PATH = None
+
def get_config_path() -> Path:
"""Get the path to ov.conf config file.
@@ -24,9 +27,7 @@ def _resolve_ov_conf_path() -> Path:
# Check environment variable first
env_path = os.environ.get("OPENVIKING_CONFIG_FILE")
if env_path:
- path = Path(env_path).expanduser()
- if path.exists():
- return path
+ return Path(env_path).expanduser()
# Default path
return Path.home() / ".openviking" / "ov.conf"
@@ -222,4 +223,4 @@ def camel_to_snake(name: str) -> str:
def snake_to_camel(name: str) -> str:
"""Convert snake_case to camelCase."""
components = name.split("_")
- return components[0] + "".join(x.title() for x in components[1:])
\ No newline at end of file
+ return components[0] + "".join(x.title() for x in components[1:])
diff --git a/bot/vikingbot/config/schema.py b/bot/vikingbot/config/schema.py
index 0ae4bfff3..fcefdcee3 100644
--- a/bot/vikingbot/config/schema.py
+++ b/bot/vikingbot/config/schema.py
@@ -60,6 +60,7 @@ class BaseChannelConfig(BaseModel):
type: Any = ChannelType.TELEGRAM # Default for backwards compatibility
enabled: bool = True
+ ov_tools_enable: bool = True
def channel_id(self) -> str:
return "default"
@@ -403,6 +404,20 @@ def get_all_channels(self) -> list[BaseChannelConfig]:
result.append(item)
return result
+ def get_channel_by_key(self, channel_key: str) -> BaseChannelConfig | None:
+ """Get channel config by channel key.
+
+ Args:
+ channel_key: Channel key in format "type__channel_id"
+
+ Returns:
+ Channel config if found, None otherwise
+ """
+ for channel_config in self.get_all_channels():
+ if channel_config.channel_key() == channel_key:
+ return channel_config
+ return None
+
class AgentsConfig(BaseModel):
"""Agent configuration."""
diff --git a/bot/vikingbot/hooks/builtins/openviking_hooks.py b/bot/vikingbot/hooks/builtins/openviking_hooks.py
index 2cbd51e30..5d36abca4 100644
--- a/bot/vikingbot/hooks/builtins/openviking_hooks.py
+++ b/bot/vikingbot/hooks/builtins/openviking_hooks.py
@@ -1,5 +1,7 @@
import re
+import asyncio
from typing import Any
+from collections import defaultdict
from loguru import logger
@@ -42,11 +44,46 @@ async def _get_client(self, workspace_id: str) -> VikingClient:
async def execute(self, context: HookContext, **kwargs) -> Any:
vikingbot_session: Session = kwargs.get("session", {})
session_id = context.session_key.safe_name()
+ config = load_config()
+ admin_user_id = config.ov_server.admin_user_id
try:
client = await self._get_client(context.workspace_id)
- result = await client.commit(session_id, vikingbot_session.messages, load_config().ov_server.admin_user_id)
- return result
+
+ # 1. 提交全部的 message 到 admin
+ admin_result = await client.commit(session_id, vikingbot_session.messages, admin_user_id)
+
+ # 2. 根据 message 里的 sender_id 进行分组
+ messages_by_sender = defaultdict(list)
+ for msg in vikingbot_session.messages:
+ sender_id = msg.get("sender_id")
+ if sender_id and sender_id != admin_user_id:
+ messages_by_sender[sender_id].append(msg)
+
+ # 3. 带并发限制地提交到各个 user
+ user_results = []
+ if messages_by_sender:
+ # 限制最大并发数为 5
+ semaphore = asyncio.Semaphore(5)
+
+ async def commit_with_semaphore(user_id: str, user_messages: list):
+ async with semaphore:
+ return await client.commit(f"{session_id}_{user_id}", user_messages, user_id)
+
+ user_tasks = []
+ for user_id, user_messages in messages_by_sender.items():
+ task = commit_with_semaphore(user_id, user_messages)
+ user_tasks.append(task)
+
+ # 等待所有用户任务完成
+ user_results = await asyncio.gather(*user_tasks, return_exceptions=True)
+
+ return {
+ "success": True,
+ "admin_result": admin_result,
+ "user_results": user_results,
+ "users_count": len(messages_by_sender)
+ }
except Exception as e:
logger.exception(f"Failed to add message to OpenViking: {e}")
return {"success": False, "error": str(e)}
diff --git a/bot/vikingbot/openviking_mount/ov_server.py b/bot/vikingbot/openviking_mount/ov_server.py
index daa139799..7acb94beb 100644
--- a/bot/vikingbot/openviking_mount/ov_server.py
+++ b/bot/vikingbot/openviking_mount/ov_server.py
@@ -444,7 +444,9 @@ async def commit(self, session_id: str, messages: list[dict[str, Any]], user_id:
if not parts:
continue
- await session.add_message(role=role, parts=parts)
+ # 获取消息的时间戳,如果没有则使用当前时间
+ created_at = message.get("timestamp")
+ await session.add_message(role=role, parts=parts, created_at=created_at)
result = await session.commit_async()
if client is not self.client:
diff --git a/bot/vikingbot/providers/registry.py b/bot/vikingbot/providers/registry.py
index 0497543ec..758ff700f 100644
--- a/bot/vikingbot/providers/registry.py
+++ b/bot/vikingbot/providers/registry.py
@@ -242,12 +242,14 @@ def label(self) -> str:
),
# MiniMax: needs "minimax/" prefix for LiteLLM routing.
# Uses OpenAI-compatible API at api.minimax.io/v1.
+ # Recommended models: MiniMax-M2.7 (default), MiniMax-M2.7-highspeed (faster).
+ # Note: MiniMax does not support system messages; they are merged into the first user message.
ProviderSpec(
name="minimax",
keywords=("minimax",),
env_key="MINIMAX_API_KEY",
display_name="MiniMax",
- litellm_prefix="minimax", # MiniMax-M2.1 → minimax/MiniMax-M2.1
+ litellm_prefix="minimax", # MiniMax-M2.7 → minimax/MiniMax-M2.7
skip_prefixes=("minimax/", "openrouter/"),
env_extras=(),
is_gateway=False,
diff --git a/crates/ov_cli/LICENSE b/crates/LICENSE
similarity index 100%
rename from crates/ov_cli/LICENSE
rename to crates/LICENSE
diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs
index 766878e24..494f084bd 100644
--- a/crates/ov_cli/src/client.rs
+++ b/crates/ov_cli/src/client.rs
@@ -1,6 +1,7 @@
use reqwest::{Client as ReqwestClient, StatusCode};
use serde::de::DeserializeOwned;
use serde_json::Value;
+use std::collections::HashSet;
use std::fs::File;
use std::path::Path;
use tempfile::{Builder, NamedTempFile};
@@ -65,7 +66,10 @@ impl HttpClient {
let path = entry.path();
if path.is_file() {
let name = path.strip_prefix(dir_path).unwrap_or(path);
- zip.start_file(name.to_string_lossy(), options)?;
+ let name_str = name.to_str().ok_or_else(|| {
+ Error::InvalidPath(format!("Non-UTF-8 path: {}", name.to_string_lossy()))
+ })?;
+ zip.start_file(name_str, options)?;
let mut file = File::open(path)?;
std::io::copy(&mut file, &mut zip)?;
}
@@ -477,20 +481,63 @@ impl HttpClient {
// ============ Search Methods ============
+ fn build_tags_filter(tags: &str) -> Result {
+ let mut tag_list: Vec<&str> = tags
+ .split(',')
+ .map(|s| s.trim())
+ .filter(|s| !s.is_empty())
+ .collect();
+
+ let mut seen = HashSet::new();
+ tag_list.retain(|s| seen.insert(*s));
+
+ if tag_list.is_empty() {
+ return Err(Error::Client(
+ "'tags' must contain at least one non-empty tag".to_string(),
+ ));
+ }
+
+ let conds: Vec = tag_list
+ .into_iter()
+ .map(|s| {
+ serde_json::json!({
+ "op": "contains",
+ "field": "tags",
+ "substring": s
+ })
+ })
+ .collect();
+
+ Ok(if conds.len() == 1 {
+ conds[0].clone()
+ } else {
+ serde_json::json!({
+ "op": "and",
+ "conds": conds
+ })
+ })
+ }
+
pub async fn find(
&self,
query: String,
uri: String,
node_limit: i32,
threshold: Option,
+ tags: Option,
) -> Result {
- let body = serde_json::json!({
- "query": query,
- "target_uri": uri,
- "limit": node_limit,
- "score_threshold": threshold,
- });
- self.post("/api/v1/search/find", &body).await
+ let mut body_map = serde_json::Map::new();
+ body_map.insert("query".to_string(), serde_json::json!(query));
+ body_map.insert("target_uri".to_string(), serde_json::json!(uri));
+ body_map.insert("limit".to_string(), serde_json::json!(node_limit));
+ if let Some(t) = threshold {
+ body_map.insert("score_threshold".to_string(), serde_json::json!(t));
+ }
+ if let Some(t) = tags {
+ let filter = Self::build_tags_filter(&t)?;
+ body_map.insert("filter".to_string(), filter);
+ }
+ self.post("/api/v1/search/find", &serde_json::Value::Object(body_map)).await
}
pub async fn search(
@@ -500,15 +547,23 @@ impl HttpClient {
session_id: Option,
node_limit: i32,
threshold: Option,
+ tags: Option,
) -> Result {
- let body = serde_json::json!({
- "query": query,
- "target_uri": uri,
- "session_id": session_id,
- "limit": node_limit,
- "score_threshold": threshold,
- });
- self.post("/api/v1/search/search", &body).await
+ let mut body_map = serde_json::Map::new();
+ body_map.insert("query".to_string(), serde_json::json!(query));
+ body_map.insert("target_uri".to_string(), serde_json::json!(uri));
+ if let Some(s) = session_id {
+ body_map.insert("session_id".to_string(), serde_json::json!(s));
+ }
+ body_map.insert("limit".to_string(), serde_json::json!(node_limit));
+ if let Some(t) = threshold {
+ body_map.insert("score_threshold".to_string(), serde_json::json!(t));
+ }
+ if let Some(t) = tags {
+ let filter = Self::build_tags_filter(&t)?;
+ body_map.insert("filter".to_string(), filter);
+ }
+ self.post("/api/v1/search/search", &serde_json::Value::Object(body_map)).await
}
pub async fn grep(
@@ -518,6 +573,7 @@ impl HttpClient {
pattern: &str,
ignore_case: bool,
node_limit: i32,
+ level_limit: i32,
) -> Result {
let body = serde_json::json!({
"uri": uri,
@@ -525,6 +581,7 @@ impl HttpClient {
"pattern": pattern,
"case_insensitive": ignore_case,
"node_limit": node_limit,
+ "level_limit": level_limit,
});
self.post("/api/v1/search/grep", &body).await
}
@@ -560,6 +617,7 @@ impl HttpClient {
exclude: Option,
directly_upload_media: bool,
watch_interval: f64,
+ tags: Option,
) -> Result {
let path_obj = Path::new(path);
@@ -587,14 +645,20 @@ impl HttpClient {
"exclude": exclude,
"directly_upload_media": directly_upload_media,
"watch_interval": watch_interval,
+ "tags": tags,
});
self.post("/api/v1/resources", &body).await
} else if path_obj.is_file() {
+ let source_name = path_obj
+ .file_name()
+ .and_then(|n| n.to_str())
+ .map(|s| s.to_string());
let temp_file_id = self.upload_temp_file(path_obj).await?;
let body = serde_json::json!({
"temp_file_id": temp_file_id,
+ "source_name": source_name,
"to": to,
"parent": parent,
"reason": reason,
@@ -607,6 +671,7 @@ impl HttpClient {
"exclude": exclude,
"directly_upload_media": directly_upload_media,
"watch_interval": watch_interval,
+ "tags": tags,
});
self.post("/api/v1/resources", &body).await
@@ -727,12 +792,73 @@ impl HttpClient {
// ============ Pack Methods ============
- pub async fn export_ovpack(&self, uri: &str, to: &str) -> Result {
+ pub async fn export_ovpack(&self, uri: &str, to: &str) -> Result {
let body = serde_json::json!({
"uri": uri,
- "to": to,
});
- self.post("/api/v1/pack/export", &body).await
+
+ let url = format!("{}/api/v1/pack/export", self.base_url);
+ let response = self
+ .http
+ .post(&url)
+ .headers(self.build_headers())
+ .json(&body)
+ .send()
+ .await
+ .map_err(|e| Error::Network(format!("HTTP request failed: {}", e)))?;
+
+ let status = response.status();
+ if !status.is_success() {
+ // Try to parse error message as JSON
+ let json_result: Result = response
+ .json()
+ .await
+ .map_err(|e| Error::Network(format!("Failed to parse error response: {}", e)));
+
+ let error_msg = match json_result {
+ Ok(json) => json
+ .get("error")
+ .and_then(|e| e.get("message"))
+ .and_then(|m| m.as_str())
+ .map(|s| s.to_string())
+ .or_else(|| {
+ json.get("detail")
+ .and_then(|d| d.as_str())
+ .map(|s| s.to_string())
+ })
+ .unwrap_or_else(|| format!("HTTP error {}", status)),
+ Err(_) => format!("HTTP error {}", status),
+ };
+
+ return Err(Error::Api(error_msg));
+ }
+
+ // Download the file content
+ let bytes = response
+ .bytes()
+ .await
+ .map_err(|e| Error::Network(format!("Failed to read response bytes: {}", e)))?;
+
+ // Determine target path
+ let to_path = Path::new(to);
+ let final_path = if to_path.is_dir() {
+ let base_name = uri.trim_end_matches('/').split('/').last().unwrap_or("export");
+ to_path.join(format!("{}.ovpack", base_name))
+ } else if !to.ends_with(".ovpack") {
+ Path::new(&format!("{}.ovpack", to)).to_path_buf()
+ } else {
+ to_path.to_path_buf()
+ };
+
+ // Ensure parent directory exists
+ if let Some(parent) = final_path.parent() {
+ std::fs::create_dir_all(parent)?;
+ }
+
+ // Write file
+ std::fs::write(&final_path, bytes)?;
+
+ Ok(final_path.to_string_lossy().to_string())
}
pub async fn import_ovpack(
diff --git a/crates/ov_cli/src/commands/pack.rs b/crates/ov_cli/src/commands/pack.rs
index 82b5f2f2a..54341659a 100644
--- a/crates/ov_cli/src/commands/pack.rs
+++ b/crates/ov_cli/src/commands/pack.rs
@@ -9,7 +9,14 @@ pub async fn export(
format: OutputFormat,
compact: bool,
) -> Result<()> {
- let result = client.export_ovpack(uri, to).await?;
+ let file_path = client.export_ovpack(uri, to).await?;
+
+ // Output success message with the file path
+ let result = serde_json::json!({
+ "file": file_path,
+ "message": format!("Successfully exported to {}", file_path)
+ });
+
output_success(&result, format, compact);
Ok(())
}
diff --git a/crates/ov_cli/src/commands/resources.rs b/crates/ov_cli/src/commands/resources.rs
index dc29317f5..e2478f90b 100644
--- a/crates/ov_cli/src/commands/resources.rs
+++ b/crates/ov_cli/src/commands/resources.rs
@@ -17,6 +17,7 @@ pub async fn add_resource(
exclude: Option,
directly_upload_media: bool,
watch_interval: f64,
+ tags: Option,
format: OutputFormat,
compact: bool,
) -> Result<()> {
@@ -35,6 +36,7 @@ pub async fn add_resource(
exclude,
directly_upload_media,
watch_interval,
+ tags,
)
.await?;
output_success(&result, format, compact);
diff --git a/crates/ov_cli/src/commands/search.rs b/crates/ov_cli/src/commands/search.rs
index 02828fc02..6e48088ec 100644
--- a/crates/ov_cli/src/commands/search.rs
+++ b/crates/ov_cli/src/commands/search.rs
@@ -8,11 +8,12 @@ pub async fn find(
uri: &str,
node_limit: i32,
threshold: Option,
+ tags: Option,
output_format: OutputFormat,
compact: bool,
) -> Result<()> {
let result = client
- .find(query.to_string(), uri.to_string(), node_limit, threshold)
+ .find(query.to_string(), uri.to_string(), node_limit, threshold, tags)
.await?;
output_success(&result, output_format, compact);
Ok(())
@@ -25,6 +26,7 @@ pub async fn search(
session_id: Option,
node_limit: i32,
threshold: Option,
+ tags: Option,
output_format: OutputFormat,
compact: bool,
) -> Result<()> {
@@ -35,6 +37,7 @@ pub async fn search(
session_id,
node_limit,
threshold,
+ tags,
)
.await?;
output_success(&result, output_format, compact);
@@ -48,11 +51,12 @@ pub async fn grep(
pattern: &str,
ignore_case: bool,
node_limit: i32,
+ level_limit: i32,
output_format: OutputFormat,
compact: bool,
) -> Result<()> {
let result = client
- .grep(uri, exclude_uri, pattern, ignore_case, node_limit)
+ .grep(uri, exclude_uri, pattern, ignore_case, node_limit, level_limit)
.await?;
output_success(&result, output_format, compact);
Ok(())
diff --git a/crates/ov_cli/src/error.rs b/crates/ov_cli/src/error.rs
index 9a2df34ac..d117d406b 100644
--- a/crates/ov_cli/src/error.rs
+++ b/crates/ov_cli/src/error.rs
@@ -20,6 +20,9 @@ pub enum Error {
#[error("Output error: {0}")]
Output(String),
+ #[error("Invalid path: {0}")]
+ InvalidPath(String),
+
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
@@ -76,6 +79,7 @@ impl From for CliError {
Error::Client(msg) => CliError::new(format!("Client error: {}", msg)),
Error::Parse(msg) => CliError::new(format!("Parse error: {}", msg)),
Error::Output(msg) => CliError::new(format!("Output error: {}", msg)),
+ Error::InvalidPath(msg) => CliError::new(format!("Invalid path: {}", msg)),
Error::Io(e) => CliError::new(format!("IO error: {}", e)),
Error::Serialization(e) => CliError::new(format!("Serialization error: {}", e)),
Error::Zip(e) => CliError::new(format!("Zip error: {}", e)),
diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs
index 3bae0bf51..213ddc8f4 100644
--- a/crates/ov_cli/src/main.rs
+++ b/crates/ov_cli/src/main.rs
@@ -146,6 +146,9 @@ enum Commands {
/// Watch interval in minutes for automatic resource monitoring (0 = no monitoring)
#[arg(long, default_value = "0")]
watch_interval: f64,
+ /// Tags for the resource (comma-separated)
+ #[arg(long)]
+ tags: Option,
},
/// Add a skill into OpenViking
AddSkill {
@@ -377,6 +380,9 @@ enum Commands {
/// Score threshold
#[arg(short, long)]
threshold: Option,
+ /// Filter by tags (comma-separated)
+ #[arg(long)]
+ tags: Option,
},
/// Run context-aware retrieval
Search {
@@ -399,6 +405,9 @@ enum Commands {
/// Score threshold
#[arg(short, long)]
threshold: Option,
+ /// Filter by tags (comma-separated)
+ #[arg(long)]
+ tags: Option,
},
/// Run content pattern search
Grep {
@@ -421,6 +430,9 @@ enum Commands {
default_value = "256"
)]
node_limit: i32,
+ /// Maximum depth level to traverse (default: 10)
+ #[arg(short = 'L', long = "level-limit", default_value = "10")]
+ level_limit: i32,
},
/// Run file glob pattern search
Glob {
@@ -666,6 +678,7 @@ async fn main() {
exclude,
no_directly_upload_media,
watch_interval,
+ tags,
} => {
handle_add_resource(
path,
@@ -681,6 +694,7 @@ async fn main() {
exclude,
no_directly_upload_media,
watch_interval,
+ tags,
ctx,
)
.await
@@ -794,21 +808,24 @@ async fn main() {
uri,
node_limit,
threshold,
- } => handle_find(query, uri, node_limit, threshold, ctx).await,
+ tags,
+ } => handle_find(query, uri, node_limit, threshold, tags, ctx).await,
Commands::Search {
query,
uri,
session_id,
node_limit,
threshold,
- } => handle_search(query, uri, session_id, node_limit, threshold, ctx).await,
+ tags,
+ } => handle_search(query, uri, session_id, node_limit, threshold, tags, ctx).await,
Commands::Grep {
uri,
exclude_uri,
pattern,
ignore_case,
node_limit,
- } => handle_grep(uri, exclude_uri, pattern, ignore_case, node_limit, ctx).await,
+ level_limit,
+ } => handle_grep(uri, exclude_uri, pattern, ignore_case, node_limit, level_limit, ctx).await,
Commands::Glob {
pattern,
@@ -837,6 +854,7 @@ async fn handle_add_resource(
exclude: Option,
no_directly_upload_media: bool,
watch_interval: f64,
+ tags: Option,
ctx: CliContext,
) -> Result<()> {
let is_url =
@@ -910,6 +928,7 @@ async fn handle_add_resource(
exclude,
directly_upload_media,
watch_interval,
+ tags,
ctx.output_format,
ctx.compact,
)
@@ -1271,12 +1290,16 @@ async fn handle_find(
uri: String,
node_limit: i32,
threshold: Option,
+ tags: Option,
ctx: CliContext,
) -> Result<()> {
let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit)];
if let Some(t) = threshold {
params.push(format!("--threshold {}", t));
}
+ if let Some(t) = &tags {
+ params.push(format!("--tags {}", t));
+ }
params.push(format!("\"{}\"", query));
print_command_echo("ov find", ¶ms.join(" "), ctx.config.echo_command);
let client = ctx.get_client();
@@ -1286,6 +1309,7 @@ async fn handle_find(
&uri,
node_limit,
threshold,
+ tags,
ctx.output_format,
ctx.compact,
)
@@ -1298,6 +1322,7 @@ async fn handle_search(
session_id: Option,
node_limit: i32,
threshold: Option,
+ tags: Option,
ctx: CliContext,
) -> Result<()> {
let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit)];
@@ -1307,6 +1332,9 @@ async fn handle_search(
if let Some(t) = threshold {
params.push(format!("--threshold {}", t));
}
+ if let Some(t) = &tags {
+ params.push(format!("--tags {}", t));
+ }
params.push(format!("\"{}\"", query));
print_command_echo("ov search", ¶ms.join(" "), ctx.config.echo_command);
let client = ctx.get_client();
@@ -1317,6 +1345,7 @@ async fn handle_search(
session_id,
node_limit,
threshold,
+ tags,
ctx.output_format,
ctx.compact,
)
@@ -1433,9 +1462,24 @@ async fn handle_grep(
pattern: String,
ignore_case: bool,
node_limit: i32,
+ level_limit: i32,
ctx: CliContext,
) -> Result<()> {
- let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit)];
+ // Prevent grep from root directory to avoid excessive server load and timeouts
+ if uri == "viking://" || uri == "viking:///" {
+ eprintln!(
+ "Error: Cannot grep from root directory 'viking://'.\n\
+ Grep from root would search across all scopes (resources, user, agent, session, queue, temp),\n\
+ which may cause server timeout or excessive load.\n\
+ Please specify a more specific scope, e.g.:\n\
+ ov grep --uri=viking://resources '{}'\n\
+ ov grep --uri=viking://user '{}'",
+ pattern, pattern
+ );
+ std::process::exit(1);
+ }
+
+ let mut params = vec![format!("--uri={}", uri), format!("-n {}", node_limit), format!("-L {}", level_limit)];
if let Some(excluded) = &exclude_uri {
params.push(format!("-x {}", excluded));
}
@@ -1452,6 +1496,7 @@ async fn handle_grep(
&pattern,
ignore_case,
node_limit,
+ level_limit,
ctx.output_format,
ctx.compact,
)
diff --git a/crates/ragfs-python/Cargo.toml b/crates/ragfs-python/Cargo.toml
new file mode 100644
index 000000000..6506f20a3
--- /dev/null
+++ b/crates/ragfs-python/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "ragfs-python"
+version = "0.1.0"
+edition = "2021"
+description = "Python bindings for RAGFS - Rust AGFS filesystem"
+publish = false
+
+[lib]
+name = "ragfs_python"
+crate-type = ["cdylib"]
+
+[dependencies]
+ragfs = { path = "../ragfs" }
+pyo3 = { version = "0.27", features = ["extension-module"] }
+tokio = { version = "1", features = ["full"] }
+serde_json = "1.0"
diff --git a/crates/ragfs-python/pyproject.toml b/crates/ragfs-python/pyproject.toml
new file mode 100644
index 000000000..560397e40
--- /dev/null
+++ b/crates/ragfs-python/pyproject.toml
@@ -0,0 +1,11 @@
+[build-system]
+requires = ["maturin>=1.0,<2.0"]
+build-backend = "maturin"
+
+[project]
+name = "ragfs-python"
+version = "0.1.0"
+requires-python = ">=3.10"
+
+[tool.maturin]
+features = ["pyo3/extension-module"]
diff --git a/crates/ragfs-python/src/lib.rs b/crates/ragfs-python/src/lib.rs
new file mode 100644
index 000000000..6be96f4ed
--- /dev/null
+++ b/crates/ragfs-python/src/lib.rs
@@ -0,0 +1,457 @@
+//! Python bindings for RAGFS - Rust AGFS filesystem
+//!
+//! Provides `RAGFSBindingClient`, a PyO3 native class that is API-compatible
+//! with the existing Go-based `AGFSBindingClient`. This embeds the ragfs
+//! filesystem engine directly in the Python process (no HTTP server needed).
+
+use pyo3::exceptions::PyRuntimeError;
+use pyo3::prelude::*;
+use pyo3::types::{PyBytes, PyDict, PyList};
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::UNIX_EPOCH;
+
+use ragfs::core::{ConfigValue, FileInfo, FileSystem, MountableFS, PluginConfig, WriteFlag};
+use ragfs::plugins::{KVFSPlugin, LocalFSPlugin, MemFSPlugin, QueueFSPlugin, ServerInfoFSPlugin, SQLFSPlugin};
+
+/// Convert a ragfs error into a Python RuntimeError
+fn to_py_err(e: ragfs::core::Error) -> PyErr {
+ PyRuntimeError::new_err(e.to_string())
+}
+
+/// Convert FileInfo to a Python dict matching the Go binding JSON format:
+/// {"name": str, "size": int, "mode": int, "modTime": str, "isDir": bool}
+fn file_info_to_py_dict(py: Python<'_>, info: &FileInfo) -> PyResult> {
+ let dict = PyDict::new(py);
+ dict.set_item("name", &info.name)?;
+ dict.set_item("size", info.size)?;
+ dict.set_item("mode", info.mode)?;
+
+ // modTime as RFC3339 string (Go binding format)
+ let secs = info
+ .mod_time
+ .duration_since(UNIX_EPOCH)
+ .unwrap_or_default()
+ .as_secs();
+ let mod_time = format_rfc3339(secs);
+ dict.set_item("modTime", mod_time)?;
+
+ dict.set_item("isDir", info.is_dir)?;
+ Ok(dict.into())
+}
+
+/// Format unix timestamp as RFC3339 string (simplified, UTC)
+fn format_rfc3339(secs: u64) -> String {
+ let s = secs;
+ let days = s / 86400;
+ let time_of_day = s % 86400;
+ let h = time_of_day / 3600;
+ let m = (time_of_day % 3600) / 60;
+ let sec = time_of_day % 60;
+
+ // Calculate date from days since epoch (simplified)
+ let (year, month, day) = days_to_ymd(days);
+ format!(
+ "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
+ year, month, day, h, m, sec
+ )
+}
+
+/// Convert days since Unix epoch to (year, month, day)
+fn days_to_ymd(days: u64) -> (u64, u64, u64) {
+ // Algorithm from http://howardhinnant.github.io/date_algorithms.html
+ let z = days + 719468;
+ let era = z / 146097;
+ let doe = z - era * 146097;
+ let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
+ let y = yoe + era * 400;
+ let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
+ let mp = (5 * doy + 2) / 153;
+ let d = doy - (153 * mp + 2) / 5 + 1;
+ let m = if mp < 10 { mp + 3 } else { mp - 9 };
+ let y = if m <= 2 { y + 1 } else { y };
+ (y, m, d)
+}
+
+/// Convert a Python dict to HashMap
+fn py_dict_to_config(dict: &Bound<'_, PyDict>) -> PyResult> {
+ let mut params = HashMap::new();
+ for (k, v) in dict.iter() {
+ let key: String = k.extract()?;
+ let value = if let Ok(s) = v.extract::() {
+ ConfigValue::String(s)
+ } else if let Ok(b) = v.extract::() {
+ ConfigValue::Bool(b)
+ } else if let Ok(i) = v.extract::() {
+ ConfigValue::Int(i)
+ } else {
+ ConfigValue::String(v.str()?.to_string())
+ };
+ params.insert(key, value);
+ }
+ Ok(params)
+}
+
+/// RAGFS Python Binding Client.
+///
+/// Embeds the ragfs filesystem engine directly in the Python process.
+/// API-compatible with the Go-based AGFSBindingClient.
+#[pyclass]
+struct RAGFSBindingClient {
+ fs: Arc,
+ rt: tokio::runtime::Runtime,
+}
+
+#[pymethods]
+impl RAGFSBindingClient {
+ /// Create a new RAGFS binding client.
+ ///
+ /// Initializes the filesystem engine with all built-in plugins registered.
+ #[new]
+ #[pyo3(signature = (config_path=None))]
+ fn new(config_path: Option<&str>) -> PyResult {
+ let _ = config_path; // reserved for future use
+
+ let rt = tokio::runtime::Runtime::new()
+ .map_err(|e| PyRuntimeError::new_err(format!("Failed to create runtime: {}", e)))?;
+
+ let fs = Arc::new(MountableFS::new());
+
+ // Register all built-in plugins
+ rt.block_on(async {
+ fs.register_plugin(MemFSPlugin).await;
+ fs.register_plugin(KVFSPlugin).await;
+ fs.register_plugin(QueueFSPlugin).await;
+ fs.register_plugin(SQLFSPlugin::new()).await;
+ fs.register_plugin(LocalFSPlugin::new()).await;
+ fs.register_plugin(ServerInfoFSPlugin::new()).await;
+ });
+
+ Ok(Self { fs, rt })
+ }
+
+ /// Check client health.
+ fn health(&self) -> PyResult> {
+ let mut m = HashMap::new();
+ m.insert("status".to_string(), "healthy".to_string());
+ Ok(m)
+ }
+
+ /// Get client capabilities.
+ fn get_capabilities(&self) -> PyResult>> {
+ Python::attach(|py| {
+ let mut m = HashMap::new();
+ m.insert("version".to_string(), "ragfs-python".into_pyobject(py)?.into_any().unbind());
+ let features = vec!["memfs", "kvfs", "queuefs", "sqlfs"];
+ m.insert("features".to_string(), features.into_pyobject(py)?.into_any().unbind());
+ Ok(m)
+ })
+ }
+
+ /// List directory contents.
+ ///
+ /// Returns a list of file info dicts with keys:
+ /// name, size, mode, modTime, isDir
+ fn ls(&self, path: String) -> PyResult> {
+ let fs = self.fs.clone();
+ let entries = self.rt.block_on(async move {
+ fs.read_dir(&path).await
+ }).map_err(to_py_err)?;
+
+ Python::attach(|py| {
+ let list = PyList::empty(py);
+ for entry in &entries {
+ let dict = file_info_to_py_dict(py, entry)?;
+ list.append(dict)?;
+ }
+ Ok(list.into())
+ })
+ }
+
+ /// Read file content.
+ ///
+ /// Args:
+ /// path: File path
+ /// offset: Starting position (default: 0)
+ /// size: Number of bytes to read (default: -1, read all)
+ /// stream: Not supported in binding mode
+ #[pyo3(signature = (path, offset=0, size=-1, stream=false))]
+ fn read(&self, path: String, offset: i64, size: i64, stream: bool) -> PyResult> {
+ if stream {
+ return Err(PyRuntimeError::new_err(
+ "Streaming not supported in binding mode",
+ ));
+ }
+
+ let fs = self.fs.clone();
+ let off = if offset < 0 { 0u64 } else { offset as u64 };
+ let sz = if size < 0 { 0u64 } else { size as u64 };
+
+ let data = self.rt.block_on(async move {
+ fs.read(&path, off, sz).await
+ }).map_err(to_py_err)?;
+
+ Python::attach(|py| {
+ Ok(PyBytes::new(py, &data).into())
+ })
+ }
+
+ /// Read file content (alias for read).
+ #[pyo3(signature = (path, offset=0, size=-1, stream=false))]
+ fn cat(&self, path: String, offset: i64, size: i64, stream: bool) -> PyResult> {
+ self.read(path, offset, size, stream)
+ }
+
+ /// Write data to file.
+ ///
+ /// Args:
+ /// path: File path
+ /// data: File content as bytes
+ #[pyo3(signature = (path, data, max_retries=3))]
+ fn write(&self, path: String, data: Vec, max_retries: i32) -> PyResult {
+ let _ = max_retries; // not applicable for local binding
+ let fs = self.fs.clone();
+ let len = data.len();
+ self.rt.block_on(async move {
+ fs.write(&path, &data, 0, WriteFlag::Create).await
+ }).map_err(to_py_err)?;
+
+ Ok(format!("Written {} bytes", len))
+ }
+
+ /// Create a new empty file.
+ fn create(&self, path: String) -> PyResult> {
+ let fs = self.fs.clone();
+ self.rt.block_on(async move {
+ fs.create(&path).await
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert("message".to_string(), "created".to_string());
+ Ok(m)
+ }
+
+ /// Create a directory.
+ #[pyo3(signature = (path, mode="755"))]
+ fn mkdir(&self, path: String, mode: &str) -> PyResult> {
+ let mode_int = u32::from_str_radix(mode, 8)
+ .map_err(|e| PyRuntimeError::new_err(format!("Invalid mode '{}': {}", mode, e)))?;
+
+ let fs = self.fs.clone();
+ self.rt.block_on(async move {
+ fs.mkdir(&path, mode_int).await
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert("message".to_string(), "created".to_string());
+ Ok(m)
+ }
+
+ /// Remove a file or directory.
+ #[pyo3(signature = (path, recursive=false))]
+ fn rm(&self, path: String, recursive: bool) -> PyResult> {
+ let fs = self.fs.clone();
+ self.rt.block_on(async move {
+ if recursive {
+ fs.remove_all(&path).await
+ } else {
+ fs.remove(&path).await
+ }
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert("message".to_string(), "deleted".to_string());
+ Ok(m)
+ }
+
+ /// Get file/directory information.
+ fn stat(&self, path: String) -> PyResult> {
+ let fs = self.fs.clone();
+ let info = self.rt.block_on(async move {
+ fs.stat(&path).await
+ }).map_err(to_py_err)?;
+
+ Python::attach(|py| {
+ let dict = file_info_to_py_dict(py, &info)?;
+ Ok(dict.into())
+ })
+ }
+
+ /// Rename/move a file or directory.
+ fn mv(&self, old_path: String, new_path: String) -> PyResult> {
+ let fs = self.fs.clone();
+ self.rt.block_on(async move {
+ fs.rename(&old_path, &new_path).await
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert("message".to_string(), "renamed".to_string());
+ Ok(m)
+ }
+
+ /// Change file permissions.
+ fn chmod(&self, path: String, mode: u32) -> PyResult> {
+ let fs = self.fs.clone();
+ self.rt.block_on(async move {
+ fs.chmod(&path, mode).await
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert("message".to_string(), "chmod ok".to_string());
+ Ok(m)
+ }
+
+ /// Touch a file (create if not exists, or update timestamp).
+ fn touch(&self, path: String) -> PyResult> {
+ let fs = self.fs.clone();
+ self.rt.block_on(async move {
+ // Try create; if already exists, write empty to update mtime
+ match fs.create(&path).await {
+ Ok(_) => Ok(()),
+ Err(_) => {
+ // File exists, write empty bytes to update timestamp
+ fs.write(&path, &[], 0, WriteFlag::None).await.map(|_| ())
+ }
+ }
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert("message".to_string(), "touched".to_string());
+ Ok(m)
+ }
+
+ /// List all mounted plugins.
+ fn mounts(&self) -> PyResult>> {
+ let fs = self.fs.clone();
+ let mount_list = self.rt.block_on(async move {
+ fs.list_mounts().await
+ });
+
+ let result: Vec> = mount_list
+ .into_iter()
+ .map(|(path, fstype)| {
+ let mut m = HashMap::new();
+ m.insert("path".to_string(), path);
+ m.insert("fstype".to_string(), fstype);
+ m
+ })
+ .collect();
+
+ Ok(result)
+ }
+
+ /// Mount a plugin dynamically.
+ ///
+ /// Args:
+ /// fstype: Filesystem type (e.g., "memfs", "sqlfs", "kvfs", "queuefs")
+ /// path: Mount path
+ /// config: Plugin configuration as dict
+ #[pyo3(signature = (fstype, path, config=None))]
+ fn mount(
+ &self,
+ fstype: String,
+ path: String,
+ config: Option<&Bound<'_, PyDict>>,
+ ) -> PyResult> {
+ let params = match config {
+ Some(dict) => py_dict_to_config(dict)?,
+ None => HashMap::new(),
+ };
+
+ let plugin_config = PluginConfig {
+ name: fstype.clone(),
+ mount_path: path.clone(),
+ params,
+ };
+
+ let fs = self.fs.clone();
+ self.rt.block_on(async move {
+ fs.mount(plugin_config).await
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert(
+ "message".to_string(),
+ format!("mounted {} at {}", fstype, path),
+ );
+ Ok(m)
+ }
+
+ /// Unmount a plugin.
+ fn unmount(&self, path: String) -> PyResult> {
+ let fs = self.fs.clone();
+ let path_clone = path.clone();
+ self.rt.block_on(async move {
+ fs.unmount(&path_clone).await
+ }).map_err(to_py_err)?;
+
+ let mut m = HashMap::new();
+ m.insert("message".to_string(), format!("unmounted {}", path));
+ Ok(m)
+ }
+
+ /// List all registered plugin names.
+ fn list_plugins(&self) -> PyResult> {
+ // Return names of built-in plugins
+ Ok(vec![
+ "memfs".to_string(),
+ "kvfs".to_string(),
+ "queuefs".to_string(),
+ "sqlfs".to_string(),
+ "localfs".to_string(),
+ "serverinfofs".to_string(),
+ ])
+ }
+
+ /// Get detailed plugin information.
+ fn get_plugins_info(&self) -> PyResult> {
+ self.list_plugins()
+ }
+
+ /// Load an external plugin (not supported in Rust binding).
+ fn load_plugin(&self, _library_path: String) -> PyResult> {
+ Err(PyRuntimeError::new_err(
+ "External plugin loading not supported in ragfs-python binding",
+ ))
+ }
+
+ /// Unload an external plugin (not supported in Rust binding).
+ fn unload_plugin(&self, _library_path: String) -> PyResult> {
+ Err(PyRuntimeError::new_err(
+ "External plugin unloading not supported in ragfs-python binding",
+ ))
+ }
+
+ /// Search for pattern in files (not yet implemented in ragfs).
+ #[pyo3(signature = (path, pattern, recursive=false, case_insensitive=false, stream=false, node_limit=None))]
+ fn grep(
+ &self,
+ path: String,
+ pattern: String,
+ recursive: bool,
+ case_insensitive: bool,
+ stream: bool,
+ node_limit: Option,
+ ) -> PyResult> {
+ let _ = (path, pattern, recursive, case_insensitive, stream, node_limit);
+ Err(PyRuntimeError::new_err(
+ "grep not yet implemented in ragfs-python",
+ ))
+ }
+
+ /// Calculate file digest (not yet implemented in ragfs).
+ #[pyo3(signature = (path, algorithm="xxh3"))]
+ fn digest(&self, path: String, algorithm: &str) -> PyResult> {
+ let _ = (path, algorithm);
+ Err(PyRuntimeError::new_err(
+ "digest not yet implemented in ragfs-python",
+ ))
+ }
+}
+
+/// Python module definition
+#[pymodule]
+fn ragfs_python(m: &Bound<'_, PyModule>) -> PyResult<()> {
+ m.add_class::()?;
+ Ok(())
+}
diff --git a/crates/ragfs/Cargo.toml b/crates/ragfs/Cargo.toml
new file mode 100644
index 000000000..4e2569c12
--- /dev/null
+++ b/crates/ragfs/Cargo.toml
@@ -0,0 +1,95 @@
+[package]
+name = "ragfs"
+version = "0.1.0"
+edition = "2021"
+authors = ["OpenViking Contributors"]
+description = "Rust implementation of AGFS - Aggregated File System for AI Agents"
+license = "Apache-2.0"
+repository = "https://github.com/OpenViking/openviking"
+keywords = ["filesystem", "agents", "rest-api", "plugin-system"]
+categories = ["filesystem", "network-programming"]
+
+[lib]
+name = "ragfs"
+path = "src/lib.rs"
+
+[[bin]]
+name = "ragfs-server"
+path = "src/server/main.rs"
+
+[[bin]]
+name = "ragfs-shell"
+path = "src/shell/main.rs"
+
+[dependencies]
+# Async runtime
+tokio = { version = "1.38", features = ["full"] }
+async-trait = "0.1"
+
+# HTTP server
+axum = "0.7"
+tower = "0.5"
+tower-http = { version = "0.5", features = ["trace", "cors"] }
+hyper = "1.0"
+
+# Serialization
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+serde_yaml = "0.9"
+
+# Configuration
+clap = { version = "4.5", features = ["derive", "env"] }
+
+# Logging
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
+
+# Path handling and filesystem
+path-clean = "1.0"
+
+# Data structures
+radix_trie = "0.2"
+
+# Error handling
+anyhow = "1.0"
+thiserror = "1.0"
+
+# UUIDs
+uuid = { version = "1.0", features = ["v4", "serde"] }
+
+# Time
+chrono = { version = "0.4", features = ["serde"] }
+
+# Bytes handling
+bytes = "1.5"
+
+# Database
+rusqlite = { version = "0.32", features = ["bundled"] }
+sqlx = { version = "0.8", features = ["runtime-tokio", "sqlite", "mysql"], optional = true }
+
+# AWS S3
+aws-config = { version = "1", features = ["behavior-version-latest"], optional = true }
+aws-sdk-s3 = { version = "1", optional = true }
+aws-types = { version = "1", optional = true }
+
+# Cache
+lru = "0.12"
+
+# Development dependencies
+[dev-dependencies]
+tempfile = "3.12"
+criterion = "0.5"
+
+[features]
+default = []
+s3 = ["aws-sdk-s3", "aws-config", "aws-types"]
+full = ["s3"]
+
+[profile.release]
+opt-level = 3
+lto = true
+strip = true
+codegen-units = 1
+
+[profile.dev]
+opt-level = 0
diff --git a/crates/ragfs/ORIGIN.md b/crates/ragfs/ORIGIN.md
new file mode 100644
index 000000000..453dbac44
--- /dev/null
+++ b/crates/ragfs/ORIGIN.md
@@ -0,0 +1,16 @@
+# RAGFS Origin
+
+This crate (RAGFS) is a Rust reimplementation of the AGFS project originally authored by [c44pt0r](https://github.com/c44pt0r).
+
+## Source
+
+RAGFS is based on the Go implementation of AGFS located at `third_party/agfs/` in this repository.
+
+## License
+
+The original AGFS project is open source. This Rust implementation maintains compatibility with and references the original AGFS license.
+
+## Switch
+export RAGFS_IMPL=auto (default to rust, with fallback to go)
+export RAGFS_IMPL=rust
+export RAGFS_IMPL=go
\ No newline at end of file
diff --git a/crates/ragfs/src/core/errors.rs b/crates/ragfs/src/core/errors.rs
new file mode 100644
index 000000000..b2f802842
--- /dev/null
+++ b/crates/ragfs/src/core/errors.rs
@@ -0,0 +1,149 @@
+//! Error types for RAGFS
+//!
+//! This module defines all error types used throughout the RAGFS system.
+//! We use `thiserror` for structured error definitions to ensure type safety
+//! and clear error messages.
+
+use std::io;
+use serde_json;
+
+/// Result type alias for RAGFS operations
+pub type Result = std::result::Result;
+
+/// Main error type for RAGFS operations
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+ /// File or directory not found
+ #[error("not found: {0}")]
+ NotFound(String),
+
+ /// File or directory already exists
+ #[error("already exists: {0}")]
+ AlreadyExists(String),
+
+ /// Permission denied
+ #[error("permission denied: {0}")]
+ PermissionDenied(String),
+
+ /// Invalid path
+ #[error("invalid path: {0}")]
+ InvalidPath(String),
+
+ /// Not a directory
+ #[error("not a directory: {0}")]
+ NotADirectory(String),
+
+ /// Is a directory (when file operation expected)
+ #[error("is a directory: {0}")]
+ IsADirectory(String),
+
+ /// Directory not empty
+ #[error("directory not empty: {0}")]
+ DirectoryNotEmpty(String),
+
+ /// Invalid operation
+ #[error("invalid operation: {0}")]
+ InvalidOperation(String),
+
+ /// I/O error
+ #[error("I/O error: {0}")]
+ Io(#[from] io::Error),
+
+ /// Plugin error
+ #[error("plugin error: {0}")]
+ Plugin(String),
+
+ /// Configuration error
+ #[error("configuration error: {0}")]
+ Config(String),
+
+ /// Mount point not found
+ #[error("mount point not found: {0}")]
+ MountPointNotFound(String),
+
+ /// Mount point already exists
+ #[error("mount point already exists: {0}")]
+ MountPointExists(String),
+
+ /// Serialization error
+ #[error("serialization error: {0}")]
+ Serialization(String),
+
+ /// Network error
+ #[error("network error: {0}")]
+ Network(String),
+
+ /// Timeout error
+ #[error("operation timed out: {0}")]
+ Timeout(String),
+
+ /// Internal error
+ #[error("internal error: {0}")]
+ Internal(String),
+}
+
+impl From for Error {
+ fn from(err: serde_json::Error) -> Self {
+ Self::Serialization(err.to_string())
+ }
+}
+
+impl Error {
+ /// Create a NotFound error
+ pub fn not_found(path: impl Into) -> Self {
+ Self::NotFound(path.into())
+ }
+
+ /// Create an AlreadyExists error
+ pub fn already_exists(path: impl Into) -> Self {
+ Self::AlreadyExists(path.into())
+ }
+
+ /// Create a PermissionDenied error
+ pub fn permission_denied(path: impl Into) -> Self {
+ Self::PermissionDenied(path.into())
+ }
+
+ /// Create an InvalidPath error
+ pub fn invalid_path(path: impl Into) -> Self {
+ Self::InvalidPath(path.into())
+ }
+
+ /// Create a Plugin error
+ pub fn plugin(msg: impl Into) -> Self {
+ Self::Plugin(msg.into())
+ }
+
+ /// Create a Config error
+ pub fn config(msg: impl Into) -> Self {
+ Self::Config(msg.into())
+ }
+
+ /// Create an Internal error
+ pub fn internal(msg: impl Into) -> Self {
+ Self::Internal(msg.into())
+ }
+
+ /// Create an InvalidOperation error
+ pub fn invalid_operation(msg: impl Into) -> Self {
+ Self::InvalidOperation(msg.into())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_error_creation() {
+ let err = Error::not_found("/test/path");
+ assert!(matches!(err, Error::NotFound(_)));
+ assert_eq!(err.to_string(), "not found: /test/path");
+ }
+
+ #[test]
+ fn test_error_display() {
+ let err = Error::permission_denied("/protected");
+ assert_eq!(err.to_string(), "permission denied: /protected");
+ }
+}
diff --git a/crates/ragfs/src/core/filesystem.rs b/crates/ragfs/src/core/filesystem.rs
new file mode 100644
index 000000000..de79ab329
--- /dev/null
+++ b/crates/ragfs/src/core/filesystem.rs
@@ -0,0 +1,220 @@
+//! FileSystem trait definition
+//!
+//! This module defines the core FileSystem trait that all filesystem implementations
+//! must implement. This provides a unified interface for file operations across
+//! different storage backends.
+
+use async_trait::async_trait;
+
+use super::errors::Result;
+use super::types::{FileInfo, WriteFlag};
+
+/// Core filesystem abstraction trait
+///
+/// All filesystem plugins must implement this trait to provide file operations.
+/// All methods are async to support I/O-bound operations efficiently.
+#[async_trait]
+pub trait FileSystem: Send + Sync {
+ /// Create an empty file at the specified path
+ ///
+ /// # Arguments
+ /// * `path` - The path where the file should be created
+ ///
+ /// # Errors
+ /// * `Error::AlreadyExists` - If a file already exists at the path
+ /// * `Error::NotFound` - If the parent directory doesn't exist
+ /// * `Error::PermissionDenied` - If permission is denied
+ async fn create(&self, path: &str) -> Result<()>;
+
+ /// Create a directory at the specified path
+ ///
+ /// # Arguments
+ /// * `path` - The path where the directory should be created
+ /// * `mode` - Unix-style permissions (e.g., 0o755)
+ ///
+ /// # Errors
+ /// * `Error::AlreadyExists` - If a directory already exists at the path
+ /// * `Error::NotFound` - If the parent directory doesn't exist
+ async fn mkdir(&self, path: &str, mode: u32) -> Result<()>;
+
+ /// Remove a file at the specified path
+ ///
+ /// # Arguments
+ /// * `path` - The path of the file to remove
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the file doesn't exist
+ /// * `Error::IsADirectory` - If the path points to a directory
+ async fn remove(&self, path: &str) -> Result<()>;
+
+ /// Recursively remove a file or directory
+ ///
+ /// # Arguments
+ /// * `path` - The path to remove
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the path doesn't exist
+ async fn remove_all(&self, path: &str) -> Result<()>;
+
+ /// Read file contents
+ ///
+ /// # Arguments
+ /// * `path` - The path of the file to read
+ /// * `offset` - Byte offset to start reading from
+ /// * `size` - Number of bytes to read (0 means read all)
+ ///
+ /// # Returns
+ /// The file contents as a byte vector
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the file doesn't exist
+ /// * `Error::IsADirectory` - If the path points to a directory
+ async fn read(&self, path: &str, offset: u64, size: u64) -> Result>;
+
+ /// Write data to a file
+ ///
+ /// # Arguments
+ /// * `path` - The path of the file to write
+ /// * `data` - The data to write
+ /// * `offset` - Byte offset to start writing at
+ /// * `flags` - Write flags (create, append, truncate, etc.)
+ ///
+ /// # Returns
+ /// The number of bytes written
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the file doesn't exist and Create flag not set
+ /// * `Error::IsADirectory` - If the path points to a directory
+ async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result;
+
+ /// List directory contents
+ ///
+ /// # Arguments
+ /// * `path` - The path of the directory to list
+ ///
+ /// # Returns
+ /// A vector of FileInfo for each entry in the directory
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the directory doesn't exist
+ /// * `Error::NotADirectory` - If the path is not a directory
+ async fn read_dir(&self, path: &str) -> Result>;
+
+ /// Get file or directory metadata
+ ///
+ /// # Arguments
+ /// * `path` - The path to get metadata for
+ ///
+ /// # Returns
+ /// FileInfo containing metadata
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the path doesn't exist
+ async fn stat(&self, path: &str) -> Result;
+
+ /// Rename/move a file or directory
+ ///
+ /// # Arguments
+ /// * `old_path` - The current path
+ /// * `new_path` - The new path
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If old_path doesn't exist
+ /// * `Error::AlreadyExists` - If new_path already exists
+ async fn rename(&self, old_path: &str, new_path: &str) -> Result<()>;
+
+ /// Change file permissions
+ ///
+ /// # Arguments
+ /// * `path` - The path of the file
+ /// * `mode` - New Unix-style permissions
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the path doesn't exist
+ async fn chmod(&self, path: &str, mode: u32) -> Result<()>;
+
+ /// Truncate a file to a specified size
+ ///
+ /// # Arguments
+ /// * `path` - The path of the file
+ /// * `size` - The new size in bytes
+ ///
+ /// # Errors
+ /// * `Error::NotFound` - If the file doesn't exist
+ /// * `Error::IsADirectory` - If the path points to a directory
+ async fn truncate(&self, path: &str, size: u64) -> Result<()> {
+ // Default implementation: read, resize, write back
+ let mut data = self.read(path, 0, 0).await?;
+ data.resize(size as usize, 0);
+ self.write(path, &data, 0, WriteFlag::Truncate).await?;
+ Ok(())
+ }
+
+ /// Check if a path exists
+ ///
+ /// # Arguments
+ /// * `path` - The path to check
+ ///
+ /// # Returns
+ /// true if the path exists, false otherwise
+ async fn exists(&self, path: &str) -> bool {
+ self.stat(path).await.is_ok()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Mock filesystem for testing
+ struct MockFS;
+
+ #[async_trait]
+ impl FileSystem for MockFS {
+ async fn create(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> {
+ Ok(())
+ }
+
+ async fn remove(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn remove_all(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> {
+ Ok(vec![])
+ }
+
+ async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result {
+ Ok(_data.len() as u64)
+ }
+
+ async fn read_dir(&self, _path: &str) -> Result> {
+ Ok(vec![])
+ }
+
+ async fn stat(&self, _path: &str) -> Result {
+ Ok(FileInfo::new_file("test".to_string(), 0, 0o644))
+ }
+
+ async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> {
+ Ok(())
+ }
+ }
+
+ #[tokio::test]
+ async fn test_filesystem_trait() {
+ let fs = MockFS;
+ assert!(fs.exists("/test").await);
+ }
+}
diff --git a/crates/ragfs/src/core/mod.rs b/crates/ragfs/src/core/mod.rs
new file mode 100644
index 000000000..9b1e1730e
--- /dev/null
+++ b/crates/ragfs/src/core/mod.rs
@@ -0,0 +1,21 @@
+//! Core module for RAGFS
+//!
+//! This module contains the fundamental abstractions and types used throughout RAGFS:
+//! - Error types and Result alias
+//! - FileSystem trait for filesystem implementations
+//! - ServicePlugin trait for plugin system
+//! - MountableFS for routing operations to mounted plugins
+//! - Core data types (FileInfo, ConfigParameter, etc.)
+
+pub mod errors;
+pub mod filesystem;
+pub mod mountable;
+pub mod plugin;
+pub mod types;
+
+// Re-export commonly used types
+pub use errors::{Error, Result};
+pub use filesystem::FileSystem;
+pub use mountable::MountableFS;
+pub use plugin::{HealthStatus, PluginRegistry, ServicePlugin};
+pub use types::{ConfigParameter, ConfigValue, FileInfo, PluginConfig, WriteFlag};
diff --git a/crates/ragfs/src/core/mountable.rs b/crates/ragfs/src/core/mountable.rs
new file mode 100644
index 000000000..7bee90cfd
--- /dev/null
+++ b/crates/ragfs/src/core/mountable.rs
@@ -0,0 +1,629 @@
+//! MountableFS - A filesystem that routes operations to mounted plugins
+//!
+//! This module implements the core MountableFS which acts as a router,
+//! directing filesystem operations to the appropriate mounted plugin based
+//! on the path prefix.
+
+use async_trait::async_trait;
+use radix_trie::{Trie, TrieCommon};
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::RwLock;
+
+use super::errors::{Error, Result};
+use super::filesystem::FileSystem;
+use super::plugin::ServicePlugin;
+use super::types::{FileInfo, PluginConfig, WriteFlag};
+
+/// Information about a mounted filesystem
+#[derive(Clone)]
+struct MountInfo {
+ /// The mount path (e.g., "/memfs")
+ path: String,
+
+ /// The filesystem instance
+ fs: Arc,
+
+ /// The plugin that created this filesystem
+ plugin_name: String,
+}
+
+/// MountableFS routes filesystem operations to mounted plugins
+///
+/// This is the core component that allows multiple filesystem implementations
+/// to coexist at different mount points. It uses a radix trie for efficient
+/// path-based routing.
+pub struct MountableFS {
+ /// Radix trie for fast path lookup
+ mounts: Arc>>,
+
+ /// Plugin registry for creating new filesystem instances
+ registry: Arc>>>,
+}
+
+impl MountableFS {
+ /// Create a new MountableFS
+ pub fn new() -> Self {
+ Self {
+ mounts: Arc::new(RwLock::new(Trie::new())),
+ registry: Arc::new(RwLock::new(HashMap::new())),
+ }
+ }
+
+ /// Register a plugin
+ ///
+ /// # Arguments
+ /// * `plugin` - The plugin to register
+ pub async fn register_plugin(&self, plugin: P) {
+ let name = plugin.name().to_string();
+ let mut registry = self.registry.write().await;
+ registry.insert(name, Arc::new(plugin));
+ }
+
+ /// Mount a filesystem at the specified path
+ ///
+ /// # Arguments
+ /// * `config` - Plugin configuration including mount path
+ ///
+ /// # Errors
+ /// * `Error::MountPointExists` - If a filesystem is already mounted at this path
+ /// * `Error::Plugin` - If the plugin is not registered or initialization fails
+ pub async fn mount(&self, config: PluginConfig) -> Result<()> {
+ let mount_path = config.mount_path.clone();
+
+ // Normalize path (ensure it starts with / and doesn't end with /)
+ let normalized_path = normalize_path(&mount_path);
+
+ // Check if already mounted
+ {
+ let mounts = self.mounts.read().await;
+ if mounts.get(&normalized_path).is_some() {
+ return Err(Error::MountPointExists(normalized_path));
+ }
+ }
+
+ // Get plugin from registry
+ let plugin = {
+ let registry = self.registry.read().await;
+ registry
+ .get(&config.name)
+ .cloned()
+ .ok_or_else(|| Error::plugin(format!("Plugin '{}' not registered", config.name)))?
+ };
+
+ // Validate configuration
+ plugin.validate(&config).await?;
+
+ // Initialize filesystem
+ let fs = plugin.initialize(config.clone()).await?;
+
+ // Add to mounts
+ let mount_info = MountInfo {
+ path: normalized_path.clone(),
+ fs: Arc::from(fs),
+ plugin_name: config.name.clone(),
+ };
+
+ let mut mounts = self.mounts.write().await;
+ mounts.insert(normalized_path, mount_info);
+
+ Ok(())
+ }
+
+ /// Unmount a filesystem at the specified path
+ ///
+ /// # Arguments
+ /// * `path` - The mount path to unmount
+ ///
+ /// # Errors
+ /// * `Error::MountPointNotFound` - If no filesystem is mounted at this path
+ pub async fn unmount(&self, path: &str) -> Result<()> {
+ let normalized_path = normalize_path(path);
+
+ let mut mounts = self.mounts.write().await;
+ if mounts.remove(&normalized_path).is_none() {
+ return Err(Error::MountPointNotFound(normalized_path));
+ }
+
+ Ok(())
+ }
+
+ /// List all mount points
+ ///
+ /// # Returns
+ /// A vector of tuples containing (mount_path, plugin_name)
+ pub async fn list_mounts(&self) -> Vec<(String, String)> {
+ let mounts = self.mounts.read().await;
+ mounts
+ .iter()
+ .map(|(path, info)| (path.clone(), info.plugin_name.clone()))
+ .collect()
+ }
+
+ /// Find the mount point for a given path
+ ///
+ /// # Arguments
+ /// * `path` - The path to look up
+ ///
+ /// # Returns
+ /// A tuple of (mount_info, relative_path) where relative_path is the path
+ /// relative to the mount point
+ ///
+ /// # Errors
+ /// * `Error::MountPointNotFound` - If no mount point matches the path
+ async fn find_mount(&self, path: &str) -> Result<(MountInfo, String)> {
+ let normalized_path = normalize_path(path);
+ let mounts = self.mounts.read().await;
+
+ // Find the longest matching prefix using radix trie
+ // Check for exact match first
+ if let Some(mount_info) = mounts.get(&normalized_path) {
+ return Ok((mount_info.clone(), "/".to_string()));
+ }
+
+ // Iterate through ancestors to find longest prefix match
+ // Start with the longest possible prefix and work backwards
+ let mut current = normalized_path.as_str();
+ loop {
+ if let Some(mount_info) = mounts.get(current) {
+ let relative_path = if current == "/" {
+ normalized_path.clone()
+ } else {
+ normalized_path[current.len()..].to_string()
+ };
+ return Ok((mount_info.clone(), relative_path));
+ }
+
+ if current == "/" {
+ break;
+ }
+
+ // Find parent path by removing last component
+ match current.rfind('/') {
+ Some(0) => current = "/",
+ Some(pos) => current = ¤t[..pos],
+ None => break,
+ }
+ }
+
+ Err(Error::MountPointNotFound(normalized_path))
+ }
+}
+
+impl Default for MountableFS {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+/// Normalize a path by ensuring it starts with / and doesn't end with /
+fn normalize_path(path: &str) -> String {
+ let mut normalized = path.trim().to_string();
+
+ // Ensure starts with /
+ if !normalized.starts_with('/') {
+ normalized.insert(0, '/');
+ }
+
+ // Remove trailing / (except for root)
+ if normalized.len() > 1 && normalized.ends_with('/') {
+ normalized.pop();
+ }
+
+ normalized
+}
+
+// Implement FileSystem trait for MountableFS by delegating to mounted filesystems
+#[async_trait]
+impl FileSystem for MountableFS {
+ async fn create(&self, path: &str) -> Result<()> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.create(&rel_path).await
+ }
+
+ async fn mkdir(&self, path: &str, mode: u32) -> Result<()> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.mkdir(&rel_path, mode).await
+ }
+
+ async fn remove(&self, path: &str) -> Result<()> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.remove(&rel_path).await
+ }
+
+ async fn remove_all(&self, path: &str) -> Result<()> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.remove_all(&rel_path).await
+ }
+
+ async fn read(&self, path: &str, offset: u64, size: u64) -> Result> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.read(&rel_path, offset, size).await
+ }
+
+ async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.write(&rel_path, data, offset, flags).await
+ }
+
+ async fn read_dir(&self, path: &str) -> Result> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.read_dir(&rel_path).await
+ }
+
+ async fn stat(&self, path: &str) -> Result {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.stat(&rel_path).await
+ }
+
+ async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> {
+ let (mount_info_old, rel_old) = self.find_mount(old_path).await?;
+ let (mount_info_new, rel_new) = self.find_mount(new_path).await?;
+
+ // Ensure both paths are on the same mount
+ if mount_info_old.path != mount_info_new.path {
+ return Err(Error::InvalidOperation(
+ "Cannot rename across different mount points".to_string(),
+ ));
+ }
+
+ mount_info_old.fs.rename(&rel_old, &rel_new).await
+ }
+
+ async fn chmod(&self, path: &str, mode: u32) -> Result<()> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.chmod(&rel_path, mode).await
+ }
+
+ async fn truncate(&self, path: &str, size: u64) -> Result<()> {
+ let (mount_info, rel_path) = self.find_mount(path).await?;
+ mount_info.fs.truncate(&rel_path, size).await
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::collections::HashMap;
+
+ // Mock filesystem for testing
+ struct MockFS {
+ name: String,
+ }
+
+ impl MockFS {
+ fn new(name: &str) -> Self {
+ Self {
+ name: name.to_string(),
+ }
+ }
+ }
+
+ #[async_trait]
+ impl FileSystem for MockFS {
+ async fn create(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> {
+ Ok(())
+ }
+
+ async fn remove(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn remove_all(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> {
+ Ok(self.name.as_bytes().to_vec())
+ }
+
+ async fn write(&self, _path: &str, data: &[u8], _offset: u64, _flags: WriteFlag) -> Result {
+ Ok(data.len() as u64)
+ }
+
+ async fn read_dir(&self, _path: &str) -> Result> {
+ Ok(vec![])
+ }
+
+ async fn stat(&self, path: &str) -> Result {
+ Ok(FileInfo::new_file(path.to_string(), 0, 0o644))
+ }
+
+ async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> {
+ Ok(())
+ }
+
+ async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> {
+ Ok(())
+ }
+ }
+
+ // Mock plugin for testing
+ struct MockPlugin {
+ name: String,
+ }
+
+ impl MockPlugin {
+ fn new(name: &str) -> Self {
+ Self {
+ name: name.to_string(),
+ }
+ }
+ }
+
+ #[async_trait]
+ impl ServicePlugin for MockPlugin {
+ fn name(&self) -> &str {
+ &self.name
+ }
+
+ fn readme(&self) -> &str {
+ "Mock plugin for testing"
+ }
+
+ async fn validate(&self, _config: &PluginConfig) -> Result<()> {
+ Ok(())
+ }
+
+ async fn initialize(&self, _config: PluginConfig) -> Result> {
+ Ok(Box::new(MockFS::new(&self.name)))
+ }
+
+ fn config_params(&self) -> &[super::super::types::ConfigParameter] {
+ &[]
+ }
+ }
+
+ #[test]
+ fn test_normalize_path() {
+ assert_eq!(normalize_path("/test"), "/test");
+ assert_eq!(normalize_path("/test/"), "/test");
+ assert_eq!(normalize_path("test"), "/test");
+ assert_eq!(normalize_path("/"), "/");
+ assert_eq!(normalize_path(""), "/");
+ }
+
+ #[tokio::test]
+ async fn test_mountable_fs_creation() {
+ let mfs = MountableFS::new();
+ let mounts = mfs.list_mounts().await;
+ assert!(mounts.is_empty());
+ }
+
+ #[tokio::test]
+ async fn test_mount_and_unmount() {
+ let mfs = MountableFS::new();
+
+ // Register plugin
+ mfs.register_plugin(MockPlugin::new("mock")).await;
+
+ // Mount filesystem
+ let config = PluginConfig {
+ name: "mock".to_string(),
+ mount_path: "/mock".to_string(),
+ params: HashMap::new(),
+ };
+
+ assert!(mfs.mount(config).await.is_ok());
+
+ // Check mount list
+ let mounts = mfs.list_mounts().await;
+ assert_eq!(mounts.len(), 1);
+ assert_eq!(mounts[0].0, "/mock");
+ assert_eq!(mounts[0].1, "mock");
+
+ // Unmount
+ assert!(mfs.unmount("/mock").await.is_ok());
+
+ // Check mount list is empty
+ let mounts = mfs.list_mounts().await;
+ assert!(mounts.is_empty());
+ }
+
+ #[tokio::test]
+ async fn test_mount_duplicate_error() {
+ let mfs = MountableFS::new();
+ mfs.register_plugin(MockPlugin::new("mock")).await;
+
+ let config = PluginConfig {
+ name: "mock".to_string(),
+ mount_path: "/mock".to_string(),
+ params: HashMap::new(),
+ };
+
+ // First mount should succeed
+ assert!(mfs.mount(config.clone()).await.is_ok());
+
+ // Second mount at same path should fail
+ let result = mfs.mount(config).await;
+ assert!(result.is_err());
+ assert!(matches!(result.unwrap_err(), Error::MountPointExists(_)));
+ }
+
+ #[tokio::test]
+ async fn test_unmount_not_found() {
+ let mfs = MountableFS::new();
+
+ let result = mfs.unmount("/nonexistent").await;
+ assert!(result.is_err());
+ assert!(matches!(result.unwrap_err(), Error::MountPointNotFound(_)));
+ }
+
+ #[tokio::test]
+ async fn test_filesystem_operations() {
+ let mfs = MountableFS::new();
+ mfs.register_plugin(MockPlugin::new("mock")).await;
+
+ let config = PluginConfig {
+ name: "mock".to_string(),
+ mount_path: "/mock".to_string(),
+ params: HashMap::new(),
+ };
+
+ mfs.mount(config).await.unwrap();
+
+ // Test read operation
+ let data = mfs.read("/mock/test.txt", 0, 0).await.unwrap();
+ assert_eq!(data, b"mock");
+
+ // Test write operation
+ let written = mfs.write("/mock/test.txt", b"hello", 0, WriteFlag::Create).await.unwrap();
+ assert_eq!(written, 5);
+
+ // Test stat operation
+ let info = mfs.stat("/mock/test.txt").await.unwrap();
+ assert_eq!(info.name, "/test.txt");
+ }
+
+ #[tokio::test]
+ async fn test_path_routing() {
+ let mfs = MountableFS::new();
+ mfs.register_plugin(MockPlugin::new("mock1")).await;
+ mfs.register_plugin(MockPlugin::new("mock2")).await;
+
+ // Mount two filesystems
+ let config1 = PluginConfig {
+ name: "mock1".to_string(),
+ mount_path: "/fs1".to_string(),
+ params: HashMap::new(),
+ };
+
+ let config2 = PluginConfig {
+ name: "mock2".to_string(),
+ mount_path: "/fs2".to_string(),
+ params: HashMap::new(),
+ };
+
+ mfs.mount(config1).await.unwrap();
+ mfs.mount(config2).await.unwrap();
+
+ // Test routing to different filesystems
+ let data1 = mfs.read("/fs1/file.txt", 0, 0).await.unwrap();
+ assert_eq!(data1, b"mock1");
+
+ let data2 = mfs.read("/fs2/file.txt", 0, 0).await.unwrap();
+ assert_eq!(data2, b"mock2");
+ }
+
+ #[tokio::test]
+ async fn test_rename_across_mounts_error() {
+ let mfs = MountableFS::new();
+ mfs.register_plugin(MockPlugin::new("mock1")).await;
+ mfs.register_plugin(MockPlugin::new("mock2")).await;
+
+ let config1 = PluginConfig {
+ name: "mock1".to_string(),
+ mount_path: "/fs1".to_string(),
+ params: HashMap::new(),
+ };
+
+ let config2 = PluginConfig {
+ name: "mock2".to_string(),
+ mount_path: "/fs2".to_string(),
+ params: HashMap::new(),
+ };
+
+ mfs.mount(config1).await.unwrap();
+ mfs.mount(config2).await.unwrap();
+
+ // Try to rename across different mounts - should fail
+ let result = mfs.rename("/fs1/file.txt", "/fs2/file.txt").await;
+ assert!(result.is_err());
+ assert!(matches!(result.unwrap_err(), Error::InvalidOperation(_)));
+ }
+
+ #[tokio::test]
+ async fn test_concurrent_operations() {
+ use tokio::task;
+
+ let mfs = Arc::new(MountableFS::new());
+ mfs.register_plugin(MockPlugin::new("mock")).await;
+
+ let config = PluginConfig {
+ name: "mock".to_string(),
+ mount_path: "/mock".to_string(),
+ params: HashMap::new(),
+ };
+
+ mfs.mount(config).await.unwrap();
+
+ // Spawn multiple concurrent read operations
+ let mut handles = vec![];
+ for i in 0..10 {
+ let mfs_clone = Arc::clone(&mfs);
+ let handle = task::spawn(async move {
+ let path = format!("/mock/file{}.txt", i);
+ mfs_clone.read(&path, 0, 0).await
+ });
+ handles.push(handle);
+ }
+
+ // Wait for all operations to complete
+ for handle in handles {
+ let result = handle.await.unwrap();
+ assert!(result.is_ok());
+ assert_eq!(result.unwrap(), b"mock");
+ }
+ }
+
+ #[tokio::test]
+ async fn test_concurrent_mount_unmount() {
+ use tokio::task;
+
+ let mfs = Arc::new(MountableFS::new());
+
+ // Register multiple plugins
+ for i in 0..5 {
+ mfs.register_plugin(MockPlugin::new(&format!("mock{}", i))).await;
+ }
+
+ // Spawn concurrent mount operations
+ let mut handles = vec![];
+ for i in 0..5 {
+ let mfs_clone = Arc::clone(&mfs);
+ let handle = task::spawn(async move {
+ let config = PluginConfig {
+ name: format!("mock{}", i),
+ mount_path: format!("/mock{}", i),
+ params: HashMap::new(),
+ };
+ mfs_clone.mount(config).await
+ });
+ handles.push(handle);
+ }
+
+ // Wait for all mounts to complete
+ for handle in handles {
+ let result = handle.await.unwrap();
+ assert!(result.is_ok());
+ }
+
+ // Verify all mounts
+ let mounts = mfs.list_mounts().await;
+ assert_eq!(mounts.len(), 5);
+
+ // Concurrent unmount
+ let mut handles = vec![];
+ for i in 0..5 {
+ let mfs_clone = Arc::clone(&mfs);
+ let handle = task::spawn(async move {
+ mfs_clone.unmount(&format!("/mock{}", i)).await
+ });
+ handles.push(handle);
+ }
+
+ // Wait for all unmounts
+ for handle in handles {
+ let result = handle.await.unwrap();
+ assert!(result.is_ok());
+ }
+
+ // Verify all unmounted
+ let mounts = mfs.list_mounts().await;
+ assert!(mounts.is_empty());
+ }
+}
diff --git a/crates/ragfs/src/core/plugin.rs b/crates/ragfs/src/core/plugin.rs
new file mode 100644
index 000000000..2bbcaf1cc
--- /dev/null
+++ b/crates/ragfs/src/core/plugin.rs
@@ -0,0 +1,276 @@
+//! Plugin system for RAGFS
+//!
+//! This module defines the ServicePlugin trait that all plugins must implement.
+//! Plugins provide filesystem implementations that can be dynamically mounted
+//! at different paths.
+
+use async_trait::async_trait;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use super::errors::Result;
+use super::filesystem::FileSystem;
+use super::types::{ConfigParameter, PluginConfig};
+
+/// Service plugin trait
+///
+/// All filesystem plugins must implement this trait to be registered
+/// and used within RAGFS. The plugin is responsible for validating
+/// configuration and creating filesystem instances.
+#[async_trait]
+pub trait ServicePlugin: Send + Sync {
+ /// Get the unique name of this plugin
+ ///
+ /// This name is used to identify the plugin in configuration
+ /// and mount operations.
+ fn name(&self) -> &str;
+
+ /// Get the plugin version
+ fn version(&self) -> &str {
+ "0.1.0"
+ }
+
+ /// Get a brief description of the plugin
+ fn description(&self) -> &str {
+ ""
+ }
+
+ /// Get the README documentation for this plugin
+ ///
+ /// This should include usage examples, configuration parameters,
+ /// and any special considerations.
+ fn readme(&self) -> &str;
+
+ /// Validate plugin configuration
+ ///
+ /// This is called before initialize() to ensure the configuration
+ /// is valid. Should check for required parameters, valid values, etc.
+ ///
+ /// # Arguments
+ /// * `config` - The configuration to validate
+ ///
+ /// # Errors
+ /// Returns an error if the configuration is invalid
+ async fn validate(&self, config: &PluginConfig) -> Result<()>;
+
+ /// Initialize the plugin and return a filesystem instance
+ ///
+ /// This is called after validate() succeeds. The plugin should
+ /// create and return a new filesystem instance configured according
+ /// to the provided configuration.
+ ///
+ /// # Arguments
+ /// * `config` - The validated configuration
+ ///
+ /// # Returns
+ /// A boxed FileSystem implementation
+ ///
+ /// # Errors
+ /// Returns an error if initialization fails
+ async fn initialize(&self, config: PluginConfig) -> Result>;
+
+ /// Shutdown the plugin
+ ///
+ /// This is called when the plugin is being unmounted or the server
+ /// is shutting down. The plugin should clean up any resources.
+ async fn shutdown(&self) -> Result<()> {
+ Ok(())
+ }
+
+ /// Get the configuration parameters supported by this plugin
+ ///
+ /// Returns a list of parameter definitions that describe what
+ /// configuration this plugin accepts.
+ fn config_params(&self) -> &[ConfigParameter];
+
+ /// Health check for the plugin
+ ///
+ /// Returns whether the plugin is healthy and operational.
+ async fn health_check(&self) -> Result {
+ Ok(HealthStatus::Healthy)
+ }
+}
+
+/// Health status of a plugin
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum HealthStatus {
+ /// Plugin is healthy and operational
+ Healthy,
+
+ /// Plugin is degraded but still functional
+ Degraded(String),
+
+ /// Plugin is unhealthy and not functional
+ Unhealthy(String),
+}
+
+/// Plugin registry
+///
+/// Manages all registered plugins and provides lookup functionality.
+pub struct PluginRegistry {
+ plugins: HashMap>,
+}
+
+impl PluginRegistry {
+ /// Create a new empty plugin registry
+ pub fn new() -> Self {
+ Self {
+ plugins: HashMap::new(),
+ }
+ }
+
+ /// Register a plugin
+ ///
+ /// # Arguments
+ /// * `plugin` - The plugin to register
+ ///
+ /// # Panics
+ /// Panics if a plugin with the same name is already registered
+ pub fn register(&mut self, plugin: P) {
+ let name = plugin.name().to_string();
+ if self.plugins.contains_key(&name) {
+ panic!("Plugin '{}' is already registered", name);
+ }
+ self.plugins.insert(name, Arc::new(plugin));
+ }
+
+ /// Get a plugin by name
+ ///
+ /// # Arguments
+ /// * `name` - The name of the plugin to retrieve
+ ///
+ /// # Returns
+ /// An Arc to the plugin, or None if not found
+ pub fn get(&self, name: &str) -> Option> {
+ self.plugins.get(name).cloned()
+ }
+
+ /// List all registered plugin names
+ pub fn list(&self) -> Vec<&str> {
+ self.plugins.keys().map(|s| s.as_str()).collect()
+ }
+
+ /// Get the number of registered plugins
+ pub fn len(&self) -> usize {
+ self.plugins.len()
+ }
+
+ /// Check if the registry is empty
+ pub fn is_empty(&self) -> bool {
+ self.plugins.is_empty()
+ }
+}
+
+impl Default for PluginRegistry {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ // Mock plugin for testing
+ struct MockPlugin;
+
+ #[async_trait]
+ impl ServicePlugin for MockPlugin {
+ fn name(&self) -> &str {
+ "mock"
+ }
+
+ fn readme(&self) -> &str {
+ "Mock plugin for testing"
+ }
+
+ async fn validate(&self, _config: &PluginConfig) -> Result<()> {
+ Ok(())
+ }
+
+ async fn initialize(&self, _config: PluginConfig) -> Result> {
+ use crate::core::filesystem::FileSystem;
+ use crate::core::types::{FileInfo, WriteFlag};
+
+ struct MockFS;
+
+ #[async_trait]
+ impl FileSystem for MockFS {
+ async fn create(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+ async fn mkdir(&self, _path: &str, _mode: u32) -> Result<()> {
+ Ok(())
+ }
+ async fn remove(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+ async fn remove_all(&self, _path: &str) -> Result<()> {
+ Ok(())
+ }
+ async fn read(&self, _path: &str, _offset: u64, _size: u64) -> Result> {
+ Ok(vec![])
+ }
+ async fn write(&self, _path: &str, _data: &[u8], _offset: u64, _flags: WriteFlag) -> Result {
+ Ok(_data.len() as u64)
+ }
+ async fn read_dir(&self, _path: &str) -> Result> {
+ Ok(vec![])
+ }
+ async fn stat(&self, _path: &str) -> Result {
+ Ok(FileInfo::new_file("test".to_string(), 0, 0o644))
+ }
+ async fn rename(&self, _old_path: &str, _new_path: &str) -> Result<()> {
+ Ok(())
+ }
+ async fn chmod(&self, _path: &str, _mode: u32) -> Result<()> {
+ Ok(())
+ }
+ }
+
+ Ok(Box::new(MockFS))
+ }
+
+ fn config_params(&self) -> &[ConfigParameter] {
+ &[]
+ }
+ }
+
+ #[test]
+ fn test_plugin_registry() {
+ let mut registry = PluginRegistry::new();
+ assert!(registry.is_empty());
+
+ registry.register(MockPlugin);
+ assert_eq!(registry.len(), 1);
+ assert!(registry.get("mock").is_some());
+ assert!(registry.get("nonexistent").is_none());
+
+ let names = registry.list();
+ assert_eq!(names, vec!["mock"]);
+ }
+
+ #[tokio::test]
+ async fn test_plugin_lifecycle() {
+ let plugin = MockPlugin;
+
+ let config = PluginConfig {
+ name: "mock".to_string(),
+ mount_path: "/mock".to_string(),
+ params: HashMap::new(),
+ };
+
+ assert!(plugin.validate(&config).await.is_ok());
+ assert!(plugin.initialize(config).await.is_ok());
+ assert!(plugin.shutdown().await.is_ok());
+ }
+
+ #[test]
+ fn test_health_status() {
+ let healthy = HealthStatus::Healthy;
+ assert_eq!(healthy, HealthStatus::Healthy);
+
+ let degraded = HealthStatus::Degraded("slow".to_string());
+ assert!(matches!(degraded, HealthStatus::Degraded(_)));
+ }
+}
diff --git a/crates/ragfs/src/core/types.rs b/crates/ragfs/src/core/types.rs
new file mode 100644
index 000000000..175bd8abf
--- /dev/null
+++ b/crates/ragfs/src/core/types.rs
@@ -0,0 +1,259 @@
+//! Core types for RAGFS
+//!
+//! This module defines the fundamental data structures used throughout RAGFS,
+//! including file metadata, write flags, and configuration types.
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::time::SystemTime;
+
+/// File metadata information
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FileInfo {
+ /// File name (without path)
+ pub name: String,
+
+ /// File size in bytes
+ pub size: u64,
+
+ /// File mode/permissions (Unix-style)
+ pub mode: u32,
+
+ /// Last modification time
+ #[serde(with = "systemtime_serde")]
+ pub mod_time: SystemTime,
+
+ /// Whether this is a directory
+ pub is_dir: bool,
+}
+
+impl FileInfo {
+ /// Create a new FileInfo for a file
+ pub fn new_file(name: String, size: u64, mode: u32) -> Self {
+ Self {
+ name,
+ size,
+ mode,
+ mod_time: SystemTime::now(),
+ is_dir: false,
+ }
+ }
+
+ /// Create a new FileInfo for a directory
+ pub fn new_dir(name: String, mode: u32) -> Self {
+ Self {
+ name,
+ size: 0,
+ mode,
+ mod_time: SystemTime::now(),
+ is_dir: true,
+ }
+ }
+
+ /// Create a new FileInfo with all parameters
+ pub fn new(name: String, size: u64, mode: u32, mod_time: SystemTime, is_dir: bool) -> Self {
+ Self {
+ name,
+ size,
+ mode,
+ mod_time,
+ is_dir,
+ }
+ }
+}
+
+/// Write operation flags
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum WriteFlag {
+ /// Create new file or truncate existing
+ Create,
+
+ /// Append to existing file
+ Append,
+
+ /// Truncate file before writing
+ Truncate,
+
+ /// Write at specific offset (default)
+ None,
+}
+
+impl Default for WriteFlag {
+ fn default() -> Self {
+ Self::None
+ }
+}
+
+/// Plugin configuration parameter metadata
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ConfigParameter {
+ /// Parameter name
+ pub name: String,
+
+ /// Parameter type: "string", "int", "bool", "string_list"
+ #[serde(rename = "type")]
+ pub param_type: String,
+
+ /// Whether this parameter is required
+ pub required: bool,
+
+ /// Default value (if not required)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub default: Option,
+
+ /// Human-readable description
+ pub description: String,
+}
+
+impl ConfigParameter {
+ /// Create a required string parameter
+ pub fn required_string(name: impl Into, description: impl Into) -> Self {
+ Self {
+ name: name.into(),
+ param_type: "string".to_string(),
+ required: true,
+ default: None,
+ description: description.into(),
+ }
+ }
+
+ /// Create an optional parameter with default
+ pub fn optional(
+ name: impl Into,
+ param_type: impl Into,
+ default: impl Into,
+ description: impl Into,
+ ) -> Self {
+ Self {
+ name: name.into(),
+ param_type: param_type.into(),
+ required: false,
+ default: Some(default.into()),
+ description: description.into(),
+ }
+ }
+}
+
+/// Plugin configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PluginConfig {
+ /// Plugin name
+ pub name: String,
+
+ /// Mount path
+ pub mount_path: String,
+
+ /// Configuration parameters
+ pub params: HashMap,
+}
+
+/// Configuration value types
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(untagged)]
+pub enum ConfigValue {
+ /// String value
+ String(String),
+
+ /// Integer value
+ Int(i64),
+
+ /// Boolean value
+ Bool(bool),
+
+ /// List of strings
+ StringList(Vec),
+}
+
+impl ConfigValue {
+ /// Try to get as string
+ pub fn as_string(&self) -> Option<&str> {
+ match self {
+ ConfigValue::String(s) => Some(s),
+ _ => None,
+ }
+ }
+
+ /// Try to get as integer
+ pub fn as_int(&self) -> Option {
+ match self {
+ ConfigValue::Int(i) => Some(*i),
+ _ => None,
+ }
+ }
+
+ /// Try to get as boolean
+ pub fn as_bool(&self) -> Option {
+ match self {
+ ConfigValue::Bool(b) => Some(*b),
+ _ => None,
+ }
+ }
+
+ /// Try to get as string list
+ pub fn as_string_list(&self) -> Option<&[String]> {
+ match self {
+ ConfigValue::StringList(list) => Some(list),
+ _ => None,
+ }
+ }
+}
+
+/// Custom serde module for SystemTime
+mod systemtime_serde {
+ use serde::{Deserialize, Deserializer, Serialize, Serializer};
+ use std::time::{SystemTime, UNIX_EPOCH};
+
+ pub fn serialize(time: &SystemTime, serializer: S) -> Result
+ where
+ S: Serializer,
+ {
+ let duration = time
+ .duration_since(UNIX_EPOCH)
+ .map_err(serde::ser::Error::custom)?;
+ duration.as_secs().serialize(serializer)
+ }
+
+ pub fn deserialize<'de, D>(deserializer: D) -> Result
+ where
+ D: Deserializer<'de>,
+ {
+ let secs = u64::deserialize(deserializer)?;
+ Ok(UNIX_EPOCH + std::time::Duration::from_secs(secs))
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_file_info_creation() {
+ let file = FileInfo::new_file("test.txt".to_string(), 1024, 0o644);
+ assert_eq!(file.name, "test.txt");
+ assert_eq!(file.size, 1024);
+ assert!(!file.is_dir);
+
+ let dir = FileInfo::new_dir("testdir".to_string(), 0o755);
+ assert_eq!(dir.name, "testdir");
+ assert!(dir.is_dir);
+ }
+
+ #[test]
+ fn test_config_value() {
+ let val = ConfigValue::String("test".to_string());
+ assert_eq!(val.as_string(), Some("test"));
+ assert_eq!(val.as_int(), None);
+
+ let val = ConfigValue::Int(42);
+ assert_eq!(val.as_int(), Some(42));
+ assert_eq!(val.as_string(), None);
+ }
+
+ #[test]
+ fn test_config_parameter() {
+ let param = ConfigParameter::required_string("host", "Database host");
+ assert_eq!(param.name, "host");
+ assert!(param.required);
+ assert_eq!(param.param_type, "string");
+ }
+}
diff --git a/crates/ragfs/src/lib.rs b/crates/ragfs/src/lib.rs
new file mode 100644
index 000000000..fa3464ad9
--- /dev/null
+++ b/crates/ragfs/src/lib.rs
@@ -0,0 +1,60 @@
+//! RAGFS - Rust implementation of AGFS (Aggregated File System)
+//!
+//! RAGFS provides a unified filesystem abstraction that allows multiple
+//! filesystem implementations (plugins) to be mounted at different paths.
+//! It exposes these filesystems through a REST API, making them accessible
+//! to AI agents and other clients.
+//!
+//! # Architecture
+//!
+//! - **Core**: Fundamental traits and types (FileSystem, ServicePlugin, etc.)
+//! - **Plugins**: Filesystem implementations (MemFS, KVFS, QueueFS, etc.)
+//! - **Server**: HTTP API server for remote access
+//! - **Shell**: Interactive command-line interface
+//!
+//! # Example
+//!
+//! ```rust,no_run
+//! use ragfs::core::{PluginRegistry, FileSystem};
+//!
+//! #[tokio::main]
+//! async fn main() -> ragfs::core::Result<()> {
+//! // Create a plugin registry
+//! let mut registry = PluginRegistry::new();
+//!
+//! // Register plugins
+//! // registry.register(MemFSPlugin);
+//!
+//! Ok(())
+//! }
+//! ```
+
+#![warn(missing_docs)]
+#![warn(clippy::all)]
+
+pub mod core;
+pub mod plugins;
+pub mod server;
+
+// Re-export core types for convenience
+pub use core::{
+ ConfigParameter, ConfigValue, Error, FileInfo, FileSystem, HealthStatus, MountableFS,
+ PluginConfig, PluginRegistry, Result, ServicePlugin, WriteFlag,
+};
+
+/// Version of RAGFS
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");
+
+/// Name of the package
+pub const NAME: &str = env!("CARGO_PKG_NAME");
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_version() {
+ assert!(!VERSION.is_empty());
+ assert_eq!(NAME, "ragfs");
+ }
+}
diff --git a/crates/ragfs/src/plugins/kvfs/mod.rs b/crates/ragfs/src/plugins/kvfs/mod.rs
new file mode 100644
index 000000000..3ced5969c
--- /dev/null
+++ b/crates/ragfs/src/plugins/kvfs/mod.rs
@@ -0,0 +1,565 @@
+//! KVFS - Key-Value File System
+//!
+//! A file system that treats files as key-value pairs. Each file's path
+//! becomes a key, and the file content becomes the value. This is useful
+//! for simple key-value storage scenarios.
+
+use async_trait::async_trait;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::SystemTime;
+use tokio::sync::RwLock;
+
+use crate::core::{
+ ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag,
+};
+
+/// Key-value entry
+#[derive(Clone)]
+struct KVEntry {
+ /// Value (file content)
+ value: Vec,
+ /// Last modification time
+ mod_time: SystemTime,
+}
+
+impl KVEntry {
+ fn new(value: Vec) -> Self {
+ Self {
+ value,
+ mod_time: SystemTime::now(),
+ }
+ }
+
+ fn touch(&mut self) {
+ self.mod_time = SystemTime::now();
+ }
+}
+
+/// Key-Value file system implementation
+pub struct KVFileSystem {
+ /// Storage for key-value pairs
+ store: Arc>>,
+}
+
+impl KVFileSystem {
+ /// Create a new KVFileSystem
+ pub fn new() -> Self {
+ Self {
+ store: Arc::new(RwLock::new(HashMap::new())),
+ }
+ }
+
+ /// Normalize path to key (remove leading /)
+ fn path_to_key(path: &str) -> String {
+ let normalized = if path.starts_with('/') {
+ &path[1..]
+ } else {
+ path
+ };
+
+ if normalized.is_empty() {
+ "/".to_string()
+ } else {
+ normalized.to_string()
+ }
+ }
+
+ /// Get parent directory path
+ fn parent_key(key: &str) -> Option {
+ if key == "/" || !key.contains('/') {
+ return Some("/".to_string());
+ }
+
+ let parts: Vec<&str> = key.split('/').collect();
+ if parts.len() <= 1 {
+ return Some("/".to_string());
+ }
+
+ Some(parts[..parts.len() - 1].join("/"))
+ }
+
+ /// List all keys with a given prefix
+ fn list_keys_with_prefix(&self, store: &HashMap, prefix: &str) -> Vec {
+ let search_prefix = if prefix == "/" {
+ ""
+ } else {
+ prefix
+ };
+
+ store
+ .keys()
+ .filter(|k| {
+ if search_prefix.is_empty() {
+ // Root: only keys without '/'
+ !k.contains('/')
+ } else {
+ // Keys that start with prefix/ and have no further /
+ k.starts_with(&format!("{}/", search_prefix))
+ && !k[search_prefix.len() + 1..].contains('/')
+ }
+ })
+ .cloned()
+ .collect()
+ }
+}
+
+impl Default for KVFileSystem {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+#[async_trait]
+impl FileSystem for KVFileSystem {
+ async fn create(&self, path: &str) -> Result<()> {
+ let key = Self::path_to_key(path);
+ let mut store = self.store.write().await;
+
+ if store.contains_key(&key) {
+ return Err(Error::already_exists(path));
+ }
+
+ store.insert(key, KVEntry::new(Vec::new()));
+ Ok(())
+ }
+
+ async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> {
+ // KVFS doesn't have real directories, but we accept mkdir for compatibility
+ // We just create an empty entry to mark the "directory"
+ let key = Self::path_to_key(path);
+ let mut store = self.store.write().await;
+
+ if store.contains_key(&key) {
+ return Err(Error::already_exists(path));
+ }
+
+ // Mark as directory by using empty value
+ store.insert(key, KVEntry::new(Vec::new()));
+ Ok(())
+ }
+
+ async fn remove(&self, path: &str) -> Result<()> {
+ let key = Self::path_to_key(path);
+ let mut store = self.store.write().await;
+
+ if store.remove(&key).is_none() {
+ return Err(Error::not_found(path));
+ }
+
+ Ok(())
+ }
+
+ async fn remove_all(&self, path: &str) -> Result<()> {
+ let key = Self::path_to_key(path);
+ let mut store = self.store.write().await;
+
+ // Remove the key itself
+ if !store.contains_key(&key) {
+ return Err(Error::not_found(path));
+ }
+
+ // Remove all keys with this prefix
+ let prefix = if key == "/" { "" } else { &key };
+ let to_remove: Vec = store
+ .keys()
+ .filter(|k| *k == &key || k.starts_with(&format!("{}/", prefix)))
+ .cloned()
+ .collect();
+
+ for k in to_remove {
+ store.remove(&k);
+ }
+
+ Ok(())
+ }
+
+ async fn read(&self, path: &str, offset: u64, size: u64) -> Result> {
+ let key = Self::path_to_key(path);
+ let store = self.store.read().await;
+
+ match store.get(&key) {
+ Some(entry) => {
+ let offset = offset as usize;
+ let data_len = entry.value.len();
+
+ if offset >= data_len {
+ return Ok(Vec::new());
+ }
+
+ let end = if size == 0 {
+ data_len
+ } else {
+ std::cmp::min(offset + size as usize, data_len)
+ };
+
+ Ok(entry.value[offset..end].to_vec())
+ }
+ None => Err(Error::not_found(path)),
+ }
+ }
+
+ async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result {
+ let key = Self::path_to_key(path);
+ let mut store = self.store.write().await;
+
+ match store.get_mut(&key) {
+ Some(entry) => {
+ entry.touch();
+
+ match flags {
+ WriteFlag::Create | WriteFlag::Truncate => {
+ entry.value = data.to_vec();
+ }
+ WriteFlag::Append => {
+ entry.value.extend_from_slice(data);
+ }
+ WriteFlag::None => {
+ let offset = offset as usize;
+ let end = offset + data.len();
+
+ if end > entry.value.len() {
+ entry.value.resize(end, 0);
+ }
+
+ entry.value[offset..end].copy_from_slice(data);
+ }
+ }
+
+ Ok(data.len() as u64)
+ }
+ None => {
+ if matches!(flags, WriteFlag::Create) {
+ store.insert(key, KVEntry::new(data.to_vec()));
+ Ok(data.len() as u64)
+ } else {
+ Err(Error::not_found(path))
+ }
+ }
+ }
+ }
+
+ async fn read_dir(&self, path: &str) -> Result> {
+ let key = Self::path_to_key(path);
+ let store = self.store.read().await;
+
+ // Check if the directory exists (or root)
+ if key != "/" && !store.contains_key(&key) {
+ return Err(Error::not_found(path));
+ }
+
+ let keys = self.list_keys_with_prefix(&store, &key);
+ let mut result = Vec::new();
+
+ for k in keys {
+ if let Some(entry) = store.get(&k) {
+ let name = k.split('/').last().unwrap_or(&k).to_string();
+ result.push(FileInfo {
+ name,
+ size: entry.value.len() as u64,
+ mode: 0o644,
+ mod_time: entry.mod_time,
+ is_dir: false,
+ });
+ }
+ }
+
+ Ok(result)
+ }
+
+ async fn stat(&self, path: &str) -> Result {
+ let key = Self::path_to_key(path);
+ let store = self.store.read().await;
+
+ match store.get(&key) {
+ Some(entry) => {
+ let name = key.split('/').last().unwrap_or(&key).to_string();
+ Ok(FileInfo {
+ name,
+ size: entry.value.len() as u64,
+ mode: 0o644,
+ mod_time: entry.mod_time,
+ is_dir: false,
+ })
+ }
+ None => Err(Error::not_found(path)),
+ }
+ }
+
+ async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> {
+ let old_key = Self::path_to_key(old_path);
+ let new_key = Self::path_to_key(new_path);
+ let mut store = self.store.write().await;
+
+ // Check old key exists
+ let entry = store
+ .get(&old_key)
+ .ok_or_else(|| Error::not_found(old_path))?
+ .clone();
+
+ // Check new key doesn't exist
+ if store.contains_key(&new_key) {
+ return Err(Error::already_exists(new_path));
+ }
+
+ // Collect all child keys with old prefix
+ let old_prefix = if old_key == "/" {
+ "".to_string()
+ } else {
+ format!("{}/", old_key)
+ };
+ let new_prefix = if new_key == "/" {
+ "".to_string()
+ } else {
+ format!("{}/", new_key)
+ };
+
+ let mut to_move = Vec::new();
+ for key in store.keys() {
+ if key == &old_key {
+ continue;
+ }
+ if !old_prefix.is_empty() && key.starts_with(&old_prefix) {
+ // Check for conflicts with new path
+ let new_child_key = format!("{}{}", new_prefix, &key[old_prefix.len()..]);
+ if store.contains_key(&new_child_key) {
+ // Convert back to path for error message
+ let new_child_path = if new_child_key == "/" {
+ "/".to_string()
+ } else {
+ format!("/{}", new_child_key)
+ };
+ return Err(Error::already_exists(&new_child_path));
+ }
+ to_move.push(key.clone());
+ }
+ }
+
+ // Move the main entry
+ store.remove(&old_key);
+ store.insert(new_key, entry);
+
+ // Move all child entries
+ for old_child_key in to_move {
+ let new_child_key = format!("{}{}", new_prefix, &old_child_key[old_prefix.len()..]);
+ if let Some(child_entry) = store.remove(&old_child_key) {
+ store.insert(new_child_key, child_entry);
+ }
+ }
+
+ Ok(())
+ }
+
+ async fn chmod(&self, path: &str, _mode: u32) -> Result<()> {
+ let key = Self::path_to_key(path);
+ let mut store = self.store.write().await;
+
+ match store.get_mut(&key) {
+ Some(entry) => {
+ entry.touch();
+ Ok(())
+ }
+ None => Err(Error::not_found(path)),
+ }
+ }
+
+ async fn truncate(&self, path: &str, size: u64) -> Result<()> {
+ let key = Self::path_to_key(path);
+ let mut store = self.store.write().await;
+
+ match store.get_mut(&key) {
+ Some(entry) => {
+ entry.value.resize(size as usize, 0);
+ entry.touch();
+ Ok(())
+ }
+ None => Err(Error::not_found(path)),
+ }
+ }
+}
+
+/// KVFS plugin
+pub struct KVFSPlugin;
+
+#[async_trait]
+impl ServicePlugin for KVFSPlugin {
+ fn name(&self) -> &str {
+ "kvfs"
+ }
+
+ fn version(&self) -> &str {
+ "0.1.0"
+ }
+
+ fn description(&self) -> &str {
+ "Key-value file system for simple storage"
+ }
+
+ fn readme(&self) -> &str {
+ r#"# KVFS - Key-Value File System
+
+A file system that treats files as key-value pairs. Each file's path
+becomes a key, and the file content becomes the value.
+
+## Features
+
+- Simple key-value storage
+- File paths map to keys
+- Fast lookups
+- In-memory storage (no persistence)
+
+## Usage
+
+Mount the filesystem:
+```bash
+curl -X POST http://localhost:8080/api/v1/mount \
+ -H "Content-Type: application/json" \
+ -d '{"plugin": "kvfs", "path": "/kvfs"}'
+```
+
+Store a value:
+```bash
+echo "value123" | curl -X PUT \
+ "http://localhost:8080/api/v1/files?path=/kvfs/mykey" \
+ --data-binary @-
+```
+
+Retrieve a value:
+```bash
+curl "http://localhost:8080/api/v1/files?path=/kvfs/mykey"
+```
+
+List all keys:
+```bash
+curl "http://localhost:8080/api/v1/directories?path=/kvfs"
+```
+
+## Use Cases
+
+- Configuration storage
+- Cache storage
+- Session data
+- Temporary key-value storage
+
+## Configuration
+
+KVFS has no configuration parameters.
+"#
+ }
+
+ async fn validate(&self, _config: &PluginConfig) -> Result<()> {
+ Ok(())
+ }
+
+ async fn initialize(&self, _config: PluginConfig) -> Result> {
+ Ok(Box::new(KVFileSystem::new()))
+ }
+
+ fn config_params(&self) -> &[ConfigParameter] {
+ &[]
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[tokio::test]
+ async fn test_kvfs_basic_operations() {
+ let fs = KVFileSystem::new();
+
+ // Create and write
+ fs.write("/key1", b"value1", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+
+ // Read
+ let data = fs.read("/key1", 0, 0).await.unwrap();
+ assert_eq!(data, b"value1");
+
+ // Update
+ fs.write("/key1", b"value2", 0, WriteFlag::Truncate)
+ .await
+ .unwrap();
+
+ let data = fs.read("/key1", 0, 0).await.unwrap();
+ assert_eq!(data, b"value2");
+ }
+
+ #[tokio::test]
+ async fn test_kvfs_list_keys() {
+ let fs = KVFileSystem::new();
+
+ fs.write("/key1", b"val1", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+ fs.write("/key2", b"val2", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+ fs.write("/key3", b"val3", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+
+ let entries = fs.read_dir("/").await.unwrap();
+ assert_eq!(entries.len(), 3);
+ }
+
+ #[tokio::test]
+ async fn test_kvfs_nested_keys() {
+ let fs = KVFileSystem::new();
+
+ // Create parent "directory" first
+ fs.mkdir("/user", 0o755).await.unwrap();
+
+ fs.write("/user/123", b"alice", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+ fs.write("/user/456", b"bob", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+
+ let entries = fs.read_dir("/user").await.unwrap();
+ assert_eq!(entries.len(), 2);
+ }
+
+ #[tokio::test]
+ async fn test_kvfs_delete() {
+ let fs = KVFileSystem::new();
+
+ fs.write("/key1", b"value1", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+ fs.remove("/key1").await.unwrap();
+
+ assert!(fs.read("/key1", 0, 0).await.is_err());
+ }
+
+ #[tokio::test]
+ async fn test_kvfs_rename() {
+ let fs = KVFileSystem::new();
+
+ fs.write("/oldkey", b"data", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+ fs.rename("/oldkey", "/newkey").await.unwrap();
+
+ assert!(fs.read("/oldkey", 0, 0).await.is_err());
+ let data = fs.read("/newkey", 0, 0).await.unwrap();
+ assert_eq!(data, b"data");
+ }
+
+ #[tokio::test]
+ async fn test_kvfs_plugin() {
+ let plugin = KVFSPlugin;
+ assert_eq!(plugin.name(), "kvfs");
+
+ let config = PluginConfig {
+ name: "kvfs".to_string(),
+ mount_path: "/kvfs".to_string(),
+ params: HashMap::new(),
+ };
+
+ assert!(plugin.validate(&config).await.is_ok());
+ assert!(plugin.initialize(config).await.is_ok());
+ }
+}
diff --git a/crates/ragfs/src/plugins/localfs/mod.rs b/crates/ragfs/src/plugins/localfs/mod.rs
new file mode 100644
index 000000000..7ac32c667
--- /dev/null
+++ b/crates/ragfs/src/plugins/localfs/mod.rs
@@ -0,0 +1,464 @@
+//! LocalFS plugin - Local file system mount
+//!
+//! This plugin mounts a local directory into RAGFS virtual file system,
+//! providing direct access to local files and directories.
+
+use async_trait::async_trait;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use crate::core::errors::{Error, Result};
+use crate::core::filesystem::FileSystem;
+use crate::core::plugin::ServicePlugin;
+use crate::core::types::{ConfigParameter, FileInfo, PluginConfig, WriteFlag};
+
+/// LocalFS - Local file system implementation
+pub struct LocalFileSystem {
+ /// Base path of the mounted directory
+ base_path: PathBuf,
+}
+
+impl LocalFileSystem {
+ /// Create a new LocalFileSystem
+ ///
+ /// # Arguments
+ /// * `base_path` - The local directory path to mount
+ ///
+ /// # Errors
+ /// Returns an error if the base path doesn't exist or is not a directory
+ pub fn new(base_path: &str) -> Result {
+ let path = PathBuf::from(base_path);
+
+ // Check if path exists
+ if !path.exists() {
+ return Err(Error::plugin(format!(
+ "base path does not exist: {}",
+ base_path
+ )));
+ }
+
+ // Check if it's a directory
+ if !path.is_dir() {
+ return Err(Error::plugin(format!(
+ "base path is not a directory: {}",
+ base_path
+ )));
+ }
+
+ Ok(Self { base_path: path })
+ }
+
+ /// Resolve a virtual path to actual local path
+ fn resolve_path(&self, path: &str) -> PathBuf {
+ // Remove leading slash to make it relative
+ let relative = path.strip_prefix('/').unwrap_or(path);
+
+ // Join with base path
+ if relative.is_empty() {
+ self.base_path.clone()
+ } else {
+ self.base_path.join(relative)
+ }
+ }
+}
+
+#[async_trait]
+impl FileSystem for LocalFileSystem {
+ async fn create(&self, path: &str) -> Result<()> {
+ let local_path = self.resolve_path(path);
+
+ // Check if file already exists
+ if local_path.exists() {
+ return Err(Error::AlreadyExists(path.to_string()));
+ }
+
+ // Check if parent directory exists
+ if let Some(parent) = local_path.parent() {
+ if !parent.exists() {
+ return Err(Error::NotFound(parent.to_string_lossy().to_string()));
+ }
+ }
+
+ // Create empty file
+ fs::File::create(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to create file: {}", e)))?;
+
+ Ok(())
+ }
+
+ async fn mkdir(&self, path: &str, _mode: u32) -> Result<()> {
+ let local_path = self.resolve_path(path);
+
+ // Check if directory already exists
+ if local_path.exists() {
+ return Err(Error::AlreadyExists(path.to_string()));
+ }
+
+ // Check if parent directory exists
+ if let Some(parent) = local_path.parent() {
+ if !parent.exists() {
+ return Err(Error::NotFound(parent.to_string_lossy().to_string()));
+ }
+ }
+
+ // Create directory
+ fs::create_dir(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to create directory: {}", e)))?;
+
+ Ok(())
+ }
+
+ async fn remove(&self, path: &str) -> Result<()> {
+ let local_path = self.resolve_path(path);
+
+ // Check if exists
+ if !local_path.exists() {
+ return Err(Error::NotFound(path.to_string()));
+ }
+
+ // If directory, check if empty
+ if local_path.is_dir() {
+ let entries = fs::read_dir(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to read directory: {}", e)))?;
+
+ if entries.count() > 0 {
+ return Err(Error::plugin(format!("directory not empty: {}", path)));
+ }
+ }
+
+ // Remove file or empty directory
+ fs::remove_file(&local_path)
+ .or_else(|_| fs::remove_dir(&local_path))
+ .map_err(|e| Error::plugin(format!("failed to remove: {}", e)))?;
+
+ Ok(())
+ }
+
+ async fn remove_all(&self, path: &str) -> Result<()> {
+ let local_path = self.resolve_path(path);
+
+ // Check if exists
+ if !local_path.exists() {
+ return Err(Error::NotFound(path.to_string()));
+ }
+
+ // Remove recursively
+ fs::remove_dir_all(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to remove: {}", e)))?;
+
+ Ok(())
+ }
+
+ async fn read(&self, path: &str, offset: u64, size: u64) -> Result> {
+ let local_path = self.resolve_path(path);
+
+ // Check if exists and is not a directory
+ let metadata = fs::metadata(&local_path)
+ .map_err(|_| Error::NotFound(path.to_string()))?;
+
+ if metadata.is_dir() {
+ return Err(Error::plugin(format!("is a directory: {}", path)));
+ }
+
+ // Read file
+ let data = fs::read(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to read file: {}", e)))?;
+
+ // Apply offset and size
+ let file_size = data.len() as u64;
+ let start = offset.min(file_size) as usize;
+ let end = if size == 0 {
+ data.len()
+ } else {
+ (offset + size).min(file_size) as usize
+ };
+
+ if start >= data.len() {
+ Ok(vec![])
+ } else {
+ Ok(data[start..end].to_vec())
+ }
+ }
+
+ async fn write(&self, path: &str, data: &[u8], offset: u64, _flags: WriteFlag) -> Result {
+ let local_path = self.resolve_path(path);
+
+ // Check if it's a directory
+ if local_path.exists() && local_path.is_dir() {
+ return Err(Error::plugin(format!("is a directory: {}", path)));
+ }
+
+ // Check if parent directory exists
+ if let Some(parent) = local_path.parent() {
+ if !parent.exists() {
+ return Err(Error::NotFound(parent.to_string_lossy().to_string()));
+ }
+ }
+
+ // Open or create file
+ let mut file = if local_path.exists() {
+ fs::OpenOptions::new()
+ .write(true)
+ .open(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to open file: {}", e)))?
+ } else {
+ fs::OpenOptions::new()
+ .write(true)
+ .create(true)
+ .open(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to create file: {}", e)))?
+ };
+
+ // Write data
+ use std::io::{Seek, SeekFrom, Write};
+
+ if offset > 0 {
+ file.seek(SeekFrom::Start(offset))
+ .map_err(|e| Error::plugin(format!("failed to seek: {}", e)))?;
+ }
+
+ let written = file
+ .write(data)
+ .map_err(|e| Error::plugin(format!("failed to write: {}", e)))?;
+
+ Ok(written as u64)
+ }
+
+ async fn read_dir(&self, path: &str) -> Result> {
+ let local_path = self.resolve_path(path);
+
+ // Check if directory exists
+ if !local_path.exists() {
+ return Err(Error::NotFound(path.to_string()));
+ }
+
+ if !local_path.is_dir() {
+ return Err(Error::plugin(format!("not a directory: {}", path)));
+ }
+
+ // Read directory
+ let entries = fs::read_dir(&local_path)
+ .map_err(|e| Error::plugin(format!("failed to read directory: {}", e)))?;
+
+ let mut files = Vec::new();
+ for entry in entries {
+ let entry = entry.map_err(|e| Error::plugin(format!("failed to read entry: {}", e)))?;
+ let metadata = entry
+ .metadata()
+ .map_err(|e| Error::plugin(format!("failed to get metadata: {}", e)))?;
+
+ let name = entry.file_name().to_string_lossy().to_string();
+ let mode = if metadata.is_dir() { 0o755 } else { 0o644 };
+ let mod_time = metadata
+ .modified()
+ .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
+
+ files.push(FileInfo::new(
+ name,
+ metadata.len(),
+ mode,
+ mod_time,
+ metadata.is_dir(),
+ ));
+ }
+
+ Ok(files)
+ }
+
+ async fn stat(&self, path: &str) -> Result {
+ let local_path = self.resolve_path(path);
+
+ // Get file metadata
+ let metadata = fs::metadata(&local_path)
+ .map_err(|_| Error::NotFound(path.to_string()))?;
+
+ let name = Path::new(path)
+ .file_name()
+ .unwrap_or(path.as_ref())
+ .to_string_lossy()
+ .to_string();
+ let mode = if metadata.is_dir() { 0o755 } else { 0o644 };
+ let mod_time = metadata
+ .modified()
+ .unwrap_or(std::time::SystemTime::UNIX_EPOCH);
+
+ Ok(FileInfo::new(
+ name,
+ metadata.len(),
+ mode,
+ mod_time,
+ metadata.is_dir(),
+ ))
+ }
+
+ async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> {
+ let old_local = self.resolve_path(old_path);
+ let new_local = self.resolve_path(new_path);
+
+ // Check if old path exists
+ if !old_local.exists() {
+ return Err(Error::NotFound(old_path.to_string()));
+ }
+
+ // Check if new path parent directory exists
+ if let Some(parent) = new_local.parent() {
+ if !parent.exists() {
+ return Err(Error::NotFound(parent.to_string_lossy().to_string()));
+ }
+ }
+
+ // Rename/move
+ fs::rename(&old_local, &new_local)
+ .map_err(|e| Error::plugin(format!("failed to rename: {}", e)))?;
+
+ Ok(())
+ }
+
+ async fn chmod(&self, path: &str, _mode: u32) -> Result<()> {
+ let local_path = self.resolve_path(path);
+
+ // Check if exists
+ if !local_path.exists() {
+ return Err(Error::NotFound(path.to_string()));
+ }
+
+ // Note: chmod is not fully implemented on all platforms
+ // For now, just return success
+ Ok(())
+ }
+}
+
+/// LocalFS plugin
+pub struct LocalFSPlugin {
+ config_params: Vec,
+}
+
+impl LocalFSPlugin {
+ /// Create a new LocalFS plugin
+ pub fn new() -> Self {
+ Self {
+ config_params: vec![
+ ConfigParameter {
+ name: "local_dir".to_string(),
+ param_type: "string".to_string(),
+ required: true,
+ default: None,
+ description: "Local directory path to expose (must exist)".to_string(),
+ },
+ ],
+ }
+ }
+}
+
+#[async_trait]
+impl ServicePlugin for LocalFSPlugin {
+ fn name(&self) -> &str {
+ "localfs"
+ }
+
+ fn readme(&self) -> &str {
+ r#"LocalFS Plugin - Local File System Mount
+
+This plugin mounts a local directory into RAGFS virtual file system.
+
+FEATURES:
+ - Mount any local directory into RAGFS
+ - Full POSIX file system operations
+ - Direct access to local files and directories
+ - Preserves file permissions and timestamps
+ - Efficient file operations (no copying)
+
+CONFIGURATION:
+
+ Basic configuration:
+ [plugins.localfs]
+ enabled = true
+ path = "/local"
+
+ [plugins.localfs.config]
+ local_dir = "/path/to/local/directory"
+
+ Multiple local mounts:
+ [plugins.localfs_home]
+ enabled = true
+ path = "/home"
+
+ [plugins.localfs_home.config]
+ local_dir = "/Users/username"
+
+USAGE:
+
+ List directory:
+ agfs ls /local
+
+ Read a file:
+ agfs cat /local/file.txt
+
+ Write to a file:
+ agfs write /local/file.txt "Hello, World!"
+
+ Create a directory:
+ agfs mkdir /local/newdir
+
+ Remove a file:
+ agfs rm /local/file.txt
+
+NOTES:
+ - Changes are directly applied to local file system
+ - File permissions are preserved and can be modified
+ - Be careful with rm -r as it permanently deletes files
+
+VERSION: 1.0.0
+"#
+ }
+
+ async fn validate(&self, config: &PluginConfig) -> Result<()> {
+ // Validate local_dir parameter
+ let local_dir = config
+ .params
+ .get("local_dir")
+ .and_then(|v| match v {
+ crate::core::types::ConfigValue::String(s) => Some(s),
+ _ => None,
+ })
+ .ok_or_else(|| Error::plugin("local_dir is required in configuration".to_string()))?;
+
+ // Check if path exists
+ let path = Path::new(local_dir);
+ if !path.exists() {
+ return Err(Error::plugin(format!(
+ "base path does not exist: {}",
+ local_dir
+ )));
+ }
+
+ // Verify it's a directory
+ if !path.is_dir() {
+ return Err(Error::plugin(format!(
+ "base path is not a directory: {}",
+ local_dir
+ )));
+ }
+
+ Ok(())
+ }
+
+ async fn initialize(&self, config: PluginConfig) -> Result> {
+ // Parse configuration
+ let local_dir = config
+ .params
+ .get("local_dir")
+ .and_then(|v| match v {
+ crate::core::types::ConfigValue::String(s) => Some(s),
+ _ => None,
+ })
+ .ok_or_else(|| Error::plugin("local_dir is required".to_string()))?;
+
+ let fs = LocalFileSystem::new(local_dir)?;
+ Ok(Box::new(fs))
+ }
+
+ fn config_params(&self) -> &[ConfigParameter] {
+ &self.config_params
+ }
+}
diff --git a/crates/ragfs/src/plugins/memfs/mod.rs b/crates/ragfs/src/plugins/memfs/mod.rs
new file mode 100644
index 000000000..3d9757a73
--- /dev/null
+++ b/crates/ragfs/src/plugins/memfs/mod.rs
@@ -0,0 +1,655 @@
+//! MemFS - In-memory File System
+//!
+//! A simple file system that stores all data in memory. All data is lost
+//! when the server restarts. This is useful for temporary storage and testing.
+
+use async_trait::async_trait;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::SystemTime;
+use tokio::sync::RwLock;
+
+use crate::core::{
+ ConfigParameter, Error, FileInfo, FileSystem, PluginConfig, Result, ServicePlugin, WriteFlag,
+};
+
+/// File entry in memory
+#[derive(Clone)]
+struct FileEntry {
+ /// File data
+ data: Vec,
+ /// File mode/permissions
+ mode: u32,
+ /// Last modification time
+ mod_time: SystemTime,
+ /// Whether this is a directory
+ is_dir: bool,
+}
+
+impl FileEntry {
+ /// Create a new file entry
+ fn new_file(mode: u32) -> Self {
+ Self {
+ data: Vec::new(),
+ mode,
+ mod_time: SystemTime::now(),
+ is_dir: false,
+ }
+ }
+
+ /// Create a new directory entry
+ fn new_dir(mode: u32) -> Self {
+ Self {
+ data: Vec::new(),
+ mode,
+ mod_time: SystemTime::now(),
+ is_dir: true,
+ }
+ }
+
+ /// Update modification time
+ fn touch(&mut self) {
+ self.mod_time = SystemTime::now();
+ }
+}
+
+/// In-memory file system implementation
+pub struct MemFileSystem {
+ /// Storage for files and directories
+ entries: Arc>>,
+}
+
+impl MemFileSystem {
+ /// Create a new MemFileSystem
+ pub fn new() -> Self {
+ let mut entries = HashMap::new();
+
+ // Create root directory
+ entries.insert(
+ "/".to_string(),
+ FileEntry::new_dir(0o755),
+ );
+
+ Self {
+ entries: Arc::new(RwLock::new(entries)),
+ }
+ }
+
+ /// Normalize path (ensure it starts with /)
+ fn normalize_path(path: &str) -> String {
+ if path.is_empty() || path == "/" {
+ return "/".to_string();
+ }
+
+ let mut normalized = path.to_string();
+ if !normalized.starts_with('/') {
+ normalized.insert(0, '/');
+ }
+
+ // Remove trailing slash (except for root)
+ if normalized.len() > 1 && normalized.ends_with('/') {
+ normalized.pop();
+ }
+
+ normalized
+ }
+
+ /// Get parent directory path
+ fn parent_path(path: &str) -> Option {
+ if path == "/" {
+ return None;
+ }
+
+ let normalized = Self::normalize_path(path);
+ let parts: Vec<&str> = normalized.split('/').collect();
+
+ if parts.len() <= 2 {
+ return Some("/".to_string());
+ }
+
+ Some(parts[..parts.len() - 1].join("/"))
+ }
+
+ /// Get file name from path
+ fn file_name(path: &str) -> String {
+ if path == "/" {
+ return "/".to_string();
+ }
+
+ let normalized = Self::normalize_path(path);
+ normalized
+ .split('/')
+ .last()
+ .unwrap_or("")
+ .to_string()
+ }
+
+ /// List entries in a directory
+ fn list_entries(&self, entries: &HashMap, dir_path: &str) -> Vec {
+ let normalized_dir = Self::normalize_path(dir_path);
+ let prefix = if normalized_dir == "/" {
+ "/".to_string()
+ } else {
+ format!("{}/", normalized_dir)
+ };
+
+ entries
+ .keys()
+ .filter(|path| {
+ if *path == &normalized_dir {
+ return false;
+ }
+
+ if !path.starts_with(&prefix) {
+ return false;
+ }
+
+ // Only direct children (no nested paths)
+ let relative = &path[prefix.len()..];
+ !relative.contains('/')
+ })
+ .cloned()
+ .collect()
+ }
+}
+
+impl Default for MemFileSystem {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+#[async_trait]
+impl FileSystem for MemFileSystem {
+ async fn create(&self, path: &str) -> Result<()> {
+ let normalized = Self::normalize_path(path);
+ let mut entries = self.entries.write().await;
+
+ // Check if already exists
+ if entries.contains_key(&normalized) {
+ return Err(Error::already_exists(&normalized));
+ }
+
+ // Check parent directory exists
+ if let Some(parent) = Self::parent_path(&normalized) {
+ match entries.get(&parent) {
+ Some(entry) if entry.is_dir => {}
+ Some(_) => return Err(Error::NotADirectory(parent)),
+ None => return Err(Error::not_found(&parent)),
+ }
+ }
+
+ // Create file
+ entries.insert(normalized, FileEntry::new_file(0o644));
+ Ok(())
+ }
+
+ async fn mkdir(&self, path: &str, mode: u32) -> Result<()> {
+ let normalized = Self::normalize_path(path);
+ let mut entries = self.entries.write().await;
+
+ // Check if already exists
+ if entries.contains_key(&normalized) {
+ return Err(Error::already_exists(&normalized));
+ }
+
+ // Check parent directory exists
+ if let Some(parent) = Self::parent_path(&normalized) {
+ match entries.get(&parent) {
+ Some(entry) if entry.is_dir => {}
+ Some(_) => return Err(Error::NotADirectory(parent)),
+ None => return Err(Error::not_found(&parent)),
+ }
+ }
+
+ // Create directory
+ entries.insert(normalized, FileEntry::new_dir(mode));
+ Ok(())
+ }
+
+ async fn remove(&self, path: &str) -> Result<()> {
+ let normalized = Self::normalize_path(path);
+ let mut entries = self.entries.write().await;
+
+ // Check if exists
+ match entries.get(&normalized) {
+ Some(entry) if entry.is_dir => {
+ return Err(Error::IsADirectory(normalized));
+ }
+ Some(_) => {}
+ None => return Err(Error::not_found(&normalized)),
+ }
+
+ // Remove file
+ entries.remove(&normalized);
+ Ok(())
+ }
+
+ async fn remove_all(&self, path: &str) -> Result<()> {
+ let normalized = Self::normalize_path(path);
+ let mut entries = self.entries.write().await;
+
+ // Check if exists
+ if !entries.contains_key(&normalized) {
+ return Err(Error::not_found(&normalized));
+ }
+
+ // Remove entry and all children
+ let to_remove: Vec = entries
+ .keys()
+ .filter(|p| *p == &normalized || p.starts_with(&format!("{}/", normalized)))
+ .cloned()
+ .collect();
+
+ for path in to_remove {
+ entries.remove(&path);
+ }
+
+ Ok(())
+ }
+
+ async fn read(&self, path: &str, offset: u64, size: u64) -> Result> {
+ let normalized = Self::normalize_path(path);
+ let entries = self.entries.read().await;
+
+ match entries.get(&normalized) {
+ Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)),
+ Some(entry) => {
+ let offset = offset as usize;
+ let data_len = entry.data.len();
+
+ if offset >= data_len {
+ return Ok(Vec::new());
+ }
+
+ let end = if size == 0 {
+ data_len
+ } else {
+ std::cmp::min(offset + size as usize, data_len)
+ };
+
+ Ok(entry.data[offset..end].to_vec())
+ }
+ None => Err(Error::not_found(&normalized)),
+ }
+ }
+
+ async fn write(&self, path: &str, data: &[u8], offset: u64, flags: WriteFlag) -> Result {
+ let normalized = Self::normalize_path(path);
+ let mut entries = self.entries.write().await;
+
+ match entries.get_mut(&normalized) {
+ Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)),
+ Some(entry) => {
+ entry.touch();
+
+ match flags {
+ WriteFlag::Create | WriteFlag::Truncate => {
+ entry.data = data.to_vec();
+ }
+ WriteFlag::Append => {
+ entry.data.extend_from_slice(data);
+ }
+ WriteFlag::None => {
+ let offset = offset as usize;
+ let end = offset + data.len();
+
+ // Extend if necessary
+ if end > entry.data.len() {
+ entry.data.resize(end, 0);
+ }
+
+ entry.data[offset..end].copy_from_slice(data);
+ }
+ }
+
+ Ok(data.len() as u64)
+ }
+ None => {
+ // Create file if Create flag is set
+ if matches!(flags, WriteFlag::Create) {
+ // Check parent exists
+ if let Some(parent) = Self::parent_path(&normalized) {
+ match entries.get(&parent) {
+ Some(entry) if entry.is_dir => {}
+ Some(_) => return Err(Error::NotADirectory(parent)),
+ None => return Err(Error::not_found(&parent)),
+ }
+ }
+
+ let mut entry = FileEntry::new_file(0o644);
+ entry.data = data.to_vec();
+ entries.insert(normalized, entry);
+ Ok(data.len() as u64)
+ } else {
+ Err(Error::not_found(&normalized))
+ }
+ }
+ }
+ }
+
+ async fn read_dir(&self, path: &str) -> Result> {
+ let normalized = Self::normalize_path(path);
+ let entries = self.entries.read().await;
+
+ // Check if directory exists
+ match entries.get(&normalized) {
+ Some(entry) if !entry.is_dir => return Err(Error::NotADirectory(normalized)),
+ Some(_) => {}
+ None => return Err(Error::not_found(&normalized)),
+ }
+
+ // List entries
+ let children = self.list_entries(&entries, &normalized);
+ let mut result = Vec::new();
+
+ for child_path in children {
+ if let Some(entry) = entries.get(&child_path) {
+ let name = Self::file_name(&child_path);
+ result.push(FileInfo {
+ name,
+ size: entry.data.len() as u64,
+ mode: entry.mode,
+ mod_time: entry.mod_time,
+ is_dir: entry.is_dir,
+ });
+ }
+ }
+
+ Ok(result)
+ }
+
+ async fn stat(&self, path: &str) -> Result {
+ let normalized = Self::normalize_path(path);
+ let entries = self.entries.read().await;
+
+ match entries.get(&normalized) {
+ Some(entry) => Ok(FileInfo {
+ name: Self::file_name(&normalized),
+ size: entry.data.len() as u64,
+ mode: entry.mode,
+ mod_time: entry.mod_time,
+ is_dir: entry.is_dir,
+ }),
+ None => Err(Error::not_found(&normalized)),
+ }
+ }
+
+ async fn rename(&self, old_path: &str, new_path: &str) -> Result<()> {
+ let old_normalized = Self::normalize_path(old_path);
+ let new_normalized = Self::normalize_path(new_path);
+ let mut entries = self.entries.write().await;
+
+ // Check old path exists
+ let entry = entries
+ .get(&old_normalized)
+ .ok_or_else(|| Error::not_found(&old_normalized))?
+ .clone();
+
+ // Check new path doesn't exist
+ if entries.contains_key(&new_normalized) {
+ return Err(Error::already_exists(&new_normalized));
+ }
+
+ // Check new parent exists
+ if let Some(parent) = Self::parent_path(&new_normalized) {
+ match entries.get(&parent) {
+ Some(e) if e.is_dir => {}
+ Some(_) => return Err(Error::NotADirectory(parent)),
+ None => return Err(Error::not_found(&parent)),
+ }
+ }
+
+ // Collect all child entries if renaming a directory
+ let old_prefix = if old_normalized == "/" {
+ "/".to_string()
+ } else {
+ format!("{}/", old_normalized)
+ };
+ let new_prefix = if new_normalized == "/" {
+ "/".to_string()
+ } else {
+ format!("{}/", new_normalized)
+ };
+
+ let mut to_move = Vec::new();
+ for (path, _) in entries.iter() {
+ if path == &old_normalized {
+ continue;
+ }
+ if path.starts_with(&old_prefix) {
+ // Check for conflicts with new path
+ let new_child_path = format!("{}{}", new_prefix, &path[old_prefix.len()..]);
+ if entries.contains_key(&new_child_path) {
+ return Err(Error::already_exists(&new_child_path));
+ }
+ to_move.push(path.clone());
+ }
+ }
+
+ // Move the main entry
+ entries.remove(&old_normalized);
+ entries.insert(new_normalized, entry);
+
+ // Move all child entries
+ for old_child_path in to_move {
+ let new_child_path = format!("{}{}", new_prefix, &old_child_path[old_prefix.len()..]);
+ if let Some(child_entry) = entries.remove(&old_child_path) {
+ entries.insert(new_child_path, child_entry);
+ }
+ }
+
+ Ok(())
+ }
+
+ async fn chmod(&self, path: &str, mode: u32) -> Result<()> {
+ let normalized = Self::normalize_path(path);
+ let mut entries = self.entries.write().await;
+
+ match entries.get_mut(&normalized) {
+ Some(entry) => {
+ entry.mode = mode;
+ entry.touch();
+ Ok(())
+ }
+ None => Err(Error::not_found(&normalized)),
+ }
+ }
+
+ async fn truncate(&self, path: &str, size: u64) -> Result<()> {
+ let normalized = Self::normalize_path(path);
+ let mut entries = self.entries.write().await;
+
+ match entries.get_mut(&normalized) {
+ Some(entry) if entry.is_dir => Err(Error::IsADirectory(normalized)),
+ Some(entry) => {
+ entry.data.resize(size as usize, 0);
+ entry.touch();
+ Ok(())
+ }
+ None => Err(Error::not_found(&normalized)),
+ }
+ }
+}
+
+/// MemFS plugin
+pub struct MemFSPlugin;
+
+#[async_trait]
+impl ServicePlugin for MemFSPlugin {
+ fn name(&self) -> &str {
+ "memfs"
+ }
+
+ fn version(&self) -> &str {
+ "0.1.0"
+ }
+
+ fn description(&self) -> &str {
+ "In-memory file system for temporary storage"
+ }
+
+ fn readme(&self) -> &str {
+ r#"# MemFS - In-memory File System
+
+A simple file system that stores all data in memory. All data is lost
+when the server restarts.
+
+## Features
+
+- Fast in-memory storage
+- Full POSIX-like file operations
+- Directory support
+- No persistence (data lost on restart)
+
+## Usage
+
+Mount the filesystem:
+```bash
+curl -X POST http://localhost:8080/api/v1/mount \
+ -H "Content-Type: application/json" \
+ -d '{"plugin": "memfs", "path": "/memfs"}'
+```
+
+Create and write to a file:
+```bash
+echo "hello world" | curl -X PUT \
+ "http://localhost:8080/api/v1/files?path=/memfs/test.txt" \
+ --data-binary @-
+```
+
+Read the file:
+```bash
+curl "http://localhost:8080/api/v1/files?path=/memfs/test.txt"
+```
+
+## Configuration
+
+MemFS has no configuration parameters.
+"#
+ }
+
+ async fn validate(&self, _config: &PluginConfig) -> Result<()> {
+ // MemFS has no required configuration
+ Ok(())
+ }
+
+ async fn initialize(&self, _config: PluginConfig) -> Result> {
+ Ok(Box::new(MemFileSystem::new()))
+ }
+
+ fn config_params(&self) -> &[ConfigParameter] {
+ // No configuration parameters
+ &[]
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[tokio::test]
+ async fn test_create_and_read_file() {
+ let fs = MemFileSystem::new();
+
+ // Create file
+ fs.create("/test.txt").await.unwrap();
+
+ // Write data
+ let data = b"hello world";
+ fs.write("/test.txt", data, 0, WriteFlag::None)
+ .await
+ .unwrap();
+
+ // Read data
+ let read_data = fs.read("/test.txt", 0, 0).await.unwrap();
+ assert_eq!(read_data, data);
+ }
+
+ #[tokio::test]
+ async fn test_mkdir_and_list() {
+ let fs = MemFileSystem::new();
+
+ // Create directory
+ fs.mkdir("/testdir", 0o755).await.unwrap();
+
+ // Create files in directory
+ fs.create("/testdir/file1.txt").await.unwrap();
+ fs.create("/testdir/file2.txt").await.unwrap();
+
+ // List directory
+ let entries = fs.read_dir("/testdir").await.unwrap();
+ assert_eq!(entries.len(), 2);
+ }
+
+ #[tokio::test]
+ async fn test_remove_file() {
+ let fs = MemFileSystem::new();
+
+ fs.create("/test.txt").await.unwrap();
+ fs.remove("/test.txt").await.unwrap();
+
+ // Should not exist
+ assert!(fs.stat("/test.txt").await.is_err());
+ }
+
+ #[tokio::test]
+ async fn test_rename() {
+ let fs = MemFileSystem::new();
+
+ fs.create("/old.txt").await.unwrap();
+ fs.write("/old.txt", b"data", 0, WriteFlag::None)
+ .await
+ .unwrap();
+
+ fs.rename("/old.txt", "/new.txt").await.unwrap();
+
+ // Old should not exist
+ assert!(fs.stat("/old.txt").await.is_err());
+
+ // New should exist with same data
+ let data = fs.read("/new.txt", 0, 0).await.unwrap();
+ assert_eq!(data, b"data");
+ }
+
+ #[tokio::test]
+ async fn test_write_flags() {
+ let fs = MemFileSystem::new();
+
+ // Create with data
+ fs.write("/test.txt", b"hello", 0, WriteFlag::Create)
+ .await
+ .unwrap();
+
+ // Append
+ fs.write("/test.txt", b" world", 0, WriteFlag::Append)
+ .await
+ .unwrap();
+
+ let data = fs.read("/test.txt", 0, 0).await.unwrap();
+ assert_eq!(data, b"hello world");
+
+ // Truncate
+ fs.write("/test.txt", b"new", 0, WriteFlag::Truncate)
+ .await
+ .unwrap();
+
+ let data = fs.read("/test.txt", 0, 0).await.unwrap();
+ assert_eq!(data, b"new");
+ }
+
+ #[tokio::test]
+ async fn test_plugin() {
+ let plugin = MemFSPlugin;
+ assert_eq!(plugin.name(), "memfs");
+
+ let config = PluginConfig {
+ name: "memfs".to_string(),
+ mount_path: "/memfs".to_string(),
+ params: HashMap::new(),
+ };
+
+ assert!(plugin.validate(&config).await.is_ok());
+ assert!(plugin.initialize(config).await.is_ok());
+ }
+}
diff --git a/crates/ragfs/src/plugins/mod.rs b/crates/ragfs/src/plugins/mod.rs
new file mode 100644
index 000000000..1fcc0c2b1
--- /dev/null
+++ b/crates/ragfs/src/plugins/mod.rs
@@ -0,0 +1,21 @@
+//! Plugins module
+//!
+//! This module contains all built-in filesystem plugins.
+
+pub mod kvfs;
+pub mod localfs;
+pub mod memfs;
+pub mod queuefs;
+#[cfg(feature = "s3")]
+pub mod s3fs;
+pub mod serverinfofs;
+pub mod sqlfs;
+
+pub use kvfs::{KVFSPlugin, KVFileSystem};
+pub use localfs::{LocalFSPlugin, LocalFileSystem};
+pub use memfs::{MemFSPlugin, MemFileSystem};
+pub use queuefs::{QueueFSPlugin, QueueFileSystem};
+#[cfg(feature = "s3")]
+pub use s3fs::{S3FSPlugin, S3FileSystem};
+pub use serverinfofs::{ServerInfoFSPlugin, ServerInfoFileSystem};
+pub use sqlfs::{SQLFSPlugin, SQLFileSystem};
diff --git a/crates/ragfs/src/plugins/queuefs/backend.rs b/crates/ragfs/src/plugins/queuefs/backend.rs
new file mode 100644
index 000000000..8e6a1d57b
--- /dev/null
+++ b/crates/ragfs/src/plugins/queuefs/backend.rs
@@ -0,0 +1,324 @@
+//! Queue Backend Abstraction
+//!
+//! This module provides a pluggable backend system for QueueFS, allowing different
+//! storage implementations (memory, SQLite, etc.) while maintaining a consistent interface.
+
+use crate::core::errors::{Error, Result};
+use serde::{Deserialize, Serialize};
+use std::collections::{HashMap, VecDeque};
+use std::time::SystemTime;
+use uuid::Uuid;
+
+/// A message in the queue
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Message {
+ /// Unique identifier for the message
+ pub id: String,
+ /// Message data
+ pub data: Vec,
+ /// Timestamp when the message was enqueued
+ pub timestamp: SystemTime,
+}
+
+impl Message {
+ /// Create a new message with the given data
+ pub fn new(data: Vec) -> Self {
+ Self {
+ id: Uuid::new_v4().to_string(),
+ data,
+ timestamp: SystemTime::now(),
+ }
+ }
+}
+
+/// Queue backend trait for pluggable storage implementations
+pub trait QueueBackend: Send + Sync {
+ /// Create a new queue with the given name
+ fn create_queue(&mut self, name: &str) -> Result<()>;
+
+ /// Remove a queue and all its messages
+ fn remove_queue(&mut self, name: &str) -> Result<()>;
+
+ /// Check if a queue exists
+ fn queue_exists(&self, name: &str) -> bool;
+
+ /// List all queues with the given prefix
+ /// If prefix is empty, returns all queues
+ fn list_queues(&self, prefix: &str) -> Vec;
+
+ /// Add a message to the queue
+ fn enqueue(&mut self, queue_name: &str, msg: Message) -> Result<()>;
+
+ /// Remove and return the first message from the queue
+ fn dequeue(&mut self, queue_name: &str) -> Result