sys-intelligence · Acture · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,18 @@
+.git
+.gitignore
+
+.venv
+**/.venv
+.uv-cache
+**/.uv-cache
+
+__pycache__
+**/__pycache__
+*.pyc
+
+dist
+build
+*.egg-info
+
+logs
+outputs
diff --git a/.github/workflows/sdk-package.yml b/.github/workflows/sdk-package.yml
@@ -0,0 +1,44 @@
+name: SDK Package
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'sdk/**'
+      - 'pyproject.toml'
+      - '.github/workflows/sdk-package.yml'
+  pull_request:
+    paths:
+      - 'sdk/**'
+      - 'pyproject.toml'
+      - '.github/workflows/sdk-package.yml'
+  workflow_dispatch:
+
+jobs:
+  build-sdk:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Build SDK package
+        run: uv build --package system-intelligence-sdk --wheel --sdist
+
+      - name: Verify package metadata
+        run: uvx twine check dist/system_intelligence_sdk-*
+
+      - name: Upload SDK dist artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: sdk-dist
+          path: dist/*
+          retention-days: 14
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,9 @@ __pycache__/
 *.pyc
 .venv/
 venv/
+build/
+dist/
+*.egg-info/
 
 # IDE
 .vscode/

diff --git a/README.md b/README.md
@@ -38,6 +38,9 @@ System Intelligence Benchmark currently includes the following example benchmark
 - **SDK** (`sdk/`) - Software development kit providing evaluators, LLM interfaces, and utility functions
 - **Documentation** (`doc/`) - Guides and documentation for using and contributing to System Intelligence Benchmark
 
+For the canonical repository boundaries and migration direction, see [doc/project_structure.md](doc/project_structure.md).
+For SDK packaging and release flow, see [doc/sdk_packaging.md](doc/sdk_packaging.md).
+
 ### Prerequisites
 
 - Python 3.9+
@@ -145,4 +148,3 @@ trademarks or logos is subject to and must follow
 [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
 Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
 Any use of third-party trademarks or logos are subject to those third-party's policies.
-
diff --git a/benchmarks/arteval_bench/Dockerfile b/benchmarks/arteval_bench/Dockerfile
@@ -1,34 +1,42 @@
-FROM ubuntu:24.04
+FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim AS builder
 
-ARG DEBIAN_FRONTEND=noninteractive
-
-USER root
+WORKDIR /workspace
+COPY . /workspace
+RUN mkdir -p /workspace/dist \
+ && (uv build --package system-intelligence-sdk --wheel -o /workspace/dist || true) \
+ && uv build --all-packages --wheel -o /workspace/dist
 
-WORKDIR /
-COPY . .
+FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
 
-RUN rm -rf /var/lib/apt/lists/* \
- && apt-get update -o Acquire::Retries=5 \
- && apt-get install -y --no-install-recommends \
-    build-essential \
-    git \
-    wget \
-    python3-pip \
-    python3-venv \
-    pipx \
+ARG DEBIAN_FRONTEND=noninteractive
+USER root
+RUN apt-get update && apt-get install -y --no-install-recommends git \
  && rm -rf /var/lib/apt/lists/*
 
-# SWE-ReX will always attempt to install its server into your docker container
-# however, this takes a couple of seconds. If we already provide it in the image,
-# this is much faster.
-RUN pipx install swe-rex 
-RUN pipx ensurepath
-
-ENV PATH="/root/.local/bin:${PATH}"
-ENV PATH="/usr/local/go/bin:${PATH}"
-
-SHELL ["/bin/bash", "-c"]
-
-RUN chmod +x install.sh test.sh && ./install.sh
-
-CMD ["bash"]
+# Build with repository root as context:
+# docker build -f benchmarks/arteval_bench/Dockerfile .
+WORKDIR /workspace
+COPY . /workspace
+COPY --from=builder /workspace/dist/*.whl /tmp/dist/
+
+WORKDIR /workspace/benchmarks/arteval_bench
+RUN set -eux; \
+ SDK_WHEEL="$(ls /tmp/dist/system_intelligence_sdk-*.whl | head -n1 || true)"; \
+ BENCH_WHEEL="$(ls /tmp/dist/arteval_bench-*.whl | head -n1 || true)"; \
+ if [ -z "$SDK_WHEEL" ]; then \
+   echo "Missing SDK wheel in /tmp/dist. Build with repo root context:"; \
+   echo "docker build -t arteval_bench -f benchmarks/arteval_bench/Dockerfile ."; \
+   ls -1 /tmp/dist || true; \
+   exit 1; \
+ fi; \
+ if [ -z "$BENCH_WHEEL" ]; then \
+   echo "Missing arteval_bench wheel in /tmp/dist."; \
+   ls -1 /tmp/dist || true; \
+   exit 1; \
+ fi; \
+ rm -rf .venv; \
+ uv venv .venv; \
+ uv pip install --python .venv/bin/python "$SDK_WHEEL" "$BENCH_WHEEL"; \
+ .venv/bin/python -c "import importlib.metadata as m; print(m.version('sweagent'))"
+
+CMD ["bash"]
diff --git a/benchmarks/arteval_bench/install.sh b/benchmarks/arteval_bench/install.sh
@@ -2,28 +2,24 @@
 
 set -e  # Exit immediately on error.
 
+if ! command -v uv >/dev/null 2>&1; then
+    echo "==> uv not found. Installing uv..."
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
-    echo "==> uv not found. Installing uv..."
-    curl -LsSf https://astral.sh/uv/install.sh | sh
-    export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
+    echo "==> Error: 'uv' command not found."
+    echo "Please install 'uv' manually before running this script."
+    echo "For installation instructions, see: https://docs.astral.sh/uv/getting-started/installation/"
+    exit 1
-    echo "==> uv not found. Installing uv..."
-    curl -LsSf https://astral.sh/uv/install.sh | sh
-    export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH"
+    echo "==> Error: 'uv' command not found."
+    echo "Please install 'uv' manually before running this script."
+    echo "For installation instructions, see: https://docs.astral.sh/uv/getting-started/installation/"
+    exit 1
+fi
+
+REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
+export UV_CACHE_DIR="${UV_CACHE_DIR:-${REPO_ROOT}/.uv-cache}"
+
 # if .venv does not exist, create it
 if [ -d ".venv" ]; then
     echo "==> .venv already exists, skipping creation."
 else
     echo "==> Creating .venv directory..."
-
-    python3 -m venv .venv
-    source .venv/bin/activate
-
-    if [ ! -d "SWE-agent" ]; then
-        echo "==> Install SWE-agent and its dependencies..."
-        git clone https://github.com/SWE-agent/SWE-agent.git
-        cd SWE-agent
-        git checkout 0c27f286303a939aa868ad2003bc4b6776771791
-        pip install --editable .
-        sweagent --help
-        cd ..
-    else
-        echo "==> SWE-agent repository already exists, skipping clone."
-    fi
-
-    deactivate
+    uv venv .venv
 fi
 
+uv sync --extra dev
+uv run --no-sync sweagent --help >/dev/null
+
 echo "==> ArtEvalBench environment is set up successfully."
diff --git a/benchmarks/arteval_bench/pyproject.toml b/benchmarks/arteval_bench/pyproject.toml
@@ -0,0 +1,28 @@
+[project]
+name = "arteval-bench"
+version = "0.1.0"
+description = "ArtEval benchmark package"
+requires-python = ">=3.9"
+dependencies = [
+  "system-intelligence-sdk>=0.1.0",
+  "requests",
+  "azure-identity",
+  "sweagent @ git+https://github.com/SWE-agent/SWE-agent.git@v1.1.0",
+]
+
+[project.optional-dependencies]
+dev = [
+  "pytest>=8.0.0",
+  "ruff>=0.6.0",
+]
+
+[build-system]
+requires = ["uv_build>=0.10.4,<0.11.0"]
+build-backend = "uv_build"
+
+[tool.uv.build-backend]
+module-name = "src"
+module-root = ""
+
+[tool.uv.sources]
+system-intelligence-sdk = { workspace = true }
diff --git a/benchmarks/arteval_bench/run.sh b/benchmarks/arteval_bench/run.sh
@@ -19,20 +19,22 @@ NEW_MODEL_NAME="${MODEL_NAME//\//_}"
 # export OPENAI_BASE_URL="http://localhost:2327/v1"
 # export OPENAI_API_KEY="EMPTY"
 
-source .venv/bin/activate
+if [ ! -x ".venv/bin/python" ]; then
+    echo "==> .venv is missing. Run ./install.sh first."
+    exit 1
+fi
+
 echo "==> Start to run ArtEvalBench"
 # Note that if you benchmark has multiple tasks, you need to add --task <task> 
 # in your code to enable task selection.
 # sweagent --help
-# python src/main.py \
+# python src/core/main.py \
 #     --task "test"
     # --save_path "./outputs/systemcourseproject__${NEW_MODEL_NAME}__$(date +"%Y-%m-%d_%H-%M-%S")" \
 
-python src/main_setup.py
-    # --model "$MODEL_NAME" \
+uv run --no-sync python src/core/main.py \
+    --model_name "${MODEL_NAME}"
     # --save_path "./outputs/systemcourseproject__${NEW_MODEL_NAME}__$(date +"%Y-%m-%d_%H-%M-%S")" \
 
-# python src/main_setup.py \
+# uv run --no-sync python src/core/main.py \
 #     --input_json "./data/benchmark/course_lab_task_examples.jsonl" 
-
-deactivate
diff --git a/benchmarks/arteval_bench/src/__init__.py b/benchmarks/arteval_bench/src/__init__.py
@@ -0,0 +1 @@
+"""ArtEval benchmark package."""
diff --git a/benchmarks/arteval_bench/src/core/main.py b/benchmarks/arteval_bench/src/core/main.py
@@ -3,11 +3,8 @@
 import argparse
 import json
 import os
-import sys
 from datetime import datetime
 
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
-
 from sdk.logger import logger
 from sdk.utils import set_llm_endpoint_from_config
 

diff --git a/benchmarks/arteval_bench/src/core/main_patch.py b/benchmarks/arteval_bench/src/core/main_patch.py
@@ -3,11 +3,8 @@
 import argparse
 import json
 import os
-import sys
 from datetime import datetime
 
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
-
 from sdk.logger import logger
 from sdk.utils import set_llm_endpoint_from_config
 

diff --git a/benchmarks/arteval_bench/src/core/run_eval_in_env.py b/benchmarks/arteval_bench/src/core/run_eval_in_env.py
@@ -2,9 +2,6 @@
 
 import asyncio
 import os
-import sys
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
 
 from swerex.deployment.docker import DockerDeploymentConfig
 from swerex.runtime.abstract import BashAction, Command, CreateBashSessionRequest, UploadRequest

diff --git a/benchmarks/arteval_bench/src/core/run_eval_sweagent.py b/benchmarks/arteval_bench/src/core/run_eval_sweagent.py
@@ -1,8 +1,7 @@
-import sys
+import json
+import os
 import subprocess
 
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
-
 from patch_evaluator import pacth_eval
-from patch_evaluator import pacth_eval
+from .patch_evaluator import pacth_eval
-from patch_evaluator import pacth_eval
+from .patch_evaluator import pacth_eval
 
 from sdk.logger import logger

diff --git a/benchmarks/arteval_bench/src/core/utils.py b/benchmarks/arteval_bench/src/core/utils.py
@@ -1,9 +1,6 @@
 """Helper methods for running tests in a deployment."""
 
 import os
-import sys
-
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
 
 from sdk.logger import logger
 
@@ -17,4 +14,4 @@ def get_task(file_path):
         + f" means you can directly proceed with executing the steps in the README"
         + f" without asking for approval or confirmation. Once you rached the end"
         + f" of the README you must exit the Docker image gracefully.")
-    return task
+    return task
diff --git a/benchmarks/arteval_bench/src/evaluator/__init__.py b/benchmarks/arteval_bench/src/evaluator/__init__.py
@@ -0,0 +1 @@
+"""ArtEval evaluator package."""
diff --git a/benchmarks/cache_algo_bench/Dockerfile b/benchmarks/cache_algo_bench/Dockerfile
@@ -1,14 +1,36 @@
-FROM ubuntu:24.04
-
-WORKDIR /usr/src
-COPY . .
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    git \
-    wget \
-    python3-pip \
-    python3-venv
-
-RUN chmod +x install.sh test.sh && ./install.sh
-
+FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim AS builder
+
+WORKDIR /workspace
+COPY . /workspace
+RUN mkdir -p /workspace/dist \
+ && (uv build --package system-intelligence-sdk --wheel -o /workspace/dist || true) \
+ && uv build --all-packages --wheel -o /workspace/dist
+
+FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
+
+# Build with repository root as context:
+# docker build -f benchmarks/cache_algo_bench/Dockerfile .
+WORKDIR /workspace
+COPY . /workspace
+COPY --from=builder /workspace/dist/*.whl /tmp/dist/
+
+WORKDIR /workspace/benchmarks/cache_algo_bench
+RUN set -eux; \
+ SDK_WHEEL="$(ls /tmp/dist/system_intelligence_sdk-*.whl | head -n1 || true)"; \
+ BENCH_WHEEL="$(ls /tmp/dist/cache_algo_bench-*.whl | head -n1 || true)"; \
+ if [ -z "$SDK_WHEEL" ]; then \
+   echo "Missing SDK wheel in /tmp/dist. Build with repo root context:"; \
+   echo "docker build -t cache_algo_bench -f benchmarks/cache_algo_bench/Dockerfile ."; \
+   ls -1 /tmp/dist || true; \
+   exit 1; \
+ fi; \
+ if [ -z "$BENCH_WHEEL" ]; then \
+   echo "Missing cache_algo_bench wheel in /tmp/dist."; \
+   ls -1 /tmp/dist || true; \
+   exit 1; \
+ fi; \
+ rm -rf .venv; \
+ uv venv .venv; \
+ uv pip install --python .venv/bin/python "$SDK_WHEEL" "$BENCH_WHEEL"
+
 # ENTRYPOINT ["./test.sh"]
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,9 @@ __pycache__/ @@
     *.pyc
     .venv/
     venv/
+    build/
+    dist/
+    *.egg-info/
     # IDE
     .vscode/
@@ Expand Down @@