diff --git a/examples/localcowork/.env.example b/examples/localcowork/.env.example
index 16e64bd..ed5eea9 100644
--- a/examples/localcowork/.env.example
+++ b/examples/localcowork/.env.example
@@ -6,6 +6,9 @@
 # ─── Model Configuration ─────────────────────────────────────────────────────
 
 # Directory containing GGUF model files (downloaded from HuggingFace)
+# IMPORTANT: Store models OUTSIDE the project repo (e.g., ~/Projects/_models).
+# The huggingface_hub library creates .cache/huggingface for tracking downloads,
+# which should not be in the project directory.
 # LOCALCOWORK_MODELS_DIR=~/Projects/_models
 
 # Text model API endpoint (OpenAI-compatible). Set by start-model.sh.
diff --git a/examples/localcowork/.gitignore b/examples/localcowork/.gitignore
index 4f4a5d5..281c756 100644
--- a/examples/localcowork/.gitignore
+++ b/examples/localcowork/.gitignore
@@ -17,6 +17,7 @@ src-tauri/gen/
 _models/*.gguf
 _models/*.bin
 _models/*.safetensors
+_models/.cache/
 
 # ─── IDE ──────────────────────────────────────────────────────────────────
 .vscode/
diff --git a/examples/localcowork/_models/config.yaml b/examples/localcowork/_models/config.yaml
index d61d1fb..f827752 100644
--- a/examples/localcowork/_models/config.yaml
+++ b/examples/localcowork/_models/config.yaml
@@ -7,10 +7,20 @@
 #   Set LOCALCOWORK_MODELS_DIR env var to override.
 #   Ollama-managed models use Ollama's own storage (~/.ollama/models/).
 #
+#   IMPORTANT: When downloading models via huggingface_hub, use your home
+#   directory (default) rather than this directory. The huggingface_hub
+#   library creates a .cache/huggingface folder for tracking downloads,
+#   which can grow large and should not be in the project repo.
+#
+#   Example to download to your models directory:
+#     from huggingface_hub import hf_hub_download
+#     hf_hub_download('LiquidAI/LFM2-24B-A2B', 'LFM2-24B-A2B-Q4_K_M.gguf',
+#                     local_dir='${HOME}/Projects/_models')
+#
 #   Model paths below use ${LOCALCOWORK_MODELS_DIR} for interpolation.
 #   The config-loader resolves environment variables at load time.
 
-active_model: lfm2-24b-a2b  # Sparse MoE: 24B total, 2.3B active, 64 experts top-4 — 80% tool accuracy
+active_model: lfm2-24b-a2b # Sparse MoE: 24B total, 2.3B active, 64 experts top-4 — 80% tool accuracy
 
 # Default model directory for non-Ollama model files (GGUF, MLX, etc.)
 models_dir: "${LOCALCOWORK_MODELS_DIR:-~/Projects/_models}"
@@ -76,11 +86,11 @@ models:
     model_name: "gpt-oss:20b"
     base_url: "http://localhost:11434/v1"
     context_window: 32768
-    tool_call_format: native_json  # Native function calling + structured outputs
+    tool_call_format: native_json # Native function calling + structured outputs
     temperature: 0.7
     max_tokens: 4096
     estimated_vram_gb: 14
-    force_json_response: false  # Enable after live testing — triggers GBNF grammar enforcement
+    force_json_response: false # Enable after live testing — triggers GBNF grammar enforcement
     capabilities:
       - text
       - tool_calling
@@ -92,7 +102,7 @@ models:
     model_path: "${LOCALCOWORK_MODELS_DIR:-~/Projects/_models}/lfm25-24b-q4_k_m.gguf"
     base_url: "http://localhost:8080/v1"
     context_window: 32768
-    tool_call_format: pythonic  # LFM2.5 uses Pythonic calls; normalizer converts to JSON
+    tool_call_format: pythonic # LFM2.5 uses Pythonic calls; normalizer converts to JSON
     temperature: 0.7
     max_tokens: 4096
     estimated_vram_gb: 14
@@ -102,20 +112,20 @@ models:
 
   # LFM2-24B-A2B — Liquid AI's MoE hybrid model (private preview)
   # Architecture: 24B total, 2.3B active per token, 64 experts top-4, 40 layers (1:3 attn:conv ratio)
-  # Download GGUF from: https://huggingface.co/LiquidAI/LFM2-24B-A2B-Preview (gated — request access)
+  # Download GGUF from: https://huggingface.co/LiquidAI/LFM2-24B-A2B (gated — request access)
   # Benchmark plan: docs/model-analysis/lfm2-24b-a2b-benchmark.md
   # Run: llama-server --model <path> --port 8080 --ctx-size 32768 --n-gpu-layers 99 --flash-attn
   lfm2-24b-a2b:
-    display_name: "LFM2-24B-A2B-Preview"
+    display_name: "LFM2-24B-A2B"
     runtime: llama_cpp
-    model_path: "${LOCALCOWORK_MODELS_DIR:-~/Projects/_models}/LFM2-24B-A2B-Preview-Q4_K_M.gguf"
+    model_path: "${LOCALCOWORK_MODELS_DIR:-~/Projects/_models}/LFM2-24B-A2B-Q4_K_M.gguf"
     base_url: "http://localhost:8080/v1"
     context_window: 32768
-    tool_call_format: bracket  # LFM2 bracket format: [server.tool(args)] parsed by tool_call_parser.rs
+    tool_call_format: bracket # LFM2 bracket format: [server.tool(args)] parsed by tool_call_parser.rs
     temperature: 0.7
-    tool_temperature: 0.1  # Lower temperature for tool-calling turns (ADR-008 Layer 3)
+    tool_temperature: 0.1 # Lower temperature for tool-calling turns (ADR-008 Layer 3)
     max_tokens: 4096
-    estimated_vram_gb: 16  # Q4_K_M quantization estimate for 24B MoE
+    estimated_vram_gb: 16 # Q4_K_M quantization estimate for 24B MoE
     capabilities:
       - text
       - tool_calling
@@ -130,7 +140,7 @@ models:
     tool_call_format: native_json
     temperature: 0.7
     max_tokens: 4096
-    estimated_vram_gb: 4  # Only ~3B active params
+    estimated_vram_gb: 4 # Only ~3B active params
     capabilities:
       - text
       - tool_calling
@@ -152,7 +162,7 @@ models:
     tool_call_format: native_json
     temperature: 0.1
     max_tokens: 4096
-    estimated_vram_gb: 1.8  # Q8_0 model (1.25 GB) + mmproj (583 MB)
+    estimated_vram_gb: 1.8 # Q8_0 model (1.25 GB) + mmproj (583 MB)
     capabilities:
       - text
       - vision
@@ -170,10 +180,10 @@ models:
     model_path: "${LOCALCOWORK_MODELS_DIR:-~/Projects/_models}/LFM2.5-1.2B-Router-FT-v2-Q8_0.gguf"
     base_url: "http://localhost:8082/v1"
     context_window: 32768
-    tool_call_format: bracket  # LFM2.5 bracket format: [server.tool(args)]
+    tool_call_format: bracket # LFM2.5 bracket format: [server.tool(args)]
     temperature: 0.1
     max_tokens: 512
-    estimated_vram_gb: 1.5  # Q8_0 quantization (1.2 GB)
+    estimated_vram_gb: 1.5 # Q8_0 quantization (1.2 GB)
     role: tool_router
     fine_tuned:
       method: lora
@@ -198,7 +208,7 @@ models:
     model_path: "${LOCALCOWORK_MODELS_DIR:-~/Projects/_models}/LFM2.5-1.2B-Instruct-F16.gguf"
     base_url: "http://localhost:8084/v1"
     context_window: 32768
-    tool_call_format: bracket  # LFM2.5 bracket format: [server.tool(args)]
+    tool_call_format: bracket # LFM2.5 bracket format: [server.tool(args)]
     temperature: 0.1
     max_tokens: 512
     estimated_vram_gb: 2.3
@@ -238,6 +248,24 @@ models:
       - text
       - tool_calling
 
+  # LM Studio headless server — any model loaded in LM Studio
+  # Run `lms server start` or enable "Run LLM server on login" in app settings.
+  # Default port is 1234. Uses OpenAI-compatible API.
+  # Note: The model_name here is informational - update to match your loaded model.
+  lmstudio-default:
+    display_name: "LM Studio (Default)"
+    runtime: lmstudio
+    model_name: "liquid/lfm2-24b-a2b" # Replace with your loaded model ID
+    base_url: "http://localhost:1234/v1"
+    context_window: 32768
+    tool_call_format: native_json
+    temperature: 0.7
+    max_tokens: 4096
+    estimated_vram_gb: null # Varies by loaded model
+    capabilities:
+      - text
+      - tool_calling
+
   # ─── Benchmark comparison models ────────────────────────────────────────
   # These models are benchmarked against LFM2-24B-A2B to demonstrate
   # scaling efficiency of hybrid MoE conv+attn vs dense and standard MoE.
@@ -290,7 +318,7 @@ models:
     tool_temperature: 0.1
     max_tokens: 4096
     estimated_vram_gb: 20
-    deprecated: true  # Partial run only (40/100), dropped from active benchmarks
+    deprecated: true # Partial run only (40/100), dropped from active benchmarks
     capabilities:
       - text
       - tool_calling
@@ -358,7 +386,9 @@ models:
       - text
       - tool_calling
 
-# Runtime configurations
+# Runtime configurations (informational only — not used by the app)
+# These describe how to start each runtime for reference. The app
+# expects the runtime to already be running when it starts.
 runtimes:
   ollama:
     command: "ollama serve"
@@ -371,6 +401,14 @@ runtimes:
     health_check: "http://localhost:8080/health"
     startup_timeout_seconds: 60
 
+  lmstudio:
+    # Use `lms server start` CLI to start headless, or enable "Run LLM server
+    # on login" in app settings (Cmd/Ctrl+,). Default port is 1234.
+    command: "lms"
+    args: ["server", "start"]
+    health_check: "http://localhost:1234/v1/models"
+    startup_timeout_seconds: 30
+
   mlx:
     command: "mlx_lm.server"
     args: ["--model", "{model_path}", "--port", "8080"]
@@ -380,9 +418,9 @@ runtimes:
 
 # Fallback chain — used when the active model is unavailable
 fallback_chain:
-  - lfm2-24b-a2b     # Primary — 78% single-step, 24% chain completion
-  - qwen3-30b-moe    # Fallback 1 — Ollama-hosted Qwen3 MoE
-  - static_response   # Fallback 2 — hardcoded "model unavailable" message
+  - lfm2-24b-a2b # Primary — 78% single-step, 24% chain completion
+  - qwen3-30b-moe # Fallback 1 — Ollama-hosted Qwen3 MoE
+  - static_response # Fallback 2 — hardcoded "model unavailable" message
 
 # Dual-model orchestrator (ADR-009)
 # When enabled, the planner model decomposes multi-step workflows and
@@ -402,9 +440,9 @@ fallback_chain:
 #   to skip the orchestrator entirely and avoid the ~2-3s wasted planner call.
 #   See ADR-009 for full details.
 orchestrator:
-  enabled: false  # With 20 curated tools, single-model loop is faster. Enable for 67+ tools.
+  enabled: false # With 20 curated tools, single-model loop is faster. Enable for 67+ tools.
   planner_model: lfm2-24b-a2b
-  router_model: lfm25-1.2b-router-ft  # Fine-tuned V2: 93.0% eval accuracy, 83.7% live (83 tools)
+  router_model: lfm25-1.2b-router-ft # Fine-tuned V2: 93.0% eval accuracy, 83.7% live (83 tools)
   router_top_k: 15
   max_plan_steps: 10
   step_retries: 3
@@ -414,4 +452,4 @@ orchestrator:
 # ~15 category meta-tools (~1,500 tokens) instead of all 67 tools (~8,670 tokens).
 # The model selects 2-3 categories, then subsequent turns use only those tools.
 # Saves ~7,170 tokens per turn and eliminates cross-server confusion.
-two_pass_tool_selection: true  # Active only when >30 tools registered; 21 curated tools use flat mode
+two_pass_tool_selection: true # Active only when >30 tools registered; 21 curated tools use flat mode
diff --git a/examples/localcowork/scripts/start-model.sh b/examples/localcowork/scripts/start-model.sh
index f282661..2570af3 100755
--- a/examples/localcowork/scripts/start-model.sh
+++ b/examples/localcowork/scripts/start-model.sh
@@ -14,8 +14,14 @@ set -euo pipefail
 
 MODELS_DIR="${LOCALCOWORK_MODELS_DIR:-$HOME/Projects/_models}"
 
+# Ensure HuggingFace cache is in home directory, NOT in project repo
+# This prevents .cache/huggingface from being created in the project
+export HF_HOME="${HF_HOME:-$HOME/.cache/huggingface}"
+export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
+echo "📁 HuggingFace cache: HF_HOME=$HF_HOME"
+
 # Main model (LFM2-24B-A2B)
-MAIN_MODEL="LFM2-24B-A2B-Preview-Q4_K_M.gguf"
+MAIN_MODEL="LFM2-24B-A2B-Q4_K_M.gguf"
 MAIN_PORT=8080
 MAIN_CTX=32768
 
@@ -30,40 +36,40 @@ START_VISION=false
 CHECK_ONLY=false
 
 for arg in "$@"; do
-    case "$arg" in
-        --vision)  START_VISION=true ;;
-        --check)   CHECK_ONLY=true ;;
-        --help|-h)
-            echo "Usage: $0 [--vision] [--check]"
-            echo ""
-            echo "  --vision    Also start the vision model server (port $VISION_PORT)"
-            echo "  --check     Check if model files exist (don't start servers)"
-            echo ""
-            echo "Environment:"
-            echo "  LOCALCOWORK_MODELS_DIR    Model directory (default: ~/Projects/_models)"
-            exit 0
-            ;;
-        *)
-            echo "Unknown argument: $arg"
-            echo "Run '$0 --help' for usage."
-            exit 1
-            ;;
-    esac
+	case "$arg" in
+	--vision) START_VISION=true ;;
+	--check) CHECK_ONLY=true ;;
+	--help | -h)
+		echo "Usage: $0 [--vision] [--check]"
+		echo ""
+		echo "  --vision    Also start the vision model server (port $VISION_PORT)"
+		echo "  --check     Check if model files exist (don't start servers)"
+		echo ""
+		echo "Environment:"
+		echo "  LOCALCOWORK_MODELS_DIR    Model directory (default: ~/Projects/_models)"
+		exit 0
+		;;
+	*)
+		echo "Unknown argument: $arg"
+		echo "Run '$0 --help' for usage."
+		exit 1
+		;;
+	esac
 done
 
 # ── Check llama-server ───────────────────────────────────────────────────────
 
-if ! command -v llama-server &> /dev/null; then
-    echo "❌ llama-server not found."
-    echo ""
-    echo "Install via Homebrew (macOS):"
-    echo "  brew install llama.cpp"
-    echo ""
-    echo "Or build from source:"
-    echo "  git clone https://github.com/ggml-org/llama.cpp"
-    echo "  cd llama.cpp && cmake -B build && cmake --build build --config Release"
-    echo "  # Binary at: build/bin/llama-server"
-    exit 1
+if ! command -v llama-server &>/dev/null; then
+	echo "❌ llama-server not found."
+	echo ""
+	echo "Install via Homebrew (macOS):"
+	echo "  brew install llama.cpp"
+	echo ""
+	echo "Or build from source:"
+	echo "  git clone https://github.com/ggml-org/llama.cpp"
+	echo "  cd llama.cpp && cmake -B build && cmake --build build --config Release"
+	echo "  # Binary at: build/bin/llama-server"
+	exit 1
 fi
 
 echo "✅ llama-server found: $(command -v llama-server)"
@@ -79,55 +85,55 @@ VISION_PATH="$MODELS_DIR/$VISION_MODEL"
 MMPROJ_PATH="$MODELS_DIR/$VISION_MMPROJ"
 
 if [ -f "$MAIN_PATH" ]; then
-    MAIN_SIZE=$(du -h "$MAIN_PATH" | cut -f1)
-    echo "✅ Main model:   $MAIN_MODEL ($MAIN_SIZE)"
+	MAIN_SIZE=$(du -h "$MAIN_PATH" | cut -f1)
+	echo "✅ Main model:   $MAIN_MODEL ($MAIN_SIZE)"
 else
-    echo "❌ Main model not found: $MAIN_PATH"
-    echo ""
-    echo "   Download LFM2-24B-A2B from HuggingFace (gated — request access first):"
-    echo "   https://huggingface.co/LiquidAI/LFM2-24B-A2B-Preview"
-    echo ""
-    echo "   pip install huggingface-hub"
-    echo "   python3 -c \""
-    echo "     from huggingface_hub import hf_hub_download"
-    echo "     hf_hub_download('LiquidAI/LFM2-24B-A2B-Preview',"
-    echo "                     'LFM2-24B-A2B-Preview-Q4_K_M.gguf',"
-    echo "                     local_dir='$MODELS_DIR')"
-    echo "   \""
-    if [ "$CHECK_ONLY" = true ]; then
-        echo ""
-    else
-        exit 1
-    fi
+	echo "❌ Main model not found: $MAIN_PATH"
+	echo ""
+	echo "   Download LFM2-24B-A2B from HuggingFace (gated — request access first):"
+	echo "   https://huggingface.co/LiquidAI/LFM2-24B-A2B"
+	echo ""
+	echo "   pip install huggingface-hub"
+	echo "   python3 -c \""
+	echo "     from huggingface_hub import hf_hub_download"
+	echo "     hf_hub_download('LiquidAI/LFM2-24B-A2B',"
+	echo "                     'LFM2-24B-A2B-Q4_K_M.gguf',"
+	echo "                     local_dir='$MODELS_DIR')"
+	echo "   \""
+	if [ "$CHECK_ONLY" = true ]; then
+		echo ""
+	else
+		exit 1
+	fi
 fi
 
 if [ -f "$VISION_PATH" ] && [ -f "$MMPROJ_PATH" ]; then
-    echo "✅ Vision model:  $VISION_MODEL + mmproj"
+	echo "✅ Vision model:  $VISION_MODEL + mmproj"
 else
-    echo "⚠️  Vision model not found (optional — OCR falls back to Tesseract)"
-    if [ "$START_VISION" = true ]; then
-        echo ""
-        echo "   Download from: https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-GGUF"
-        echo ""
-        echo "   pip install huggingface-hub"
-        echo "   python3 -c \""
-        echo "     from huggingface_hub import hf_hub_download"
-        echo "     for f in ['$VISION_MODEL', '$VISION_MMPROJ']:"
-        echo "         hf_hub_download('LiquidAI/LFM2.5-VL-1.6B-GGUF', f,"
-        echo "                         local_dir='$MODELS_DIR')"
-        echo "   \""
-    fi
+	echo "⚠️  Vision model not found (optional — OCR falls back to Tesseract)"
+	if [ "$START_VISION" = true ]; then
+		echo ""
+		echo "   Download from: https://huggingface.co/LiquidAI/LFM2.5-VL-1.6B-GGUF"
+		echo ""
+		echo "   pip install huggingface-hub"
+		echo "   python3 -c \""
+		echo "     from huggingface_hub import hf_hub_download"
+		echo "     for f in ['$VISION_MODEL', '$VISION_MMPROJ']:"
+		echo "         hf_hub_download('LiquidAI/LFM2.5-VL-1.6B-GGUF', f,"
+		echo "                         local_dir='$MODELS_DIR')"
+		echo "   \""
+	fi
 fi
 
 if [ "$CHECK_ONLY" = true ]; then
-    exit 0
+	exit 0
 fi
 
 # ── Start main model server ─────────────────────────────────────────────────
 
 if [ ! -f "$MAIN_PATH" ]; then
-    echo "Cannot start server — main model file missing."
-    exit 1
+	echo "Cannot start server — main model file missing."
+	exit 1
 fi
 
 echo ""
@@ -141,11 +147,11 @@ echo ""
 
 # Start main model in background
 llama-server \
-    --model "$MAIN_PATH" \
-    --port "$MAIN_PORT" \
-    --ctx-size "$MAIN_CTX" \
-    --n-gpu-layers 99 \
-    --flash-attn &
+	--model "$MAIN_PATH" \
+	--port "$MAIN_PORT" \
+	--ctx-size "$MAIN_CTX" \
+	--n-gpu-layers 99 \
+	--flash-attn &
 
 MAIN_PID=$!
 echo "  PID: $MAIN_PID"
@@ -153,47 +159,47 @@ echo "  PID: $MAIN_PID"
 # Wait for health check
 echo -n "  Waiting for server..."
 for i in $(seq 1 60); do
-    if curl -sf "http://localhost:$MAIN_PORT/health" > /dev/null 2>&1; then
-        echo " ready!"
-        break
-    fi
-    if [ "$i" -eq 60 ]; then
-        echo " timeout (60s). Check logs above for errors."
-        exit 1
-    fi
-    sleep 1
-    echo -n "."
+	if curl -sf "http://localhost:$MAIN_PORT/health" >/dev/null 2>&1; then
+		echo " ready!"
+		break
+	fi
+	if [ "$i" -eq 60 ]; then
+		echo " timeout (60s). Check logs above for errors."
+		exit 1
+	fi
+	sleep 1
+	echo -n "."
 done
 
 # ── Start vision model server (optional) ─────────────────────────────────────
 
 if [ "$START_VISION" = true ] && [ -f "$VISION_PATH" ] && [ -f "$MMPROJ_PATH" ]; then
-    echo ""
-    echo "═══════════════════════════════════════════════════"
-    echo "  Starting LFM2.5-VL-1.6B on port $VISION_PORT"
-    echo "═══════════════════════════════════════════════════"
-
-    llama-server \
-        --model "$VISION_PATH" \
-        --mmproj "$MMPROJ_PATH" \
-        --port "$VISION_PORT" \
-        --ctx-size 32768 &
-
-    VISION_PID=$!
-    echo "  PID: $VISION_PID"
-
-    echo -n "  Waiting for server..."
-    for i in $(seq 1 60); do
-        if curl -sf "http://localhost:$VISION_PORT/health" > /dev/null 2>&1; then
-            echo " ready!"
-            break
-        fi
-        if [ "$i" -eq 60 ]; then
-            echo " timeout. Vision OCR will fall back to Tesseract."
-        fi
-        sleep 1
-        echo -n "."
-    done
+	echo ""
+	echo "═══════════════════════════════════════════════════"
+	echo "  Starting LFM2.5-VL-1.6B on port $VISION_PORT"
+	echo "═══════════════════════════════════════════════════"
+
+	llama-server \
+		--model "$VISION_PATH" \
+		--mmproj "$MMPROJ_PATH" \
+		--port "$VISION_PORT" \
+		--ctx-size 32768 &
+
+	VISION_PID=$!
+	echo "  PID: $VISION_PID"
+
+	echo -n "  Waiting for server..."
+	for i in $(seq 1 60); do
+		if curl -sf "http://localhost:$VISION_PORT/health" >/dev/null 2>&1; then
+			echo " ready!"
+			break
+		fi
+		if [ "$i" -eq 60 ]; then
+			echo " timeout. Vision OCR will fall back to Tesseract."
+		fi
+		sleep 1
+		echo -n "."
+	done
 fi
 
 # ── Summary ──────────────────────────────────────────────────────────────────
@@ -204,7 +210,7 @@ echo "  Model servers running"
 echo "═══════════════════════════════════════════════════"
 echo "  Main:   http://localhost:$MAIN_PORT/v1  (PID $MAIN_PID)"
 if [ "$START_VISION" = true ] && [ -n "${VISION_PID:-}" ]; then
-    echo "  Vision: http://localhost:$VISION_PORT/v1  (PID $VISION_PID)"
+	echo "  Vision: http://localhost:$VISION_PORT/v1  (PID $VISION_PID)"
 fi
 echo ""
 echo "  In another terminal:  cargo tauri dev"