Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f068db2
docs(phase-74): plan hexagonal HNSW — 4 plans in 3 waves
TinDang97 Mar 31, 2026
9230b15
feat(74-02): A2 hexagonal lattice codebook for paired-dimension Turbo…
TinDang97 Mar 31, 2026
1e47239
feat(74-01): diversity heuristic neighbor selection (Algorithm 4)
TinDang97 Mar 31, 2026
76acbe4
test(74-03): add failing tests for cell-parallel compaction
TinDang97 Mar 31, 2026
ae09f6f
feat(74-03): implement cell-parallel compaction with spatial partitio…
TinDang97 Mar 31, 2026
80afa90
docs(74-03): update .planning submodule for cell-parallel compaction
TinDang97 Mar 31, 2026
1fbbb1e
feat(74-04): add TQ4A2 config variant and encode_tq_mse_a2 function
TinDang97 Mar 31, 2026
cc8a266
feat(74-04): wire TQ4A2 encoding into compaction and mutable segment
TinDang97 Mar 31, 2026
815ffa7
docs(74-04): update .planning submodule for A2 lattice encoding pipeline
TinDang97 Mar 31, 2026
9dd7e1f
docs(phase-74): benchmark report — Moon vs Redis vs Qdrant at 10K/768d
TinDang97 Mar 31, 2026
9de2fae
fix(74): disable diversity heuristic for TQ-ADC graph construction
TinDang97 Apr 1, 2026
9377c57
fix(74): decoded centroid L2 for Light-mode HNSW build + ef_search tu…
TinDang97 Apr 1, 2026
2a8405d
feat(74): sub-centroid signs at insert time — recall 92.7% → 96.9%
TinDang97 Apr 1, 2026
ba220e5
feat: add mixed insert+search simulation benchmark
TinDang97 Apr 1, 2026
fe2ae3b
fix: global vector IDs across multiple compacted segments
TinDang97 Apr 1, 2026
c18437d
fix: disable compact_parallel — 2-coord partitioning breaks high-d re…
TinDang97 Apr 1, 2026
69b6d61
docs: add hexagonal HNSW architecture diagram
TinDang97 Apr 1, 2026
d4e0c9a
docs: hexagonal HNSW status report — features, bugs, benchmarks, memo…
TinDang97 Apr 1, 2026
3ddb7c5
fix: address all 7 Qodo code review findings
TinDang97 Apr 1, 2026
5f711ed
chore: remove obsolete hexagonal HNSW architecture diagram and status…
TinDang97 Apr 1, 2026
b0211f4
style: cargo fmt
TinDang97 Apr 1, 2026
e279442
Increase timeout for cargo test from 10 to 15 minutes
pilotspacex-byte Apr 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- run: cargo test --no-default-features --features runtime-tokio,jemalloc
timeout-minutes: 10
timeout-minutes: 15
env:
MOON_NO_URING: "1"

Expand Down
2 changes: 1 addition & 1 deletion .planning
364 changes: 364 additions & 0 deletions scripts/bench-mixed-1k-compact.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,364 @@
#!/usr/bin/env python3
"""
Mixed Insert+Search with COMPACT_THRESHOLD=1000

Simulates a realistic workload where vectors arrive continuously and
searches happen between inserts. Compaction triggers every 1K vectors
in the mutable segment, creating multiple immutable HNSW segments.

Timeline (10K total):
- Insert 100 vectors, then search 10 queries → repeat 100 times
- Every ~1000 vectors: compaction fires on next search
- Track: recall, latency, compaction events per 100-vector window

This exposes:
- How recall behaves BETWEEN compaction events (mutable brute-force)
- Compaction latency spikes and their frequency
- Recall across multiple immutable segments (merged search)
- Whether small segments hurt recall vs one large segment
"""

import json
import os
import sys
import time

import numpy as np


def generate_or_load_data():
cache = "target/bench-data-minilm"
if os.path.exists(f"{cache}/vectors.npy"):
vectors = np.load(f"{cache}/vectors.npy")
queries = np.load(f"{cache}/queries.npy")
with open(f"{cache}/ground_truth.json") as f:
gt = json.load(f)
return vectors, queries, gt
print("ERROR: Run bench-mixed-workload.py first to generate MiniLM data")
sys.exit(1)


def run_moon(port, vectors, queries, gt_final, compact_threshold):
import redis as redis_lib

r = redis_lib.Redis(port=port, decode_responses=False, socket_timeout=600)
r.ping()

n, dim = vectors.shape

# Create index with specified compact threshold
r.execute_command(
"FT.CREATE", "idx", "ON", "HASH",
"PREFIX", "1", "doc:",
"SCHEMA", "vec", "VECTOR", "HNSW", "10",
"TYPE", "FLOAT32", "DIM", str(dim),
"DISTANCE_METRIC", "L2", "QUANTIZATION", "TQ4",
"COMPACT_THRESHOLD", str(compact_threshold),
)
Comment on lines +41 to +57
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

This benchmark also needs run-specific namespaces.

Lines 51/53/83/99/165/218/228/256 reuse fixed idx, doc:*, vecset, and vec:* names. On a rerun against the same ports, Moon can fail at FT.CREATE, and the baseline is not guaranteed to start from a clean namespace. Use a per-run prefix/set name or explicit teardown so repeated executions stay isolated.

Also applies to: 78-103, 214-228, 254-256

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/bench-mixed-1k-compact.py` around lines 41 - 57, The benchmark reuses
fixed Redis names ("idx", key prefix "doc:", set "vecset", and keys like
"vec:*") causing FT.CREATE conflicts and leftover state between runs; update
run_moon (and other routines that create the index/keys) to generate a
run-specific namespace (e.g., append a unique run_id/timestamp to the index
name, prefix, and set/key names) or perform an explicit teardown before creating
resources so every execution is isolated; ensure all usages of "idx", "doc:",
"vecset", and "vec:*" in the script are replaced to consistently use the per-run
namespace or are cleaned up prior to FT.CREATE.


# Tracking arrays
insert_batch = 100
search_per_batch = 10
num_batches = n // insert_batch

timeline = [] # per-batch metrics
all_lats = []
compaction_events = []
next_id = 0
query_idx = 0
total_compact_time = 0.0

print(f" Config: {n} vectors, batch={insert_batch}, "
f"search/batch={search_per_batch}, compact_threshold={compact_threshold}")
print(f" Expected compactions: ~{n // compact_threshold}")
print()
print(f" {'Vectors':>7} │ {'Recall':>7} │ {'p50':>7} │ {'p99':>8} │ {'max':>8} │ Compact")
print(f" {'':─>7}─┼─{'':─>7}─┼─{'':─>7}─┼─{'':─>8}─┼─{'':─>8}─┼─{'':─>20}")

for batch_idx in range(num_batches):
# Insert batch
pipe = r.pipeline(transaction=False)
for i in range(insert_batch):
vid = next_id + i
pipe.execute_command("HSET", f"doc:{vid}", "vec", vectors[vid].tobytes())
pipe.execute()
next_id += insert_batch

# Search queries and measure
batch_lats = []
batch_recalls = []
batch_compact = False
batch_compact_time = 0.0

for _ in range(search_per_batch):
q = queries[query_idx % len(queries)]
query_idx += 1

t0 = time.perf_counter()
result = r.execute_command(
"FT.SEARCH", "idx",
"*=>[KNN 10 @vec $query]",
"PARAMS", "2", "query", q.tobytes(),
)
lat = (time.perf_counter() - t0) * 1000
batch_lats.append(lat)
all_lats.append(lat)

# Detect compaction spike
if lat > 100: # >100ms strongly suggests compaction
batch_compact = True
batch_compact_time = lat

# Parse results
ids = []
if isinstance(result, list) and len(result) > 1:
for j in range(1, len(result), 2):
try:
raw = result[j]
if isinstance(raw, bytes):
raw = raw.decode()
ids.append(int(raw.split(":")[-1]))
except Exception:
pass
Comment on lines +114 to +122
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Abort the run if ID parsing stops matching the response format.

These except Exception: pass blocks quietly drop unparseable IDs. If FT.SEARCH or VSIM changes shape, the benchmark will look like a recall regression instead of a broken parser. Narrow the exception and raise with the offending payload so the run fails obviously.

Also applies to: 173-181, 233-240, 259-266

🧰 Tools
🪛 Ruff (0.15.7)

[error] 121-122: try-except-pass detected, consider logging the exception

(S110)


[warning] 121-121: Do not catch blind exception: Exception

(BLE001)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@scripts/bench-mixed-1k-compact.py` around lines 114 - 122, The current loop
quietly swallows all exceptions when parsing IDs from the Redis response
(variables result, raw, ids), which can hide format changes; replace the broad
except Exception: pass with targeted exception handling (e.g., except
(ValueError, IndexError, UnicodeDecodeError, AttributeError) as e:) and re-raise
a new exception or call sys.exit with a clear message that includes the
offending payload (raw and/or result) and the original exception to abort the
run; apply the same change to the other identical blocks handling FT.SEARCH/VSIM
responses so any unparseable ID causes the benchmark to fail loudly instead of
silently dropping IDs.


# Recall vs brute-force over ALL vectors inserted so far
dists = np.sum((vectors[:next_id] - q) ** 2, axis=1)
local_gt = set(np.argsort(dists)[:10].tolist())
recall = len(set(ids) & local_gt) / 10
batch_recalls.append(recall)

avg_recall = np.mean(batch_recalls)
p50 = np.percentile(batch_lats, 50)
p99 = np.percentile(batch_lats, 99)
max_lat = max(batch_lats)

compact_str = ""
if batch_compact:
compact_str = f"← {batch_compact_time:.0f}ms"
compaction_events.append({
"at_vectors": next_id,
"latency_ms": batch_compact_time,
})
total_compact_time += batch_compact_time

timeline.append({
"vectors": next_id,
"recall": float(avg_recall),
"p50_ms": float(p50),
"p99_ms": float(p99),
"max_ms": float(max_lat),
"compact": batch_compact,
})

# Print every 500 vectors or on compaction
if next_id % 500 == 0 or batch_compact:
print(f" {next_id:>7} │ {avg_recall:>7.4f} │ {p50:>6.1f}ms │ {p99:>7.1f}ms │ {max_lat:>7.0f}ms │ {compact_str}")

# Final recall against full ground truth
print()
print(f" Final recall measurement (200 queries, full GT)...")
final_recalls = []
final_lats = []
for i, q in enumerate(queries):
t0 = time.perf_counter()
result = r.execute_command(
"FT.SEARCH", "idx",
"*=>[KNN 10 @vec $query]",
"PARAMS", "2", "query", q.tobytes(),
)
lat = (time.perf_counter() - t0) * 1000
final_lats.append(lat)

ids = []
if isinstance(result, list) and len(result) > 1:
for j in range(1, len(result), 2):
try:
raw = result[j]
if isinstance(raw, bytes):
raw = raw.decode()
ids.append(int(raw.split(":")[-1]))
except Exception:
pass
recall = len(set(ids) & set(gt_final[i])) / 10
final_recalls.append(recall)

return {
"timeline": timeline,
"compaction_events": compaction_events,
"total_compact_time_ms": total_compact_time,
"final_recall": float(np.mean(final_recalls)),
"final_p50": float(np.percentile(final_lats, 50)),
"final_qps": 1000 / np.mean(final_lats),
"all_lats": all_lats,
"steady_state_recall": float(np.mean([t["recall"] for t in timeline])),
"num_compactions": len(compaction_events),
}


def run_redis(port, vectors, queries, gt_final):
import redis as redis_lib

r = redis_lib.Redis(port=port, decode_responses=False, socket_timeout=600)
r.ping()

n, dim = vectors.shape
insert_batch = 100
search_per_batch = 10
num_batches = n // insert_batch

timeline = []
all_lats = []
next_id = 0
query_idx = 0

for batch_idx in range(num_batches):
pipe = r.pipeline(transaction=False)
for i in range(insert_batch):
vid = next_id + i
pipe.execute_command("VADD", "vecset", "FP32", vectors[vid].tobytes(), f"vec:{vid}")
pipe.execute()
next_id += insert_batch

batch_lats = []
batch_recalls = []
for _ in range(search_per_batch):
q = queries[query_idx % len(queries)]
query_idx += 1
t0 = time.perf_counter()
result = r.execute_command("VSIM", "vecset", "FP32", q.tobytes(), "COUNT", "10")
lat = (time.perf_counter() - t0) * 1000
batch_lats.append(lat)
all_lats.append(lat)

ids = []
if isinstance(result, list):
for item in result:
try:
raw = item.decode() if isinstance(item, bytes) else str(item)
ids.append(int(raw.split(":")[-1]))
except Exception:
pass

dists = np.sum((vectors[:next_id] - q) ** 2, axis=1)
local_gt = set(np.argsort(dists)[:10].tolist())
batch_recalls.append(len(set(ids) & local_gt) / 10)

timeline.append({
"vectors": next_id,
"recall": float(np.mean(batch_recalls)),
"p50_ms": float(np.percentile(batch_lats, 50)),
})

final_recalls = []
final_lats = []
for i, q in enumerate(queries):
t0 = time.perf_counter()
result = r.execute_command("VSIM", "vecset", "FP32", q.tobytes(), "COUNT", "10")
lat = (time.perf_counter() - t0) * 1000
final_lats.append(lat)
ids = []
if isinstance(result, list):
for item in result:
try:
raw = item.decode() if isinstance(item, bytes) else str(item)
ids.append(int(raw.split(":")[-1]))
except Exception:
pass
final_recalls.append(len(set(ids) & set(gt_final[i])) / 10)

return {
"timeline": timeline,
"final_recall": float(np.mean(final_recalls)),
"final_p50": float(np.percentile(final_lats, 50)),
"final_qps": 1000 / np.mean(final_lats),
"steady_state_recall": float(np.mean([t["recall"] for t in timeline])),
"all_lats": all_lats,
}


def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--moon-port", type=int, default=6379)
parser.add_argument("--redis-port", type=int, default=6400)
parser.add_argument("--compact-threshold", type=int, default=1000)
parser.add_argument("--skip-redis", action="store_true")
args = parser.parse_args()

vectors, queries, gt = generate_or_load_data()
n, dim = vectors.shape
print(f"Mixed Insert+Search (compact_threshold={args.compact_threshold})")
print(f"Data: {n} MiniLM vectors, {dim}d, {len(queries)} queries")
print(f"Pattern: insert 100 → search 10 → repeat {n // 100} times")
print()

# Moon
print("=" * 65)
print(f" Moon (port {args.moon_port}, compact_threshold={args.compact_threshold})")
print("=" * 65)
try:
moon = run_moon(args.moon_port, vectors, queries, gt, args.compact_threshold)
except Exception as e:
print(f" Moon error: {e}")
moon = None

# Redis
redis_result = None
if not args.skip_redis:
print()
print("=" * 65)
print(f" Redis (port {args.redis_port})")
print("=" * 65)
try:
redis_result = run_redis(args.redis_port, vectors, queries, gt)
except Exception as e:
print(f" Redis error: {e}")

# Report
print()
print("=" * 65)
print(" SUMMARY")
print("=" * 65)
print()

if moon:
print(f" Moon (compact_threshold={args.compact_threshold}):")
print(f" Steady-state recall (avg over all batches): {moon['steady_state_recall']:.4f}")
print(f" Final recall@10: {moon['final_recall']:.4f}")
print(f" Final QPS: {moon['final_qps']:.0f}")
print(f" Final p50: {moon['final_p50']:.2f}ms")
print(f" Compaction events: {moon['num_compactions']}")
print(f" Total compact time: {moon['total_compact_time_ms']:.0f}ms")
if moon['all_lats']:
lats = moon['all_lats']
print(f" Latency: p50={np.percentile(lats,50):.1f}ms "
f"p95={np.percentile(lats,95):.1f}ms "
f"p99={np.percentile(lats,99):.1f}ms "
f"max={max(lats):.0f}ms")
if moon['compaction_events']:
print(f" Compaction details:")
for evt in moon['compaction_events']:
print(f" at {evt['at_vectors']:>5} vectors: {evt['latency_ms']:.0f}ms")
print()

if redis_result:
print(f" Redis:")
print(f" Steady-state recall: {redis_result['steady_state_recall']:.4f}")
print(f" Final recall@10: {redis_result['final_recall']:.4f}")
print(f" Final QPS: {redis_result['final_qps']:.0f}")
lats = redis_result['all_lats']
print(f" Latency: p50={np.percentile(lats,50):.1f}ms "
f"p95={np.percentile(lats,95):.1f}ms "
f"p99={np.percentile(lats,99):.1f}ms "
f"max={max(lats):.0f}ms")
print()

# Save
os.makedirs("target/bench-results", exist_ok=True)
out = {"moon": moon, "redis": redis_result, "compact_threshold": args.compact_threshold}
with open("target/bench-results/mixed-1k-compact.json", "w") as f:
json.dump(out, f, indent=2, default=str)


if __name__ == "__main__":
main()
Loading
Loading