From f8817a366190bad54497407ede011202c3a797aa Mon Sep 17 00:00:00 2001 From: Tin Dang Date: Sun, 24 May 2026 20:53:27 +0700 Subject: [PATCH] fix(tier-2): CodeRabbit follow-ups from PR #100 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second pass on the CodeRabbit review surfaced four findings that the previous fix wave did not address. This commit closes them all. ## Real bugs - `src/cluster/command.rs::format_node_line`: `NodeFlags::Replica { master_id }` was rendered as `"slave "` inside the flags column AND the master-id was emitted again in its own dedicated column. The result was a malformed `CLUSTER NODES` / `CLUSTER REPLICAS` line where the master-id appeared twice and every column right of "flags" shifted by one — breaking whitespace-tokenized parsers (including redis-cli's own splitter). Flags is now `"slave"` / `"myself,slave"` only; master-id stays in its own column. - `src/command/key_extra.rs` `COPY ... DB `: the DB index token was consumed by `i += 1` without parsing. The handler-level intercept normally validates the DB index before reaching dispatch, but any path that bypassed the intercept (or future caller wiring) would silently accept garbage like `COPY src dst DB abc`. Now defensively `parse::()`s the token and returns the canonical `ERR value is not an integer or out of range` on malformed input. ## Test hardening - `src/cluster/command.rs` test `cluster_replicas_lists_replicas`: was loose (only checked >=9 fields, gathered ids). It would not have caught the duplicate-master-id bug above. Now pins the exact replica-line layout: 9 whitespace-separated columns, `fields[2] == "slave"` (not "slave "), `fields[3]` is exactly the 40-char master id, and the master-id appears exactly once on the line. Any future regression that re-embeds master-id in the flags column will flip the column count from 9 → 10 and trip the assertion. ## Script hygiene - `scripts/test-consistency.sh` SWAPDB block: * Seed step previously used `both SELECT 0; both SET swapkey hello; both SELECT 1; both DEL swapkey`. `redis-cli SELECT` does NOT persist across separate invocations, so the trailing `DEL swapkey` ran against db0 and removed the key we had just seeded. Now uses explicit `redis-cli -n ` for each step. * Out-of-range parity check captured `$redis_oor` but only asserted `$rust_oor` contained `ERR`. A divergence (moon errors, Redis silently OKs) would have passed. Now requires both sides to return `ERR` and prints both messages on failure. ## Scoped out (intentionally not in this PR) The remaining CodeRabbit comments need design work, not patches: - `src/shard/coordinator.rs` is over the 1500-LOC cap. Splitting it out (e.g. `coordinator::swapdb`) is mechanical refactor work that belongs with the parallel `handler_{sharded,monoio}/mod.rs` split — they're all over the cap and should land together. - Snapshot-COW interaction with `SwapDb` / `MOVE` in `src/shard/spsc_handler.rs` needs a design pass on snapshot reachability for two-DB commands, not a quick guard. ## Verification - `cargo build --release` PASS - `cargo build --no-default-features --features runtime-tokio,jemalloc` PASS - `cargo clippy --release -- -D warnings` PASS - `cargo clippy --no-default-features --features runtime-tokio,jemalloc -- -D warnings` PASS - `cargo fmt --check` PASS - `cargo test --release --lib` — 3256 PASS / 0 FAIL - `cargo test --no-default-features --features runtime-tokio,jemalloc --lib` — 2649 PASS / 0 FAIL - Functional: `COPY src dst DB abc` returns ERR; `COPY src dst DB 99999` returns ERR (out-of-range intercept still fires); CLUSTER REPLICAS layout verified by the strengthened unit test. author: Tin Dang --- scripts/test-consistency.sh | 25 +++++++---- src/cluster/command.rs | 87 ++++++++++++++++++++++++++++++------- src/command/key_extra.rs | 30 ++++++++++--- 3 files changed, 113 insertions(+), 29 deletions(-) diff --git a/scripts/test-consistency.sh b/scripts/test-consistency.sh index 671407d3..560e804b 100755 --- a/scripts/test-consistency.sh +++ b/scripts/test-consistency.sh @@ -545,11 +545,14 @@ assert_both "TOUCH missing" TOUCH edge:nomiss # =========================================================================== log "=== SWAPDB ===" -# Seed: db0 has swapkey=hello, db1 is empty -both SELECT 0 -both SET swapkey hello -both SELECT 1 -both DEL swapkey +# Seed: db0 has swapkey=hello, db1 is empty. +# Use explicit `-n ` per invocation — `redis-cli SELECT` does NOT persist +# across separate process invocations, so the previous `both SELECT 1; both +# DEL swapkey` deleted from db0 (the just-seeded key) instead of db1. +redis-cli -p "$PORT_REDIS" -n 0 SET swapkey hello >/dev/null +redis-cli -p "$PORT_RUST" -n 0 SET swapkey hello >/dev/null +redis-cli -p "$PORT_REDIS" -n 1 DEL swapkey >/dev/null +redis-cli -p "$PORT_RUST" -n 1 DEL swapkey >/dev/null # SWAPDB 0 1 — swaps databases 0 and 1 assert_both "SWAPDB 0 1" SWAPDB 0 1 @@ -566,13 +569,19 @@ assert_eq "SWAPDB: key absent from db0" "$redis_db0_gone" "$rust_db0_gone" # Same-index SWAPDB is a no-op; must return OK (not error) assert_both "SWAPDB 0 0 (same-index no-op)" SWAPDB 0 0 -# Out-of-range indices must return ERR (not panic) +# Out-of-range indices must return ERR (not panic) — assert parity with Redis, +# not just that moon emits *some* ERR. The previous check ignored $redis_oor, +# so a divergence (e.g. moon ERR + Redis OK, or different error wording class) +# would silently pass. redis_oor=$(redis-cli -p "$PORT_REDIS" SWAPDB 0 9999 2>&1) || true rust_oor=$(redis-cli -p "$PORT_RUST" SWAPDB 0 9999 2>&1) || true -if echo "$rust_oor" | grep -qi "ERR"; then +if echo "$redis_oor" | grep -qi "ERR" && echo "$rust_oor" | grep -qi "ERR"; then PASS=$((PASS + 1)) else - FAIL=$((FAIL + 1)); echo " FAIL: SWAPDB out-of-range should return ERR, got: $rust_oor" + FAIL=$((FAIL + 1)) + echo " FAIL: SWAPDB out-of-range parity" + echo " redis: $redis_oor" + echo " rust: $rust_oor" fi # Swap back to restore state for remaining tests diff --git a/src/cluster/command.rs b/src/cluster/command.rs index 4b491328..d834c4d6 100644 --- a/src/cluster/command.rs +++ b/src/cluster/command.rs @@ -100,20 +100,25 @@ pub fn handle_cluster_myid(cs: &Arc>) -> Frame { /// No trailing newline — callers add `\n` for CLUSTER NODES (bulk-string concat) or /// wrap in `Frame::BulkString` directly for CLUSTER REPLICAS (array of strings). fn format_node_line(node: &ClusterNode, self_node_id: &str) -> String { + // CLUSTER NODES flags field is a comma-separated keyword list — never + // includes the master-id (that goes in its own column below). Including + // master_id here previously produced a malformed line with master_id + // appearing twice (once inside flags, once in the dedicated column), + // which broke whitespace-tokenized parsers (e.g. redis-cli's own line + // splitter). Reference: Redis CLUSTER NODES format spec. let flags_str = if node.node_id == self_node_id { - // self: prepend "myself," match &node.flags { - NodeFlags::Master => "myself,master".to_string(), - NodeFlags::Replica { master_id } => format!("myself,slave {}", master_id), - NodeFlags::Pfail => "myself,pfail".to_string(), - NodeFlags::Fail => "myself,fail".to_string(), + NodeFlags::Master => "myself,master", + NodeFlags::Replica { .. } => "myself,slave", + NodeFlags::Pfail => "myself,pfail", + NodeFlags::Fail => "myself,fail", } } else { match &node.flags { - NodeFlags::Master => "master".to_string(), - NodeFlags::Replica { master_id } => format!("slave {}", master_id), - NodeFlags::Pfail => "pfail".to_string(), - NodeFlags::Fail => "fail".to_string(), + NodeFlags::Master => "master", + NodeFlags::Replica { .. } => "slave", + NodeFlags::Pfail => "pfail", + NodeFlags::Fail => "fail", } }; @@ -922,35 +927,85 @@ mod tests { } } - /// T2.4-b: Array with one BulkString per replica, each containing ≥9 fields. + /// T2.4-b: Array with one BulkString per replica, each containing exactly + /// 9 fields and the correct replica-row layout. + /// + /// Pinned columns (whitespace-tokenized): + /// [0] node-id (40 hex chars) + /// [1] ip:port@bus + /// [2] flags (== "slave" for non-self replica) + /// [3] master-id (40 hex chars, the master being replicated) + /// [4] ping-sent + /// [5] pong-recv + /// [6] epoch + /// [7] link-state + /// [8] slot-ranges ("-" for replicas, never empty) + /// + /// This guards against the previous bug where `format_node_line` embedded + /// the master-id inside the flags field, producing "slave " and + /// shifting every subsequent column right by one. #[test] fn cluster_replicas_lists_replicas() { let (cs, master_id, replica1_id, replica2_id) = make_cs_with_replicas(); - let args = vec![Frame::BulkString(bytes::Bytes::from(master_id))]; + let args = vec![Frame::BulkString(bytes::Bytes::from(master_id.clone()))]; let result = handle_cluster_replicas(&args, &cs); match result { Frame::Array(items) => { assert_eq!(items.len(), 2, "expected 2 replicas"); - // Gather node-ids from the returned lines let mut seen_ids = std::collections::HashSet::new(); for item in &*items { let line = match item { Frame::BulkString(b) => String::from_utf8(b.to_vec()).unwrap(), other => panic!("expected BulkString element, got {:?}", other), }; - // Must not have trailing newline assert!( !line.ends_with('\n'), "line must not end with newline: {:?}", line ); let fields: Vec<&str> = line.split_whitespace().collect(); - assert!( - fields.len() >= 9, - "expected >= 9 fields, got {}: {}", + // Exactly the pinned columns above; slot-ranges renders + // as "-" for replicas (no slot ownership), so we expect + // exactly 9 whitespace-separated tokens. If + // `format_node_line` ever re-embeds master_id inside the + // flags column, this would become 10 — which is the + // signal we want to lock down. + assert_eq!( fields.len(), + 9, + "wrong column count (would silently regress if flags \ + re-embed master_id): {:?}", + line + ); + // flags column must be exactly "slave" — NOT "slave ". + assert_eq!( + fields[2], "slave", + "flags column must be 'slave', got '{}': {:?}", + fields[2], line + ); + // master-id column must be the 40-char master id of the + // node being replicated, NOT empty and NOT a copy of any + // other field. + assert_eq!( + fields[3].len(), + 40, + "master-id column must be 40 hex chars: {:?}", + line + ); + assert_eq!( + fields[3], master_id, + "master-id column must match the master being queried: {:?}", line ); + // master-id must appear exactly once on the line (the bug + // duplicated it: once in flags, once in its own column). + let occurrences = line.matches(master_id.as_str()).count(); + assert_eq!( + occurrences, 1, + "master-id must appear exactly once on the line, found \ + {} occurrences: {:?}", + occurrences, line + ); seen_ids.insert(fields[0].to_string()); } assert!(seen_ids.contains(&replica1_id), "replica1 not in result"); diff --git a/src/command/key_extra.rs b/src/command/key_extra.rs index 617450dd..b195e914 100644 --- a/src/command/key_extra.rs +++ b/src/command/key_extra.rs @@ -39,14 +39,34 @@ pub fn copy(db: &mut Database, args: &[Frame]) -> Frame { if arg.eq_ignore_ascii_case(b"REPLACE") { replace = true; } else if arg.eq_ignore_ascii_case(b"DB") { - // Cross-db COPY is intercepted by handler-level code before dispatch(). - // When this branch is reached, the DB index has already been validated - // as the same database (parse_copy_db_args returned None → same-db path). - // Consume the DB index argument and continue as a same-db copy. - i += 1; // skip the db-index token + // Cross-db COPY is intercepted by handler-level code before + // dispatch(). When this branch is reached, the DB index has + // already been validated as the same database (parse_copy_db_args + // returned None → same-db path). Even so, defensively validate + // the consumed token so a malformed value like `COPY src dst DB + // abc` is rejected rather than silently accepted in any path + // that bypasses the handler-level intercept (e.g. future + // callers, dispatch paths that don't go through parse_copy_db_args). + i += 1; if i >= args.len() { return Frame::Error(Bytes::from_static(b"ERR syntax error")); } + let db_tok = match extract_key(&args[i]) { + Some(k) => k, + None => { + return Frame::Error(Bytes::from_static( + b"ERR value is not an integer or out of range", + )); + } + }; + let parsed: Option = std::str::from_utf8(db_tok) + .ok() + .and_then(|s| s.parse::().ok()); + if parsed.is_none() { + return Frame::Error(Bytes::from_static( + b"ERR value is not an integer or out of range", + )); + } } else { return Frame::Error(Bytes::from_static(b"ERR syntax error")); }