Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ the PR description linked in each section.
### Added

- **`wire unclaim` + relay `DELETE /v1/handle/claim/:nick` — release a claimed handle** (#247 finding 1): a handle claim was FCFS-**permanent** (no expiry, no unclaim), so an abandoned/rotated nick squatted the directory forever. You can now release your persona: `wire unclaim` (owner-gated by your slot token) frees the nick so it stops resolving via `.well-known/wire/agent` and can be re-claimed. (Operator-TTL auto-expiry — the other half of #247.1 — needs persisted slot-activity to avoid evicting quiet-but-live agents on relay restart, and stays tracked.)
- **`wire status --wait-daemon-running [--timeout <secs>]`** (#284.2): a bounded, in-process replacement for fragile external shell loops like `until wire status … | grep -q 'daemon_running":true'; do sleep 3; done`. The external pattern + a never-healthy daemon piled up 254 stale `wire.exe` processes on Willard's box (each `wire status` invocation hanging on a wedged probe, the loop spawning a fresh one every 3s). The new flag polls the daemon-liveness snapshot every 200ms in-process, exits 0 with the full status when `daemon_running:true`, or bails after `--timeout` (default 30s) with the last-seen `pidfile_pid` / `pgrep_pids` on stderr so the operator knows what wasn't healthy. Replaces the loop with one bounded subcommand call; no spawn pressure, no orphan accumulation. Pure-logic `wait_step` decision is split out and unit-tested.

### Fixed

Expand Down
20 changes: 19 additions & 1 deletion src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,17 @@ pub enum Command {
peer: Option<String>,
#[arg(long)]
json: bool,
/// Block until `daemon_running:true`, then exit 0. Polls
/// internally every 200ms up to `--timeout` seconds. Exit 1
/// on timeout (with the last seen status to stderr). Replaces
/// fragile external `until wire status … | grep daemon_running:true`
/// shell loops that piled up hundreds of `wire status`
/// invocations on a never-healthy host (#284.2).
#[arg(long)]
wait_daemon_running: bool,
/// Bound for `--wait-daemon-running`. Default 30s.
#[arg(long, default_value_t = 30)]
timeout: u64,
},
/// Publish or inspect auto-responder health for this slot.
Responder {
Expand Down Expand Up @@ -1733,9 +1744,16 @@ pub fn run() -> Result<()> {
offline,
json,
} => cmd_init(relay.as_deref(), offline, json),
Command::Status { peer, json } => {
Command::Status {
peer,
json,
wait_daemon_running,
timeout,
} => {
if let Some(peer) = peer {
status::cmd_status_peer(&peer, json)
} else if wait_daemon_running {
status::cmd_status_wait_daemon_running(json, timeout)
} else {
status::cmd_status(json)
}
Expand Down
108 changes: 108 additions & 0 deletions src/cli/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,80 @@ use crate::config;

// ---------- status ----------

/// Pure decision for `cmd_status_wait_daemon_running`'s polling loop.
/// Extracted so the policy is unit-testable without spinning a real
/// `wire daemon`. Given a liveness snapshot's `pidfile_alive` flag,
/// the current instant, and the deadline, decide: success now, keep
/// waiting, or time out.
#[derive(Debug, PartialEq, Eq)]
pub(crate) enum WaitDecision {
/// `pidfile_alive == true` — break out of the poll loop and
/// print the full status.
Healthy,
/// Deadline already elapsed — bail with a timeout error.
TimedOut,
/// Deadline still ahead, daemon still down — sleep + retry.
Continue,
}

pub(crate) fn wait_step(
pidfile_alive: bool,
now: std::time::Instant,
deadline: std::time::Instant,
) -> WaitDecision {
if pidfile_alive {
WaitDecision::Healthy
} else if now >= deadline {
WaitDecision::TimedOut
} else {
WaitDecision::Continue
}
}

/// `wire status --wait-daemon-running [--timeout <secs>]`: poll the
/// local daemon-liveness snapshot until `daemon_running:true` (the
/// same `pidfile_alive` truth `cmd_status` surfaces), then exit 0.
///
/// Behavior:
/// - Polls every 200ms.
/// - Bounded by `timeout_secs` (default 30s at the clap layer).
/// - On healthy: prints the same status JSON / human surface
/// `cmd_status` would have, then returns Ok(()).
/// - On timeout: emits the last-seen status (so the operator knows
/// what was wrong — daemon not started? pidfile corrupt?) and
/// exits with a non-zero `anyhow::Error` so shell wrappers can
/// branch.
///
/// #284.2: replaces fragile external loops like
/// `until wire status … | grep daemon_running:true; do sleep 3; done`,
/// which on a never-healthy host piled up hundreds of `wire status`
/// invocations every few seconds (Willard's 254-`wire.exe` pile-up
/// repro). Each poll cycle here is in-process, so there's no spawn
/// pressure, and timeout guarantees the wrapper exits cleanly.
pub(super) fn cmd_status_wait_daemon_running(as_json: bool, timeout_secs: u64) -> Result<()> {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
loop {
let snap = crate::ensure_up::daemon_liveness();
match wait_step(snap.pidfile_alive, std::time::Instant::now(), deadline) {
WaitDecision::Healthy => return cmd_status(as_json),
WaitDecision::TimedOut => {
if !as_json {
eprintln!(
"wire status: daemon not running after {timeout_secs}s. \
Last seen: pidfile_pid={:?}, pgrep_pids={:?}. \
Run `wire up` to start the daemon.",
snap.pidfile_pid, snap.pgrep_pids
);
}
bail!("daemon_running stayed false through {timeout_secs}s wait window");
}
WaitDecision::Continue => {
std::thread::sleep(std::time::Duration::from_millis(200));
}
}
}
}

pub(super) fn cmd_status(as_json: bool) -> Result<()> {
let initialized = config::is_initialized()?;

Expand Down Expand Up @@ -2132,4 +2206,38 @@ mod doctor_tests {
);
});
}

// ---------- #284.2: wait_step pure-logic policy ----------

#[test]
fn wait_step_returns_healthy_when_pidfile_alive() {
let now = std::time::Instant::now();
let deadline = now + std::time::Duration::from_secs(30);
assert_eq!(wait_step(true, now, deadline), WaitDecision::Healthy);
}

#[test]
fn wait_step_returns_timed_out_when_deadline_passed_and_dead() {
let now = std::time::Instant::now();
// Deadline already 1s in the past.
let deadline = now - std::time::Duration::from_secs(1);
assert_eq!(wait_step(false, now, deadline), WaitDecision::TimedOut);
}

#[test]
fn wait_step_returns_continue_when_deadline_future_and_dead() {
let now = std::time::Instant::now();
let deadline = now + std::time::Duration::from_secs(5);
assert_eq!(wait_step(false, now, deadline), WaitDecision::Continue);
}

#[test]
fn wait_step_healthy_wins_over_timeout() {
// If both conditions hold (deadline elapsed AND daemon now
// alive), the success path takes precedence — we don't punish
// an operator who just barely missed the window.
let now = std::time::Instant::now();
let deadline = now - std::time::Duration::from_secs(1);
assert_eq!(wait_step(true, now, deadline), WaitDecision::Healthy);
}
}