Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion src/switchplane/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import click

from switchplane import fmt
from switchplane.config import load_config
from switchplane.daemon import RuntimePaths, start_daemon, stop_daemon
from switchplane.protocol import CliRequest, CliResponse
from switchplane.transport import ControlPlaneClient, is_alive
Expand Down Expand Up @@ -46,6 +47,24 @@ def send_request(method: str, params: dict | None = None) -> CliResponse:
request = CliRequest(method=method, params=params or {})
return client.send(request)

def _load_tui_config():
"""Read TUI tuning knobs from the app's merged config (user
overrides on top of app defaults). Falls back silently to the
defaults if the config can't be parsed for any reason — the
TUI is the wrong place to surface a config validation error,
and the defaults are safe."""
try:
cfg = load_config(
config_path=paths.config_path,
default_config_path=app.default_config_path,
config_class=app.config_class,
)
return cfg.tui
except Exception:
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

except Exception here will silently swallow real config errors (TOML syntax errors, type validation failures from a misnamed [tui] field, etc.) and the user gets defaults with zero feedback. The docstring acknowledges this is intentional — "the TUI is the wrong place to surface a config validation error" — but "surface" doesn't have to mean "crash the TUI". Consider at minimum a stderr warning before the swallow, or routing through structlog so the issue ends up in the daemon log. Right now a typo in user config is invisible.

from switchplane.config import TuiConfig
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: load_config is already imported at the top of this module (line 13) — TuiConfig can ride along in the same import line at module scope rather than being deferred into the except branch. There's no circular-import reason to keep it local; this just makes the failure path do extra work and slightly obscures the dependency.


return TuiConfig()

@click.group(invoke_without_command=True)
@click.pass_context
def cli(ctx):
Expand All @@ -55,7 +74,15 @@ def cli(ctx):
if sys.stdin.isatty():
# Interactive: enter TUI dashboard (auto-discovers running tasks)
ensure_daemon()
asyncio.run(run_tui(paths.sock_path, initial_tasks=None))
tui_cfg = _load_tui_config()
asyncio.run(
run_tui(
paths.sock_path,
initial_tasks=None,
max_buffer_lines=tui_cfg.max_buffer_lines,
spinner_interval=tui_cfg.spinner_interval,
)
)
else:
click.echo(ctx.get_help())

Expand Down
35 changes: 35 additions & 0 deletions src/switchplane/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,46 @@ class LoggingConfig(BaseModel):
level: str = "debug" # log level: debug, info, warning, error


class TuiConfig(BaseModel):
"""TUI tuning knobs.

Defaults are conservative — they trade scrollback depth and
spinner liveness for bounded per-frame render cost. The
TUI's main thread renders the **entire** scrollback buffer on
every redraw (prompt_toolkit's `FormattedTextControl.create_content`
is per-frame O(buffer_size), not O(visible-area)), so a buffer
much larger than these defaults can pin the daemon's CPU on
long-running tasks even when the user isn't actively scrolling.
"""

max_buffer_lines: int = 2_000
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doc drift: CLAUDE.md line 103 still says "Event buffers are capped at a configurable max_buffer_lines (default 10,000)". This PR drops the default to 2,000. Worth updating in the same PR so the agent-facing docs don't lie about runtime behavior — a future change here will read CLAUDE.md, not config.py.

"""Maximum lines retained per tab before oldest are trimmed.

Was 10_000; that produced sustained 99% CPU spins on the daemon
main thread for long-running tasks (LLM tool loops with hundreds
of events). The render cost grows linearly with this; halving it
halves baseline render cost while still giving the operator a
deep-enough scrollback for routine debugging.
"""

spinner_interval: float = 1.0
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth calling out in the PR body / commit msg: this is a 5× drop in spinner frame rate (5 Hz → 1 Hz), not just a debounce. A 1 Hz spinner is going to feel sluggish to operators watching for liveness — at 1 fps, a moving spinner reads more like "is it stuck?" than "it's alive." The render-cost rationale in the docstring is sound, but the human-visible effect is also a UX regression on its own. Two options worth considering: (1) keep the spinner at a higher rate (e.g. 0.5s) and rely on the _refresh debounce + smaller buffer to bound cost — those changes alone might be sufficient; (2) drop to 1.0s but call it out explicitly in user-facing release notes so operators know to override via config.

"""How often the active-task spinner redraws, in seconds.

Was 0.2 (5 fps); that pinned a redraw-every-200ms cadence on
every active-task tab regardless of whether content changed.
Combined with a large `max_buffer_lines` it was the load-bearing
contributor to the daemon-CPU pin. 1.0 (1 fps) is plenty to
signal liveness without driving the renderer into a steady-state
loop.
"""


class AppConfig(BaseModel):
"""Top-level configuration."""

llm: LLMConfig = LLMConfig()
logging: LoggingConfig = LoggingConfig()
tui: TuiConfig = TuiConfig()
agents: dict[str, dict[str, Any]] = {}


Expand Down
94 changes: 89 additions & 5 deletions src/switchplane/tui.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,30 @@

_TERMINAL_STATUSES = {"completed", "failed", "cancelled"}
_HEARTBEAT_INTERVAL = 60 # seconds — must be well under daemon's IDLE_TIMEOUT (300s)

# Debounce window for `TUISession._refresh()`. Without this, every
# `_append_line` invalidates the prompt_toolkit Application and the
# renderer can't drain the event queue between redraws — observed in
# production as a sustained 99%-CPU TUI spin in `split_lines /
# create_content / write_to_screen` while attached to a long-running
# task with high event rate (e.g. an LLM tool loop firing tool.invoke
# events back-to-back). py-spy dump pinned the main thread at 99%
# inside that render call chain across the whole 10k-line buffer.
#
# 33ms (~30 fps) is well below human perception threshold and well
# above the few-events-per-second steady-state rate, so visible
# latency is bounded while bursty append-storms collapse to a single
# redraw.
_REFRESH_DEBOUNCE_SECONDS = 0.033
_SYSTEM_TAB_ID = "_system"
_DEFAULT_MAX_BUFFER_LINES = 10_000

# Defaults track the matching fields on `TuiConfig` in
# `switchplane/config.py`. When these are imported directly (e.g.
# tests, ad-hoc TUI launches), the constants here apply. When the
# TUI is started via `run_tui` from a configured Application, the
# AppConfig values override them.
_DEFAULT_MAX_BUFFER_LINES = 2_000
_DEFAULT_SPINNER_INTERVAL = 1.0
_LINE_PREFIX = " " # Left margin for event lines
_LINE_PREFIX_WIDTH = len(_LINE_PREFIX)

Expand Down Expand Up @@ -229,9 +251,15 @@ def _render_diff(diff_text: str, width: int = 0) -> StyleAndTextTuples:
class TUISession:
"""Manages TUI state: event buffers, background streams, and input dispatch."""

def __init__(self, sock_path: Path, max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES) -> None:
def __init__(
self,
sock_path: Path,
max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES,
spinner_interval: float = _DEFAULT_SPINNER_INTERVAL,
) -> None:
self.sock_path = sock_path
self.max_buffer_lines = max_buffer_lines
self.spinner_interval = spinner_interval
self.buffers: dict[str, EventBuffer] = {}
self.task_order: list[str] = [] # ordered task IDs for tab bar (excludes _system)
self.focused_task_id: str | None = _SYSTEM_TAB_ID
Expand All @@ -242,6 +270,10 @@ def __init__(self, sock_path: Path, max_buffer_lines: int = _DEFAULT_MAX_BUFFER_
self._task_window: Window | None = None
self._spinner_frame: int = 0
self._spinner_timer: asyncio.TimerHandle | None = None
# Coalesces multiple `_refresh()` calls within
# `_REFRESH_DEBOUNCE_SECONDS` into a single `Application.invalidate()`.
# See module docstring on `_REFRESH_DEBOUNCE_SECONDS` for why.
self._refresh_timer: asyncio.TimerHandle | None = None

# Create the system tab buffer — always present at logical slot 0
self.buffers[_SYSTEM_TAB_ID] = EventBuffer(
Expand Down Expand Up @@ -418,6 +450,37 @@ def _system_messages(self, msgs: list[str]) -> None:
self._append_line(_SYSTEM_TAB_ID, [], [(_S_SYSTEM, msg)])

def _refresh(self) -> None:
"""Schedule a debounced redraw of the prompt_toolkit Application.

Direct `Application.invalidate()` calls during a high-rate event
burst keep the renderer pinned at 100% CPU re-rendering the full
scrollback (`split_lines` is per-frame O(total-rendered-text),
not O(visible-area)). py-spy dump on a wedged production session
showed the main thread spending 100% CPU in
`prompt_toolkit.layout.controls.create_content → split_lines`
while the agent's event queue piled up unread.

Coalesce: schedule one invalidate on a `_REFRESH_DEBOUNCE_SECONDS`
timer; subsequent `_refresh` calls inside that window are no-ops
(the timer is already armed). Visible latency is bounded by the
debounce constant; throughput on bursty append-storms collapses
from N redraws to one.
"""
if self._app is None:
return
if self._refresh_timer is not None:
return # Redraw already scheduled
try:
loop = asyncio.get_running_loop()
except RuntimeError:
# Called outside an event loop (test path / pre-startup).
# Fall back to immediate invalidate.
self._app.invalidate()
return
self._refresh_timer = loop.call_later(_REFRESH_DEBOUNCE_SECONDS, self._fire_refresh)
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistency with the existing spinner pattern. Two-line down (_tick_spinner at line ~1348) the codebase already has the same arm-a-timer shape, and there it acquires the loop via self._app.loop. Here _refresh uses asyncio.get_running_loop(). They will normally agree, but if anything ever calls _refresh from a coroutine running on a non-app loop (e.g. a future test that drives a session under a separate event loop, or a thread-bridged context), the timer fires on a loop the app's invalidate doesn't observe. Recommend matching the spinner pattern: loop = self._app.loop; if loop is None: self._app.invalidate(); return — same shape, no try/except RuntimeError, and consistent with the established convention in this file.


def _fire_refresh(self) -> None:
self._refresh_timer = None
if self._app is not None:
self._app.invalidate()

Expand Down Expand Up @@ -1268,13 +1331,23 @@ def _stop_spinner(self) -> None:
self._spinner_timer = None

def _tick_spinner(self) -> None:
"""Advance the spinner frame and schedule the next tick."""
"""Advance the spinner frame and schedule the next tick.

Tick interval is `self.spinner_interval` (configurable via
`TuiConfig.spinner_interval`). Was hardcoded to 0.2s; that
was the load-bearing contributor to a daemon-CPU pin observed
on long-running tasks — every tick calls `_refresh()` which
invalidates the prompt_toolkit Application, which re-renders
the entire scrollback (per-frame O(buffer_size)). At a deep
scrollback the per-frame cost can exceed the tick interval
and the renderer never yields back to the IPC reader.
"""
self._spinner_frame += 1
self._refresh()
if self._app is not None and self._has_active_tasks():
loop = self._app.loop
if loop is not None:
self._spinner_timer = loop.call_later(0.2, self._tick_spinner)
self._spinner_timer = loop.call_later(self.spinner_interval, self._tick_spinner)
else:
self._spinner_timer = None
else:
Expand Down Expand Up @@ -1533,6 +1606,7 @@ async def run_tui(
sock_path: Path,
initial_tasks: list[tuple[str, str, str, str]] | None = None,
max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES,
spinner_interval: float = _DEFAULT_SPINNER_INTERVAL,
) -> None:
"""Run the TUI session.

Expand All @@ -1542,8 +1616,15 @@ async def run_tui(
to pre-populate the session with. Pass an empty list to auto-discover
running tasks from the daemon.
max_buffer_lines: Maximum lines retained per tab before oldest are trimmed.
spinner_interval: Active-task spinner tick interval in seconds. Each tick
triggers a full prompt_toolkit redraw, so a low interval combined
with a deep scrollback can pin the daemon's CPU.
"""
session = TUISession(sock_path, max_buffer_lines=max_buffer_lines)
session = TUISession(
sock_path,
max_buffer_lines=max_buffer_lines,
spinner_interval=spinner_interval,
)

if initial_tasks is None:
# Auto-discover running tasks from the daemon
Expand Down Expand Up @@ -1587,6 +1668,9 @@ def _list():
session._system_stream.cancel()
for stream in session.streams.values():
stream.cancel()
if session._refresh_timer is not None:
session._refresh_timer.cancel()
session._refresh_timer = None
await asyncio.gather(
*([session._heartbeat] if session._heartbeat else []),
*([session._system_stream] if session._system_stream else []),
Expand Down
35 changes: 34 additions & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
from switchplane.config import AppConfig, LLMConfig, _deep_merge, get_agent_config, load_config
from switchplane.config import (
AppConfig,
LLMConfig,
TuiConfig,
_deep_merge,
get_agent_config,
load_config,
)


class TestDeepMerge:
Expand Down Expand Up @@ -44,13 +51,39 @@ class TestAppConfig:
def test_defaults(self):
cfg = AppConfig()
assert isinstance(cfg.llm, LLMConfig)
assert isinstance(cfg.tui, TuiConfig)
assert cfg.agents == {}

def test_with_agents(self):
cfg = AppConfig(agents={"worker": {"timeout": 30}})
assert cfg.agents["worker"]["timeout"] == 30


class TestTuiConfig:
"""`TuiConfig` knobs cap per-frame TUI render cost.
Defaults are intentionally conservative — see config.py."""

def test_defaults(self):
cfg = TuiConfig()
# 2_000 (was 10_000) — render cost grows linearly with this.
assert cfg.max_buffer_lines == 2_000
# 1.0s (was 0.2s hardcoded in tui.py) — 5× slower spinner
# tick cuts baseline render rate proportionally.
assert cfg.spinner_interval == 1.0

def test_overrides(self):
cfg = TuiConfig(max_buffer_lines=500, spinner_interval=2.0)
assert cfg.max_buffer_lines == 500
assert cfg.spinner_interval == 2.0

def test_loaded_via_app_config(self):
"""The TuiConfig is reachable as `AppConfig().tui`, which is
how the cli.py TUI launch path reads it."""
cfg = AppConfig(tui={"max_buffer_lines": 1234, "spinner_interval": 0.5})
assert cfg.tui.max_buffer_lines == 1234
assert cfg.tui.spinner_interval == 0.5


class TestLoadConfig:
def test_no_files(self):
cfg = load_config(None, None)
Expand Down
76 changes: 76 additions & 0 deletions tests/test_tui.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,15 @@ def test_custom_max_buffer_lines(self, tmp_path):
s = TUISession(tmp_path / "s.sock", max_buffer_lines=500)
assert s.max_buffer_lines == 500

def test_default_spinner_interval(self, session):
# Default lives on the TuiConfig but is mirrored as
# `_DEFAULT_SPINNER_INTERVAL` in tui.py for ad-hoc launches.
assert session.spinner_interval == 1.0

def test_custom_spinner_interval(self, tmp_path):
s = TUISession(tmp_path / "s.sock", spinner_interval=0.25)
assert s.spinner_interval == 0.25


# ---------------------------------------------------------------------------
# add_task
Expand Down Expand Up @@ -413,6 +422,73 @@ def test_calls_refresh_with_app_set(self, session):
mock_app.invalidate.assert_called()


class TestRefreshDebounce:
"""`_refresh()` coalesces multiple rapid invalidates into a single
redraw on a `_REFRESH_DEBOUNCE_SECONDS` timer.

Background: high event rate (e.g. an LLM tool loop firing dozens
of `tool.invoke` events per second) was triggering one
`Application.invalidate()` per `_append_line`, pinning the
prompt_toolkit renderer at 100% CPU re-rendering the whole
scrollback. py-spy dump on a wedged session showed the main
thread spending 100% CPU in `split_lines / create_content`. The
debounce caps effective redraw rate so the renderer can drain
the event queue between frames.
"""

async def test_coalesces_burst_into_single_invalidate(self, session):
from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS

mock_app = MagicMock()
session._app = mock_app

# 10 rapid appends within one event-loop iteration. Inside an
# event loop, `_refresh` arms a timer instead of calling
# `invalidate` directly.
for i in range(10):
session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, f"line {i}")])

# Burst phase: timer pending, no invalidate yet.
assert mock_app.invalidate.call_count == 0
assert session._refresh_timer is not None

# Let the timer fire — wait slightly longer than the debounce.
await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)

# Exactly one redraw for the whole burst.
assert mock_app.invalidate.call_count == 1
assert session._refresh_timer is None

async def test_subsequent_burst_after_fire_arms_new_timer(self, session):
from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS

mock_app = MagicMock()
session._app = mock_app

session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "first burst")])
await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
assert mock_app.invalidate.call_count == 1

session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "second burst")])
await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
# Timer re-armed and fired again — debounce doesn't permanently
# gate redraws, just throttles rate.
assert mock_app.invalidate.call_count == 2

def test_refresh_outside_event_loop_falls_back_to_direct_invalidate(self, session):
"""If `_refresh` is called outside a running event loop (test
fixtures, pre-startup paths, the existing
`test_calls_refresh_with_app_set` shape), arm-a-timer can't
work — fall back to a direct `invalidate()` so legacy callers
and tests don't silently lose their redraw."""
mock_app = MagicMock()
session._app = mock_app
# Synchronous (no event loop): direct invalidate, no timer.
session._refresh()
assert mock_app.invalidate.call_count == 1
assert session._refresh_timer is None


# ---------------------------------------------------------------------------
# _focused_buf / _system_message / _append_text
# ---------------------------------------------------------------------------
Expand Down
Loading