salesforce-misc · demianbrecht · Jun 1, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/src/switchplane/cli.py b/src/switchplane/cli.py
@@ -10,6 +10,7 @@
 import click
 
 from switchplane import fmt
+from switchplane.config import load_config
 from switchplane.daemon import RuntimePaths, start_daemon, stop_daemon
 from switchplane.protocol import CliRequest, CliResponse
 from switchplane.transport import ControlPlaneClient, is_alive
@@ -46,6 +47,24 @@ def send_request(method: str, params: dict | None = None) -> CliResponse:
             request = CliRequest(method=method, params=params or {})
             return client.send(request)
 
+    def _load_tui_config():
+        """Read TUI tuning knobs from the app's merged config (user
+        overrides on top of app defaults). Falls back silently to the
+        defaults if the config can't be parsed for any reason — the
+        TUI is the wrong place to surface a config validation error,
+        and the defaults are safe."""
+        try:
+            cfg = load_config(
+                config_path=paths.config_path,
+                default_config_path=app.default_config_path,
+                config_class=app.config_class,
+            )
+            return cfg.tui
+        except Exception:
+            from switchplane.config import TuiConfig
+
+            return TuiConfig()
+
     @click.group(invoke_without_command=True)
     @click.pass_context
     def cli(ctx):
@@ -55,7 +74,15 @@ def cli(ctx):
             if sys.stdin.isatty():
                 # Interactive: enter TUI dashboard (auto-discovers running tasks)
                 ensure_daemon()
-                asyncio.run(run_tui(paths.sock_path, initial_tasks=None))
+                tui_cfg = _load_tui_config()
+                asyncio.run(
+                    run_tui(
+                        paths.sock_path,
+                        initial_tasks=None,
+                        max_buffer_lines=tui_cfg.max_buffer_lines,
+                        spinner_interval=tui_cfg.spinner_interval,
+                    )
+                )
             else:
                 click.echo(ctx.get_help())
 

diff --git a/src/switchplane/config.py b/src/switchplane/config.py
@@ -32,11 +32,46 @@ class LoggingConfig(BaseModel):
     level: str = "debug"  # log level: debug, info, warning, error
 
 
+class TuiConfig(BaseModel):
+    """TUI tuning knobs.
+
+    Defaults are conservative — they trade scrollback depth and
+    spinner liveness for bounded per-frame render cost. The
+    TUI's main thread renders the **entire** scrollback buffer on
+    every redraw (prompt_toolkit's `FormattedTextControl.create_content`
+    is per-frame O(buffer_size), not O(visible-area)), so a buffer
+    much larger than these defaults can pin the daemon's CPU on
+    long-running tasks even when the user isn't actively scrolling.
+    """
+
+    max_buffer_lines: int = 2_000
+    """Maximum lines retained per tab before oldest are trimmed.
+
+    Was 10_000; that produced sustained 99% CPU spins on the daemon
+    main thread for long-running tasks (LLM tool loops with hundreds
+    of events). The render cost grows linearly with this; halving it
+    halves baseline render cost while still giving the operator a
+    deep-enough scrollback for routine debugging.
+    """
+
+    spinner_interval: float = 1.0
+    """How often the active-task spinner redraws, in seconds.
+
+    Was 0.2 (5 fps); that pinned a redraw-every-200ms cadence on
+    every active-task tab regardless of whether content changed.
+    Combined with a large `max_buffer_lines` it was the load-bearing
+    contributor to the daemon-CPU pin. 1.0 (1 fps) is plenty to
+    signal liveness without driving the renderer into a steady-state
+    loop.
+    """
+
+
 class AppConfig(BaseModel):
     """Top-level configuration."""
 
     llm: LLMConfig = LLMConfig()
     logging: LoggingConfig = LoggingConfig()
+    tui: TuiConfig = TuiConfig()
     agents: dict[str, dict[str, Any]] = {}
 
 

diff --git a/src/switchplane/tui.py b/src/switchplane/tui.py
@@ -101,8 +101,30 @@
 
 _TERMINAL_STATUSES = {"completed", "failed", "cancelled"}
 _HEARTBEAT_INTERVAL = 60  # seconds — must be well under daemon's IDLE_TIMEOUT (300s)
+
+# Debounce window for `TUISession._refresh()`. Without this, every
+# `_append_line` invalidates the prompt_toolkit Application and the
+# renderer can't drain the event queue between redraws — observed in
+# production as a sustained 99%-CPU TUI spin in `split_lines /
+# create_content / write_to_screen` while attached to a long-running
+# task with high event rate (e.g. an LLM tool loop firing tool.invoke
+# events back-to-back). py-spy dump pinned the main thread at 99%
+# inside that render call chain across the whole 10k-line buffer.
+#
+# 33ms (~30 fps) is well below human perception threshold and well
+# above the few-events-per-second steady-state rate, so visible
+# latency is bounded while bursty append-storms collapse to a single
+# redraw.
+_REFRESH_DEBOUNCE_SECONDS = 0.033
 _SYSTEM_TAB_ID = "_system"
-_DEFAULT_MAX_BUFFER_LINES = 10_000
+
+# Defaults track the matching fields on `TuiConfig` in
+# `switchplane/config.py`. When these are imported directly (e.g.
+# tests, ad-hoc TUI launches), the constants here apply. When the
+# TUI is started via `run_tui` from a configured Application, the
+# AppConfig values override them.
+_DEFAULT_MAX_BUFFER_LINES = 2_000
+_DEFAULT_SPINNER_INTERVAL = 1.0
 _LINE_PREFIX = "  "  # Left margin for event lines
 _LINE_PREFIX_WIDTH = len(_LINE_PREFIX)
 
@@ -229,9 +251,15 @@ def _render_diff(diff_text: str, width: int = 0) -> StyleAndTextTuples:
 class TUISession:
     """Manages TUI state: event buffers, background streams, and input dispatch."""
 
-    def __init__(self, sock_path: Path, max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES) -> None:
+    def __init__(
+        self,
+        sock_path: Path,
+        max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES,
+        spinner_interval: float = _DEFAULT_SPINNER_INTERVAL,
+    ) -> None:
         self.sock_path = sock_path
         self.max_buffer_lines = max_buffer_lines
+        self.spinner_interval = spinner_interval
         self.buffers: dict[str, EventBuffer] = {}
         self.task_order: list[str] = []  # ordered task IDs for tab bar (excludes _system)
         self.focused_task_id: str | None = _SYSTEM_TAB_ID
@@ -242,6 +270,10 @@ def __init__(self, sock_path: Path, max_buffer_lines: int = _DEFAULT_MAX_BUFFER_
         self._task_window: Window | None = None
         self._spinner_frame: int = 0
         self._spinner_timer: asyncio.TimerHandle | None = None
+        # Coalesces multiple `_refresh()` calls within
+        # `_REFRESH_DEBOUNCE_SECONDS` into a single `Application.invalidate()`.
+        # See module docstring on `_REFRESH_DEBOUNCE_SECONDS` for why.
+        self._refresh_timer: asyncio.TimerHandle | None = None
 
         # Create the system tab buffer — always present at logical slot 0
         self.buffers[_SYSTEM_TAB_ID] = EventBuffer(
@@ -418,6 +450,37 @@ def _system_messages(self, msgs: list[str]) -> None:
             self._append_line(_SYSTEM_TAB_ID, [], [(_S_SYSTEM, msg)])
 
     def _refresh(self) -> None:
+        """Schedule a debounced redraw of the prompt_toolkit Application.
+
+        Direct `Application.invalidate()` calls during a high-rate event
+        burst keep the renderer pinned at 100% CPU re-rendering the full
+        scrollback (`split_lines` is per-frame O(total-rendered-text),
+        not O(visible-area)). py-spy dump on a wedged production session
+        showed the main thread spending 100% CPU in
+        `prompt_toolkit.layout.controls.create_content → split_lines`
+        while the agent's event queue piled up unread.
+
+        Coalesce: schedule one invalidate on a `_REFRESH_DEBOUNCE_SECONDS`
+        timer; subsequent `_refresh` calls inside that window are no-ops
+        (the timer is already armed). Visible latency is bounded by the
+        debounce constant; throughput on bursty append-storms collapses
+        from N redraws to one.
+        """
+        if self._app is None:
+            return
+        if self._refresh_timer is not None:
+            return  # Redraw already scheduled
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            # Called outside an event loop (test path / pre-startup).
+            # Fall back to immediate invalidate.
+            self._app.invalidate()
+            return
+        self._refresh_timer = loop.call_later(_REFRESH_DEBOUNCE_SECONDS, self._fire_refresh)
+
+    def _fire_refresh(self) -> None:
+        self._refresh_timer = None
         if self._app is not None:
             self._app.invalidate()
 
@@ -1268,13 +1331,23 @@ def _stop_spinner(self) -> None:
             self._spinner_timer = None
 
     def _tick_spinner(self) -> None:
-        """Advance the spinner frame and schedule the next tick."""
+        """Advance the spinner frame and schedule the next tick.
+
+        Tick interval is `self.spinner_interval` (configurable via
+        `TuiConfig.spinner_interval`). Was hardcoded to 0.2s; that
+        was the load-bearing contributor to a daemon-CPU pin observed
+        on long-running tasks — every tick calls `_refresh()` which
+        invalidates the prompt_toolkit Application, which re-renders
+        the entire scrollback (per-frame O(buffer_size)). At a deep
+        scrollback the per-frame cost can exceed the tick interval
+        and the renderer never yields back to the IPC reader.
+        """
         self._spinner_frame += 1
         self._refresh()
         if self._app is not None and self._has_active_tasks():
             loop = self._app.loop
             if loop is not None:
-                self._spinner_timer = loop.call_later(0.2, self._tick_spinner)
+                self._spinner_timer = loop.call_later(self.spinner_interval, self._tick_spinner)
             else:
                 self._spinner_timer = None
         else:
@@ -1533,6 +1606,7 @@ async def run_tui(
     sock_path: Path,
     initial_tasks: list[tuple[str, str, str, str]] | None = None,
     max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES,
+    spinner_interval: float = _DEFAULT_SPINNER_INTERVAL,
 ) -> None:
     """Run the TUI session.
 
@@ -1542,8 +1616,15 @@ async def run_tui(
             to pre-populate the session with. Pass an empty list to auto-discover
             running tasks from the daemon.
         max_buffer_lines: Maximum lines retained per tab before oldest are trimmed.
+        spinner_interval: Active-task spinner tick interval in seconds. Each tick
+            triggers a full prompt_toolkit redraw, so a low interval combined
+            with a deep scrollback can pin the daemon's CPU.
     """
-    session = TUISession(sock_path, max_buffer_lines=max_buffer_lines)
+    session = TUISession(
+        sock_path,
+        max_buffer_lines=max_buffer_lines,
+        spinner_interval=spinner_interval,
+    )
 
     if initial_tasks is None:
         # Auto-discover running tasks from the daemon
@@ -1587,6 +1668,9 @@ def _list():
             session._system_stream.cancel()
         for stream in session.streams.values():
             stream.cancel()
+        if session._refresh_timer is not None:
+            session._refresh_timer.cancel()
+            session._refresh_timer = None
         await asyncio.gather(
             *([session._heartbeat] if session._heartbeat else []),
             *([session._system_stream] if session._system_stream else []),

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -1,4 +1,11 @@
-from switchplane.config import AppConfig, LLMConfig, _deep_merge, get_agent_config, load_config
+from switchplane.config import (
+    AppConfig,
+    LLMConfig,
+    TuiConfig,
+    _deep_merge,
+    get_agent_config,
+    load_config,
+)
 
 
 class TestDeepMerge:
@@ -44,13 +51,39 @@ class TestAppConfig:
     def test_defaults(self):
         cfg = AppConfig()
         assert isinstance(cfg.llm, LLMConfig)
+        assert isinstance(cfg.tui, TuiConfig)
         assert cfg.agents == {}
 
     def test_with_agents(self):
         cfg = AppConfig(agents={"worker": {"timeout": 30}})
         assert cfg.agents["worker"]["timeout"] == 30
 
 
+class TestTuiConfig:
+    """`TuiConfig` knobs cap per-frame TUI render cost.
+    Defaults are intentionally conservative — see config.py."""
+
+    def test_defaults(self):
+        cfg = TuiConfig()
+        # 2_000 (was 10_000) — render cost grows linearly with this.
+        assert cfg.max_buffer_lines == 2_000
+        # 1.0s (was 0.2s hardcoded in tui.py) — 5× slower spinner
+        # tick cuts baseline render rate proportionally.
+        assert cfg.spinner_interval == 1.0
+
+    def test_overrides(self):
+        cfg = TuiConfig(max_buffer_lines=500, spinner_interval=2.0)
+        assert cfg.max_buffer_lines == 500
+        assert cfg.spinner_interval == 2.0
+
+    def test_loaded_via_app_config(self):
+        """The TuiConfig is reachable as `AppConfig().tui`, which is
+        how the cli.py TUI launch path reads it."""
+        cfg = AppConfig(tui={"max_buffer_lines": 1234, "spinner_interval": 0.5})
+        assert cfg.tui.max_buffer_lines == 1234
+        assert cfg.tui.spinner_interval == 0.5
+
+
 class TestLoadConfig:
     def test_no_files(self):
         cfg = load_config(None, None)

diff --git a/tests/test_tui.py b/tests/test_tui.py
@@ -97,6 +97,15 @@ def test_custom_max_buffer_lines(self, tmp_path):
         s = TUISession(tmp_path / "s.sock", max_buffer_lines=500)
         assert s.max_buffer_lines == 500
 
+    def test_default_spinner_interval(self, session):
+        # Default lives on the TuiConfig but is mirrored as
+        # `_DEFAULT_SPINNER_INTERVAL` in tui.py for ad-hoc launches.
+        assert session.spinner_interval == 1.0
+
+    def test_custom_spinner_interval(self, tmp_path):
+        s = TUISession(tmp_path / "s.sock", spinner_interval=0.25)
+        assert s.spinner_interval == 0.25
+
 
 # ---------------------------------------------------------------------------
 # add_task
@@ -413,6 +422,73 @@ def test_calls_refresh_with_app_set(self, session):
         mock_app.invalidate.assert_called()
 
 
+class TestRefreshDebounce:
+    """`_refresh()` coalesces multiple rapid invalidates into a single
+    redraw on a `_REFRESH_DEBOUNCE_SECONDS` timer.
+
+    Background: high event rate (e.g. an LLM tool loop firing dozens
+    of `tool.invoke` events per second) was triggering one
+    `Application.invalidate()` per `_append_line`, pinning the
+    prompt_toolkit renderer at 100% CPU re-rendering the whole
+    scrollback. py-spy dump on a wedged session showed the main
+    thread spending 100% CPU in `split_lines / create_content`. The
+    debounce caps effective redraw rate so the renderer can drain
+    the event queue between frames.
+    """
+
+    async def test_coalesces_burst_into_single_invalidate(self, session):
+        from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS
+
+        mock_app = MagicMock()
+        session._app = mock_app
+
+        # 10 rapid appends within one event-loop iteration. Inside an
+        # event loop, `_refresh` arms a timer instead of calling
+        # `invalidate` directly.
+        for i in range(10):
+            session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, f"line {i}")])
+
+        # Burst phase: timer pending, no invalidate yet.
+        assert mock_app.invalidate.call_count == 0
+        assert session._refresh_timer is not None
+
+        # Let the timer fire — wait slightly longer than the debounce.
+        await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
+
+        # Exactly one redraw for the whole burst.
+        assert mock_app.invalidate.call_count == 1
+        assert session._refresh_timer is None
+
+    async def test_subsequent_burst_after_fire_arms_new_timer(self, session):
+        from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS
+
+        mock_app = MagicMock()
+        session._app = mock_app
+
+        session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "first burst")])
+        await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
+        assert mock_app.invalidate.call_count == 1
+
+        session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "second burst")])
+        await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
+        # Timer re-armed and fired again — debounce doesn't permanently
+        # gate redraws, just throttles rate.
+        assert mock_app.invalidate.call_count == 2
+
+    def test_refresh_outside_event_loop_falls_back_to_direct_invalidate(self, session):
+        """If `_refresh` is called outside a running event loop (test
+        fixtures, pre-startup paths, the existing
+        `test_calls_refresh_with_app_set` shape), arm-a-timer can't
+        work — fall back to a direct `invalidate()` so legacy callers
+        and tests don't silently lose their redraw."""
+        mock_app = MagicMock()
+        session._app = mock_app
+        # Synchronous (no event loop): direct invalidate, no timer.
+        session._refresh()
+        assert mock_app.invalidate.call_count == 1
+        assert session._refresh_timer is None
+
+
 # ---------------------------------------------------------------------------
 # _focused_buf / _system_message / _append_text
 # ---------------------------------------------------------------------------