From 96e703bf067b793751a8a0f5fcb6d4ddab55253d Mon Sep 17 00:00:00 2001
From: Demian Brecht <demianbrecht@gmail.com>
Date: Mon, 25 May 2026 21:10:12 -0700
Subject: [PATCH 1/3] fix: debounce TUI refresh to cap renderer redraw rate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A long-running calder task wedged at 99% CPU on the daemon process.
py-spy dump pinned the main thread spinning in
`prompt_toolkit.layout.controls.create_content → split_lines →
write_to_screen → render → _redraw` while the agent process
(properly blocked on its IPC channel) was at 0% CPU and 30s of
total CPU time vs. 1h 51m elapsed. The agent's events were piling
up unread because the TUI couldn't drain them between redraws.

Mechanism: every `_append_line` calls `self._refresh()` which
unconditionally calls `Application.invalidate()`. When events
arrive in bursts (an LLM tool loop firing dozens of `tool.invoke`
events back-to-back, each producing 50+ rendered lines via
markdown / diff splitting), the renderer can't keep up — each
redraw runs `split_lines` over the entire 10k-line scrollback,
which is per-frame O(total-rendered-text), not O(visible-area).
The redraw schedules itself (via the next invalidate) before it
finishes processing the prior event, and we converge on a render
loop that never yields back to the IPC reader.

Fix: coalesce `_refresh()` calls within a 33ms (~30 fps) window
into a single `Application.invalidate()`. The first call within a
quiet period arms a `loop.call_later` timer; subsequent calls
inside the window are no-ops; the timer fires once and clears.
Visible latency is bounded (33ms is well below human perception)
while bursty append-storms collapse from N redraws to one,
giving the IPC reader CPU to actually drain events.

Outside an event loop (test fixtures, pre-startup, the existing
`test_calls_refresh_with_app_set` shape) the behavior falls back
to direct `invalidate()` so legacy callers don't silently lose
their redraw.

Three new TUI tests:
- `test_coalesces_burst_into_single_invalidate` — 10 rapid
  appends + sleep past the debounce → exactly one `invalidate`
  call, timer cleared.
- `test_subsequent_burst_after_fire_arms_new_timer` — debounce
  doesn't permanently gate; a second burst after the first fires
  re-arms.
- `test_refresh_outside_event_loop_falls_back_to_direct_invalidate`
  — pins the legacy synchronous-fallback path.

662 unit tests pass (+3). ruff lint + format clean across whole repo.
---
 src/switchplane/tui.py | 53 +++++++++++++++++++++++++++++++++
 tests/test_tui.py      | 67 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)

diff --git a/src/switchplane/tui.py b/src/switchplane/tui.py
index aa9428f..99baf99 100644
--- a/src/switchplane/tui.py
+++ b/src/switchplane/tui.py
@@ -101,6 +101,21 @@
 
 _TERMINAL_STATUSES = {"completed", "failed", "cancelled"}
 _HEARTBEAT_INTERVAL = 60  # seconds — must be well under daemon's IDLE_TIMEOUT (300s)
+
+# Debounce window for `TUISession._refresh()`. Without this, every
+# `_append_line` invalidates the prompt_toolkit Application and the
+# renderer can't drain the event queue between redraws — observed in
+# production as a sustained 99%-CPU TUI spin in `split_lines /
+# create_content / write_to_screen` while attached to a long-running
+# task with high event rate (e.g. an LLM tool loop firing tool.invoke
+# events back-to-back). py-spy dump pinned the main thread at 99%
+# inside that render call chain across the whole 10k-line buffer.
+#
+# 33ms (~30 fps) is well below human perception threshold and well
+# above the few-events-per-second steady-state rate, so visible
+# latency is bounded while bursty append-storms collapse to a single
+# redraw.
+_REFRESH_DEBOUNCE_SECONDS = 0.033
 _SYSTEM_TAB_ID = "_system"
 _DEFAULT_MAX_BUFFER_LINES = 10_000
 _LINE_PREFIX = "  "  # Left margin for event lines
@@ -242,6 +257,10 @@ def __init__(self, sock_path: Path, max_buffer_lines: int = _DEFAULT_MAX_BUFFER_
         self._task_window: Window | None = None
         self._spinner_frame: int = 0
         self._spinner_timer: asyncio.TimerHandle | None = None
+        # Coalesces multiple `_refresh()` calls within
+        # `_REFRESH_DEBOUNCE_SECONDS` into a single `Application.invalidate()`.
+        # See module docstring on `_REFRESH_DEBOUNCE_SECONDS` for why.
+        self._refresh_timer: asyncio.TimerHandle | None = None
 
         # Create the system tab buffer — always present at logical slot 0
         self.buffers[_SYSTEM_TAB_ID] = EventBuffer(
@@ -418,6 +437,37 @@ def _system_messages(self, msgs: list[str]) -> None:
             self._append_line(_SYSTEM_TAB_ID, [], [(_S_SYSTEM, msg)])
 
     def _refresh(self) -> None:
+        """Schedule a debounced redraw of the prompt_toolkit Application.
+
+        Direct `Application.invalidate()` calls during a high-rate event
+        burst keep the renderer pinned at 100% CPU re-rendering the full
+        scrollback (`split_lines` is per-frame O(total-rendered-text),
+        not O(visible-area)). py-spy dump on a wedged production session
+        showed the main thread spending 100% CPU in
+        `prompt_toolkit.layout.controls.create_content → split_lines`
+        while the agent's event queue piled up unread.
+
+        Coalesce: schedule one invalidate on a `_REFRESH_DEBOUNCE_SECONDS`
+        timer; subsequent `_refresh` calls inside that window are no-ops
+        (the timer is already armed). Visible latency is bounded by the
+        debounce constant; throughput on bursty append-storms collapses
+        from N redraws to one.
+        """
+        if self._app is None:
+            return
+        if self._refresh_timer is not None:
+            return  # Redraw already scheduled
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            # Called outside an event loop (test path / pre-startup).
+            # Fall back to immediate invalidate.
+            self._app.invalidate()
+            return
+        self._refresh_timer = loop.call_later(_REFRESH_DEBOUNCE_SECONDS, self._fire_refresh)
+
+    def _fire_refresh(self) -> None:
+        self._refresh_timer = None
         if self._app is not None:
             self._app.invalidate()
 
@@ -1587,6 +1637,9 @@ def _list():
             session._system_stream.cancel()
         for stream in session.streams.values():
             stream.cancel()
+        if session._refresh_timer is not None:
+            session._refresh_timer.cancel()
+            session._refresh_timer = None
         await asyncio.gather(
             *([session._heartbeat] if session._heartbeat else []),
             *([session._system_stream] if session._system_stream else []),
diff --git a/tests/test_tui.py b/tests/test_tui.py
index 75243c0..d57ee23 100644
--- a/tests/test_tui.py
+++ b/tests/test_tui.py
@@ -413,6 +413,73 @@ def test_calls_refresh_with_app_set(self, session):
         mock_app.invalidate.assert_called()
 
 
+class TestRefreshDebounce:
+    """`_refresh()` coalesces multiple rapid invalidates into a single
+    redraw on a `_REFRESH_DEBOUNCE_SECONDS` timer.
+
+    Background: high event rate (e.g. an LLM tool loop firing dozens
+    of `tool.invoke` events per second) was triggering one
+    `Application.invalidate()` per `_append_line`, pinning the
+    prompt_toolkit renderer at 100% CPU re-rendering the whole
+    scrollback. py-spy dump on a wedged session showed the main
+    thread spending 100% CPU in `split_lines / create_content`. The
+    debounce caps effective redraw rate so the renderer can drain
+    the event queue between frames.
+    """
+
+    async def test_coalesces_burst_into_single_invalidate(self, session):
+        from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS
+
+        mock_app = MagicMock()
+        session._app = mock_app
+
+        # 10 rapid appends within one event-loop iteration. Inside an
+        # event loop, `_refresh` arms a timer instead of calling
+        # `invalidate` directly.
+        for i in range(10):
+            session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, f"line {i}")])
+
+        # Burst phase: timer pending, no invalidate yet.
+        assert mock_app.invalidate.call_count == 0
+        assert session._refresh_timer is not None
+
+        # Let the timer fire — wait slightly longer than the debounce.
+        await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
+
+        # Exactly one redraw for the whole burst.
+        assert mock_app.invalidate.call_count == 1
+        assert session._refresh_timer is None
+
+    async def test_subsequent_burst_after_fire_arms_new_timer(self, session):
+        from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS
+
+        mock_app = MagicMock()
+        session._app = mock_app
+
+        session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "first burst")])
+        await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
+        assert mock_app.invalidate.call_count == 1
+
+        session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "second burst")])
+        await asyncio.sleep(_REFRESH_DEBOUNCE_SECONDS + 0.01)
+        # Timer re-armed and fired again — debounce doesn't permanently
+        # gate redraws, just throttles rate.
+        assert mock_app.invalidate.call_count == 2
+
+    def test_refresh_outside_event_loop_falls_back_to_direct_invalidate(self, session):
+        """If `_refresh` is called outside a running event loop (test
+        fixtures, pre-startup paths, the existing
+        `test_calls_refresh_with_app_set` shape), arm-a-timer can't
+        work — fall back to a direct `invalidate()` so legacy callers
+        and tests don't silently lose their redraw."""
+        mock_app = MagicMock()
+        session._app = mock_app
+        # Synchronous (no event loop): direct invalidate, no timer.
+        session._refresh()
+        assert mock_app.invalidate.call_count == 1
+        assert session._refresh_timer is None
+
+
 # ---------------------------------------------------------------------------
 # _focused_buf / _system_message / _append_text
 # ---------------------------------------------------------------------------

From f908907ff2581c7c82753c119ea7b83e0d202bd7 Mon Sep 17 00:00:00 2001
From: Demian Brecht <demianbrecht@gmail.com>
Date: Mon, 25 May 2026 21:20:27 -0700
Subject: [PATCH 2/3] fix: smaller default scrollback + slower spinner; expose
 via TuiConfig
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-on to the `_refresh()` debounce in this PR. The debounce
removes the *additive* invalidate storm during event bursts, but
doesn't help the steady-state pin: an active task drives a 5 fps
spinner that calls `_refresh()` every 200ms regardless of whether
content changed, and prompt_toolkit's `FormattedTextControl.create_content`
walks the entire scrollback on every redraw. With a 10k-line
buffer the per-frame cost can exceed the spinner interval, and the
renderer never yields back to the IPC reader.

Cuts both axes:

- **`max_buffer_lines` default 10_000 → 2_000.** Render cost is
  linear in buffer size; a 5× cut makes the steady-state render
  cheap enough that the spinner can never starve the reader.
- **Spinner tick 0.2s → 1.0s.** Was hardcoded; promoted to a
  per-session field. Humans don't perceive the difference between
  a 5 fps and 1 fps spinner ("task is running"), but the renderer
  pressure drops by 5×.

Both knobs land on a new `TuiConfig` model in `switchplane.config`
so applications (e.g. calder) can override them via their merged
TOML config without code changes:

```toml
[tui]
max_buffer_lines = 5000
spinner_interval = 0.5
```

The CLI's interactive-launch path now reads the merged config and
passes both values into `run_tui` → `TUISession`. Falls back to the
defaults if config parsing fails — a config validation error on
TUI launch is the wrong place to surface.

5 new tests:
- `TestTuiConfig` (3): defaults, overrides, reach via AppConfig.
- `TestTUISessionInit` gains `test_default_spinner_interval` and
  `test_custom_spinner_interval`.

667 tests pass (+5). ruff lint + format clean across the whole repo.
---
 src/switchplane/cli.py    | 29 ++++++++++++++++++++++++++-
 src/switchplane/config.py | 35 +++++++++++++++++++++++++++++++++
 src/switchplane/tui.py    | 41 ++++++++++++++++++++++++++++++++++-----
 tests/test_config.py      | 35 ++++++++++++++++++++++++++++++++-
 tests/test_tui.py         |  9 +++++++++
 5 files changed, 142 insertions(+), 7 deletions(-)

diff --git a/src/switchplane/cli.py b/src/switchplane/cli.py
index 5ca90e0..8380484 100644
--- a/src/switchplane/cli.py
+++ b/src/switchplane/cli.py
@@ -10,6 +10,7 @@
 import click
 
 from switchplane import fmt
+from switchplane.config import load_config
 from switchplane.daemon import RuntimePaths, start_daemon, stop_daemon
 from switchplane.protocol import CliRequest, CliResponse
 from switchplane.transport import ControlPlaneClient, is_alive
@@ -46,6 +47,24 @@ def send_request(method: str, params: dict | None = None) -> CliResponse:
             request = CliRequest(method=method, params=params or {})
             return client.send(request)
 
+    def _load_tui_config():
+        """Read TUI tuning knobs from the app's merged config (user
+        overrides on top of app defaults). Falls back silently to the
+        defaults if the config can't be parsed for any reason — the
+        TUI is the wrong place to surface a config validation error,
+        and the defaults are safe."""
+        try:
+            cfg = load_config(
+                config_path=paths.config_path,
+                default_config_path=app.default_config_path,
+                config_class=app.config_class,
+            )
+            return cfg.tui
+        except Exception:
+            from switchplane.config import TuiConfig
+
+            return TuiConfig()
+
     @click.group(invoke_without_command=True)
     @click.pass_context
     def cli(ctx):
@@ -55,7 +74,15 @@ def cli(ctx):
             if sys.stdin.isatty():
                 # Interactive: enter TUI dashboard (auto-discovers running tasks)
                 ensure_daemon()
-                asyncio.run(run_tui(paths.sock_path, initial_tasks=None))
+                tui_cfg = _load_tui_config()
+                asyncio.run(
+                    run_tui(
+                        paths.sock_path,
+                        initial_tasks=None,
+                        max_buffer_lines=tui_cfg.max_buffer_lines,
+                        spinner_interval=tui_cfg.spinner_interval,
+                    )
+                )
             else:
                 click.echo(ctx.get_help())
 
diff --git a/src/switchplane/config.py b/src/switchplane/config.py
index f97fad1..40936f7 100644
--- a/src/switchplane/config.py
+++ b/src/switchplane/config.py
@@ -32,11 +32,46 @@ class LoggingConfig(BaseModel):
     level: str = "debug"  # log level: debug, info, warning, error
 
 
+class TuiConfig(BaseModel):
+    """TUI tuning knobs.
+
+    Defaults are conservative — they trade scrollback depth and
+    spinner liveness for bounded per-frame render cost. The
+    TUI's main thread renders the **entire** scrollback buffer on
+    every redraw (prompt_toolkit's `FormattedTextControl.create_content`
+    is per-frame O(buffer_size), not O(visible-area)), so a buffer
+    much larger than these defaults can pin the daemon's CPU on
+    long-running tasks even when the user isn't actively scrolling.
+    """
+
+    max_buffer_lines: int = 2_000
+    """Maximum lines retained per tab before oldest are trimmed.
+
+    Was 10_000; that produced sustained 99% CPU spins on the daemon
+    main thread for long-running tasks (LLM tool loops with hundreds
+    of events). The render cost grows linearly with this; halving it
+    halves baseline render cost while still giving the operator a
+    deep-enough scrollback for routine debugging.
+    """
+
+    spinner_interval: float = 1.0
+    """How often the active-task spinner redraws, in seconds.
+
+    Was 0.2 (5 fps); that pinned a redraw-every-200ms cadence on
+    every active-task tab regardless of whether content changed.
+    Combined with a large `max_buffer_lines` it was the load-bearing
+    contributor to the daemon-CPU pin. 1.0 (1 fps) is plenty to
+    signal liveness without driving the renderer into a steady-state
+    loop.
+    """
+
+
 class AppConfig(BaseModel):
     """Top-level configuration."""
 
     llm: LLMConfig = LLMConfig()
     logging: LoggingConfig = LoggingConfig()
+    tui: TuiConfig = TuiConfig()
     agents: dict[str, dict[str, Any]] = {}
 
 
diff --git a/src/switchplane/tui.py b/src/switchplane/tui.py
index 99baf99..2002c05 100644
--- a/src/switchplane/tui.py
+++ b/src/switchplane/tui.py
@@ -117,7 +117,14 @@
 # redraw.
 _REFRESH_DEBOUNCE_SECONDS = 0.033
 _SYSTEM_TAB_ID = "_system"
-_DEFAULT_MAX_BUFFER_LINES = 10_000
+
+# Defaults track the matching fields on `TuiConfig` in
+# `switchplane/config.py`. When these are imported directly (e.g.
+# tests, ad-hoc TUI launches), the constants here apply. When the
+# TUI is started via `run_tui` from a configured Application, the
+# AppConfig values override them.
+_DEFAULT_MAX_BUFFER_LINES = 2_000
+_DEFAULT_SPINNER_INTERVAL = 1.0
 _LINE_PREFIX = "  "  # Left margin for event lines
 _LINE_PREFIX_WIDTH = len(_LINE_PREFIX)
 
@@ -244,9 +251,15 @@ def _render_diff(diff_text: str, width: int = 0) -> StyleAndTextTuples:
 class TUISession:
     """Manages TUI state: event buffers, background streams, and input dispatch."""
 
-    def __init__(self, sock_path: Path, max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES) -> None:
+    def __init__(
+        self,
+        sock_path: Path,
+        max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES,
+        spinner_interval: float = _DEFAULT_SPINNER_INTERVAL,
+    ) -> None:
         self.sock_path = sock_path
         self.max_buffer_lines = max_buffer_lines
+        self.spinner_interval = spinner_interval
         self.buffers: dict[str, EventBuffer] = {}
         self.task_order: list[str] = []  # ordered task IDs for tab bar (excludes _system)
         self.focused_task_id: str | None = _SYSTEM_TAB_ID
@@ -1318,13 +1331,23 @@ def _stop_spinner(self) -> None:
             self._spinner_timer = None
 
     def _tick_spinner(self) -> None:
-        """Advance the spinner frame and schedule the next tick."""
+        """Advance the spinner frame and schedule the next tick.
+
+        Tick interval is `self.spinner_interval` (configurable via
+        `TuiConfig.spinner_interval`). Was hardcoded to 0.2s; that
+        was the load-bearing contributor to a daemon-CPU pin observed
+        on long-running tasks — every tick calls `_refresh()` which
+        invalidates the prompt_toolkit Application, which re-renders
+        the entire scrollback (per-frame O(buffer_size)). At a deep
+        scrollback the per-frame cost can exceed the tick interval
+        and the renderer never yields back to the IPC reader.
+        """
         self._spinner_frame += 1
         self._refresh()
         if self._app is not None and self._has_active_tasks():
             loop = self._app.loop
             if loop is not None:
-                self._spinner_timer = loop.call_later(0.2, self._tick_spinner)
+                self._spinner_timer = loop.call_later(self.spinner_interval, self._tick_spinner)
             else:
                 self._spinner_timer = None
         else:
@@ -1583,6 +1606,7 @@ async def run_tui(
     sock_path: Path,
     initial_tasks: list[tuple[str, str, str, str]] | None = None,
     max_buffer_lines: int = _DEFAULT_MAX_BUFFER_LINES,
+    spinner_interval: float = _DEFAULT_SPINNER_INTERVAL,
 ) -> None:
     """Run the TUI session.
 
@@ -1592,8 +1616,15 @@ async def run_tui(
             to pre-populate the session with. Pass an empty list to auto-discover
             running tasks from the daemon.
         max_buffer_lines: Maximum lines retained per tab before oldest are trimmed.
+        spinner_interval: Active-task spinner tick interval in seconds. Each tick
+            triggers a full prompt_toolkit redraw, so a low interval combined
+            with a deep scrollback can pin the daemon's CPU.
     """
-    session = TUISession(sock_path, max_buffer_lines=max_buffer_lines)
+    session = TUISession(
+        sock_path,
+        max_buffer_lines=max_buffer_lines,
+        spinner_interval=spinner_interval,
+    )
 
     if initial_tasks is None:
         # Auto-discover running tasks from the daemon
diff --git a/tests/test_config.py b/tests/test_config.py
index 3f8a973..0250908 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,4 +1,11 @@
-from switchplane.config import AppConfig, LLMConfig, _deep_merge, get_agent_config, load_config
+from switchplane.config import (
+    AppConfig,
+    LLMConfig,
+    TuiConfig,
+    _deep_merge,
+    get_agent_config,
+    load_config,
+)
 
 
 class TestDeepMerge:
@@ -44,6 +51,7 @@ class TestAppConfig:
     def test_defaults(self):
         cfg = AppConfig()
         assert isinstance(cfg.llm, LLMConfig)
+        assert isinstance(cfg.tui, TuiConfig)
         assert cfg.agents == {}
 
     def test_with_agents(self):
@@ -51,6 +59,31 @@ def test_with_agents(self):
         assert cfg.agents["worker"]["timeout"] == 30
 
 
+class TestTuiConfig:
+    """`TuiConfig` knobs cap per-frame TUI render cost.
+    Defaults are intentionally conservative — see config.py."""
+
+    def test_defaults(self):
+        cfg = TuiConfig()
+        # 2_000 (was 10_000) — render cost grows linearly with this.
+        assert cfg.max_buffer_lines == 2_000
+        # 1.0s (was 0.2s hardcoded in tui.py) — 5× slower spinner
+        # tick cuts baseline render rate proportionally.
+        assert cfg.spinner_interval == 1.0
+
+    def test_overrides(self):
+        cfg = TuiConfig(max_buffer_lines=500, spinner_interval=2.0)
+        assert cfg.max_buffer_lines == 500
+        assert cfg.spinner_interval == 2.0
+
+    def test_loaded_via_app_config(self):
+        """The TuiConfig is reachable as `AppConfig().tui`, which is
+        how the cli.py TUI launch path reads it."""
+        cfg = AppConfig(tui={"max_buffer_lines": 1234, "spinner_interval": 0.5})
+        assert cfg.tui.max_buffer_lines == 1234
+        assert cfg.tui.spinner_interval == 0.5
+
+
 class TestLoadConfig:
     def test_no_files(self):
         cfg = load_config(None, None)
diff --git a/tests/test_tui.py b/tests/test_tui.py
index d57ee23..d1ce510 100644
--- a/tests/test_tui.py
+++ b/tests/test_tui.py
@@ -97,6 +97,15 @@ def test_custom_max_buffer_lines(self, tmp_path):
         s = TUISession(tmp_path / "s.sock", max_buffer_lines=500)
         assert s.max_buffer_lines == 500
 
+    def test_default_spinner_interval(self, session):
+        # Default lives on the TuiConfig but is mirrored as
+        # `_DEFAULT_SPINNER_INTERVAL` in tui.py for ad-hoc launches.
+        assert session.spinner_interval == 1.0
+
+    def test_custom_spinner_interval(self, tmp_path):
+        s = TUISession(tmp_path / "s.sock", spinner_interval=0.25)
+        assert s.spinner_interval == 0.25
+
 
 # ---------------------------------------------------------------------------
 # add_task

From 801cce7e9d04308be08c5af942b124f2a3722bc1 Mon Sep 17 00:00:00 2001
From: Demian Brecht <demianbrecht@gmail.com>
Date: Tue, 26 May 2026 08:41:10 -0700
Subject: [PATCH 3/3] fix: address PR #17 review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Five non-blocking findings from the review, all addressed:

1. **CLAUDE.md drift** — `default 10,000` claim was now wrong since
   this PR drops the default to 2,000. Updated to reference the new
   `[tui]` config section and the surrounding TUI tuning knobs.

2. **Loop-acquisition convention** — `_refresh()` was using
   `asyncio.get_running_loop()` while `_tick_spinner()` (same file,
   same arm-a-timer pattern) used `self._app.loop`. Aligned to
   prompt_toolkit's own convention from
   `Application.create_background_task`:
   `self._app.loop or asyncio.get_running_loop()`. This handles the
   pre-`run_async` window correctly (where `self._app.loop` is None
   and we still need to schedule on whatever loop is running) AND
   the post-`run_async` window (where `self._app.loop` is set and
   should be preferred). Preserves the synchronous-test fallback to
   direct `invalidate()`. Added a test pinning the
   "loop set → schedule on app loop, not get_running_loop" branch.

3. **Silent config-error swallow** — `_load_tui_config`'s
   `except Exception` now emits a stderr warning before falling
   back to defaults. A typo in `[tui]` would otherwise silently
   revert to defaults with zero feedback.

4. **Local TuiConfig import** — promoted to a module-level import
   alongside `load_config`. No circular-import reason to keep it
   local; cleaner failure path.

5. **Spinner interval pushback** — reviewer's case for "1 fps reads
   as stuck, not alive" was sound. Settled on 0.5s (2 fps) as the
   compromise: 2.5× cost reduction vs. 0.2s without crossing the
   "is it stuck?" threshold. The buffer cap and refresh debounce
   are doing most of the heavy lifting on render cost; the spinner
   change is polish on top. TuiConfig docstring + test fixtures
   updated.

668 unit tests pass (+1 new for the loop-acquisition branch).
ruff lint + format clean across the whole repo.
---
 CLAUDE.md                 |  2 +-
 src/switchplane/cli.py    | 29 ++++++++++++++++++--------
 src/switchplane/config.py | 20 +++++++++++-------
 src/switchplane/tui.py    | 28 +++++++++++++++++--------
 tests/test_config.py      |  8 +++++---
 tests/test_tui.py         | 43 ++++++++++++++++++++++++++++++++++++++-
 6 files changed, 101 insertions(+), 29 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 3bf5767..9ae6aec 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -100,7 +100,7 @@ weather task <task_id> <command> [--key value ...]
 
 In CLI attached mode (`run`/`follow`/`resume`), only `/` task commands are supported inline. Daemon commands are not available — detach with Ctrl+C and use CLI subcommands directly.
 
-**Event buffers** are capped at a configurable `max_buffer_lines` (default 10,000) per tab to bound memory in long-running sessions.
+**Event buffers** are capped at a configurable `[tui] max_buffer_lines` (default 2,000) per tab to bound both memory and per-frame render cost in long-running sessions. Spinner tick interval and refresh-debounce window are also configurable; see `TuiConfig` in `switchplane/config.py`.
 
 ## Build system
 
diff --git a/src/switchplane/cli.py b/src/switchplane/cli.py
index 8380484..8d56142 100644
--- a/src/switchplane/cli.py
+++ b/src/switchplane/cli.py
@@ -10,7 +10,7 @@
 import click
 
 from switchplane import fmt
-from switchplane.config import load_config
+from switchplane.config import TuiConfig, load_config
 from switchplane.daemon import RuntimePaths, start_daemon, stop_daemon
 from switchplane.protocol import CliRequest, CliResponse
 from switchplane.transport import ControlPlaneClient, is_alive
@@ -47,12 +47,21 @@ def send_request(method: str, params: dict | None = None) -> CliResponse:
             request = CliRequest(method=method, params=params or {})
             return client.send(request)
 
-    def _load_tui_config():
+    def _load_tui_config() -> TuiConfig:
         """Read TUI tuning knobs from the app's merged config (user
-        overrides on top of app defaults). Falls back silently to the
-        defaults if the config can't be parsed for any reason — the
-        TUI is the wrong place to surface a config validation error,
-        and the defaults are safe."""
+        overrides on top of app defaults). Falls back to the defaults
+        if the config can't be parsed for any reason — the TUI itself
+        is the wrong place to crash on a config validation error, and
+        the defaults are safe.
+
+        The fallback is **not silent**: a one-line warning goes to
+        stderr so a typo in `[tui]` (or a stale TOML field after a
+        switchplane upgrade) is at least visible to the operator.
+        Without this, a misnamed knob would silently revert to the
+        default with zero feedback — the kind of bug that takes hours
+        to diagnose because the apparent behavior matches "the config
+        was loaded fine; my override just didn't help".
+        """
         try:
             cfg = load_config(
                 config_path=paths.config_path,
@@ -60,9 +69,11 @@ def _load_tui_config():
                 config_class=app.config_class,
             )
             return cfg.tui
-        except Exception:
-            from switchplane.config import TuiConfig
-
+        except Exception as e:
+            click.echo(
+                f"warning: could not parse TUI config ({e!r}); falling back to defaults",
+                err=True,
+            )
             return TuiConfig()
 
     @click.group(invoke_without_command=True)
diff --git a/src/switchplane/config.py b/src/switchplane/config.py
index 40936f7..d18a8f6 100644
--- a/src/switchplane/config.py
+++ b/src/switchplane/config.py
@@ -54,15 +54,21 @@ class TuiConfig(BaseModel):
     deep-enough scrollback for routine debugging.
     """
 
-    spinner_interval: float = 1.0
+    spinner_interval: float = 0.5
     """How often the active-task spinner redraws, in seconds.
 
-    Was 0.2 (5 fps); that pinned a redraw-every-200ms cadence on
-    every active-task tab regardless of whether content changed.
-    Combined with a large `max_buffer_lines` it was the load-bearing
-    contributor to the daemon-CPU pin. 1.0 (1 fps) is plenty to
-    signal liveness without driving the renderer into a steady-state
-    loop.
+    Was 0.2 (5 fps), raised here to 0.5 (2 fps). The original 5 fps
+    pinned a redraw-every-200ms cadence on every active-task tab
+    regardless of whether content changed; combined with a large
+    `max_buffer_lines` it was the load-bearing contributor to the
+    daemon-CPU pin.
+
+    2 fps is a 2.5× cost reduction without sacrificing liveness —
+    fast enough that operators read it as "alive" rather than "stuck"
+    (1 fps was tested and felt like the latter). The smaller buffer
+    cap and `_REFRESH_DEBOUNCE_SECONDS` are doing most of the
+    heavy lifting on render cost; the spinner change here is the
+    polish on top.
     """
 
 
diff --git a/src/switchplane/tui.py b/src/switchplane/tui.py
index 2002c05..fced111 100644
--- a/src/switchplane/tui.py
+++ b/src/switchplane/tui.py
@@ -124,7 +124,7 @@
 # TUI is started via `run_tui` from a configured Application, the
 # AppConfig values override them.
 _DEFAULT_MAX_BUFFER_LINES = 2_000
-_DEFAULT_SPINNER_INTERVAL = 1.0
+_DEFAULT_SPINNER_INTERVAL = 0.5
 _LINE_PREFIX = "  "  # Left margin for event lines
 _LINE_PREFIX_WIDTH = len(_LINE_PREFIX)
 
@@ -465,18 +465,30 @@ def _refresh(self) -> None:
         (the timer is already armed). Visible latency is bounded by the
         debounce constant; throughput on bursty append-storms collapses
         from N redraws to one.
+
+        Loop acquisition follows prompt_toolkit's own convention
+        (`self.loop or asyncio.get_running_loop()`, see
+        `prompt_toolkit.application.application.Application.create_background_task`):
+        `self._app.loop` is `None` until `run_async()` runs and after it
+        returns, so during pre-startup or test paths we fall through to
+        the running loop, and if even that's unavailable we invalidate
+        directly so the redraw isn't silently lost.
         """
         if self._app is None:
             return
         if self._refresh_timer is not None:
             return  # Redraw already scheduled
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            # Called outside an event loop (test path / pre-startup).
-            # Fall back to immediate invalidate.
-            self._app.invalidate()
-            return
+        loop = self._app.loop
+        if loop is None:
+            try:
+                loop = asyncio.get_running_loop()
+            except RuntimeError:
+                # No event loop at all (synchronous test path / pre-startup
+                # before the first `run_async`). Direct invalidate so the
+                # redraw isn't lost; legacy callers that don't drive an
+                # event loop still see their refreshes land.
+                self._app.invalidate()
+                return
         self._refresh_timer = loop.call_later(_REFRESH_DEBOUNCE_SECONDS, self._fire_refresh)
 
     def _fire_refresh(self) -> None:
diff --git a/tests/test_config.py b/tests/test_config.py
index 0250908..6251bc9 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -67,9 +67,11 @@ def test_defaults(self):
         cfg = TuiConfig()
         # 2_000 (was 10_000) — render cost grows linearly with this.
         assert cfg.max_buffer_lines == 2_000
-        # 1.0s (was 0.2s hardcoded in tui.py) — 5× slower spinner
-        # tick cuts baseline render rate proportionally.
-        assert cfg.spinner_interval == 1.0
+        # 0.5s (was 0.2s hardcoded in tui.py) — 2.5× slower spinner
+        # tick cuts baseline render rate proportionally without
+        # crossing the threshold where the spinner reads as "stuck"
+        # rather than "alive".
+        assert cfg.spinner_interval == 0.5
 
     def test_overrides(self):
         cfg = TuiConfig(max_buffer_lines=500, spinner_interval=2.0)
diff --git a/tests/test_tui.py b/tests/test_tui.py
index d1ce510..1df782f 100644
--- a/tests/test_tui.py
+++ b/tests/test_tui.py
@@ -100,7 +100,7 @@ def test_custom_max_buffer_lines(self, tmp_path):
     def test_default_spinner_interval(self, session):
         # Default lives on the TuiConfig but is mirrored as
         # `_DEFAULT_SPINNER_INTERVAL` in tui.py for ad-hoc launches.
-        assert session.spinner_interval == 1.0
+        assert session.spinner_interval == 0.5
 
     def test_custom_spinner_interval(self, tmp_path):
         s = TUISession(tmp_path / "s.sock", spinner_interval=0.25)
@@ -417,6 +417,11 @@ def test_unknown_task_id_is_noop(self, session):
 
     def test_calls_refresh_with_app_set(self, session):
         mock_app = MagicMock()
+        # See TestRefreshDebounce / test_refresh_calls_invalidate_when_app_set:
+        # `loop=None` + no running loop steers `_refresh` to the
+        # direct-invalidate fallback (legacy path). The timer-armed
+        # path is covered separately under TestRefreshDebounce.
+        mock_app.loop = None
         session._app = mock_app
         session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "x")])
         mock_app.invalidate.assert_called()
@@ -440,6 +445,14 @@ async def test_coalesces_burst_into_single_invalidate(self, session):
         from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS
 
         mock_app = MagicMock()
+        # `_refresh` follows prompt_toolkit's convention:
+        # `self._app.loop or asyncio.get_running_loop()`. Force the
+        # `or` branch by setting `loop = None` so the test exercises
+        # the path most production callers take pre-`run_async`.
+        # `MagicMock().loop` is itself a (truthy) MagicMock by
+        # default, so without this the test would call
+        # `mock_loop.call_later(...)` and never schedule a real timer.
+        mock_app.loop = None
         session._app = mock_app
 
         # 10 rapid appends within one event-loop iteration. Inside an
@@ -463,6 +476,7 @@ async def test_subsequent_burst_after_fire_arms_new_timer(self, session):
         from switchplane.tui import _REFRESH_DEBOUNCE_SECONDS
 
         mock_app = MagicMock()
+        mock_app.loop = None  # see comment in coalesces test above
         session._app = mock_app
 
         session._append_line(_SYSTEM_TAB_ID, [], [(_S_INFO, "first burst")])
@@ -482,12 +496,34 @@ def test_refresh_outside_event_loop_falls_back_to_direct_invalidate(self, sessio
         work — fall back to a direct `invalidate()` so legacy callers
         and tests don't silently lose their redraw."""
         mock_app = MagicMock()
+        # `loop=None` forces the get_running_loop fallback; combined
+        # with no running loop in this synchronous test, both branches
+        # fail and we land on the direct-invalidate fallback.
+        mock_app.loop = None
         session._app = mock_app
         # Synchronous (no event loop): direct invalidate, no timer.
         session._refresh()
         assert mock_app.invalidate.call_count == 1
         assert session._refresh_timer is None
 
+    def test_refresh_uses_app_loop_when_set(self, session):
+        """When `self._app.loop` is set (i.e. `Application.run_async`
+        is in flight), `_refresh` schedules on that loop directly
+        instead of falling through to `asyncio.get_running_loop()`.
+        This matches prompt_toolkit's own
+        `self.loop or get_running_loop()` convention from
+        `Application.create_background_task`."""
+        mock_app = MagicMock()
+        mock_loop = MagicMock()
+        mock_app.loop = mock_loop
+        session._app = mock_app
+
+        session._refresh()
+        # Timer scheduled on the app's loop, not via get_running_loop.
+        mock_loop.call_later.assert_called_once()
+        # No direct invalidate — the timer is the path.
+        mock_app.invalidate.assert_not_called()
+
 
 # ---------------------------------------------------------------------------
 # _focused_buf / _system_message / _append_text
@@ -526,6 +562,11 @@ def test_refresh_noop_when_app_is_none(self, session):
 
     def test_refresh_calls_invalidate_when_app_set(self, session):
         mock_app = MagicMock()
+        # `loop=None` + no running loop → direct invalidate fallback,
+        # which is what this legacy test exercises. With a real loop
+        # the path arms a timer instead; the dedicated burst tests
+        # in TestRefreshDebounce cover that branch.
+        mock_app.loop = None
         session._app = mock_app
         session._refresh()
         mock_app.invalidate.assert_called_once()