Liquid4All · ThomasGmeinder · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/examples/audio-car-cockpit/.gitignore b/examples/audio-car-cockpit/.gitignore
@@ -5,8 +5,9 @@ uv.lock
 .ruff_cache
 
 # Local server runtimes and models
-llama-server
-llama.cpp
+llama-server*
+llama.cpp-rocm
+llama.cpp-cpu
 llama-liquid-audio*
 runners/
 LFM2.5-Audio-1.5B-GGUF
diff --git a/examples/audio-car-cockpit/Makefile b/examples/audio-car-cockpit/Makefile
@@ -4,7 +4,7 @@
 	test-search test-quick test-full test-toolcall \
 	llama-liquid-audio-runner \
 	LFM2-1.2B-Tool-GGUF \
-	install-deps
+	install-deps clean
 
 all: help
 
@@ -97,7 +97,11 @@ else
   $(error Unsupported arch: $(UNAME_M))
 endif
 
-HAS_ROCM := $(shell test -d /opt/rocm && echo 1)
+ifdef CPU
+  HAS_ROCM :=
+else
+  HAS_ROCM := $(shell rocm-smi --showproductname 2>/dev/null | grep "Card Series" >/dev/null && echo 1)
+endif
 
 LLAMA_CPP_REPO   := https://github.com/ggml-org/llama.cpp.git
 LLAMA_CPP_PR     := 18641
@@ -109,32 +113,58 @@ LLAMA_CPP_COMMIT := d03c45c9c56795af8b0e899762bf266c14fd2028
 # └──────────────────────────────────────────────────────────┘
 
 ifdef HAS_ROCM
-  $(info ROCm detected at /opt/rocm — building with HIP GPU acceleration)
+  $(info ROCm detected — building with HIP GPU acceleration)
 
-  HIP_ARCH   ?= gfx1150
+  _DETECTED_HIP_ARCH := $(shell rocm-smi --showproductname 2>/dev/null | grep -oP 'gfx\w+' | head -1)
+  HIP_ARCH   ?= $(or $(_DETECTED_HIP_ARCH),gfx1150)
+  $(info Using HIP_ARCH=$(HIP_ARCH))
   CMAKE_ARGS := -DGGML_HIP=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_HIP_ARCHITECTURES="$(HIP_ARCH)"
-
-  # ROCm: clone PR #18641 which includes liquid-audio tools
-  llama.cpp:
-	git clone $(LLAMA_CPP_REPO) && \
-		cd llama.cpp && \
+  GPU_FLAGS  := --n-gpu-layers 9999
+  AUDIO_PCM_FORMAT := int16
+  LLAMA_CPP_DIR := llama.cpp-rocm
+
+  # Krackan iGPU (gfx1153, Ryzen AI 7): ROCm <= 7.2 doesn't ship gfx1153
+  # rocBLAS Tensile kernels, so the audio server's multimodal warmup
+  # segfaults dispatching GEMMs that have no matching kernel. Reroute via
+  # gfx1150 (binary-compatible RDNA 3.5) only when needed; the wildcard
+  # check self-disables this once a future ROCm release adds gfx1153
+  # kernels. Must NOT be applied to the tool model — it crashes its warmup
+  # instead — so it is set as a recipe-line prefix on `audioserver` only.
+  ifeq ($(HIP_ARCH),gfx1153)
+    ifeq (,$(wildcard /opt/rocm*/lib/rocblas/library/*gfx1153*))
+      AUDIO_SERVER_ENV := HSA_OVERRIDE_GFX_VERSION=11.5.0
+    endif
+  endif
+
+  llama.cpp-rocm:
+	git clone $(LLAMA_CPP_REPO) $@ && \
+		cd $@ && \
 		git fetch origin pull/$(LLAMA_CPP_PR)/head:pr-$(LLAMA_CPP_PR) && \
 		git checkout pr-$(LLAMA_CPP_PR)
 
+  LLAMA_SERVER        := llama-server-rocm
   AUDIO_SERVER        := ./llama-liquid-audio-server
   AUDIO_SERVER_TARGET := llama-liquid-audio-server
 
 else
-  $(info ROCm not found — building CPU-only, downloading pre-built audio runner)
+  ifdef CPU
+    $(info CPU=1 set — forcing CPU-only build)
+  else
+    $(info ROCm not found — building CPU-only, downloading pre-built audio runner)
+  endif
 
   CMAKE_ARGS := -DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=ON
-
-  # No ROCm: clone llama.cpp at a known-good commit on main
-  llama.cpp:
-	git clone $(LLAMA_CPP_REPO) && \
-		cd llama.cpp && \
+  GPU_FLAGS  :=
+  AUDIO_PCM_FORMAT := float32
+  LLAMA_CPP_DIR := llama.cpp-cpu
+  AUDIO_SERVER_ENV :=
+
+  llama.cpp-cpu:
+	git clone $(LLAMA_CPP_REPO) $@ && \
+		cd $@ && \
 		git checkout $(LLAMA_CPP_COMMIT)
 
+  LLAMA_SERVER        := llama-server-cpu
   AUDIO_SERVER        := ./llama-liquid-audio/llama-liquid-audio-server
   AUDIO_SERVER_TARGET := llama-liquid-audio/llama-liquid-audio-server
 
@@ -179,49 +209,48 @@ else
 endif
 
 
-llama.cpp/build/bin/llama-server: llama.cpp
+$(LLAMA_CPP_DIR)/build/bin/llama-server: $(LLAMA_CPP_DIR)
 ifeq ($(UNAME_S),Linux)
 	@dpkg -s libssl-dev >/dev/null 2>&1 || \
 		(echo "Error: libssl-dev not found — llama-server would build without HTTPS support." \
 		     "Run 'make install-deps' first." && exit 1)
 endif
-	cd llama.cpp && \
+	cd $(LLAMA_CPP_DIR) && \
 		cmake -B build $(CMAKE_ARGS) && \
 		cmake --build build --config Release -t llama-server -j 8
 
-llama-server:  ## Build llama-server (auto-detects ROCm)
-	@# Make doesn't allow non-recursive dependencies, adding the check here instead
-	test -e $@ || $(MAKE) llama.cpp/build/bin/llama-server && \
-		cp llama.cpp/build/bin/llama-server $@
-	touch llama-server
+$(LLAMA_SERVER):  ## Build llama-server (auto-detects ROCm)
+	test -e $@ || $(MAKE) $(LLAMA_CPP_DIR)/build/bin/llama-server && \
+		cp $(LLAMA_CPP_DIR)/build/bin/llama-server $@
+	touch $@
 
 ifdef HAS_ROCM
-llama.cpp/build/bin/llama-liquid-audio-server: llama.cpp
-	cd llama.cpp && \
+$(LLAMA_CPP_DIR)/build/bin/llama-liquid-audio-server: $(LLAMA_CPP_DIR)
+	cd $(LLAMA_CPP_DIR) && \
 		cmake -B build $(CMAKE_ARGS) && \
 		cmake --build build --config Release -t llama-liquid-audio-server -j 8
 
 llama-liquid-audio-server:  ## Build llama-liquid-audio-server with ROCm/HIP (from PR #18641)
-	test -e $@ || $(MAKE) llama.cpp/build/bin/llama-liquid-audio-server && \
-		cp llama.cpp/build/bin/llama-liquid-audio-server $@
-	touch llama-liquid-audio-server
+	test -e $@ || $(MAKE) $(LLAMA_CPP_DIR)/build/bin/llama-liquid-audio-server && \
+		cp $(LLAMA_CPP_DIR)/build/bin/llama-liquid-audio-server $@
+	touch $@
 endif
 
 
 # ┌──────────────────────────────────────────────────────────┐
 # │                         Servers                          │
 # └──────────────────────────────────────────────────────────┘
 
-serve: llama-server  ## Start FastAPI server
-	$(UV) run --frozen server.py
+serve: $(LLAMA_SERVER)  ## Start FastAPI server
+	AUDIO_PCM_FORMAT=$(AUDIO_PCM_FORMAT) $(UV) run --frozen server.py
 
 audioserver: $(AUDIO_SERVER_TARGET) LFM2.5-Audio-1.5B-GGUF  ## Start audio server
-	$(AUDIO_SERVER) \
+	$(AUDIO_SERVER_ENV) $(AUDIO_SERVER) \
 		-m LFM2.5-Audio-1.5B-GGUF/LFM2.5-Audio-1.5B-Q8_0.gguf \
 		-mm LFM2.5-Audio-1.5B-GGUF/mmproj-LFM2.5-Audio-1.5B-Q8_0.gguf \
 		-mv LFM2.5-Audio-1.5B-GGUF/vocoder-LFM2.5-Audio-1.5B-Q8_0.gguf \
 		--tts-speaker-file LFM2.5-Audio-1.5B-GGUF/tokenizer-LFM2.5-Audio-1.5B-Q8_0.gguf \
-	-t ${THREADS} --host 127.0.0.1 --port ${AUDIO_SERVER_PORT} &>/dev/null
+	$(GPU_FLAGS) -t ${THREADS} --host 127.0.0.1 --port ${AUDIO_SERVER_PORT} &>/dev/null
 
 
 # ┌──────────────────────────────────────────────────────────┐
@@ -251,6 +280,10 @@ test-toolcall:  ## Tool call with the string "play the next song"
 # │                        Utilities                         │
 # └──────────────────────────────────────────────────────────┘
 
+clean:  ## Remove build artifacts (llama.cpp, binaries, runners)
+	rm -rf llama.cpp-rocm llama.cpp-cpu llama-server-rocm llama-server-cpu \
+		llama-liquid-audio-server llama-liquid-audio runners
+
 UV_FROZEN_DEV = $(UV) run --only-group dev --frozen
 
 lint:  ## Lint and format python code

diff --git a/examples/audio-car-cockpit/check_system.sh b/examples/audio-car-cockpit/check_system.sh
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+missing=0
+
+check_command() {
+    local cmd="$1"
+    local install_hint="$2"
+    if command -v "$cmd" &>/dev/null; then
+        echo "[OK]  $cmd is installed ($(command -v "$cmd"))"
+    else
+        echo "[MISSING]  $cmd is not installed"
+        echo "          Install suggestion: $install_hint"
+        missing=1
+    fi
+}
+
+echo "Checking prerequisites..."
+echo
+
+check_command "make" \
+    "sudo apt-get install -y make       # Debian/Ubuntu
+          brew install make               # macOS"
+
+check_command "curl" \
+    "sudo apt-get install -y curl       # Debian/Ubuntu
+          brew install curl               # macOS"
+
+# ── Radeon iGPU / ROCm checks ──
+
+echo
+has_radeon=0
+if lspci 2>/dev/null | grep -qi 'vga.*amd\|display.*amd\|vga.*radeon\|display.*radeon'; then
+    has_radeon=1
+    gpu_name=$(lspci 2>/dev/null | grep -iE 'vga.*amd|display.*amd|vga.*radeon|display.*radeon' | head -1 | sed 's/.*: //')
+    echo "[OK]  AMD Radeon GPU detected: $gpu_name"
+
+    # Check kernel driver is loaded (amdgpu)
+    if grep -q amdgpu /proc/modules 2>/dev/null; then
+        echo "[OK]  amdgpu kernel driver is loaded"
+    else
+        echo "[MISSING]  amdgpu kernel driver is NOT loaded"
+        echo "          Install suggestion: sudo apt-get install -y linux-modules-extra-\$(uname -r)"
+        echo "          Then reboot and verify with: lsmod | grep amdgpu"
+        missing=1
+    fi
+
+    # Check ROCm installation
+    if [ -d /opt/rocm ]; then
+        rocm_version=$(cat /opt/rocm/.info/version 2>/dev/null || echo "unknown")
+        echo "[OK]  ROCm is installed at /opt/rocm (version: $rocm_version)"
+    else
+        echo "[MISSING]  ROCm is not installed (no /opt/rocm found)"
+        echo "          Install suggestion: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/"
+        echo "          Also install: sudo apt install -y libstdc++-14-dev"
+        missing=1
+    fi
+
+    # Check GPU architecture via rocminfo
+    if command -v rocminfo &>/dev/null; then
+        gfx_arch=$(rocminfo 2>/dev/null | grep -oP 'gfx\w+' | head -1 || true)
+        if [ -n "$gfx_arch" ]; then
+            echo "[OK]  GPU architecture: $gfx_arch  (pass HIP_ARCH=$gfx_arch to make)"
+        else
+            echo "[WARN] Could not determine GPU architecture from rocminfo"
+        fi
+    else
+        echo "[INFO] rocminfo not available — install ROCm to detect GPU architecture"
+    fi
+else
+    echo "[INFO] No AMD Radeon GPU detected — will build in CPU-only mode"
+fi
+
+echo
+if [ "$missing" -eq 0 ]; then
+    echo "All prerequisites are installed."
+else
+    echo "Some prerequisites are missing. Please install them and re-run this script."
+    exit 1
+fi
diff --git a/examples/audio-car-cockpit/server.py b/examples/audio-car-cockpit/server.py
@@ -1,4 +1,6 @@
 import base64
+import os
+import time
 import webbrowser
 from contextlib import asynccontextmanager
 from pathlib import Path
@@ -108,6 +110,9 @@ async def websocket_endpoint(websocket: WebSocket):
 async def websocket_audio_endpoint(websocket: WebSocket):
     await websocket.accept()
     audio_client = AsyncOpenAI(base_url=f"http://127.0.0.1:{p_env.AUDIO_SERVER_PORT}/v1", api_key="dummy")
+    audio_pcm_format = os.environ.get("AUDIO_PCM_FORMAT", "float32")
+
+    await websocket.send_json({"type": "config", "audio_pcm_format": audio_pcm_format})
 
     voice = "US female"
 
@@ -123,6 +128,7 @@ async def websocket_audio_endpoint(websocket: WebSocket):
             # Build messages based on mode
             if mode == "asr":
                 print("\n[AUDIO] Starting ASR (Speech-to-Text)...")
+                t_start = time.perf_counter()
                 if wav_data is None:
                     continue
                 messages = [
@@ -169,10 +175,13 @@ async def websocket_audio_endpoint(websocket: WebSocket):
                     transcribed_text += _text_content
                     await websocket.send_json({"type": "text", "data": _text_content})
 
-                if hasattr(delta, "audio_chunk") and delta.audio_chunk:
-                    chunk_data = delta.audio_chunk["data"]
-                    # Send audio chunk immediately for low latency
-                    await websocket.send_json({"type": "audio", "data": chunk_data, "sample_rate": 24000})
+                audio_data = getattr(delta, "audio", None) or getattr(delta, "audio_chunk", None)
+                if audio_data:
+                    await websocket.send_json({
+                        "type": "audio",
+                        "data": audio_data["data"],
+                        "sample_rate": audio_data.get("sample_rate", 24000),
+                    })
 
             # If ASR mode, process through tool calling and then TTS
             if mode == "asr" and transcribed_text:
@@ -243,13 +252,23 @@ async def websocket_audio_endpoint(websocket: WebSocket):
                     max_tokens=512,
                 )
 
+                tts_first_audio = True
                 async for chunk in tts_stream:
                     delta = chunk.choices[0].delta
 
-                    if hasattr(delta, "audio_chunk") and delta.audio_chunk:
-                        chunk_data = delta.audio_chunk["data"]
-                        # Send audio chunk immediately for low latency
-                        await websocket.send_json({"type": "audio", "data": chunk_data, "sample_rate": 24000})
+                    audio_data = getattr(delta, "audio", None) or getattr(delta, "audio_chunk", None)
+                    if audio_data:
+                        if tts_first_audio:
+                            print(f"[AUDIO] Time to first audio byte: {(time.perf_counter() - t_start)*1000:.0f} ms")
+                            tts_first_audio = False
+                        await websocket.send_json({
+                            "type": "audio",
+                            "data": audio_data["data"],
+                            "sample_rate": audio_data.get("sample_rate", 24000),
+                        })
+
+            if mode == "asr" and transcribed_text:
+                print(f"[AUDIO] End-to-end latency: {(time.perf_counter() - t_start)*1000:.0f} ms")
 
             await websocket.send_json({"type": "done"})
 

diff --git a/examples/audio-car-cockpit/src/llamacpp_inference.py b/examples/audio-car-cockpit/src/llamacpp_inference.py
@@ -61,7 +61,9 @@ def spawn_embedding_runtime(
 
     port = find_available_port(preferred_port=8989)
     host = "127.0.0.1"
-    executable = str((Path.cwd() / "llama-server").resolve())
+    cwd = Path.cwd()
+    candidates = [cwd / "llama-server-rocm", cwd / "llama-server-cpu", cwd / "llama-server"]
+    executable = str(next((p for p in candidates if p.exists()), candidates[-1]).resolve())
 
     command = [
         executable,