Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions examples/audio-car-cockpit/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ uv.lock
.ruff_cache

# Local server runtimes and models
llama-server
llama.cpp
llama-server*
llama.cpp-rocm
llama.cpp-cpu
llama-liquid-audio*
runners/
LFM2.5-Audio-1.5B-GGUF
95 changes: 64 additions & 31 deletions examples/audio-car-cockpit/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
test-search test-quick test-full test-toolcall \
llama-liquid-audio-runner \
LFM2-1.2B-Tool-GGUF \
install-deps
install-deps clean

all: help

Expand Down Expand Up @@ -97,7 +97,11 @@ else
$(error Unsupported arch: $(UNAME_M))
endif

HAS_ROCM := $(shell test -d /opt/rocm && echo 1)
ifdef CPU
HAS_ROCM :=
else
HAS_ROCM := $(shell rocm-smi --showproductname 2>/dev/null | grep "Card Series" >/dev/null && echo 1)
endif

LLAMA_CPP_REPO := https://github.com/ggml-org/llama.cpp.git
LLAMA_CPP_PR := 18641
Expand All @@ -109,32 +113,58 @@ LLAMA_CPP_COMMIT := d03c45c9c56795af8b0e899762bf266c14fd2028
# └──────────────────────────────────────────────────────────┘

ifdef HAS_ROCM
$(info ROCm detected at /opt/rocm — building with HIP GPU acceleration)
$(info ROCm detected — building with HIP GPU acceleration)

HIP_ARCH ?= gfx1150
_DETECTED_HIP_ARCH := $(shell rocm-smi --showproductname 2>/dev/null | grep -oP 'gfx\w+' | head -1)
HIP_ARCH ?= $(or $(_DETECTED_HIP_ARCH),gfx1150)
$(info Using HIP_ARCH=$(HIP_ARCH))
CMAKE_ARGS := -DGGML_HIP=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_HIP_ARCHITECTURES="$(HIP_ARCH)"

# ROCm: clone PR #18641 which includes liquid-audio tools
llama.cpp:
git clone $(LLAMA_CPP_REPO) && \
cd llama.cpp && \
GPU_FLAGS := --n-gpu-layers 9999
AUDIO_PCM_FORMAT := int16
LLAMA_CPP_DIR := llama.cpp-rocm

# Krackan iGPU (gfx1153, Ryzen AI 7): ROCm <= 7.2 doesn't ship gfx1153
# rocBLAS Tensile kernels, so the audio server's multimodal warmup
# segfaults dispatching GEMMs that have no matching kernel. Reroute via
# gfx1150 (binary-compatible RDNA 3.5) only when needed; the wildcard
# check self-disables this once a future ROCm release adds gfx1153
# kernels. Must NOT be applied to the tool model — it crashes its warmup
# instead — so it is set as a recipe-line prefix on `audioserver` only.
ifeq ($(HIP_ARCH),gfx1153)
ifeq (,$(wildcard /opt/rocm*/lib/rocblas/library/*gfx1153*))
AUDIO_SERVER_ENV := HSA_OVERRIDE_GFX_VERSION=11.5.0
endif
endif

llama.cpp-rocm:
git clone $(LLAMA_CPP_REPO) $@ && \
cd $@ && \
git fetch origin pull/$(LLAMA_CPP_PR)/head:pr-$(LLAMA_CPP_PR) && \
git checkout pr-$(LLAMA_CPP_PR)

LLAMA_SERVER := llama-server-rocm
AUDIO_SERVER := ./llama-liquid-audio-server
AUDIO_SERVER_TARGET := llama-liquid-audio-server

else
$(info ROCm not found — building CPU-only, downloading pre-built audio runner)
ifdef CPU
$(info CPU=1 set — forcing CPU-only build)
else
$(info ROCm not found — building CPU-only, downloading pre-built audio runner)
endif

CMAKE_ARGS := -DBUILD_SHARED_LIBS=OFF -DLLAMA_CURL=ON

# No ROCm: clone llama.cpp at a known-good commit on main
llama.cpp:
git clone $(LLAMA_CPP_REPO) && \
cd llama.cpp && \
GPU_FLAGS :=
AUDIO_PCM_FORMAT := float32
LLAMA_CPP_DIR := llama.cpp-cpu
AUDIO_SERVER_ENV :=

llama.cpp-cpu:
git clone $(LLAMA_CPP_REPO) $@ && \
cd $@ && \
git checkout $(LLAMA_CPP_COMMIT)

LLAMA_SERVER := llama-server-cpu
AUDIO_SERVER := ./llama-liquid-audio/llama-liquid-audio-server
AUDIO_SERVER_TARGET := llama-liquid-audio/llama-liquid-audio-server

Expand Down Expand Up @@ -179,49 +209,48 @@ else
endif


llama.cpp/build/bin/llama-server: llama.cpp
$(LLAMA_CPP_DIR)/build/bin/llama-server: $(LLAMA_CPP_DIR)
ifeq ($(UNAME_S),Linux)
@dpkg -s libssl-dev >/dev/null 2>&1 || \
(echo "Error: libssl-dev not found — llama-server would build without HTTPS support." \
"Run 'make install-deps' first." && exit 1)
endif
cd llama.cpp && \
cd $(LLAMA_CPP_DIR) && \
cmake -B build $(CMAKE_ARGS) && \
cmake --build build --config Release -t llama-server -j 8

llama-server: ## Build llama-server (auto-detects ROCm)
@# Make doesn't allow non-recursive dependencies, adding the check here instead
test -e $@ || $(MAKE) llama.cpp/build/bin/llama-server && \
cp llama.cpp/build/bin/llama-server $@
touch llama-server
$(LLAMA_SERVER): ## Build llama-server (auto-detects ROCm)
test -e $@ || $(MAKE) $(LLAMA_CPP_DIR)/build/bin/llama-server && \
cp $(LLAMA_CPP_DIR)/build/bin/llama-server $@
touch $@

ifdef HAS_ROCM
llama.cpp/build/bin/llama-liquid-audio-server: llama.cpp
cd llama.cpp && \
$(LLAMA_CPP_DIR)/build/bin/llama-liquid-audio-server: $(LLAMA_CPP_DIR)
cd $(LLAMA_CPP_DIR) && \
cmake -B build $(CMAKE_ARGS) && \
cmake --build build --config Release -t llama-liquid-audio-server -j 8

llama-liquid-audio-server: ## Build llama-liquid-audio-server with ROCm/HIP (from PR #18641)
test -e $@ || $(MAKE) llama.cpp/build/bin/llama-liquid-audio-server && \
cp llama.cpp/build/bin/llama-liquid-audio-server $@
touch llama-liquid-audio-server
test -e $@ || $(MAKE) $(LLAMA_CPP_DIR)/build/bin/llama-liquid-audio-server && \
cp $(LLAMA_CPP_DIR)/build/bin/llama-liquid-audio-server $@
touch $@
endif


# ┌──────────────────────────────────────────────────────────┐
# │ Servers │
# └──────────────────────────────────────────────────────────┘

serve: llama-server ## Start FastAPI server
$(UV) run --frozen server.py
serve: $(LLAMA_SERVER) ## Start FastAPI server
AUDIO_PCM_FORMAT=$(AUDIO_PCM_FORMAT) $(UV) run --frozen server.py

audioserver: $(AUDIO_SERVER_TARGET) LFM2.5-Audio-1.5B-GGUF ## Start audio server
$(AUDIO_SERVER) \
$(AUDIO_SERVER_ENV) $(AUDIO_SERVER) \
-m LFM2.5-Audio-1.5B-GGUF/LFM2.5-Audio-1.5B-Q8_0.gguf \
-mm LFM2.5-Audio-1.5B-GGUF/mmproj-LFM2.5-Audio-1.5B-Q8_0.gguf \
-mv LFM2.5-Audio-1.5B-GGUF/vocoder-LFM2.5-Audio-1.5B-Q8_0.gguf \
--tts-speaker-file LFM2.5-Audio-1.5B-GGUF/tokenizer-LFM2.5-Audio-1.5B-Q8_0.gguf \
-t ${THREADS} --host 127.0.0.1 --port ${AUDIO_SERVER_PORT} &>/dev/null
$(GPU_FLAGS) -t ${THREADS} --host 127.0.0.1 --port ${AUDIO_SERVER_PORT} &>/dev/null


# ┌──────────────────────────────────────────────────────────┐
Expand Down Expand Up @@ -251,6 +280,10 @@ test-toolcall: ## Tool call with the string "play the next song"
# │ Utilities │
# └──────────────────────────────────────────────────────────┘

clean: ## Remove build artifacts (llama.cpp, binaries, runners)
rm -rf llama.cpp-rocm llama.cpp-cpu llama-server-rocm llama-server-cpu \
llama-liquid-audio-server llama-liquid-audio runners

UV_FROZEN_DEV = $(UV) run --only-group dev --frozen

lint: ## Lint and format python code
Expand Down
80 changes: 80 additions & 0 deletions examples/audio-car-cockpit/check_system.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env bash
set -euo pipefail

missing=0

check_command() {
local cmd="$1"
local install_hint="$2"
if command -v "$cmd" &>/dev/null; then
echo "[OK] $cmd is installed ($(command -v "$cmd"))"
else
echo "[MISSING] $cmd is not installed"
echo " Install suggestion: $install_hint"
missing=1
fi
}

echo "Checking prerequisites..."
echo

check_command "make" \
"sudo apt-get install -y make # Debian/Ubuntu
brew install make # macOS"

check_command "curl" \
"sudo apt-get install -y curl # Debian/Ubuntu
brew install curl # macOS"

# ── Radeon iGPU / ROCm checks ──

echo
has_radeon=0
if lspci 2>/dev/null | grep -qi 'vga.*amd\|display.*amd\|vga.*radeon\|display.*radeon'; then
has_radeon=1
gpu_name=$(lspci 2>/dev/null | grep -iE 'vga.*amd|display.*amd|vga.*radeon|display.*radeon' | head -1 | sed 's/.*: //')
echo "[OK] AMD Radeon GPU detected: $gpu_name"

# Check kernel driver is loaded (amdgpu)
if grep -q amdgpu /proc/modules 2>/dev/null; then
echo "[OK] amdgpu kernel driver is loaded"
else
echo "[MISSING] amdgpu kernel driver is NOT loaded"
echo " Install suggestion: sudo apt-get install -y linux-modules-extra-\$(uname -r)"
echo " Then reboot and verify with: lsmod | grep amdgpu"
missing=1
fi

# Check ROCm installation
if [ -d /opt/rocm ]; then
rocm_version=$(cat /opt/rocm/.info/version 2>/dev/null || echo "unknown")
echo "[OK] ROCm is installed at /opt/rocm (version: $rocm_version)"
else
echo "[MISSING] ROCm is not installed (no /opt/rocm found)"
echo " Install suggestion: https://rocm.docs.amd.com/projects/install-on-linux/en/latest/"
echo " Also install: sudo apt install -y libstdc++-14-dev"
missing=1
fi

# Check GPU architecture via rocminfo
if command -v rocminfo &>/dev/null; then
gfx_arch=$(rocminfo 2>/dev/null | grep -oP 'gfx\w+' | head -1 || true)
if [ -n "$gfx_arch" ]; then
echo "[OK] GPU architecture: $gfx_arch (pass HIP_ARCH=$gfx_arch to make)"
else
echo "[WARN] Could not determine GPU architecture from rocminfo"
fi
else
echo "[INFO] rocminfo not available — install ROCm to detect GPU architecture"
fi
else
echo "[INFO] No AMD Radeon GPU detected — will build in CPU-only mode"
fi

echo
if [ "$missing" -eq 0 ]; then
echo "All prerequisites are installed."
else
echo "Some prerequisites are missing. Please install them and re-run this script."
exit 1
fi
35 changes: 27 additions & 8 deletions examples/audio-car-cockpit/server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import base64
import os
import time
import webbrowser
from contextlib import asynccontextmanager
from pathlib import Path
Expand Down Expand Up @@ -108,6 +110,9 @@ async def websocket_endpoint(websocket: WebSocket):
async def websocket_audio_endpoint(websocket: WebSocket):
await websocket.accept()
audio_client = AsyncOpenAI(base_url=f"http://127.0.0.1:{p_env.AUDIO_SERVER_PORT}/v1", api_key="dummy")
audio_pcm_format = os.environ.get("AUDIO_PCM_FORMAT", "float32")

await websocket.send_json({"type": "config", "audio_pcm_format": audio_pcm_format})

voice = "US female"

Expand All @@ -123,6 +128,7 @@ async def websocket_audio_endpoint(websocket: WebSocket):
# Build messages based on mode
if mode == "asr":
print("\n[AUDIO] Starting ASR (Speech-to-Text)...")
t_start = time.perf_counter()
if wav_data is None:
continue
messages = [
Expand Down Expand Up @@ -169,10 +175,13 @@ async def websocket_audio_endpoint(websocket: WebSocket):
transcribed_text += _text_content
await websocket.send_json({"type": "text", "data": _text_content})

if hasattr(delta, "audio_chunk") and delta.audio_chunk:
chunk_data = delta.audio_chunk["data"]
# Send audio chunk immediately for low latency
await websocket.send_json({"type": "audio", "data": chunk_data, "sample_rate": 24000})
audio_data = getattr(delta, "audio", None) or getattr(delta, "audio_chunk", None)
if audio_data:
await websocket.send_json({
"type": "audio",
"data": audio_data["data"],
"sample_rate": audio_data.get("sample_rate", 24000),
})

# If ASR mode, process through tool calling and then TTS
if mode == "asr" and transcribed_text:
Expand Down Expand Up @@ -243,13 +252,23 @@ async def websocket_audio_endpoint(websocket: WebSocket):
max_tokens=512,
)

tts_first_audio = True
async for chunk in tts_stream:
delta = chunk.choices[0].delta

if hasattr(delta, "audio_chunk") and delta.audio_chunk:
chunk_data = delta.audio_chunk["data"]
# Send audio chunk immediately for low latency
await websocket.send_json({"type": "audio", "data": chunk_data, "sample_rate": 24000})
audio_data = getattr(delta, "audio", None) or getattr(delta, "audio_chunk", None)
if audio_data:
if tts_first_audio:
print(f"[AUDIO] Time to first audio byte: {(time.perf_counter() - t_start)*1000:.0f} ms")
tts_first_audio = False
await websocket.send_json({
"type": "audio",
"data": audio_data["data"],
"sample_rate": audio_data.get("sample_rate", 24000),
})

if mode == "asr" and transcribed_text:
print(f"[AUDIO] End-to-end latency: {(time.perf_counter() - t_start)*1000:.0f} ms")

await websocket.send_json({"type": "done"})

Expand Down
4 changes: 3 additions & 1 deletion examples/audio-car-cockpit/src/llamacpp_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def spawn_embedding_runtime(

port = find_available_port(preferred_port=8989)
host = "127.0.0.1"
executable = str((Path.cwd() / "llama-server").resolve())
cwd = Path.cwd()
candidates = [cwd / "llama-server-rocm", cwd / "llama-server-cpu", cwd / "llama-server"]
executable = str(next((p for p in candidates if p.exists()), candidates[-1]).resolve())

command = [
executable,
Expand Down
Loading