goodroot · manishEMS47 · Jun 8, 2026
@@ -7,7 +7,7 @@
 </p>
 
 <p align="center">
-    instant performance | Cohere / Parakeet / Whisper / Gemini / ElevenLabs / REST API | stylish visuals
+    instant performance | Cohere / Parakeet / Whisper / Gemini / ElevenLabs / 60db / REST API | stylish visuals
 </p>
 
  <p align="center">
@@ -28,7 +28,7 @@ https://github.com/user-attachments/assets/4c223e85-2916-494f-b7b1-766ce1bdc991
 - **GPU memory efficient** - Limit or zero memory usage easily, more for other local models
 - **onnx-asr for wild CPU speeds** - No GPU? Optimized for great speed on any hardware
 - **Translation** - Translate non-English to English with a single config
-- **REST API or websockets** - Secure, fast wires to top clouds like Gemini, ElevenLabs
+- **REST API or websockets** - Secure, fast wires to top clouds like Gemini, ElevenLabs, 60db
 - **Themed visualizer** - Visualizes your voice, will automatch Omarchy theme
 - **Word overides and prompts** - Custom hot keys, common words, and more
 - **Multi-lingual** - Great performance in many languages

@@ -172,7 +172,7 @@ Use a different hotkey for a specific language:
 
 > **Note**: Works with backends that support language parameters:
 > - **REST API**: Works if the endpoint accepts `language` in the request body
-> - **Realtime WebSocket**: Fully supported (OpenAI, Google, ElevenLabs)
+> - **Realtime WebSocket**: Fully supported (OpenAI, Google, ElevenLabs, 60db)
 > - **Local whisper models**: Fully supported (all pywhispercpp models)
 > - **Custom REST endpoints**: May not work if the endpoint doesn't accept a language parameter
 
@@ -289,7 +289,7 @@ For up-to-date accuracy rankings across open-source models, see the [Open ASR Le
 | faster-whisper | Local | NVIDIA or CPU | Fast | 99 | Very good | — |
 | whisper.cpp | Local | NVIDIA, AMD/Intel, CPU | Very fast | 99 | Very good | — |
 | REST API | Cloud | — | Varies | Varies | Varies | Cohere, OpenAI, Groq, Regolo |
-| Realtime WebSocket | Cloud | — | Real-time | Varies | Varies | Google Gemini, OpenAI, ElevenLabs |
+| Realtime WebSocket | Cloud | — | Real-time | Varies | Varies | Google Gemini, OpenAI, ElevenLabs, 60db |
 
 ---
 
@@ -691,6 +691,54 @@ Uses native 16kHz audio (no resampling) and auto-reconnects on connection drops.
 }
 ```
 
+#### 60db Realtime STT
+
+Realtime streaming transcription via [60db](https://60db.ai/), 39 languages with optional speaker diarization.
+
+Bring an API key from your 60db account (sent as `?apiKey=` on the socket; keys start with `sk_live_`).
+
+Uses native 16kHz audio (no resampling) and auto-reconnects on connection drops.
+
+- **transcribe** (default) - speech-to-text
+
+```jsonc
+{
+    "transcription_backend": "realtime-ws",
+    "websocket_provider": "60db",
+    "websocket_model": "60db-stt-realtime",
+    "realtime_timeout": 30,                  // Advanced: seconds to wait after stop for final transcript
+    "realtime_buffer_max_seconds": 5,        // Advanced: max unsent audio backlog (seconds) before dropping old chunks
+    "sixtydb_diarize": false,                // Advanced: enable speaker diarization (adds cost)
+    "sixtydb_utterance_end_ms": 500,         // Advanced: silence (ms) before an utterance is finalized (>=300)
+    "sixtydb_audio_enhancement": "adaptive"  // Advanced: "off", "light", or "adaptive"
+}
+```
+
+#### 60db Text-to-Speech (CLI)
+
+60db also provides text-to-speech. This is a standalone utility — it is **not** wired into the dictation pipeline (hyprwhspr never speaks on its own). It reuses the `60db` API key stored above.
+
+List the voices available to your account:
+
+```bash
+hyprwhspr 60db voices
+```
+
+Synthesize text to a WAV file:
+
+```bash
+hyprwhspr 60db tts --text "Hello from hyprwhspr" --voice <VOICE_ID> --out hello.wav
+# Options: --sample-rate {8000,16000,24000,48000} (default 24000), --speed 0.5-2.0 (default 1.0)
+```
+
+Set a default voice so `--voice` can be omitted:
+
+```jsonc
+{
+    "sixtydb_tts_voice_id": "<VOICE_ID>"  // Default voice for `hyprwhspr 60db tts`
+}
+```
+
 ## Audio and visual feedback
 
 ### Themed visualizer

@@ -37,6 +37,7 @@
     keyboard_command,
     record_command,
     record_capture_command,
+    sixtydb_command,
 )
 
 
@@ -178,6 +179,22 @@ def main():
                                        help='Language code for transcription (e.g., en, it, de)')
     record_subparsers.add_parser('status', help='Show current recording status')
 
+    # 60db command (text-to-speech utilities)
+    sixtydb_parser = subparsers.add_parser('60db', help='60db text-to-speech utilities')
+    sixtydb_subparsers = sixtydb_parser.add_subparsers(dest='sixtydb_action', help='60db actions')
+    sixtydb_subparsers.add_parser('voices', help='List voices available to your 60db account')
+    sixtydb_tts_parser = sixtydb_subparsers.add_parser('tts', help='Synthesize text to a WAV file')
+    sixtydb_tts_parser.add_argument('--text', required=True, help='Text to synthesize')
+    sixtydb_tts_parser.add_argument('--voice', dest='voice', metavar='VOICE_ID',
+                                    help='Voice ID (default: sixtydb_tts_voice_id from config)')
+    sixtydb_tts_parser.add_argument('--out', metavar='PATH', default='tts-output.wav',
+                                    help='Output WAV path (default: tts-output.wav)')
+    sixtydb_tts_parser.add_argument('--sample-rate', dest='sample_rate', type=int, default=24000,
+                                    choices=[8000, 16000, 24000, 48000],
+                                    help='Output sample rate in Hz (default: 24000)')
+    sixtydb_tts_parser.add_argument('--speed', type=float, default=1.0,
+                                    help='Speech speed multiplier 0.5-2.0 (default: 1.0)')
+
     # backend command
     backend_parser = subparsers.add_parser('backend', help='Backend management')
     backend_subparsers = backend_parser.add_subparsers(dest='backend_action', help='Backend actions')
@@ -312,6 +329,11 @@ def main():
                 record_capture_command(language=getattr(args, 'language', None))
             else:
                 record_command(args.record_action, language=getattr(args, 'language', None))
+        elif args.command == '60db':
+            if not getattr(args, 'sixtydb_action', None):
+                sixtydb_parser.print_help()
+                sys.exit(1)
+            sixtydb_command(args.sixtydb_action, args=args)
         elif args.command == 'uninstall':
             uninstall_command(
                 keep_models=getattr(args, 'keep_models', False),

@@ -5788,3 +5788,124 @@ def record_capture_command(language: str = None):
     except OSError as e:
         log_error(f"Capture socket error: {e}")
         sys.exit(1)
+
+
+def sixtydb_command(action: str, args=None):
+    """
+    60db text-to-speech utilities (independent of the dictation pipeline).
+
+    Actions:
+        voices  - list the voices available to your 60db account
+        tts     - synthesize text to a WAV file
+
+    Args:
+        action: Sub-action ('voices' or 'tts')
+        args:   Parsed argparse namespace (used by 'tts')
+    """
+    api_key = get_credential('60db')
+    if not api_key:
+        log_error("No 60db API key found.")
+        log_info("Add one with the realtime setup, or store it directly:")
+        log_info("  hyprwhspr setup   (choose realtime-ws -> 60db)")
+        sys.exit(1)
+
+    if action == 'voices':
+        _sixtydb_voices(api_key)
+    elif action == 'tts':
+        _sixtydb_tts(api_key, args)
+    else:
+        log_error(f"Unknown 60db action: {action}")
+        sys.exit(1)
+
+
+def _sixtydb_voices(api_key: str):
+    """List the caller's 60db voices in a table."""
+    try:
+        from .sixtydb_tts_client import list_my_voices, SixtyDbTTSError
+    except ImportError:
+        from sixtydb_tts_client import list_my_voices, SixtyDbTTSError
+
+    try:
+        voices = list_my_voices(api_key)
+    except SixtyDbTTSError as e:
+        log_error(f"Failed to list voices: {e}")
+        sys.exit(1)
+    except Exception as e:
+        log_error(f"Failed to list voices: {e}")
+        sys.exit(1)
+
+    if not voices:
+        log_warning("No voices found for this 60db account.")
+        return
+
+    console = Console()
+    table = Table(title="60db Voices")
+    table.add_column("Voice ID", style="cyan", no_wrap=True)
+    table.add_column("Name", style="green")
+    table.add_column("Category")
+    table.add_column("Model")
+    table.add_column("Language")
+    table.add_column("Gender")
+
+    for v in voices:
+        labels = v.get('labels') or {}
+        table.add_row(
+            str(v.get('voice_id', '')),
+            str(v.get('name', '')),
+            str(v.get('category', '')),
+            str(v.get('model', '')),
+            str(labels.get('language_name') or labels.get('language') or ''),
+            str(labels.get('gender', '')),
+        )
+
+    console.print(table)
+
+
+def _sixtydb_tts(api_key: str, args):
+    """Synthesize text to a WAV file via the 60db WebSocket TTS API."""
+    try:
+        from .sixtydb_tts_client import SixtyDbTTSClient, pcm16_to_wav, SixtyDbTTSError
+    except ImportError:
+        from sixtydb_tts_client import SixtyDbTTSClient, pcm16_to_wav, SixtyDbTTSError
+
+    text = getattr(args, 'text', None)
+    voice_id = getattr(args, 'voice', None)
+    out_path = getattr(args, 'out', None) or 'tts-output.wav'
+    sample_rate = int(getattr(args, 'sample_rate', None) or 24000)
+    speed = float(getattr(args, 'speed', None) or 1.0)
+
+    if not text:
+        log_error("--text is required")
+        sys.exit(1)
+
+    # Fall back to a configured default voice if one isn't supplied.
+    if not voice_id:
+        try:
+            voice_id = ConfigManager().get_setting('sixtydb_tts_voice_id', None)
+        except Exception:
+            voice_id = None
+    if not voice_id:
+        log_error("--voice is required (no sixtydb_tts_voice_id configured).")
+        log_info("List available voices with: hyprwhspr 60db voices")
+        sys.exit(1)
+
+    client = SixtyDbTTSClient(api_key)
+    try:
+        log_info(f"Synthesizing {len(text)} chars with voice {voice_id}...")
+        pcm = client.synthesize(text, voice_id, sample_rate=sample_rate, speed=speed)
+    except SixtyDbTTSError as e:
+        log_error(f"TTS failed: {e}")
+        sys.exit(1)
+    except Exception as e:
+        log_error(f"TTS failed: {e}")
+        sys.exit(1)
+
+    wav_bytes = pcm16_to_wav(pcm, sample_rate=sample_rate)
+    try:
+        Path(out_path).write_bytes(wav_bytes)
+    except OSError as e:
+        log_error(f"Failed to write {out_path}: {e}")
+        sys.exit(1)
+
+    duration = len(pcm) / 2 / sample_rate  # 16-bit mono
+    log_success(f"Wrote {out_path} ({len(wav_bytes)} bytes, ~{duration:.1f}s audio)")
@@ -140,6 +140,27 @@
                 'hidden': True
             }
         }
+    },
+    '60db': {
+        'name': '60db',
+        'endpoint': 'https://api.60db.ai/stt',
+        'websocket_endpoint': 'wss://api.60db.ai/ws/stt',
+        # 60db authenticates the WebSocket via an ?apiKey= query param (REST uses
+        # an Authorization: Bearer header). Keys are prefixed 'sk_live_' / 'sk_test_',
+        # but we don't enforce a prefix to stay permissive across key types.
+        'api_key_prefix': None,
+        'api_key_description': '60db API key (from app.60db.ai, starts with sk_live_)',
+        'models': {
+            '60db-stt-realtime': {
+                'name': '60db Realtime STT',
+                'description': 'Realtime streaming transcription, 39 languages, optional diarization',
+                # 60db's STT WebSocket selects models server-side from languages/config,
+                # so there is no model_id to send. Kept for the REST batch endpoint.
+                'body': {},
+                'realtime_model': True,
+                'hidden': True
+            }
+        }
     }
 }