-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdictate.py
More file actions
executable file
·184 lines (151 loc) · 4.69 KB
/
dictate.py
File metadata and controls
executable file
·184 lines (151 loc) · 4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "faster-whisper>=1.0.0",
# "pyaudio>=0.2.13",
# "numpy>=1.24.0",
# ]
# ///
"""Dictate - Speech-to-text with agent processing.
Toggle recording with keyboard shortcut (via GNOME keybinding).
First invocation starts recording, second stops and processes.
"""
import argparse
import os
import signal
import sys
import time
PID_FILE = "/tmp/dictate.pid"
def parse_args() -> argparse.Namespace:
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
description="Speech-to-text with agent processing"
)
parser.add_argument(
"-l", "--language",
default=None,
help="Language code (e.g., 'de' for German). Default: en",
)
parser.add_argument(
"-r", "--raw",
action="store_true",
help="Skip Claude processing, output raw transcription",
)
parser.add_argument(
"-t", "--type",
action="store_true",
help="Type text at cursor (in addition to clipboard)",
)
return parser.parse_args()
def is_running() -> int | None:
"""Check if another instance is recording.
Returns:
PID of running instance, or None if not running.
"""
if os.path.exists(PID_FILE):
try:
with open(PID_FILE) as f:
pid = int(f.read().strip())
# Check if process exists
os.kill(pid, 0)
return pid
except (OSError, ValueError):
# Process doesn't exist or invalid PID - clean up stale file
try:
os.unlink(PID_FILE)
except OSError:
pass
return None
def cleanup_pid_file() -> None:
"""Remove PID file."""
try:
os.unlink(PID_FILE)
except OSError:
pass
def main() -> int:
"""Main entry point."""
args = parse_args()
# Check if another instance is running
existing_pid = is_running()
if existing_pid:
# Signal existing process to stop recording
try:
os.kill(existing_pid, signal.SIGUSR1)
except OSError:
pass
return 0
# Import here to avoid slow startup when just signaling
from dictate.agent import Agent
from dictate.clipboard import copy_to_clipboard, type_text
from dictate.config import Config
from dictate.notifier import Notifier
from dictate.recorder import Recorder
from dictate.transcriber import Transcriber
# Write PID file
with open(PID_FILE, "w") as f:
f.write(str(os.getpid()))
# Track if we should stop
should_stop = False
def handle_stop_signal(signum, frame):
nonlocal should_stop
should_stop = True
signal.signal(signal.SIGUSR1, handle_stop_signal)
recorder = None
try:
# Load configuration
config = Config.load()
# Reset notification ID for fresh session
Notifier.reset()
# Start recording
Notifier.recording()
recorder = Recorder()
recorder.start(device_index=config.audio_device)
# Wait for stop signal
while not should_stop:
time.sleep(0.1)
# Stop recording and get audio
audio = recorder.stop()
# Check if recording is too short
if len(audio) < 16000: # Less than 1 second at 16kHz
Notifier.error("Recording too short")
return 1
# Transcribe
Notifier.transcribing()
transcriber = Transcriber(
model_size=config.model_size,
device=config.device,
compute_type=config.compute_type,
)
language = args.language or config.language
text = transcriber.transcribe(audio, language=language)
if not text.strip():
Notifier.error("No speech detected")
return 1
# Process with Claude (unless --raw)
if args.raw:
processed = text
else:
Notifier.processing()
agent = Agent(prompt_template=config.prompt_template)
try:
processed = agent.process(text)
except Exception as e:
# Fallback to raw transcription if Claude fails
processed = text
Notifier.notify("Warning", f"Claude failed, using raw text: {e}", "dialog-warning")
# Copy to clipboard
copy_to_clipboard(processed)
if args.type:
type_text(processed)
Notifier.done(processed)
return 0
except Exception as e:
Notifier.error(str(e))
return 1
finally:
if recorder:
recorder.terminate()
cleanup_pid_file()
if __name__ == "__main__":
sys.exit(main())