Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/llm/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"metro-config": "^0.83.0",
"react": "19.2.5",
"react-native": "0.83.4",
"react-native-audio-api": "0.12.0",
"react-native-audio-api": "0.12.2",
"react-native-device-info": "^15.0.2",
"react-native-executorch": "workspace:*",
"react-native-executorch-expo-resource-fetcher": "workspace:*",
Expand Down
18 changes: 17 additions & 1 deletion apps/speech/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import React, { useState } from 'react';
import { View, Text, StyleSheet, TouchableOpacity } from 'react-native';
import { TextToSpeechScreen } from './screens/TextToSpeechScreen';
import { SpeechToTextScreen } from './screens/SpeechToTextScreen';
import { VoiceActivityDetectionScreen } from './screens/VoiceActivityDetectionScreen';
import ColorPalette from './colors';
import ExecutorchLogo from './assets/executorch.svg';
import { Quiz } from './screens/Quiz';
Expand All @@ -15,7 +16,12 @@ initExecutorch({

export default function App() {
const [currentScreen, setCurrentScreen] = useState<
'menu' | 'speech-to-text' | 'text-to-speech' | 'quiz' | 'text-to-speech-llm'
| 'menu'
| 'speech-to-text'
| 'text-to-speech'
| 'quiz'
| 'text-to-speech-llm'
| 'vad'
>('menu');

const goToMenu = () => setCurrentScreen('menu');
Expand All @@ -28,6 +34,10 @@ export default function App() {
return <SpeechToTextScreen onBack={goToMenu} />;
}

if (currentScreen === 'vad') {
return <VoiceActivityDetectionScreen onBack={goToMenu} />;
}

if (currentScreen === 'quiz') {
return <Quiz onBack={goToMenu} />;
}
Expand All @@ -47,6 +57,12 @@ export default function App() {
>
<Text style={styles.buttonText}>Speech to Text</Text>
</TouchableOpacity>
<TouchableOpacity
style={styles.button}
onPress={() => setCurrentScreen('vad')}
>
<Text style={styles.buttonText}>Voice Activity Detection</Text>
</TouchableOpacity>
<TouchableOpacity
style={styles.button}
onPress={() => setCurrentScreen('text-to-speech')}
Expand Down
132 changes: 110 additions & 22 deletions apps/speech/screens/SpeechToTextScreen.tsx
Comment thread
IgorSwat marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
KeyboardAvoidingView,
Platform,
Switch,
Keyboard,
} from 'react-native';
import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
import {
Expand All @@ -21,6 +22,7 @@
WHISPER_SMALL_EN_COREML,
TranscriptionResult,
SpeechToTextProps,
FSMN_VAD,
} from 'react-native-executorch';
import { ModelPicker, ModelOption } from '../components/ModelPicker';

Expand Down Expand Up @@ -50,14 +52,17 @@
const isSimulator = DeviceInfo.isEmulatorSync();

const DEFAULT_MODEL =
Platform.OS === 'ios' ? WHISPER_BASE_EN_COREML : WHISPER_TINY_EN;
Platform.OS === 'ios' && !isSimulator
? WHISPER_BASE_EN_COREML
: WHISPER_TINY_EN;

export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
const [selectedModel, setSelectedModel] =
useState<STTModelSources>(DEFAULT_MODEL);

const model = useSpeechToText({
model: selectedModel,
vad: FSMN_VAD,
});

const [transcription, setTranscription] =
Expand All @@ -72,6 +77,7 @@
} | null>(null);

const [enableTimestamps, setEnableTimestamps] = useState(false);
const [useVAD, setUseVAD] = useState(true);
const [error, setError] = useState<string | null>(null);
const [audioURL, setAudioURL] = useState('');
const [hasMicPermission, setHasMicPermission] = useState(false);
Expand Down Expand Up @@ -111,11 +117,15 @@
}

const handleTranscribeFromURL = async () => {
if (!audioURL.trim()) {
console.warn('Please provide a valid audio file URL');
if (!audioURL.trim() || model.isGenerating) {
if (!audioURL.trim()) {
console.warn('Please provide a valid audio file URL');
}
return;
}

Keyboard.dismiss();

const uri = await getAudioFile(audioURL);
// Reset previous states
setTranscription(null);
Expand All @@ -139,8 +149,10 @@
};

const handleStartTranscribeFromMicrophone = async () => {
if (!hasMicPermission) {
setError('Microphone permission denied. Please enable it in Settings.');
if (!hasMicPermission || model.isGenerating || liveTranscribing) {
if (!hasMicPermission) {
setError('Microphone permission denied. Please enable it in Settings.');
}
return;
}

Expand Down Expand Up @@ -185,7 +197,9 @@
try {
const streamIter = model.stream({
verbose: enableTimestamps,
timeout: 100,
timeout: 200,
useVAD: useVAD,
vadDetectionMargin: 1200,
});

for await (const { committed, nonCommitted } of streamIter) {
Expand Down Expand Up @@ -359,22 +373,60 @@
<Text style={styles.buttonText}> Stop Live Transcription</Text>
</TouchableOpacity>
) : (
<TouchableOpacity
disabled={recordingButtonDisabled}
onPress={handleStartTranscribeFromMicrophone}
style={[
styles.liveTranscriptionButton,
styles.backgroundBlue,
recordingButtonDisabled && styles.disabled,
]}
>
<FontAwesome name="microphone" size={20} color="white" />
<Text style={styles.buttonText}>
{isSimulator
? 'Recording is not available on Simulator'
: 'Start Live Transcription'}
</Text>
</TouchableOpacity>
<View style={styles.buttonRow}>
<TouchableOpacity
disabled={recordingButtonDisabled}
onPress={handleStartTranscribeFromMicrophone}
style={[
styles.liveTranscriptionButton,
styles.backgroundBlue,
styles.flex1,
recordingButtonDisabled && styles.disabled,
]}
>
<FontAwesome name="microphone" size={20} color="white" />
<Text style={styles.buttonText}>
{isSimulator ? 'No Mic' : 'Start Live'}
</Text>
</TouchableOpacity>

<TouchableOpacity
onPress={() => setUseVAD(!useVAD)}
activeOpacity={0.7}
accessibilityRole="switch"
accessibilityState={{ checked: useVAD }}
accessibilityLabel={`Voice Activity Detection ${useVAD ? 'on' : 'off'}`}
style={[
styles.vadButton,
useVAD ? styles.vadActive : styles.vadInactive,
recordingButtonDisabled && styles.disabled,
]}
>
<FontAwesome
name={useVAD ? 'check-circle' : 'circle-o'}
size={18}
color={useVAD ? '#ffffff' : '#94a3b8'}
/>
<View style={styles.vadTextContainer}>
<Text
style={[
styles.vadButtonLabel,
{ color: useVAD ? 'white' : '#64748b' },

Check warning on line 414 in apps/speech/screens/SpeechToTextScreen.tsx

View workflow job for this annotation

GitHub Actions / lint

Inline style: { color: "useVAD ? 'white' : '#64748b'" }
]}
>
VAD
</Text>
<Text
style={[
styles.vadButtonState,
{ color: useVAD ? '#bbf7d0' : '#94a3b8' },

Check warning on line 422 in apps/speech/screens/SpeechToTextScreen.tsx

View workflow job for this annotation

GitHub Actions / lint

Inline style: { color: "useVAD ? '#bbf7d0' : '#94a3b8'" }
]}
>
{useVAD ? 'ON' : 'OFF'}
</Text>
</View>
</TouchableOpacity>
</View>
)}
</View>
</KeyboardAvoidingView>
Expand Down Expand Up @@ -498,6 +550,42 @@
backgroundBlue: {
backgroundColor: '#0f186e',
},
buttonRow: {
flexDirection: 'row',
gap: 8,
marginTop: 12,
},
flex1: {
flex: 1,
marginTop: 0,
},
vadButton: {
flexDirection: 'row',
alignItems: 'center',
justifyContent: 'center',
paddingHorizontal: 14,
borderRadius: 12,
gap: 10,
},
vadActive: {
backgroundColor: '#0f186e',
},
vadInactive: {
backgroundColor: '#f1f5f9',
},
vadTextContainer: {
alignItems: 'flex-start',
},
vadButtonLabel: {
fontWeight: '800',
fontSize: 13,
letterSpacing: 0.5,
},
vadButtonState: {
fontWeight: '700',
fontSize: 10,
letterSpacing: 1,
},
disabled: {
opacity: 0.5,
},
Expand Down
2 changes: 2 additions & 0 deletions apps/speech/screens/TextToSpeechScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
TextInput,
KeyboardAvoidingView,
Platform,
Keyboard,
} from 'react-native';
import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
import {
Expand Down Expand Up @@ -141,6 +142,7 @@ export const TextToSpeechScreen = ({ onBack }: { onBack: () => void }) => {
return;
}

Keyboard.dismiss();
setIsPlaying(true);

try {
Expand Down
Loading
Loading