software-mansion · IgorSwat · May 7, 2026 · May 14, 2026 · May 7, 2026 · May 10, 2026
diff --git a/apps/llm/package.json b/apps/llm/package.json
@@ -28,7 +28,7 @@
     "metro-config": "^0.83.0",
     "react": "19.2.5",
     "react-native": "0.83.4",
-    "react-native-audio-api": "0.12.0",
+    "react-native-audio-api": "0.12.2",
     "react-native-device-info": "^15.0.2",
     "react-native-executorch": "workspace:*",
     "react-native-executorch-expo-resource-fetcher": "workspace:*",

diff --git a/apps/speech/App.tsx b/apps/speech/App.tsx
@@ -2,6 +2,7 @@ import React, { useState } from 'react';
 import { View, Text, StyleSheet, TouchableOpacity } from 'react-native';
 import { TextToSpeechScreen } from './screens/TextToSpeechScreen';
 import { SpeechToTextScreen } from './screens/SpeechToTextScreen';
+import { VoiceActivityDetectionScreen } from './screens/VoiceActivityDetectionScreen';
 import ColorPalette from './colors';
 import ExecutorchLogo from './assets/executorch.svg';
 import { Quiz } from './screens/Quiz';
@@ -15,7 +16,12 @@ initExecutorch({
 
 export default function App() {
   const [currentScreen, setCurrentScreen] = useState<
-    'menu' | 'speech-to-text' | 'text-to-speech' | 'quiz' | 'text-to-speech-llm'
+    | 'menu'
+    | 'speech-to-text'
+    | 'text-to-speech'
+    | 'quiz'
+    | 'text-to-speech-llm'
+    | 'vad'
   >('menu');
 
   const goToMenu = () => setCurrentScreen('menu');
@@ -28,6 +34,10 @@ export default function App() {
     return <SpeechToTextScreen onBack={goToMenu} />;
   }
 
+  if (currentScreen === 'vad') {
+    return <VoiceActivityDetectionScreen onBack={goToMenu} />;
+  }
+
   if (currentScreen === 'quiz') {
     return <Quiz onBack={goToMenu} />;
   }
@@ -47,6 +57,12 @@ export default function App() {
         >
           <Text style={styles.buttonText}>Speech to Text</Text>
         </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.button}
+          onPress={() => setCurrentScreen('vad')}
+        >
+          <Text style={styles.buttonText}>Voice Activity Detection</Text>
+        </TouchableOpacity>
         <TouchableOpacity
           style={styles.button}
           onPress={() => setCurrentScreen('text-to-speech')}

diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx
@@ -9,6 +9,7 @@
   KeyboardAvoidingView,
   Platform,
   Switch,
+  Keyboard,
 } from 'react-native';
 import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
 import {
@@ -21,6 +22,7 @@
   WHISPER_SMALL_EN_COREML,
   TranscriptionResult,
   SpeechToTextProps,
+  FSMN_VAD,
 } from 'react-native-executorch';
 import { ModelPicker, ModelOption } from '../components/ModelPicker';
 
@@ -50,14 +52,17 @@
 const isSimulator = DeviceInfo.isEmulatorSync();
 
 const DEFAULT_MODEL =
-  Platform.OS === 'ios' ? WHISPER_BASE_EN_COREML : WHISPER_TINY_EN;
+  Platform.OS === 'ios' && !isSimulator
+    ? WHISPER_BASE_EN_COREML
+    : WHISPER_TINY_EN;
 
 export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
   const [selectedModel, setSelectedModel] =
     useState<STTModelSources>(DEFAULT_MODEL);
 
   const model = useSpeechToText({
     model: selectedModel,
+    vad: FSMN_VAD,
   });
 
   const [transcription, setTranscription] =
@@ -72,6 +77,7 @@
   } | null>(null);
 
   const [enableTimestamps, setEnableTimestamps] = useState(false);
+  const [useVAD, setUseVAD] = useState(true);
   const [error, setError] = useState<string | null>(null);
   const [audioURL, setAudioURL] = useState('');
   const [hasMicPermission, setHasMicPermission] = useState(false);
@@ -111,11 +117,15 @@
   }
 
   const handleTranscribeFromURL = async () => {
-    if (!audioURL.trim()) {
-      console.warn('Please provide a valid audio file URL');
+    if (!audioURL.trim() || model.isGenerating) {
+      if (!audioURL.trim()) {
+        console.warn('Please provide a valid audio file URL');
+      }
       return;
     }
 
+    Keyboard.dismiss();
+
     const uri = await getAudioFile(audioURL);
     // Reset previous states
     setTranscription(null);
@@ -139,8 +149,10 @@
   };
 
   const handleStartTranscribeFromMicrophone = async () => {
-    if (!hasMicPermission) {
-      setError('Microphone permission denied. Please enable it in Settings.');
+    if (!hasMicPermission || model.isGenerating || liveTranscribing) {
+      if (!hasMicPermission) {
+        setError('Microphone permission denied. Please enable it in Settings.');
+      }
       return;
     }
 
@@ -185,7 +197,9 @@
     try {
       const streamIter = model.stream({
         verbose: enableTimestamps,
-        timeout: 100,
+        timeout: 200,
+        useVAD: useVAD,
+        vadDetectionMargin: 1200,
       });
 
       for await (const { committed, nonCommitted } of streamIter) {
@@ -359,22 +373,60 @@
                 <Text style={styles.buttonText}> Stop Live Transcription</Text>
               </TouchableOpacity>
             ) : (
-              <TouchableOpacity
-                disabled={recordingButtonDisabled}
-                onPress={handleStartTranscribeFromMicrophone}
-                style={[
-                  styles.liveTranscriptionButton,
-                  styles.backgroundBlue,
-                  recordingButtonDisabled && styles.disabled,
-                ]}
-              >
-                <FontAwesome name="microphone" size={20} color="white" />
-                <Text style={styles.buttonText}>
-                  {isSimulator
-                    ? 'Recording is not available on Simulator'
-                    : 'Start Live Transcription'}
-                </Text>
-              </TouchableOpacity>
+              <View style={styles.buttonRow}>
+                <TouchableOpacity
+                  disabled={recordingButtonDisabled}
+                  onPress={handleStartTranscribeFromMicrophone}
+                  style={[
+                    styles.liveTranscriptionButton,
+                    styles.backgroundBlue,
+                    styles.flex1,
+                    recordingButtonDisabled && styles.disabled,
+                  ]}
+                >
+                  <FontAwesome name="microphone" size={20} color="white" />
+                  <Text style={styles.buttonText}>
+                    {isSimulator ? 'No Mic' : 'Start Live'}
+                  </Text>
+                </TouchableOpacity>
+
+                <TouchableOpacity
+                  onPress={() => setUseVAD(!useVAD)}
+                  activeOpacity={0.7}
+                  accessibilityRole="switch"
+                  accessibilityState={{ checked: useVAD }}
+                  accessibilityLabel={`Voice Activity Detection ${useVAD ? 'on' : 'off'}`}
+                  style={[
+                    styles.vadButton,
+                    useVAD ? styles.vadActive : styles.vadInactive,
+                    recordingButtonDisabled && styles.disabled,
+                  ]}
+                >
+                  <FontAwesome
+                    name={useVAD ? 'check-circle' : 'circle-o'}
+                    size={18}
+                    color={useVAD ? '#ffffff' : '#94a3b8'}
+                  />
+                  <View style={styles.vadTextContainer}>
+                    <Text
+                      style={[
+                        styles.vadButtonLabel,
+                        { color: useVAD ? 'white' : '#64748b' },
+                      ]}
+                    >
+                      VAD
+                    </Text>
+                    <Text
+                      style={[
+                        styles.vadButtonState,
+                        { color: useVAD ? '#bbf7d0' : '#94a3b8' },
+                      ]}
+                    >
+                      {useVAD ? 'ON' : 'OFF'}
+                    </Text>
+                  </View>
+                </TouchableOpacity>
+              </View>
             )}
           </View>
         </KeyboardAvoidingView>
@@ -498,6 +550,42 @@
   backgroundBlue: {
     backgroundColor: '#0f186e',
   },
+  buttonRow: {
+    flexDirection: 'row',
+    gap: 8,
+    marginTop: 12,
+  },
+  flex1: {
+    flex: 1,
+    marginTop: 0,
+  },
+  vadButton: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    justifyContent: 'center',
+    paddingHorizontal: 14,
+    borderRadius: 12,
+    gap: 10,
+  },
+  vadActive: {
+    backgroundColor: '#0f186e',
+  },
+  vadInactive: {
+    backgroundColor: '#f1f5f9',
+  },
+  vadTextContainer: {
+    alignItems: 'flex-start',
+  },
+  vadButtonLabel: {
+    fontWeight: '800',
+    fontSize: 13,
+    letterSpacing: 0.5,
+  },
+  vadButtonState: {
+    fontWeight: '700',
+    fontSize: 10,
+    letterSpacing: 1,
+  },
   disabled: {
     opacity: 0.5,
   },

diff --git a/apps/speech/screens/TextToSpeechScreen.tsx b/apps/speech/screens/TextToSpeechScreen.tsx
@@ -7,6 +7,7 @@ import {
   TextInput,
   KeyboardAvoidingView,
   Platform,
+  Keyboard,
 } from 'react-native';
 import { SafeAreaProvider, SafeAreaView } from 'react-native-safe-area-context';
 import {
@@ -141,6 +142,7 @@ export const TextToSpeechScreen = ({ onBack }: { onBack: () => void }) => {
       return;
     }
 
+    Keyboard.dismiss();
     setIsPlaying(true);
 
     try {