Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions apps/llm/app/llm/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
View,
} from 'react-native';
import SendIcon from '../../assets/icons/send_icon.svg';
import { useLLM, LLAMA3_2_1B_SPINQUANT } from 'react-native-executorch';
import { useLLM, QWEN3_0_6B_QUANTIZED } from 'react-native-executorch';
import { ModelPicker } from '../../components/ModelPicker';
import { LLM_MODELS, LLMModelSources } from '../../components/llmModels';
import PauseIcon from '../../assets/icons/pause_icon.svg';
Expand Down Expand Up @@ -42,9 +42,8 @@ function LLMScreen() {
const { bottom } = useSafeAreaInsets();
const [isTextInputFocused, setIsTextInputFocused] = useState(false);
const [userInput, setUserInput] = useState('');
const [selectedModel, setSelectedModel] = useState<LLMModelSources>(
LLAMA3_2_1B_SPINQUANT
);
const [selectedModel, setSelectedModel] =
useState<LLMModelSources>(QWEN3_0_6B_QUANTIZED);
const textInputRef = useRef<TextInput>(null);
const { setGlobalGenerating } = useContext(GeneratingContext);

Expand Down Expand Up @@ -76,6 +75,7 @@ function LLMScreen() {
}
};

console.log(llm.messageHistory)
return !llm.isReady && !llm.error ? (
<Spinner
visible={true}
Expand Down
237 changes: 223 additions & 14 deletions apps/llm/app/multimodal_llm/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ import {
View,
} from 'react-native';
import { launchImageLibrary } from 'react-native-image-picker';
import {
AudioManager,
AudioRecorder,
AudioContext,
} from 'react-native-audio-api';
import { useIsFocused } from '@react-navigation/native';
import { useSafeAreaInsets } from 'react-native-safe-area-context';
import { useLLM, LFM2_5_VL_1_6B_QUANTIZED } from 'react-native-executorch';
Expand All @@ -29,6 +34,7 @@ const SUGGESTED_PROMPTS = [
'Describe this scene in detail',
'What objects can you see?',
'What text appears in this image?',
'Transcribe the audio?',
];
import { useLLMStats } from '../../hooks/useLLMStats';
import { StatsBar } from '../../components/StatsBar';
Expand All @@ -46,7 +52,15 @@ function MultimodalLLMScreen() {
const textInputRef = useRef<TextInput>(null);
const { setGlobalGenerating } = useContext(GeneratingContext);

// Added error state
const [audioBuffer, setAudioBuffer] = useState<Float32Array | null>(null);
const [audioLabel, setAudioLabel] = useState<string | null>(null);
const [audioUrl, setAudioUrl] = useState('');
const [isFetchingAudio, setIsFetchingAudio] = useState(false);
const [isRecording, setIsRecording] = useState(false);
const [hasMicPermission, setHasMicPermission] = useState(false);
const recorder = useRef(new AudioRecorder());
const recordChunks = useRef<Float32Array[]>([]);

const [error, setError] = useState<string | null>(null);

const vlm = useLLM({
Expand All @@ -68,6 +82,87 @@ function MultimodalLLMScreen() {
if (vlm.error) setError(String(vlm.error));
}, [vlm.error]);

useEffect(() => {
AudioManager.setAudioSessionOptions({
iosCategory: 'playAndRecord',
iosMode: 'spokenAudio',
iosOptions: ['allowBluetoothHFP', 'defaultToSpeaker'],
});
(async () => {
const status = await AudioManager.requestRecordingPermissions();
setHasMicPermission(status === 'Granted');
})();
}, []);

const loadAudioFromUrl = async () => {
const url = audioUrl.trim();
if (!url) return;
setIsFetchingAudio(true);
try {
const ctx = new AudioContext({ sampleRate: 16000 });
const decoded = await ctx.decodeAudioData(url);
const pcm = decoded.getChannelData(0);
const name = url.split('/').pop() || 'audio';
setAudioBuffer(pcm);
setAudioLabel(`${name} · ${(pcm.length / 16000).toFixed(1)}s`);
} catch (e) {
setError(e instanceof Error ? e.message : String(e));
} finally {
setIsFetchingAudio(false);
}
};

const startRecording = async () => {
if (!hasMicPermission) {
setError('Microphone permission denied. Please enable it in Settings.');
return;
}
recordChunks.current = [];
const sampleRate = 16000;
recorder.current.onAudioReady(
{ sampleRate, bufferLength: 0.1 * sampleRate, channelCount: 1 },
({ buffer }) => {
recordChunks.current.push(new Float32Array(buffer.getChannelData(0)));
}
);
try {
const ok = await AudioManager.setAudioSessionActivity(true);
if (!ok) {
setError('Cannot start audio session');
return;
}
const result = recorder.current.start();
if (result.status === 'error') {
setError(`Recording problems: ${result.message}`);
return;
}
setIsRecording(true);
} catch (e) {
setError(e instanceof Error ? e.message : String(e));
}
};

const stopRecording = () => {
recorder.current.stop();
setIsRecording(false);
const total = recordChunks.current.reduce((n, c) => n + c.length, 0);
if (total === 0) return;
const pcm = new Float32Array(total);
let off = 0;
for (const c of recordChunks.current) {
pcm.set(c, off);
off += c.length;
}
recordChunks.current = [];
setAudioBuffer(pcm);
setAudioLabel(`Recording · ${(pcm.length / 16000).toFixed(1)}s`);
};

const clearAudio = () => {
setAudioBuffer(null);
setAudioLabel(null);
};

const pickImage = async () => {
try {
const result = await launchImageLibrary({ mediaType: 'photo' });
Expand All @@ -81,19 +176,27 @@ function MultimodalLLMScreen() {
};

const sendMessage = async () => {
if (!userInput.trim() || vlm.isGenerating) return;
if (!(imageUri || audioBuffer || userInput.trim()) || vlm.isGenerating)
return;
onMessageSend();
const text = userInput.trim();
setUserInput('');
textInputRef.current?.clear();
Keyboard.dismiss();
const currentImageUri = imageUri;
const currentAudio = audioBuffer;
setImageUri(null);
setAudioBuffer(null);
setAudioLabel(null);
try {
await vlm.sendMessage(
text,
currentImageUri ? { imagePath: currentImageUri } : undefined
);
const media =
currentImageUri || currentAudio
? {
...(currentImageUri ? { imagePath: currentImageUri } : {}),
...(currentAudio ? { audioBuffer: currentAudio } : {}),
}
: undefined;
await vlm.sendMessage(text, media);
} catch (e) {
// Updated to set UI error instead of just console.error
setError(e instanceof Error ? e.message : String(e));
Expand Down Expand Up @@ -159,6 +262,42 @@ function MultimodalLLMScreen() {
</TouchableOpacity>
)}

{/* Audio URL input */}
<View style={styles.audioUrlRow}>
<TextInput
placeholder="Audio URL (mp3/wav/…)"
placeholderTextColor="#C1C6E5"
style={styles.audioUrlInput}
value={audioUrl}
onChangeText={setAudioUrl}
autoCapitalize="none"
autoCorrect={false}
/>
<TouchableOpacity
style={[
styles.audioUrlButton,
(!audioUrl.trim() || isFetchingAudio || vlm.isGenerating) &&
styles.disabled,
]}
onPress={loadAudioFromUrl}
disabled={!audioUrl.trim() || isFetchingAudio || vlm.isGenerating}
>
<Text style={styles.audioUrlButtonText}>
{isFetchingAudio ? '…' : 'Load'}
</Text>
</TouchableOpacity>
</View>

{/* Audio attachment strip */}
{audioLabel && (
<View style={styles.audioAttachmentContainer}>
<Text style={styles.audioAttachmentText}>🎵 {audioLabel}</Text>
<TouchableOpacity onPress={clearAudio}>
<Text style={styles.audioAttachmentClear}>✕</Text>
</TouchableOpacity>
</View>
)}

<StatsBar stats={stats} />
<View
style={[
Expand All @@ -178,6 +317,17 @@ function MultimodalLLMScreen() {
<Text style={styles.imageButtonText}>📷</Text>
</TouchableOpacity>

{/* Mic record / stop button */}
<TouchableOpacity
style={styles.imageButton}
onPress={isRecording ? stopRecording : startRecording}
disabled={vlm.isGenerating}
>
<Text style={styles.imageButtonText}>
{isRecording ? '⏹️' : '🎤'}
</Text>
</TouchableOpacity>

<TextInput
autoCorrect={false}
ref={textInputRef}
Expand All @@ -198,14 +348,15 @@ function MultimodalLLMScreen() {
onChangeText={setUserInput}
/>

{userInput.trim() && !vlm.isGenerating && (
<TouchableOpacity
style={styles.sendChatTouchable}
onPress={sendMessage}
>
<SendIcon height={24} width={24} padding={4} margin={8} />
</TouchableOpacity>
)}
{(imageUri || audioBuffer || userInput.trim()) &&
!vlm.isGenerating && (
<TouchableOpacity
style={styles.sendChatTouchable}
onPress={sendMessage}
>
<SendIcon height={24} width={24} padding={4} margin={8} />
</TouchableOpacity>
)}
{vlm.isGenerating && (
<TouchableOpacity
style={styles.sendChatTouchable}
Expand Down Expand Up @@ -319,6 +470,64 @@ const styles = StyleSheet.create({
fontFamily: 'regular',
color: ColorPalette.blueDark,
},
audioAttachmentContainer: {
flexDirection: 'row',
alignItems: 'center',
justifyContent: 'space-between',
paddingHorizontal: 16,
paddingVertical: 8,
marginHorizontal: 16,
marginBottom: 4,
borderRadius: 8,
borderWidth: 1,
borderColor: ColorPalette.blueLight,
backgroundColor: '#fafbff',
},
audioAttachmentText: {
fontSize: 13,
fontFamily: 'regular',
color: ColorPalette.blueDark,
},
audioAttachmentClear: {
fontSize: 16,
color: ColorPalette.blueDark,
paddingHorizontal: 8,
},
audioUrlRow: {
flexDirection: 'row',
alignItems: 'center',
marginHorizontal: 16,
marginBottom: 4,
},
audioUrlInput: {
flex: 1,
padding: 10,
borderTopLeftRadius: 8,
borderBottomLeftRadius: 8,
borderWidth: 1,
borderColor: ColorPalette.blueLight,
borderRightWidth: 0,
fontFamily: 'regular',
fontSize: 13,
color: ColorPalette.primary,
},
audioUrlButton: {
paddingVertical: 10,
paddingHorizontal: 16,
backgroundColor: ColorPalette.strongPrimary,
borderTopRightRadius: 8,
borderBottomRightRadius: 8,
justifyContent: 'center',
alignItems: 'center',
},
audioUrlButtonText: {
color: '#fff',
fontFamily: 'medium',
fontSize: 13,
},
disabled: {
opacity: 0.5,
},
bottomContainer: {
height: 100,
width: '100%',
Expand Down
1 change: 1 addition & 0 deletions apps/llm/components/llmModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export const LLM_MODELS: ModelOption<LLMModelSources>[] = [
{ label: 'Qwen3 0.6B', value: QWEN3_0_6B },
{ label: 'Qwen3 0.6B Quantized', value: QWEN3_0_6B_QUANTIZED },
{ label: 'Qwen3 1.7B', value: QWEN3_1_7B },
{ label: 'Gemma4 e2b Quantized', value: GEMMA4_E2B_QUANTIZED },
{ label: 'Qwen3 1.7B Quantized', value: QWEN3_1_7B_QUANTIZED },
{ label: 'Qwen3 4B', value: QWEN3_4B },
{ label: 'Qwen3 4B Quantized', value: QWEN3_4B_QUANTIZED },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ inline std::vector<float> getValue<std::vector<float>>(const jsi::Value &val,
return getArrayAsVector<float>(val, runtime);
}

template <>
inline std::vector<std::vector<float>>
getValue<std::vector<std::vector<float>>>(const jsi::Value &val,
jsi::Runtime &runtime) {
jsi::Array array = val.asObject(runtime).asArray(runtime);
const size_t length = array.size(runtime);
std::vector<std::vector<float>> result;
result.reserve(length);
for (size_t i = 0; i < length; ++i) {
auto span =
getTypedArrayAsSpan<float>(array.getValueAtIndex(runtime, i), runtime);
result.emplace_back(span.begin(), span.end());
}
return result;
}

template <>
inline std::vector<int64_t>
getValue<std::vector<int64_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
Expand Down
Loading
Loading