Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
811b58c
SpeechToText AutostopSilenceTimeout
VladislavAntonyuk Feb 23, 2026
7f98647
2017
VladislavAntonyuk Feb 23, 2026
36edccb
Update src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechT…
VladislavAntonyuk Feb 23, 2026
fe6d968
Fix too long value
VladislavAntonyuk Feb 23, 2026
96fdd51
fix current state notification
VladislavAntonyuk Feb 23, 2026
31eb967
Fix NRE
VladislavAntonyuk Feb 24, 2026
23646f6
remove using
VladislavAntonyuk Feb 24, 2026
ca9d16c
Merge branch 'main' into speech-to-text-autostop-silence-timeout
VladislavAntonyuk Apr 2, 2026
a12df44
Fix PR comments
VladislavAntonyuk Apr 2, 2026
889f7c8
Merge branch 'main' into speech-to-text-autostop-silence-timeout
TheCodeTraveler Apr 5, 2026
88875da
Update Formatting
TheCodeTraveler Apr 5, 2026
efa7d92
Add `const nuint audioEngineBusTap = 0;`
TheCodeTraveler Apr 5, 2026
0916a5e
Call `StopListenAsync` when `InternalStartListeningAsync` fails
TheCodeTraveler Apr 5, 2026
3493e48
Update src/CommunityToolkit.Maui.Core/Essentials/SpeechToText/SpeechT…
TheCodeTraveler Apr 5, 2026
a62f2b7
Merge branch 'speech-to-text-autostop-silence-timeout' of https://git…
TheCodeTraveler Apr 5, 2026
a449bb1
Add bounds check before casing `double` to `long`
TheCodeTraveler Apr 5, 2026
0a32c83
Update samples/CommunityToolkit.Maui.Sample/ViewModels/Essentials/Spe…
TheCodeTraveler Apr 5, 2026
f9fa744
Merge branch 'speech-to-text-autostop-silence-timeout' of https://git…
TheCodeTraveler Apr 5, 2026
9ff6f51
Add `GC.SuppressFinalize(this);`
TheCodeTraveler Apr 5, 2026
a39f7b8
Subscribe `RecognitionResultUpdated` before `StartListenAsync`, Add `…
TheCodeTraveler Apr 5, 2026
a78461c
Call `OnSpeechToTextStateChanged` after disposing all fields
TheCodeTraveler Apr 5, 2026
4183fa5
Update samples/CommunityToolkit.Maui.Sample/CommunityToolkit.Maui.Sam…
TheCodeTraveler Apr 5, 2026
afafd09
Retrieve IDispatchTimer from MainThread
TheCodeTraveler Apr 5, 2026
f11a32a
Call `OnSpeechToTextStateChanged` After Disposing all IDisposables
TheCodeTraveler Apr 5, 2026
0c3afdd
Call `StopListenAsync` if `InternalStartListening` fails
TheCodeTraveler Apr 5, 2026
50b5217
Use `NSErrorException`
TheCodeTraveler Apr 5, 2026
3575a55
Retrieve `IDispatchTimer` from MainThread
TheCodeTraveler Apr 5, 2026
78941a5
Add `SpeechToTextOptionsDefaults.AutoStopSilenceTimeout`
TheCodeTraveler Apr 5, 2026
a9ee101
Merge branch 'speech-to-text-autostop-silence-timeout' of https://git…
TheCodeTraveler Apr 5, 2026
779b467
Fix SpeechToTextPage Culture Picker
TheCodeTraveler Apr 5, 2026
57675be
use `is not`
TheCodeTraveler Apr 5, 2026
e068e03
Remove invalid `MemberNotNull`
TheCodeTraveler Apr 5, 2026
199d515
Use `const audioEngineBusTap`
TheCodeTraveler Apr 5, 2026
b4bea62
Add `token.ThrowIfCancellationRequested`
TheCodeTraveler Apr 5, 2026
1b2c98b
Add `CanExecute` Properties
TheCodeTraveler Apr 5, 2026
a2b572b
Use `TwoLetterISOLanguageName` for Android compatibility
TheCodeTraveler Apr 5, 2026
2e22eb7
Call `ResetTimer()` in audio bus callback
TheCodeTraveler Apr 5, 2026
72e7ab5
Fix Type Name
TheCodeTraveler Apr 5, 2026
08f53cc
Re-enable `CanStartListenExecute` and disable `CanStopListenExecute` …
TheCodeTraveler Apr 5, 2026
a02d11a
Remove `speechToText.RecognitionResultUpdated -= HandleRecognitionRes…
TheCodeTraveler Apr 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
x:Class="CommunityToolkit.Maui.Sample.Pages.Essentials.SpeechToTextPage"
xmlns:vm="clr-namespace:CommunityToolkit.Maui.Sample.ViewModels.Essentials"
xmlns:essentials="clr-namespace:CommunityToolkit.Maui.Sample.Pages.Essentials"
xmlns:media="clr-namespace:Microsoft.Maui.Media;assembly=Microsoft.Maui.Essentials"
x:TypeArguments="vm:SpeechToTextViewModel"
x:DataType="vm:SpeechToTextViewModel"
Title="SpeechToText">
Expand All @@ -29,7 +30,7 @@
<Picker
ItemsSource="{Binding Locales}"
SelectedItem="{Binding CurrentLocale}"
ItemDisplayBinding="{Binding ., x:DataType={x:Type Picker}, Converter={StaticResource PickerLocaleDisplayConverter}}"/>
ItemDisplayBinding="{Binding ., x:DataType={x:Type media:Locale}, Converter={StaticResource PickerLocaleDisplayConverter}}"/>

<Label
Text="State"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ protected override async void OnAppearing()
{
base.OnAppearing();

await BindingContext.SetLocalesCommand.ExecuteAsync(null);
await BindingContext.SetLocalesCommand.ExecuteAsync(CancellationToken.None);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public OfflineSpeechToTextViewModel()
speechToText = OfflineSpeechToText.Default;

speechToText.StateChanged += HandleSpeechToTextStateChanged;
speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;
speechToText.RecognitionResultCompleted += HandleRecognitionResultCompleted;
}

Expand All @@ -24,49 +25,70 @@ public OfflineSpeechToTextViewModel()
[ObservableProperty]
public partial string? RecognitionText { get; set; } = "Welcome to .NET MAUI Community Toolkit!";

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StartListenCommand))]
public partial bool CanStartListenExecute { get; set; } = true;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StopListenCommand))]
public partial bool CanStopListenExecute { get; set; } = false;

static async Task<bool> ArePermissionsGranted(ISpeechToText speechToText)
{
var microphonePermissionStatus = await Permissions.RequestAsync<Permissions.Microphone>();
var isSpeechToTextRequestPermissionsGranted = await speechToText.RequestPermissions(CancellationToken.None);

return microphonePermissionStatus is PermissionStatus.Granted
&& isSpeechToTextRequestPermissionsGranted;
&& isSpeechToTextRequestPermissionsGranted;
}

[RelayCommand]
async Task StartListen()
[RelayCommand(CanExecute = nameof(CanStartListenExecute))]
async Task StartListen(CancellationToken token)
{
CanStartListenExecute = false;
CanStopListenExecute = true;

var isGranted = await ArePermissionsGranted(speechToText);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(CancellationToken.None);
await Toast.Make("Permission not granted").Show(token);
CanStartListenExecute = true;
CanStopListenExecute = false;
return;
}

const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;

await speechToText.StartListenAsync(new SpeechToTextOptions
try
{
Culture = CultureInfo.CurrentCulture,
ShouldReportPartialResults = true
}, CancellationToken.None);

if (RecognitionText is beginSpeakingPrompt)
await speechToText.StartListenAsync(new SpeechToTextOptions
{
AutoStopSilenceTimeout = TimeSpan.FromSeconds(5),
Culture = CultureInfo.CurrentCulture,
ShouldReportPartialResults = true
}, token);

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}
catch
{
RecognitionText = string.Empty;
CanStartListenExecute = true;
CanStopListenExecute = false;

throw;
}
}

[RelayCommand]
Task StopListen()
[RelayCommand(CanExecute = nameof(CanStopListenExecute))]
Task StopListen(CancellationToken token)
{
speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;
CanStartListenExecute = true;
CanStopListenExecute = false;

return speechToText.StopListenAsync(CancellationToken.None);
return speechToText.StopListenAsync(token);
}

void HandleRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public SpeechToTextViewModel(ITextToSpeech textToSpeech, [FromKeyedServices("Onl

Locales.CollectionChanged += HandleLocalesCollectionChanged;
this.speechToText.StateChanged += HandleSpeechToTextStateChanged;
this.speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;
this.speechToText.RecognitionResultCompleted += HandleRecognitionResultCompleted;
}

Expand All @@ -43,6 +44,12 @@ public SpeechToTextViewModel(ITextToSpeech textToSpeech, [FromKeyedServices("Onl

public async ValueTask DisposeAsync()
{
GC.SuppressFinalize(this);

Locales.CollectionChanged -= HandleLocalesCollectionChanged;
this.speechToText.StateChanged -= HandleSpeechToTextStateChanged;
this.speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;
this.speechToText.RecognitionResultCompleted -= HandleRecognitionResultCompleted;
await speechToText.DisposeAsync();
}

Expand All @@ -52,22 +59,25 @@ static async Task<bool> ArePermissionsGranted(ISpeechToText speechToText)
var isSpeechToTextPermissionsGranted = await speechToText.RequestPermissions(CancellationToken.None);

return microphonePermissionStatus is PermissionStatus.Granted
&& isSpeechToTextPermissionsGranted;
&& isSpeechToTextPermissionsGranted;
}

[RelayCommand]
async Task SetLocales(CancellationToken token)
{
Locales.Clear();

var locales = await textToSpeech.GetLocalesAsync().WaitAsync(token);
IReadOnlyList<Locale> locales = [.. await textToSpeech.GetLocalesAsync().WaitAsync(token)];

foreach (var locale in locales.OrderBy(x => x.Language).ThenBy(x => x.Name))
{
Locales.Add(locale);
}

CurrentLocale = Locales.FirstOrDefault();
var currentLocale = locales.FirstOrDefault(l => l.Language.Equals(CultureInfo.CurrentUICulture.Name, StringComparison.OrdinalIgnoreCase))
?? locales.FirstOrDefault(l => l.Language.Equals(CultureInfo.CurrentUICulture.TwoLetterISOLanguageName, StringComparison.OrdinalIgnoreCase));

CurrentLocale = currentLocale ?? locales[0];
}

[RelayCommand]
Expand All @@ -94,56 +104,68 @@ async Task Play(CancellationToken cancellationToken)
}

[RelayCommand(CanExecute = nameof(CanStartListenExecute))]
async Task StartListen()
async Task StartListen(CancellationToken cancellationToken)
{
CanStartListenExecute = false;
CanStopListenExecute = true;

var isGranted = await ArePermissionsGranted(speechToText);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(CancellationToken.None);
await Toast.Make("Permission not granted").Show(cancellationToken);
CanStartListenExecute = true;
CanStopListenExecute = false;
return;
}

if (Connectivity.NetworkAccess != NetworkAccess.Internet)
if (Connectivity.NetworkAccess is not NetworkAccess.Internet)
{
await Toast.Make("Internet connection is required").Show(CancellationToken.None);
await Toast.Make("Internet connection is required").Show(cancellationToken);
CanStartListenExecute = true;
CanStopListenExecute = false;
return;
}

const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;

await speechToText.StartListenAsync(new SpeechToTextOptions()
try
{
Culture = CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage),
ShouldReportPartialResults = true
}, CancellationToken.None);

if (RecognitionText is beginSpeakingPrompt)
await speechToText.StartListenAsync(new SpeechToTextOptions
{
Culture = CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage),
AutoStopSilenceTimeout = TimeSpan.FromSeconds(5),
ShouldReportPartialResults = true
}, cancellationToken);

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}
catch
{
RecognitionText = string.Empty;
CanStartListenExecute = true;
CanStopListenExecute = false;

throw;
}
}

[RelayCommand(CanExecute = nameof(CanStopListenExecute))]
Task StopListen()
Task StopListen(CancellationToken cancellationToken)
{
CanStartListenExecute = true;
CanStopListenExecute = false;

speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;

return speechToText.StopListenAsync(CancellationToken.None);
return speechToText.StopListenAsync(cancellationToken);
}

void HandleRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs e)
{
RecognitionText += e.RecognitionResult;
RecognitionText += $" {e.RecognitionResult}";
}

void HandleRecognitionResultCompleted(object? sender, SpeechToTextRecognitionResultCompletedEventArgs e)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
using AVFoundation;
using CommunityToolkit.Maui.Core;
using Microsoft.Maui.ApplicationModel;
using Microsoft.Maui.Dispatching;
using Speech;

namespace CommunityToolkit.Maui.Media;

public sealed partial class OfflineSpeechToTextImplementation
{
AVAudioEngine? audioEngine;
const nuint audioEngineBusTap = 0;

readonly AVAudioEngine audioEngine = new();

IDispatcherTimer? silenceTimer;
SFSpeechRecognizer? speechRecognizer;
SFSpeechRecognitionTask? recognitionTask;
SFSpeechAudioBufferRecognitionRequest? liveSpeechRequest;
Expand All @@ -19,12 +26,11 @@ public sealed partial class OfflineSpeechToTextImplementation
/// <inheritdoc />
public ValueTask DisposeAsync()
{
audioEngine?.Dispose();
audioEngine.Dispose();
speechRecognizer?.Dispose();
liveSpeechRequest?.Dispose();
recognitionTask?.Dispose();

audioEngine = null;
speechRecognizer = null;
liveSpeechRequest = null;
recognitionTask = null;
Expand All @@ -41,12 +47,6 @@ public Task<bool> RequestPermissions(CancellationToken cancellationToken = defau
return taskResult.Task.WaitAsync(cancellationToken);
}

static Task<bool> IsSpeechPermissionAuthorized(CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
return Task.FromResult(SFSpeechRecognizer.AuthorizationStatus is SFSpeechRecognizerAuthorizationStatus.Authorized);
}

static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession)
{
sharedAvAudioSession = AVAudioSession.SharedInstance();
Expand All @@ -62,10 +62,87 @@ static void InitializeAvAudioSession(out AVAudioSession sharedAvAudioSession)

void InternalStopListening()
{
audioEngine?.InputNode.RemoveTapOnBus(0);
audioEngine?.Stop();
silenceTimer?.Tick -= OnSilenceTimerTick;
silenceTimer?.Stop();

liveSpeechRequest?.EndAudio();
recognitionTask?.Cancel();
recognitionTask?.Finish();

audioEngine.Stop();
audioEngine.InputNode.RemoveTapOnBus(audioEngineBusTap);

recognitionTask?.Dispose();
speechRecognizer?.Dispose();
liveSpeechRequest?.Dispose();

speechRecognizer = null;
liveSpeechRequest = null;
recognitionTask = null;

// Dispose all IDisposables before calling `OnSpeechToTextStateChanged` to ensure CurrentState == SpeechToTextState.Stopped
OnSpeechToTextStateChanged(CurrentState);
}

void OnSilenceTimerTick(object? sender, EventArgs e)
{
InternalStopListening();
}

SFSpeechRecognitionTask CreateSpeechRecognizerTask(SFSpeechRecognizer sfSpeechRecognizer, SFSpeechAudioBufferRecognitionRequest sfSpeechAudioBufferRecognitionRequest)
{
int currentIndex = 0;
return sfSpeechRecognizer.GetRecognitionTask(sfSpeechAudioBufferRecognitionRequest, (result, err) =>
{
if (err is not null)
{
currentIndex = 0;
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Failed(new Exception(err.LocalizedDescription)));
}
else
{
if (result.Final)
{
currentIndex = 0;
InternalStopListening();
OnRecognitionResultCompleted(SpeechToTextResult.Success(result.BestTranscription.FormattedString));
}
else
{
RestartTimer();
if (currentIndex <= 0)
{
OnSpeechToTextStateChanged(CurrentState);
}

currentIndex++;
OnRecognitionResultUpdated(result.BestTranscription.FormattedString);
}
}
});
}

async Task<IDispatcherTimer> CreateSilenceTimer(SpeechToTextOptions options, CancellationToken cancellationToken)
{
var timer = await MainThread.InvokeOnMainThreadAsync(() => Dispatcher.GetForCurrentThread()?.CreateTimer()
?? throw new InvalidOperationException($"{nameof(IDispatcherTimer)} must be retrieved from the main UI Thread"))
.WaitAsync(cancellationToken);

if (options.AutoStopSilenceTimeout >= SpeechToTextOptionsDefaults.AutoStopSilenceTimeout)
{
return timer;
}

timer.Tick += OnSilenceTimerTick;
timer.Interval = options.AutoStopSilenceTimeout;
timer.Start();

return timer;
}

void RestartTimer()
{
silenceTimer?.Stop();
silenceTimer?.Start();
}
}
Loading
Loading