diff --git a/Makefile b/Makefile index 055ed334383..6d6378748c7 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ libnavigator \ libtrip-notification \ libnavigation-core \ libnavigation-copilot \ +libnavigation-voicefeedback \ CORE_MODULES = $(RELEASED_CORE_MODULES) @@ -137,6 +138,7 @@ core-check-api: assemble-core-release ./gradlew :libtrip-notification:checkApi -PhidePackage=com.mapbox.navigation.trip.notification.internal ./gradlew :libnavigation-core:checkApi -PhidePackage=com.mapbox.navigation.core.internal ./gradlew :libnavigation-copilot:checkApi -PhidePackage=com.mapbox.navigation.copilot.internal + ./gradlew :libnavigation-voicefeedback:checkApi -PhidePackage=com.mapbox.navigation.voicefeedback.internal .PHONY: core-update-api core-update-api: assemble-core-release @@ -148,6 +150,7 @@ core-update-api: assemble-core-release ./gradlew :libtrip-notification:updateApi -PhidePackage=com.mapbox.navigation.trip.notification.internal ./gradlew :libnavigation-core:updateApi -PhidePackage=com.mapbox.navigation.core.internal ./gradlew :libnavigation-copilot:updateApi -PhidePackage=com.mapbox.navigation.copilot.internal + ./gradlew :libnavigation-voicefeedback:updateApi -PhidePackage=com.mapbox.navigation.voicefeedback.internal .PHONY: assemble-ui-debug assemble-ui-debug: diff --git a/build.gradle b/build.gradle index 069f28c88d0..c6797f05e3c 100644 --- a/build.gradle +++ b/build.gradle @@ -21,6 +21,7 @@ buildscript { dependencies { classpath pluginDependencies.gradle classpath pluginDependencies.kotlin + classpath pluginDependencies.kotlinSerialization classpath pluginDependencies.dependencyUpdates classpath pluginDependencies.jacoco classpath pluginDependencies.googleServices diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index ad8653a8349..18d8995bd9f 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -31,7 +31,7 @@ ext { mapboxMapSdk : '10.19.0', mapboxSdkServices : '7.1.0', mapboxNavigator : "${mapboxNavigatorVersion}", - mapboxCommonNative : '23.11.4', + mapboxCommonNative : '23.12.0', mapboxCrashMonitor : '2.0.0', mapboxAnnotationPlugin : '0.8.0', mapboxBaseAndroid : '0.8.0', @@ -75,6 +75,8 @@ ext { androidStartup : '1.0.0', viewBinding : '7.2.1', glide : '4.13.2', + kotlinDateTime : '0.3.2', + kotlinSerialization : '1.3.1', ] dependenciesList = [ // mapbox @@ -88,6 +90,7 @@ ext { mapboxNavigator : "com.mapbox.navigator:mapbox-navigation-native$ndkVersionSuffix:${version.mapboxNavigator}", mapboxCommonNative : "com.mapbox.common:common$ndkVersionSuffix:${version.mapboxCommonNative}", mapboxMapsAndroidAuto : "com.mapbox.extension:maps-androidauto$ndkVersionSuffix:${version.mapboxMapsAndroidAuto}", + mapboxMapGptExpAndroid : "com.mapbox.mapgpt.experimental:mapgpt$ndkVersionSuffix:${version.mapboxCommonNative}", /** * explicitly define Mapbox OkHttp dependency so that we are sure it's in sync with the Common SDK version we define @@ -180,7 +183,10 @@ ext { viewBinding : "androidx.databinding:viewbinding:${version.viewBinding}", // Test app crashlytics - firebaseCrashlyticsNdk : "com.google.firebase:firebase-crashlytics-ndk:${version.firebaseCrashlytics}" + firebaseCrashlyticsNdk : "com.google.firebase:firebase-crashlytics-ndk:${version.firebaseCrashlytics}", + + kotlinXDateTime : "org.jetbrains.kotlinx:kotlinx-datetime:${version.kotlinDateTime}", + kotlinSerialization : "org.jetbrains.kotlinx:kotlinx-serialization-json:${version.kotlinSerialization}" ] pluginVersion = [ @@ -208,6 +214,7 @@ ext { pluginDependencies = [ gradle : "com.android.tools.build:gradle:${pluginVersion.gradle}", kotlin : "org.jetbrains.kotlin:kotlin-gradle-plugin:${pluginVersion.kotlin}", + kotlinSerialization : "org.jetbrains.kotlin:kotlin-serialization:${pluginVersion.kotlin}", checkstyle : "com.puppycrawl.tools:checkstyle:${pluginVersion.checkstyle}", license : "com.jaredsburrows:gradle-license-plugin:${pluginVersion.license}", // Used by license. Add this dependency explicitly, diff --git a/gradle/publish.gradle b/gradle/publish.gradle index 0a8d441a65a..88366d61fe6 100644 --- a/gradle/publish.gradle +++ b/gradle/publish.gradle @@ -66,6 +66,7 @@ sdkNameMap["libnavigator"] = "mobile-navigation-navigator" sdkNameMap["libtrip-notification"] = "mobile-navigation-notification" sdkNameMap["libnavigation-router"] = "mobile-navigation-router" sdkNameMap["libnavigation-util"] = "mobile-navigation-utils" +sdkNameMap["libnavigation-voicefeedback"] = "mobile-navigation-voicefeedback" sdkNameMap["libnavui-base"] = "mobile-navigation-ui-base" sdkNameMap["libnavui-maps"] = "mobile-navigation-ui-maps" sdkNameMap["libnavui-util"] = "mobile-navigation-ui-utils" diff --git a/libnavigation-voicefeedback/.gitignore b/libnavigation-voicefeedback/.gitignore new file mode 100644 index 00000000000..b32ff15a839 --- /dev/null +++ b/libnavigation-voicefeedback/.gitignore @@ -0,0 +1,3 @@ +/build +src/main/assets/sdk_versions/* +src/androidTest/res/values/mapbox_access_token.xml \ No newline at end of file diff --git a/libnavigation-voicefeedback/README.md b/libnavigation-voicefeedback/README.md new file mode 100644 index 00000000000..f729abc047e --- /dev/null +++ b/libnavigation-voicefeedback/README.md @@ -0,0 +1,73 @@ +# Voice Feedback (for Nav SDK v2) + +An SDK for voice-based feedback collection during navigation sessions in Android, powered by MapGPT. + +The root package contains ASR engine implementations, state management, context providers, and microphone handling. Supports speech recognition for capturing and processing user voice feedback during navigation. + +## Usage + +```kotlin +val mapboxNavigation = .. +val feedbackAgent = FeedbackAgentSession.Builder().build() +mapboxNavigation.registerObserver(feedbackAgent) + +fun onConnectButtonClick() { + feedbackAgent.connect() +} + +fun onMicButtonClick() { + feedbackAgent.startListening() +} + +feedbackAgent.asrState + .collect { state -> + when (state) { + is ASRState.Listening -> { + transcript = state.text + } + is ASRState.Result -> { + mapboxNavigation.postVoiceFeedback( + feedbackSubType = state.feedbackType, + description = state.text, + screenshot = "", + ) { + // feedback submitted + } + } + } + } +``` + +#### Key Components: +- **AutomaticSpeechRecognitionEngine**: Interface defining the contract for speech recognition implementations with state flow exposure for reactive recognition updates +- **AsrSessionState**: Sealed class representing connection states including `Disconnected`, `Connecting`, `Connected`. +- **ASRState**: Sealed interface representing recognition states including `Idle`, `Listening`, `Error`, `Result`, `NoResult`, `SpeechFinishedWaitingForResult`, `Interrupted`, and `InterruptedByTimeout` +- **FeedbackAgentContextProvider**: Interface for retrieving voice feedback context data during feedback sessions + +## Key Architecture Decisions + +### Microphone Abstraction as Internal API + +ASR requires an audio source. The module uses the Android AudioRecord by default. E2E tests need to inject audio from files (e.g., InputStreamMicrophone) to circumvent real microphone access in tests. + +Keep the Microphone interface and injection point internal to the module. The public FeedbackAgentOptions.create() exposes only language and endpoint to keep the public API simple. A custom microphone can be passed via the internal constructor for testing. + +- Public API stays minimal; clients use defaults. +- Testing can use InputStreamMicrophone for deterministic E2E tests. +- Microphone configuration (e.g., custom sources, sample rate) may be exposed in the future if customers request it. + +### Manual Connection (connect() Required) + +FeedbackAgentSession registers as a MapboxNavigationObserver/UIComponent but does not connect to the ASR service automatically on onAttached(). + +Require the client to call connect() explicitly after registering the session. + +Allows client to control the connection state and reduce resource usage and avoid idle WebSocket connections. + +### Internal FeedbackAgentContextProvider + +Every ASR request sends context (user location, route state, app settings, etc.) to the MapGPT backend. + +FeedbackAgentContextProvider derives the appropriate context from `MapboxNavigation`. + +The context is required by the backend to provide the LLM with the necessary tokens to accurately submit feedback. \ No newline at end of file diff --git a/libnavigation-voicefeedback/api/current.txt b/libnavigation-voicefeedback/api/current.txt new file mode 100644 index 00000000000..862e423530d --- /dev/null +++ b/libnavigation-voicefeedback/api/current.txt @@ -0,0 +1,152 @@ +// Signature format: 3.0 +package com.mapbox.navigation.voicefeedback { + + @com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI public sealed interface ASRState { + } + + public static final class ASRState.Error implements com.mapbox.navigation.voicefeedback.ASRState { + ctor public ASRState.Error(Throwable error); + method public Throwable getError(); + property public final Throwable error; + } + + public static final class ASRState.Idle implements com.mapbox.navigation.voicefeedback.ASRState { + field public static final com.mapbox.navigation.voicefeedback.ASRState.Idle INSTANCE; + } + + public static final class ASRState.Interrupted implements com.mapbox.navigation.voicefeedback.ASRState { + field public static final com.mapbox.navigation.voicefeedback.ASRState.Interrupted INSTANCE; + } + + public static final class ASRState.InterruptedByTimeout implements com.mapbox.navigation.voicefeedback.ASRState { + field public static final com.mapbox.navigation.voicefeedback.ASRState.InterruptedByTimeout INSTANCE; + } + + public static final class ASRState.Listening implements com.mapbox.navigation.voicefeedback.ASRState { + ctor public ASRState.Listening(String text); + method public String getText(); + property public final String text; + } + + public static final class ASRState.NoResult implements com.mapbox.navigation.voicefeedback.ASRState { + field public static final com.mapbox.navigation.voicefeedback.ASRState.NoResult INSTANCE; + } + + public static final class ASRState.Result implements com.mapbox.navigation.voicefeedback.ASRState { + ctor public ASRState.Result(String text, String feedbackType); + method public String getFeedbackType(); + method public String getText(); + property public final String feedbackType; + property public final String text; + } + + public static final class ASRState.SpeechFinishedWaitingForResult implements com.mapbox.navigation.voicefeedback.ASRState { + field public static final com.mapbox.navigation.voicefeedback.ASRState.SpeechFinishedWaitingForResult INSTANCE; + } + + @com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI public final class FeedbackAgentEndpoint { + method public String getName(); + method public String getStreamingApiHost(); + method public String getStreamingAsrApiHost(); + property public final String name; + property public final String streamingApiHost; + property public final String streamingAsrApiHost; + field public static final com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint.Companion Companion; + field public static final String PRODUCTION = "production"; + field @RestrictTo(androidx.annotation.RestrictTo.Scope.LIBRARY_GROUP) public static final String TESTING = "testing"; + } + + public static final class FeedbackAgentEndpoint.Companion { + method public com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint custom(String name, String streamingApiHost, String streamingAsrApiHost); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint getProduction(); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint getTesting(); + property public final com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint Production; + property public final com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint Testing; + } + + @com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI public final class FeedbackAgentOptions { + method public com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint getEndpoint(); + method public java.util.Locale getLanguage(); + property public final com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint endpoint; + property public final java.util.Locale language; + } + + public static final class FeedbackAgentOptions.Builder { + ctor public FeedbackAgentOptions.Builder(); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentOptions build(); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentOptions.Builder endpoint(com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint endpoint); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentOptions.Builder language(java.util.Locale language); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentOptions.Builder microphone(com.mapbox.navigation.voicefeedback.Microphone microphone); + } + + @com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI public final class FeedbackAgentSession implements com.mapbox.navigation.core.lifecycle.MapboxNavigationObserver { + method public void connect(); + method public void disconnect(); + method public kotlinx.coroutines.flow.StateFlow getAsrState(); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentOptions getOptions(); + method public void interruptListening(); + method public void onAttached(com.mapbox.navigation.core.MapboxNavigation mapboxNavigation); + method public void onDetached(com.mapbox.navigation.core.MapboxNavigation mapboxNavigation); + method public void startListening(); + method public void stopListening(); + property public final kotlinx.coroutines.flow.StateFlow asrState; + property public final com.mapbox.navigation.voicefeedback.FeedbackAgentOptions options; + } + + public static final class FeedbackAgentSession.Builder { + ctor public FeedbackAgentSession.Builder(); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentSession build(); + method public com.mapbox.navigation.voicefeedback.FeedbackAgentSession.Builder options(com.mapbox.navigation.voicefeedback.FeedbackAgentOptions options); + } + + public final class FeedbackExtKt { + method @com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI public static void postVoiceFeedback(com.mapbox.navigation.core.MapboxNavigation, @com.mapbox.navigation.core.telemetry.events.FeedbackEvent.SubType String feedbackSubType, String description, String screenshot, com.mapbox.navigation.core.telemetry.events.FeedbackMetadata? feedbackMetadata = null, com.mapbox.navigation.core.internal.telemetry.UserFeedbackCallback userFeedbackCallback); + method @com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI public static void postVoiceFeedback(com.mapbox.navigation.core.MapboxNavigation, @com.mapbox.navigation.core.telemetry.events.FeedbackEvent.SubType String feedbackSubType, String description, String screenshot, com.mapbox.navigation.core.internal.telemetry.UserFeedbackCallback userFeedbackCallback); + } + + @com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI public interface Microphone extends com.mapbox.navigation.core.lifecycle.MapboxNavigationObserver { + method public com.mapbox.navigation.voicefeedback.Microphone.Config getConfig(); + method public kotlinx.coroutines.flow.StateFlow getState(); + method public void stop(); + method public suspend Object? stream(kotlin.jvm.functions.Function1 consumer, kotlin.coroutines.Continuation); + property public abstract com.mapbox.navigation.voicefeedback.Microphone.Config config; + property public abstract kotlinx.coroutines.flow.StateFlow state; + } + + public static final class Microphone.Config { + ctor public Microphone.Config(int sampleRateHz = 16000); + method public int component1(); + method public com.mapbox.navigation.voicefeedback.Microphone.Config copy(int sampleRateHz); + method public int getSampleRateHz(); + property public final int sampleRateHz; + } + + public abstract static sealed class Microphone.State { + } + + public static final class Microphone.State.Disconnected extends com.mapbox.navigation.voicefeedback.Microphone.State { + field public static final com.mapbox.navigation.voicefeedback.Microphone.State.Disconnected INSTANCE; + } + + public static final class Microphone.State.Error extends com.mapbox.navigation.voicefeedback.Microphone.State { + ctor public Microphone.State.Error(String reason); + method public String getReason(); + property public final String reason; + } + + public static final class Microphone.State.Idle extends com.mapbox.navigation.voicefeedback.Microphone.State { + field public static final com.mapbox.navigation.voicefeedback.Microphone.State.Idle INSTANCE; + } + + public static final class Microphone.State.Streaming extends com.mapbox.navigation.voicefeedback.Microphone.State { + ctor public Microphone.State.Streaming(int chunkId, byte[] byteArray, int bytesRead); + method public byte[] getByteArray(); + method public int getBytesRead(); + method public int getChunkId(); + property public final byte[] byteArray; + property public final int bytesRead; + property public final int chunkId; + } + +} + diff --git a/libnavigation-voicefeedback/build.gradle b/libnavigation-voicefeedback/build.gradle new file mode 100644 index 00000000000..f4cd5e42af4 --- /dev/null +++ b/libnavigation-voicefeedback/build.gradle @@ -0,0 +1,84 @@ +apply plugin: 'com.android.library' +apply plugin: 'kotlin-android' +apply plugin: 'kotlinx-serialization' +apply plugin: 'kotlin-kapt' +apply plugin: 'kotlin-parcelize' +apply plugin: 'org.jetbrains.dokka' +apply plugin: 'com.jaredsburrows.license' +apply plugin: 'com.mapbox.android.sdk.versions' +apply from: "../gradle/ktlint.gradle" +apply from: file("../gradle/artifact-settings.gradle") +apply from: "../gradle/kdoc-settings.gradle" + +version = project.ext.versionName +group = project.ext.mapboxArtifactGroupId + +android { + compileSdkVersion androidVersions.compileSdkVersion + + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + + defaultConfig { + minSdkVersion androidVersions.minSdkVersion + targetSdkVersion androidVersions.targetSdkVersion + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + consumerProguardFiles 'proguard-rules.pro', "../proguard/proguard-project.pro" + } + + testOptions { + unitTests.returnDefaultValues = true + unitTests.includeAndroidResources = true + } +} + +dependencies { + api project(":libnavigation-core") + implementation project(":libnavigation-util") + implementation dependenciesList.mapboxMapGptExpAndroid + + implementation dependenciesList.kotlinStdLib + implementation dependenciesList.coroutinesAndroid + implementation dependenciesList.kotlinXDateTime + implementation dependenciesList.kotlinSerialization + + implementation dependenciesList.androidXCore + + apply from: "../gradle/unit-testing-dependencies.gradle" + testImplementation(project(':libtesting-utils')) + testImplementation project(':libtesting-navigation-util') + + // Instrumentation tests + androidTestImplementation project(':libtesting-ui') + androidTestImplementation dependenciesList.androidxTestJunit + androidTestImplementation dependenciesList.junit + androidTestImplementation(dependenciesList.barista) { + exclude group: 'org.jetbrains.kotlin' + exclude group: 'androidx.annotation' + } + androidTestImplementation dependenciesList.mockwebserver + androidTestImplementation dependenciesList.coroutinesAndroid + androidTestImplementation dependenciesList.testRunner + androidTestUtil dependenciesList.testOrchestrator +} + +dokkaHtmlPartial { + outputDirectory.set(kdocPath) + moduleName.set("Feedback Agent") + dokkaSourceSets { + configureEach { + reportUndocumented.set(true) + + perPackageOption { + matchingRegex.set("com.mapbox.navigation.voicefeedback.internal.*") + suppress.set(true) + } + } + } +} + +apply from: "../gradle/track-public-apis.gradle" +apply from: "../gradle/jacoco.gradle" +apply from: "../gradle/publish.gradle" \ No newline at end of file diff --git a/libnavigation-voicefeedback/consumer-rules.pro b/libnavigation-voicefeedback/consumer-rules.pro new file mode 100644 index 00000000000..e69de29bb2d diff --git a/libnavigation-voicefeedback/gradle.properties b/libnavigation-voicefeedback/gradle.properties new file mode 100644 index 00000000000..c7762cc333e --- /dev/null +++ b/libnavigation-voicefeedback/gradle.properties @@ -0,0 +1,3 @@ +POM_ARTIFACT_ID=voicefeedback +POM_ARTIFACT_TITLE=Mapbox Navigation Feedback Agent +POM_DESCRIPTION=Artifact that provides ability to collect feedback using voice \ No newline at end of file diff --git a/libnavigation-voicefeedback/proguard-rules.pro b/libnavigation-voicefeedback/proguard-rules.pro new file mode 100644 index 00000000000..481bb434814 --- /dev/null +++ b/libnavigation-voicefeedback/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/libnavigation-voicefeedback/src/androidTest/assets/test_report.wav b/libnavigation-voicefeedback/src/androidTest/assets/test_report.wav new file mode 100644 index 00000000000..402fd83b146 Binary files /dev/null and b/libnavigation-voicefeedback/src/androidTest/assets/test_report.wav differ diff --git a/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/testing/TestUtils.kt b/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/testing/TestUtils.kt new file mode 100644 index 00000000000..c7e3df2dfa0 --- /dev/null +++ b/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/testing/TestUtils.kt @@ -0,0 +1,19 @@ +@file:Suppress("DEPRECATION") + +package com.mapbox.navigation.testing + +import android.content.Context +import com.mapbox.navigation.base.options.NavigationOptions +import com.mapbox.navigation.core.MapboxNavigation +import com.mapbox.navigation.core.MapboxNavigationProvider + +fun createMapboxNavigation(context: Context): MapboxNavigation { + val navigationOptions = NavigationOptions.Builder(context) + .accessToken(context.getMapboxAccessTokenFromResources()) + .build() + return MapboxNavigationProvider.create(navigationOptions) +} + +fun Context.getMapboxAccessTokenFromResources(): String { + return getString(resources.getIdentifier("mapbox_access_token", "string", packageName)) +} diff --git a/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/testing/voicefeedback/InputStreamMicrophone.kt b/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/testing/voicefeedback/InputStreamMicrophone.kt new file mode 100644 index 00000000000..80010689e14 --- /dev/null +++ b/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/testing/voicefeedback/InputStreamMicrophone.kt @@ -0,0 +1,112 @@ +package com.mapbox.navigation.testing.voicefeedback + +import android.media.AudioFormat +import android.media.AudioRecord +import android.util.Log +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.core.MapboxNavigation +import com.mapbox.navigation.voicefeedback.Microphone +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.withContext +import java.io.InputStream + +@OptIn(ExperimentalPreviewMapboxNavigationAPI::class) +internal class InputStreamMicrophone( + private val inputStreamProvider: () -> InputStream, +) : Microphone { + override val config: Microphone.Config = Microphone.Config() + override val state = + MutableStateFlow(Microphone.State.Disconnected) + + private val minBufferSizeBytes = AudioRecord.getMinBufferSize( + config.sampleRateHz, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + ) + + private val bufferSizeBytes = minBufferSizeBytes * BUFFER_MULTIPLIER + + override fun onAttached(mapboxNavigation: MapboxNavigation) { + // no-op + } + + override fun onDetached(mapboxNavigation: MapboxNavigation) { + // no-op + } + + override suspend fun stream(consumer: (Microphone.State.Streaming) -> Unit) { + Log.d( + TAG, + "Streaming audio, " + + "minBufferSizeBytes: $minBufferSizeBytes, " + + "bufferSizeBytes: $bufferSizeBytes", + ) + streamAudioBytes(inputStreamProvider, ByteArray(bufferSizeBytes)) { chunk -> + state.value = chunk + consumer(chunk) + } + + state.value = Microphone.State.Idle + } + + private suspend fun streamAudioBytes( + stream: () -> InputStream, + byteArray: ByteArray, + consumer: (Microphone.State.Streaming) -> Unit, + ) = withContext(Dispatchers.IO) { + consumer.invoke( + Microphone.State.Streaming( + chunkId = 0, + byteArray = byteArray, + bytesRead = 0, + ), + ) + stream().use { inputStream -> + var chunkId = 0 + while (state.value is Microphone.State.Streaming) { + val bytesRead = inputStream.read(byteArray) + if (bytesRead < 0) { + Log.d(TAG, "End of stream reached") + break + } else { + val chunk = Microphone.State.Streaming( + chunkId = chunkId++, + byteArray = byteArray, + bytesRead = bytesRead, + ) + val delayMillis = calculateDelayMillis(bytesRead, DEFAULT_BITRATE) + + Log.d( + TAG, + "Streaming chunk ${chunkId - 1} with size $bytesRead bytes. " + + "Delay: $delayMillis millis", + ) + delay(delayMillis) + consumer.invoke(chunk) + } + } + } + } + + @Suppress("MagicNumber") + private fun calculateDelayMillis(bytesRead: Int, bitrate: Int): Long { + return (bytesRead * 8 * 1000L) / bitrate + } + + override fun stop() { + state.value = Microphone.State.Idle + } + + private companion object { + private const val TAG = "InputStreamMicrophone" + + // Make sure to covert the audio files to PCM 16-bit 44100 Hz + private const val AUDIO_EXTENSION = "wav" + + private const val BUFFER_MULTIPLIER = 4 + + private const val DEFAULT_BITRATE = 700_000 + } +} diff --git a/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/voicefeedback/FeedbackAgentE2ETest.kt b/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/voicefeedback/FeedbackAgentE2ETest.kt new file mode 100644 index 00000000000..fd84cb1b454 --- /dev/null +++ b/libnavigation-voicefeedback/src/androidTest/java/com/mapbox/navigation/voicefeedback/FeedbackAgentE2ETest.kt @@ -0,0 +1,87 @@ +package com.mapbox.navigation.voicefeedback + +import android.content.Context +import androidx.test.filters.LargeTest +import androidx.test.platform.app.InstrumentationRegistry +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.testing.createMapboxNavigation +import com.mapbox.navigation.testing.ui.utils.MapboxNavigationRule +import com.mapbox.navigation.testing.ui.utils.coroutines.sdkTest +import com.mapbox.navigation.testing.voicefeedback.InputStreamMicrophone +import kotlinx.coroutines.async +import kotlinx.coroutines.flow.dropWhile +import kotlinx.coroutines.flow.filterIsInstance +import kotlinx.coroutines.flow.first +import kotlinx.coroutines.flow.firstOrNull +import kotlinx.coroutines.flow.lastOrNull +import kotlinx.coroutines.flow.map +import kotlinx.coroutines.flow.takeWhile +import org.junit.Assert.assertEquals +import org.junit.Before +import org.junit.Ignore +import org.junit.Rule +import org.junit.Test +import java.util.Locale +import kotlin.time.ExperimentalTime + +/** + * Use these tests as a faster alternative to perform e2e test instead of integrating changes into + * a test application. + */ +@LargeTest +@Ignore("These tests hit a real server and are not designed to be continuously run in CI.") +@OptIn(ExperimentalPreviewMapboxNavigationAPI::class, ExperimentalTime::class) +class FeedbackAgentE2ETest { + @get:Rule + val mapboxNavigationRule = MapboxNavigationRule() + + val context: Context + get() = InstrumentationRegistry.getInstrumentation().targetContext.applicationContext + + lateinit var options: FeedbackAgentOptions + + @Before + fun setup() { + options = FeedbackAgentOptions.Builder() + .language(Locale.getDefault()) + .endpoint(FeedbackAgentEndpoint.Testing) + .microphone( + InputStreamMicrophone { + context.assets.open("test_report.wav") + }, + ).build() + } + + @Test + fun testFeedbackReport() = sdkTest { + val mapboxNavigation = + createMapboxNavigation(context) + val feedbackAgent = + FeedbackAgentSession.Builder().options(options).build() + feedbackAgent.onAttached(mapboxNavigation) + feedbackAgent.connect() + + // wait for the session to be connected + feedbackAgent.asrState.first { it is ASRState.Idle } + + val transcription = async { + feedbackAgent.asrState + .dropWhile { it !is ASRState.Listening } + .takeWhile { it is ASRState.Listening } + .filterIsInstance() + .map { it.text } + .lastOrNull() + } + feedbackAgent.startListening() + + // wait for the audio input to be processed + feedbackAgent.asrState + .filterIsInstance() + .firstOrNull() + + assertEquals( + "I'm sending a test report there is a car accident right here", + transcription.await(), + ) + } +} diff --git a/libnavigation-voicefeedback/src/main/AndroidManifest.xml b/libnavigation-voicefeedback/src/main/AndroidManifest.xml new file mode 100644 index 00000000000..75559fe1206 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/AndroidManifest.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/ASRState.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/ASRState.kt new file mode 100644 index 00000000000..6d34a51e909 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/ASRState.kt @@ -0,0 +1,139 @@ +package com.mapbox.navigation.voicefeedback + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import kotlinx.coroutines.flow.StateFlow +import kotlin.time.Duration +import kotlin.time.DurationUnit +import kotlin.time.ExperimentalTime +import kotlin.time.toDuration + +/** + * Represents the various states of an Automatic Speech Recognition (ASR) engine. + * + * This sealed interface defines a finite set of possible states that describe + * the lifecycle and result of a speech recognition session. It is intended to be + * used with reactive streams (e.g. [StateFlow]) to observe and respond to recognition state changes. + */ +@ExperimentalPreviewMapboxNavigationAPI +sealed interface ASRState { + /** + * Indicates that the ASR engine is idle and not actively listening or processing audio. + */ + object Idle : ASRState + + /** + * Indicates that the ASR engine is currently listening to the user's speech. + * + * @param text The partial or live transcription of the spoken input. + */ + class Listening(val text: String) : ASRState { + private val startNanos = System.nanoTime() + + /** + * Timestamp indicating when the listening state was detected. This is used to measure the + * durations where text is not changing. + */ + @OptIn(ExperimentalTime::class) + internal fun elapsedTime(): Duration { + val elapsedNs = System.nanoTime() - startNanos + return elapsedNs.toDuration(DurationUnit.NANOSECONDS) + } + + /** + * Indicates whether some other object is "equal to" this one. + */ + override fun equals(other: Any?): Boolean = other is Listening && text == other.text + + /** + * Returns a hash code value for the object. + */ + override fun hashCode(): Int = text.hashCode() + + /** + * Returns a string representation of the object. + */ + override fun toString(): String = "ASRState.Listening(text=$text)" + } + + /** + * Indicates that an error has occurred during the speech recognition process. + * + * @param error A [Throwable] describing the cause of the failure. + */ + class Error(val error: Throwable) : ASRState { + /** + * Indicates whether some other object is "equal to" this one. + */ + override fun equals(other: Any?): Boolean = other is Error && error == other.error + + /** + * Returns a hash code value for the object. + */ + override fun hashCode(): Int = error.hashCode() + + /** + * Returns a string representation of the object. + */ + override fun toString(): String = "ASRState.Error(error=$error)" + } + + /** + * Indicates that the user has finished speaking and the engine is + * now waiting for the final recognition result. + */ + object SpeechFinishedWaitingForResult : ASRState + + /** + * Indicates that the final recognition result is available. + * + * @param text The recognized speech converted into text. + * @param feedbackType The category or type of feedback (e.g., bug report, suggestion). + */ + class Result( + val text: String, + val feedbackType: String, + ) : ASRState { + /** + * Indicates whether some other object is "equal to" this one. + */ + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (other !is Result) return false + + if (text != other.text) return false + if (feedbackType != other.feedbackType) return false + + return true + } + + /** + * Returns a hash code value for the object. + */ + override fun hashCode(): Int { + var result = text.hashCode() + result = 31 * result + feedbackType.hashCode() + return result + } + + /** + * Returns a string representation of the object. + */ + override fun toString(): String = "ASRState.Result(text=$text, feedbackType=$feedbackType)" + } + + /** + * Indicates that no recognizable speech was detected during the session. + */ + object NoResult : ASRState + + /** + * Indicates that the recognition was interrupted unexpectedly + * (e.g., by external factors such as app lifecycle events). + */ + object Interrupted : ASRState + + /** + * Indicates that the recognition session was interrupted due to a timeout. + */ + object InterruptedByTimeout : ASRState +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentEndpoint.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentEndpoint.kt new file mode 100644 index 00000000000..7c797a9fb2e --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentEndpoint.kt @@ -0,0 +1,96 @@ +package com.mapbox.navigation.voicefeedback + +import androidx.annotation.RestrictTo +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI + +/** + * Wrapper that defines the properties of endpoints required to be used with Feedback Agent. + * + * @property name describes the flavor of endpoint + * @property streamingApiHost host for streaming data + * @property streamingAsrApiHost host for Automatic Speech Recognition + */ +@ExperimentalPreviewMapboxNavigationAPI +class FeedbackAgentEndpoint private constructor( + val name: String, + val streamingApiHost: String, + val streamingAsrApiHost: String, +) { + companion object { + /** + * Name for production endpoints. + */ + const val PRODUCTION = "production" + + /** + * Name for testing endpoints. + */ + @RestrictTo(RestrictTo.Scope.LIBRARY_GROUP) + const val TESTING = "testing" + + /** + * Standard production endpoints. + */ + val Production = FeedbackAgentEndpoint( + name = PRODUCTION, + streamingApiHost = "wss://mapgpt-production-ws.mapbox.com", + streamingAsrApiHost = "wss://api-navgptasr-production.mapbox.com", + ) + + /** + * Testing endpoints. + */ + @RestrictTo(RestrictTo.Scope.LIBRARY) + val Testing = FeedbackAgentEndpoint( + name = TESTING, + streamingApiHost = "wss://mapgpt-testing-ws.tilestream.net", + streamingAsrApiHost = "wss://api-navgptasr-staging.tilestream.net", + ) + + /** + * Custom endpoint type. + * + * @param name describes the kind of endpoint + * @param streamingApiHost api host to stream to + * @param streamingAsrApiHost api host for Automatic Speech Recognition + */ + fun custom(name: String, streamingApiHost: String, streamingAsrApiHost: String) = + FeedbackAgentEndpoint(name, streamingApiHost, streamingAsrApiHost) + } + + /** + * Returns a string representation of the object. + */ + override fun toString(): String { + return "FeedbackAgentEndpoint(" + + "name=$name, " + + "streamingApiHost=$streamingApiHost," + + "streamingAsrApiHost=$streamingAsrApiHost)" + } + + /** + * Indicates whether some other object is "equal to" this one. + */ + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (javaClass != other?.javaClass) return false + + other as FeedbackAgentEndpoint + + if (name != other.name) return false + if (streamingApiHost != other.streamingApiHost) return false + if (streamingAsrApiHost != other.streamingAsrApiHost) return false + + return true + } + + /** + * Returns a hash code value for the object. + */ + override fun hashCode(): Int { + var result = name.hashCode() + result = 31 * result + streamingApiHost.hashCode() + result = 31 * result + streamingAsrApiHost.hashCode() + return result + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentOptions.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentOptions.kt new file mode 100644 index 00000000000..c0969dce1e7 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentOptions.kt @@ -0,0 +1,83 @@ +package com.mapbox.navigation.voicefeedback + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.voicefeedback.internal.audio.microphone.AudioRecordMicrophone +import java.util.Locale + +/** + * Options for configuring the Feedback Agent. + * + * @param language The assigned language for user input. + * @param endpoint Environment configuration. + */ +@ExperimentalPreviewMapboxNavigationAPI +class FeedbackAgentOptions private constructor( + val language: Locale, + val endpoint: FeedbackAgentEndpoint, + internal val microphone: Microphone = AudioRecordMicrophone(), +) { + /** + * Builder for creating a new instance of [FeedbackAgentOptions]. + */ + class Builder { + private var language: Locale = Locale.getDefault() + + private var endpoint: FeedbackAgentEndpoint = FeedbackAgentEndpoint.Production + + private var microphone: Microphone = AudioRecordMicrophone() + + /** + * @param language The assigned language for user input. Default is the device's locale. + */ + fun language(language: Locale): Builder = apply { this.language = language } + + /** + * @param endpoint Environment configuration. Default is production. + */ + fun endpoint(endpoint: FeedbackAgentEndpoint): Builder = apply { this.endpoint = endpoint } + + /** + * @param microphone A custom audio source input. + */ + fun microphone(microphone: Microphone): Builder = + apply { this.microphone = microphone } + + /** + * Build the [FeedbackAgentOptions]. + */ + fun build() = FeedbackAgentOptions(language, endpoint, microphone) + } + + /** + * Indicates whether some other object is "equal to" this one. + */ + override fun equals(other: Any?): Boolean { + if (this === other) return true + if (javaClass != other?.javaClass) return false + + other as FeedbackAgentOptions + + if (language != other.language) return false + if (endpoint != other.endpoint) return false + if (microphone != other.microphone) return false + + return true + } + + /** + * Returns a hash code value for the object. + */ + override fun hashCode(): Int { + var result = language.hashCode() + result = 31 * result + endpoint.hashCode() + result = 31 * result + microphone.hashCode() + return result + } + + /** + * Returns a string representation of the object. + */ + override fun toString(): String { + return "FeedbackAgentOptions(endpoint=$endpoint, language=$language)" + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentSession.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentSession.kt new file mode 100644 index 00000000000..ed67e1c18ef --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackAgentSession.kt @@ -0,0 +1,203 @@ +package com.mapbox.navigation.voicefeedback + +import android.os.SystemClock +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.core.MapboxNavigation +import com.mapbox.navigation.core.internal.extensions.flowLocationMatcherResult +import com.mapbox.navigation.core.lifecycle.MapboxNavigationApp +import com.mapbox.navigation.core.lifecycle.MapboxNavigationObserver +import com.mapbox.navigation.core.trip.session.LocationMatcherResult +import com.mapbox.navigation.utils.internal.logD +import com.mapbox.navigation.voicefeedback.internal.AsrSessionState +import com.mapbox.navigation.voicefeedback.internal.DefaultContextProvider +import com.mapbox.navigation.voicefeedback.internal.MapboxASRServiceImpl +import com.mapbox.navigation.voicefeedback.internal.MapboxAutomaticSpeechRecognitionEngine +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.cancel +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.collectLatest +import kotlinx.coroutines.flow.filterNotNull +import kotlinx.coroutines.flow.onCompletion +import kotlinx.coroutines.flow.onEach +import kotlinx.coroutines.flow.takeWhile +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import kotlin.time.ExperimentalTime + +/** + * Access point for core functionality of the Feedback Agent SDK, including environment + * configuration, lifecycle management, and surface capabilities. + * + * @param options a set of [FeedbackAgentOptions] used to customize various features + */ +@OptIn(ExperimentalTime::class) +@ExperimentalPreviewMapboxNavigationAPI +class FeedbackAgentSession private constructor( + val options: FeedbackAgentOptions, +) : MapboxNavigationObserver { + /** + * Builder for creating a new instance of [FeedbackAgentSession]. + * + * @param contextProvider Provides additional contextual data for Feedback Agent. + */ + class Builder { + private var options: FeedbackAgentOptions = FeedbackAgentOptions.Builder().build() + + /** + * @param options a set of [FeedbackAgentOptions] used to customize various features + */ + fun options(options: FeedbackAgentOptions): Builder = apply { this.options = options } + + /** + * Build the [FeedbackAgentSession]. + */ + fun build(): FeedbackAgentSession = FeedbackAgentSession(options) + } + + private val microphone = options.microphone + + private var locationMatcherResult: LocationMatcherResult? = null + + private val contextProvider = DefaultContextProvider(options.language) { + locationMatcherResult + } + + private val mapboxASRService = MapboxASRServiceImpl( + language = options.language, + endpoint = options.endpoint, + feedbackAgentContextProvider = contextProvider, + ) + + private val engine = MapboxAutomaticSpeechRecognitionEngine( + mapboxASRService = mapboxASRService, + microphone = microphone, + ) + + /** + * A [StateFlow] representing the current user input state. + * + * Observers can collect this flow to reactively respond to changes in + * the recognition process, such as when it starts listening, detects speech, + * processes results, or encounters errors. A null value means that the ASR service is not + * connected. + */ + val asrState: StateFlow = engine.state + + private var mapboxNavigation: MapboxNavigation? = null + + private val connectAttemptFlow = MutableStateFlow(null) + + private lateinit var coroutineScope: CoroutineScope + + /** + * Signals that the [mapboxNavigation] instance is ready for use. + * + * @param mapboxNavigation instance that is being attached. + * + * @see [MapboxNavigationApp.registerObserver] + */ + override fun onAttached(mapboxNavigation: MapboxNavigation) { + coroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Main.immediate) + this.mapboxNavigation = mapboxNavigation + microphone.onAttached(mapboxNavigation) + + coroutineScope.launch(Dispatchers.IO) { + connectAttemptFlow.filterNotNull().collectLatest { + logD(TAG) { "Connect attempt" } + connect(mapboxNavigation.navigationOptions.accessToken ?: "") + } + } + + coroutineScope.launch(Dispatchers.Default) { + mapboxNavigation + .flowLocationMatcherResult() + .collect { locationMatcherResult = it } + } + } + + /** + * Signals that the [mapboxNavigation] instance is being detached. + * + * @param mapboxNavigation instance that is being detached. + * + * @see [MapboxNavigationApp.unregisterObserver] + */ + override fun onDetached(mapboxNavigation: MapboxNavigation) { + stopListening() + disconnect() + microphone.onDetached(mapboxNavigation) + this.mapboxNavigation = null + coroutineScope.cancel() + } + + private suspend fun connect(token: String) = withContext(Dispatchers.IO) { + // Wait for the service to be disconnected if it's not already + if (mapboxASRService.sessionState.value !is AsrSessionState.Disconnected) { + logD(TAG) { "Disconnecting from MapboxASRService before connecting again." } + engine.disconnect() + mapboxASRService.sessionState.takeWhile { it !is AsrSessionState.Disconnected } + .collect() + } + + // Start the connection + engine.connect(token) + + // Wait for the connection to be established. + // Save the session state and emit connection attempts when the session is cleared. + // Disconnect the service whenever the flow is terminated. + mapboxASRService.sessionState.onEach { state -> + if (state is AsrSessionState.Connected) { + logD(TAG) { "onConnected $state" } + } + }.onCompletion { + logD(TAG) { "Connection flow terminated, disconnect from MapboxASRService" } + engine.disconnect() + }.collect() + } + + /** + * Starts listening for user input. + */ + fun startListening() { + engine.startListening() + } + + /** + * Stop user input. If the user mic is open and listening, it should be closed + */ + fun stopListening() { + engine.stopListening() + } + + /** + * Terminate ongoing conversation manually to close microphone and ignore the last user input. + */ + fun interruptListening() { + engine.interruptListening() + } + + /** + * Starts a new session. + * + * Upon a failed connection attempt, or loss of connection, the service will automatically try + * to reconnect. + */ + fun connect() { + connectAttemptFlow.value = SystemClock.elapsedRealtime() + } + + /** + * Closes the current session and disconnects from the host. + */ + fun disconnect() { + engine.disconnect() + } + + private companion object { + private const val TAG = "FeedbackAgentSession" + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackExt.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackExt.kt new file mode 100644 index 00000000000..2163eb9cd20 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/FeedbackExt.kt @@ -0,0 +1,54 @@ +package com.mapbox.navigation.voicefeedback + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.core.MapboxNavigation +import com.mapbox.navigation.core.internal.telemetry.UserFeedbackCallback +import com.mapbox.navigation.core.internal.telemetry.postUserFeedback +import com.mapbox.navigation.core.telemetry.events.FeedbackEvent +import com.mapbox.navigation.core.telemetry.events.FeedbackHelper +import com.mapbox.navigation.core.telemetry.events.FeedbackMetadata +import com.mapbox.navigation.core.telemetry.events.FeedbackMetadataWrapper + +/** + * Send user voice feedback about an issue or problem with the Navigation SDK. + * + * Method can be invoked out of the trip session + * (whenever until [MapboxNavigation.onDestroy] is called), + * because a feedback is attached to passed location and time in the past + * when [FeedbackMetadata] was generated (see [MapboxNavigation.provideFeedbackMetadataWrapper]). + * + * @param feedbackSubType feedback subtype [FeedbackEvent.SubType] or custom feedback subtype + * @param description description message + * @param screenshot encoded screenshot + * @param feedbackMetadata use it to attach feedback to a specific passed location. + * See [FeedbackMetadata] and [FeedbackMetadataWrapper] + * @param userFeedbackCallback invoked when the posted feedback has been processed + * + * to retrieve possible feedback subtypes for a given [feedbackSubType] + * @see [com.mapbox.navigation.ui.maps.util.ViewUtils.capture] to capture screenshots + * @see [FeedbackHelper.encodeScreenshot] to encode screenshots + */ +@ExperimentalPreviewMapboxNavigationAPI +@JvmOverloads +fun MapboxNavigation.postVoiceFeedback( + @FeedbackEvent.SubType feedbackSubType: String, + description: String, + screenshot: String, + feedbackMetadata: FeedbackMetadata? = null, + userFeedbackCallback: UserFeedbackCallback, +) { + postUserFeedback( + feedbackType = FeedbackEvent.VOICE_FEEDBACK, + description = description, + feedbackSource = FeedbackEvent.UI, + screenshot = screenshot, + feedbackSubType = arrayOf(feedbackSubType), + feedbackMetadata = feedbackMetadata, + userFeedbackCallback = userFeedbackCallback, + ) +} + +/** + * Feedback source *voice*: the user tapped a voice feedback button and send a message + */ +private val FeedbackEvent.VOICE_FEEDBACK: String get() = "voice_feedback" diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/Microphone.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/Microphone.kt new file mode 100644 index 00000000000..0b035813ec7 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/Microphone.kt @@ -0,0 +1,122 @@ +package com.mapbox.navigation.voicefeedback + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.core.lifecycle.MapboxNavigationObserver +import kotlinx.coroutines.flow.StateFlow + +/** + * [Microphone] interface allows the platform to define a source for audio. + * The microphone can be used to stream audio data from the platform to the Mapbox SDK. + */ +@ExperimentalPreviewMapboxNavigationAPI +interface Microphone : MapboxNavigationObserver { + + /** + * Configuration setting for the microphone. + */ + val config: Config + + /** + * The current state of the microphone. Implementations should update this as the microphone + * changes state. Example: + * + * ``` + * val state = MutableStateFlow(State.Disconnected) + * override val state: StateFlow = state + * ``` + */ + val state: StateFlow + + /** + * Starts streaming audio from the microphone. The audio is streamed in chunks and surfaced + * through the [consumer] lambda as well as the [state] flow. The consumer lambda will use + * [State.Streaming] to process the audio data. The [state] flow can be used to share the + * streaming bytes for multiple uses. + * + * The stream will block while streaming. The stream will stop when [stop] is called, or when + * some event causes the stream to stop streaming. + */ + suspend fun stream(consumer: (State.Streaming) -> Unit) + + /** + * Stops streaming audio from the microphone and causes [stream] to exit. + * The [state] flow will move to [State.Idle]. + */ + fun stop() + + /** + * Defines the specific details from the microphone. + */ + data class Config( + /** + * The number of audio samples per second. + * Many speech-to-text services perform best with 16000 Hz audio. + */ + val sampleRateHz: Int = 16000, + ) + + /** + * Represent current microphone state + */ + sealed class State { + + /** + * Before the MicrophoneMiddleware has been attached it is in the disconnected state. + * This state is used to indicate that the microphone is not available. + */ + object Disconnected : State() { + + /** + * @return a string representation of the object. + */ + override fun toString(): String = "Disconnected" + } + + /** + * The microphone is not currently available to stream audio. + */ + object Idle : State() { + + /** + * @return a string representation of the object. + */ + override fun toString(): String = "Idle" + } + + /** + * The microphone is currently streaming audio. Collectively, the streaming states + * can be concatenated to form an audio file. The consumer of the audio data is able to + * process this audio data in real-time. + * + * @param chunkId unique number for each chunk of audio. + * @param byteArray An array containing the audio data. + * @param bytesRead the number of bytes the buffer contains of unique audio. + */ + class Streaming( + val chunkId: Int, + val byteArray: ByteArray, + val bytesRead: Int, + ) : State() { + + /** + * @return a string representation of the object. + */ + override fun toString(): String = "Streaming(chunkId=$chunkId, bytesRead=$bytesRead)" + } + + /** + * The microphone has encountered a recoverable error that should be surfaced to the user. + * The error state is used to give the user an opportunity to retry actions with the + * microphone. + * + * @param reason debuggable reason for error. Is not shown to the user. + */ + class Error(val reason: String) : State() { + + /** + * @return a string representation of the object. + */ + override fun toString(): String = "Error(reason=$reason)" + } + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AsrData.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AsrData.kt new file mode 100644 index 00000000000..1f636d57c7a --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AsrData.kt @@ -0,0 +1,16 @@ +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI + +@ExperimentalPreviewMapboxNavigationAPI +internal sealed interface AsrData { + data class Transcript( + val text: String, + val isFinal: Boolean, + ) : AsrData + + data class Result( + val description: String, + val type: String, + ) : AsrData +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AsrSessionState.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AsrSessionState.kt new file mode 100644 index 00000000000..c2f12fdb93c --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AsrSessionState.kt @@ -0,0 +1,41 @@ +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI + +/** + * Connection state with the ASR service. + * + * There can only be one connected session at a time. + */ +@ExperimentalPreviewMapboxNavigationAPI +internal sealed class AsrSessionState { + + /** + * Represents a state where ASR is connected and can be interacted with. + * + * @param apiHost The host that the service is currently connected to. + * @param sessionId Unique ID of the current session. + * The ID can be stored and used with ASR to reconnect to a previously established session. + */ + data class Connected( + val apiHost: String, + val sessionId: String, + ) : AsrSessionState() + + /** + * Represents a state where ASR is connecting. + * + * @param apiHost The host that the service is connecting to. + * @param reconnectSessionId Unique ID of the session the service is trying to connect to. + * If `null`, a new ID will be generated. + */ + data class Connecting( + val apiHost: String, + val reconnectSessionId: String?, + ) : AsrSessionState() + + /** + * Represents a state where ASR is disconnected. + */ + object Disconnected : AsrSessionState() +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AutomaticSpeechRecognitionEngine.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AutomaticSpeechRecognitionEngine.kt new file mode 100644 index 00000000000..ce3bb2294b4 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/AutomaticSpeechRecognitionEngine.kt @@ -0,0 +1,52 @@ +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.voicefeedback.ASRState +import kotlinx.coroutines.flow.StateFlow + +/** + * Interface representing an automatic speech recognition (ASR) engine. + * + * This interface provides a contract for components that perform speech recognition + * and expose their current recognition state as a reactive stream. + * + * Implementations are expected to manage the lifecycle of audio listening + * and recognition, including starting and stopping the recognition process. + */ +@ExperimentalPreviewMapboxNavigationAPI +internal interface AutomaticSpeechRecognitionEngine { + /** + * A [StateFlow] that emits updates about the current state of the ASR engine. + * + * Observers can collect this flow to reactively respond to changes in + * the recognition process, such as when it starts listening, detects speech, + * processes results, or encounters errors. A null value means that the engine is not + * connected. + */ + val state: StateFlow + + /** + * Starts the speech recognition process. + * + * This typically activates the microphone and begins listening for audio input. + * Recognition results and state updates should be reflected through [state]. + */ + fun startListening() + + /** + * Stops the speech recognition process. + * + * This typically deactivates the microphone and finalizes any ongoing recognition. + * After calling this, the ASR engine should transition to an idle or stopped state. + */ + fun stopListening() + + fun connect(token: String) + + fun disconnect() + + /** + * Triggered when the user terminate conversation manually. + */ + fun interruptListening() +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackAgentContextDTO.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackAgentContextDTO.kt new file mode 100644 index 00000000000..47c73721f62 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackAgentContextDTO.kt @@ -0,0 +1,64 @@ +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +/** + * Context required for Feedback Agent service to process the request. + * + * @property userContext required app context + * @property appContext optional user context + */ +@ExperimentalPreviewMapboxNavigationAPI +@Serializable +internal data class FeedbackAgentContextDTO( + @SerialName("user_context") + val userContext: FeedbackAgentUserContextDTO, + @SerialName("app_context") + val appContext: FeedbackAgentAppContextDTO? = null, +) + +/** + * @property locale IETF language tag (based on ISO 639), for example "en-US". + * This locale will be used to influence the language the AI replies in. + * @property temperatureUnits "Fahrenheit" or "Celsius". + * @property distanceUnits "mi" or "km" + * @property clientTime The current time in 'yyyy-MM-dd'T'HH:mm:ss' format. + */ +@ExperimentalPreviewMapboxNavigationAPI +@Serializable +internal data class FeedbackAgentAppContextDTO( + @SerialName("locale") + val locale: String? = null, + @SerialName("temp_units") + val temperatureUnits: String? = null, + @SerialName("distance_units") + val distanceUnits: String? = null, + @SerialName("client_time") + val clientTime: String? = null, +) + +/** + * @property lat Latitude of the current location. + * @property lon Longitude of the current location. + * @property heading Current user heading. + * @property placeName The name of the place where the user currently is. For example: + * - Neighborhood, a colloquial sub-city features often referred to in local parlance. + * - Place, a cities, villages, municipalities, etc. + * - Locality, a sub-city features present in countries where such an additional administrative layer is used in postal addressing. + * - District, smaller than top-level administrative features but typically larger than cities. + * - Region, a top-level sub-national administrative features, such as states in the United States or provinces in Canada or China. + * - Country, Generally recognized countries or, in some cases like Hong Kong, an area of quasi-national administrative status that has been given a designated country code under ISO 3166-1. + * + * The provided name should be the most granular available to be determined (for example, a Neighborhood should be preferred over Place, if available). + */ +@ExperimentalPreviewMapboxNavigationAPI +@Serializable +internal data class FeedbackAgentUserContextDTO( + val lat: String, + val lon: String, + val heading: String? = null, + @SerialName("place_name") + val placeName: String, +) diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackAgentContextProvider.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackAgentContextProvider.kt new file mode 100644 index 00000000000..9deb5084146 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackAgentContextProvider.kt @@ -0,0 +1,82 @@ +package com.mapbox.navigation.voicefeedback.internal + +import android.os.Build +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.base.formatter.UnitType +import com.mapbox.navigation.base.internal.extensions.LocaleEx.getUnitTypeForLocale +import com.mapbox.navigation.core.trip.session.LocationMatcherResult +import java.text.SimpleDateFormat +import java.time.LocalDateTime +import java.time.format.DateTimeFormatter +import java.util.Calendar +import java.util.Locale + +/** + * Provides a mechanism for retrieving the current Feedback Agent context. + * + * Implementations of this interface supply the context data required for Feedback Agent interactions. + */ +@ExperimentalPreviewMapboxNavigationAPI +internal interface FeedbackAgentContextProvider { + + /** + * Retrieves the current Feedback Agent context. + * + * @return The current [FeedbackAgentContextDTO]. + */ + fun getContext(): FeedbackAgentContextDTO +} + +private const val DATE_TIME_PATTERN = "yyyy-MM-dd'T'HH:mm:ss" + +@ExperimentalPreviewMapboxNavigationAPI +internal class DefaultContextProvider( + private val locale: Locale, + private val locationProvider: () -> LocationMatcherResult?, +) : FeedbackAgentContextProvider { + private val formatter by lazy { + SimpleDateFormat(DATE_TIME_PATTERN, Locale.getDefault()) + } + + override fun getContext(): FeedbackAgentContextDTO { + return FeedbackAgentContextDTO( + userContext = getUserContext(), + appContext = getAppContext(), + ) + } + + private fun getUserContext(): FeedbackAgentUserContextDTO { + val location = locationProvider()?.enhancedLocation + ?: return FeedbackAgentUserContextDTO("", "", null, "") + + return FeedbackAgentUserContextDTO( + lat = location.latitude.toString(), + lon = location.longitude.toString(), + heading = location.bearing.toString(), + placeName = "", + ) + } + + private fun getAppContext(): FeedbackAgentAppContextDTO { + val (temperatureUnits, distanceUnits) = when (locale.getUnitTypeForLocale()) { + UnitType.IMPERIAL -> "Fahrenheit" to "mi" + UnitType.METRIC -> "Celsius" to "km" + } + + return FeedbackAgentAppContextDTO( + locale = locale.toLanguageTag(), + temperatureUnits = temperatureUnits, + distanceUnits = distanceUnits, + clientTime = getClientTime(), + ) + } + + private fun getClientTime(): String { + return if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + } else { + val time = Calendar.getInstance().time + return formatter.format(time) + } + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackDTO.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackDTO.kt new file mode 100644 index 00000000000..4c34b3b0718 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/FeedbackDTO.kt @@ -0,0 +1,16 @@ +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable + +internal const val FEEDBACK_ACTION_TYPE = "feedback" + +@ExperimentalPreviewMapboxNavigationAPI +@Serializable +internal data class FeedbackDTO( + @SerialName("feedbackType") + val feedbackType: String, + @SerialName("feedbackDescription") + val feedbackDescription: String, +) diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/MapboxASRService.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/MapboxASRService.kt new file mode 100644 index 00000000000..17e54fca2e2 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/MapboxASRService.kt @@ -0,0 +1,301 @@ +package com.mapbox.navigation.voicefeedback.internal + +import android.annotation.SuppressLint +import com.mapbox.common.ValueConverter +import com.mapbox.mapgpt.experimental.MapgptAsrTranscript +import com.mapbox.mapgpt.experimental.MapgptConfiguration +import com.mapbox.mapgpt.experimental.MapgptEndpoint +import com.mapbox.mapgpt.experimental.MapgptEndpointType +import com.mapbox.mapgpt.experimental.MapgptMessage +import com.mapbox.mapgpt.experimental.MapgptMessageAction +import com.mapbox.mapgpt.experimental.MapgptMessageConversation +import com.mapbox.mapgpt.experimental.MapgptMessageEntity +import com.mapbox.mapgpt.experimental.MapgptObserver +import com.mapbox.mapgpt.experimental.MapgptSession +import com.mapbox.mapgpt.experimental.MapgptSessionError +import com.mapbox.mapgpt.experimental.MapgptSessionErrorType +import com.mapbox.mapgpt.experimental.MapgptSessionLanguage +import com.mapbox.mapgpt.experimental.MapgptSessionMode +import com.mapbox.mapgpt.experimental.MapgptSessionOptions +import com.mapbox.mapgpt.experimental.MapgptSessionReconnecting +import com.mapbox.mapgpt.experimental.MapgptSessionType +import com.mapbox.mapgpt.experimental.MapgptStartSession +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.utils.internal.logD +import com.mapbox.navigation.utils.internal.logE +import com.mapbox.navigation.voicefeedback.FeedbackAgentEndpoint +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Job +import kotlinx.coroutines.MainScope +import kotlinx.coroutines.channels.BufferOverflow +import kotlinx.coroutines.channels.awaitClose +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.MutableSharedFlow +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.callbackFlow +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.launch +import kotlinx.coroutines.suspendCancellableCoroutine +import kotlinx.coroutines.withTimeoutOrNull +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.SerializationException +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json +import java.util.Locale +import java.util.UUID +import kotlin.coroutines.resume +import kotlin.time.Duration +import kotlin.time.Duration.Companion.milliseconds +import kotlin.time.ExperimentalTime + +@ExperimentalPreviewMapboxNavigationAPI +internal interface MapboxASRService { + + fun connect(token: String) + suspend fun disconnect() + fun startAsrRequest() + fun sendFinalAsrData(abort: Boolean) + fun sendAsrData(data: ByteArray) + + val sessionState: Flow + val asrData: Flow +} + +@OptIn(ExperimentalTime::class) +@ExperimentalPreviewMapboxNavigationAPI +internal class MapboxASRServiceImpl( + private val profileId: String = DEFAULT_FEEDBACK_PROFILE_ID, + private val language: Locale, + private val endpoint: FeedbackAgentEndpoint, + private val feedbackAgentContextProvider: FeedbackAgentContextProvider, + private val cancelTimeout: Duration = DEFAULT_CANCEL_TIMEOUT, + private val coroutineScope: CoroutineScope = MainScope(), + private val jsonDecoder: Json = Json { ignoreUnknownKeys = true }, +) : MapboxASRService { + + @OptIn(ExperimentalSerializationApi::class) + private val jsonParser = Json { + ignoreUnknownKeys = true + serializersModule = sessionFrameSerializerModule + explicitNulls = false + } + + private val mapgptSession: MapgptSession = MapgptSession() + private var connectionJob: Job? = null + override val asrData = MutableSharedFlow( + extraBufferCapacity = 1, + onBufferOverflow = BufferOverflow.DROP_OLDEST, + ) + override val sessionState = MutableStateFlow(AsrSessionState.Disconnected) + private var listeningActive = false + + @OptIn(ExperimentalPreviewMapboxNavigationAPI::class) + override fun connect(token: String) { + connectionJob?.cancel() + connectionJob = coroutineScope.launch(Dispatchers.Main) { + connect( + endpoint = endpoint.toNativeMapGptEndpoint(), + token = token, + ).collect { state -> + sessionState.value = state + } + } + } + + @SuppressLint("RestrictedApi") + @OptIn(ExperimentalPreviewMapboxNavigationAPI::class) + private fun connect( + token: String, + endpoint: MapgptEndpoint, + ): Flow = callbackFlow { + logD(TAG) { "connect: $endpoint" } + MapgptConfiguration.setEndpoint(endpoint) + + logD(TAG) { "Connecting to streamingApiHost ${endpoint.websocketUrlAsr}" } + val options = MapgptSessionOptions.Builder() + .token(token) + .uuid(UUID.randomUUID().toString()) + .type(MapgptSessionType.ASR) + .mode(MapgptSessionMode.ONLINE) + .language( + when (language.language.lowercase(Locale.ROOT)) { + "zh" -> MapgptSessionLanguage.CHINESE + "nl" -> MapgptSessionLanguage.DUTCH + "en" -> MapgptSessionLanguage.ENGLISH + "fr" -> MapgptSessionLanguage.FRENCH + "de" -> MapgptSessionLanguage.GERMAN + "he" -> MapgptSessionLanguage.HEBREW + "it" -> MapgptSessionLanguage.ITALIAN + "ja" -> MapgptSessionLanguage.JAPANESE + "ko" -> MapgptSessionLanguage.KOREAN + "es" -> MapgptSessionLanguage.SPANISH + else -> MapgptSessionLanguage.ENGLISH + }, + ) + .profile(profileId) + .reconnect(true) + .build() + + val mapGptObserver = MapboxASRObserver( + onMapGptSessionStartedCallback = { sessionId -> + logD(TAG) { "onMapGptSessionStartedCallback: $sessionId" } + trySend(AsrSessionState.Connected(endpoint.websocketUrlAsr, sessionId)) + }, + onMapGptSessionErrorCallback = { nativeError -> + logD(TAG) { "onMapGptSessionErrorCallback: $nativeError" } + trySend(AsrSessionState.Disconnected) + }, + onReconnecting = { reconnectionData -> + logD(TAG) { "onReconnecting: $reconnectionData" } + trySend( + AsrSessionState.Connecting( + apiHost = endpoint.websocketUrlAsr, + reconnectSessionId = reconnectionData, + ), + ) + }, + onAsrTranscriptReceived = { text: String, isFinal: Boolean -> + logD(TAG) { "onAsrTranscriptReceived: $text isFinal: $isFinal" } + if (listeningActive) asrData.tryEmit(AsrData.Transcript(text, isFinal)) + if (isFinal) listeningActive = false + }, + onFeedbackReceived = { feedbackDTO -> + logD(TAG) { "onFeedbackReceived: $feedbackDTO" } + asrData.tryEmit( + AsrData.Result( + feedbackDTO.feedbackDescription, + feedbackDTO.feedbackType, + ), + ) + }, + ) + mapgptSession.connect(options, mapGptObserver) + + awaitClose { + mapgptSession.cancelConnection { + logD(TAG) { "Connection cancelled: $it" } + } + } + } + + override suspend fun disconnect() { + connectionJob?.cancel() + withTimeoutOrNull(cancelTimeout) { + suspendCancellableCoroutine { continuation -> + logD(TAG) { "platform cancelConnection start" } + mapgptSession.cancelConnection { + logD(TAG) { "platform connection cancelled: $it" } + continuation.resume(Unit) + } + logD(TAG) { "platform cancelConnection complete" } + } + } + } + + override fun startAsrRequest() { + asrData.tryEmit(null) + listeningActive = true + val contextDTO = feedbackAgentContextProvider.getContext() + val contextJson = jsonParser.encodeToString(contextDTO) + logD(TAG) { "startAsrRequest called with context = $contextJson" } + ValueConverter.fromJson(contextJson).onValue { context -> + mapgptSession.startAsrRequest(context, emptyList(), profileId) + }.onError { error -> + logE(TAG) { "Start ASR failed: $error" } + } + } + + override fun sendFinalAsrData(abort: Boolean) { + listeningActive = false + asrData.tryEmit(null) + mapgptSession.finalizeAsrRequest(abort) + } + + override fun sendAsrData(data: ByteArray) { + mapgptSession.sendAsrData(data) + } + + @SuppressLint("RestrictedApi") + private fun String.toMapgptEndpointType(): MapgptEndpointType? { + return MapgptEndpointType.values().firstOrNull { mapgptEndpointType -> + mapgptEndpointType.name.equals(this, true) + } + } + + private inner class MapboxASRObserver( + private val onMapGptSessionStartedCallback: (sessionId: String) -> Unit, + private val onMapGptSessionErrorCallback: (nativeError: MapgptSessionErrorType) -> Unit, + private val onAsrTranscriptReceived: (text: String, isFinal: Boolean) -> Unit, + private val onFeedbackReceived: (feedbackDTO: FeedbackDTO) -> Unit, + private val onReconnecting: (sessionId: String) -> Unit, + ) : MapgptObserver { + + override fun onMapgptSessionStarted(message: MapgptStartSession) { + logD(TAG) { "onMapgptSessionStarted: $message" } + onMapGptSessionStartedCallback(message.sessionId) + } + + override fun onMapgptSessionReconnecting(reconnecting: MapgptSessionReconnecting) { + logD(TAG) { "onMapgptSessionReconnecting: $reconnecting" } + onReconnecting(reconnecting.sessionId) + } + + override fun onMapgptSessionError(error: MapgptSessionError) { + logD(TAG) { "onMapgptSessionError: $error" } + onMapGptSessionErrorCallback(error.type) + } + + override fun onMapgptMessageReceived(message: MapgptMessage) { + logD(TAG) { "onMapgptMessageReceived: $message" } + } + + override fun onMapgptConversationReceived(conversation: MapgptMessageConversation) { + logD(TAG) { "onMapgptConversationReceived: $conversation" } + } + + override fun onMapgptEntityReceived(entity: MapgptMessageEntity) { + logD(TAG) { "onMapgptEntityReceived: $entity" } + } + + override fun onMapgptActionReceived(action: MapgptMessageAction) { + logD(TAG) { "onMapgptActionReceived: $action" } + if (action.type == FEEDBACK_ACTION_TYPE) { + try { + val feedbackDTO = + jsonDecoder.decodeFromString( + FeedbackDTO.serializer(), + action.raw.data.toJson(), + ) + onFeedbackReceived(feedbackDTO) + } catch (se: SerializationException) { + logE(TAG) { "onMapgptActionReceived error: $se" } + } catch (ise: IllegalStateException) { + logE(TAG) { "onMapgptActionReceived error: $ise" } + } + } + } + + override fun onMapgptAsrTranscript(transcript: MapgptAsrTranscript) { + logD(TAG) { "onMapgptAsrTranscript: $transcript" } + onAsrTranscriptReceived(transcript.text, transcript.isFinal) + } + } + + private fun FeedbackAgentEndpoint.toNativeMapGptEndpoint(): MapgptEndpoint { + return MapgptEndpoint.Builder() + .name(this.name) + .type(this.name.toMapgptEndpointType() ?: MapgptEndpointType.PRODUCTION) + .conversationUrl("") + .websocketUrlAsr(this.streamingAsrApiHost) + .websocketUrlText(this.streamingApiHost) + .build() + } + + companion object { + + private const val TAG = "MapboxASRService" + private const val DEFAULT_FEEDBACK_PROFILE_ID = "feedback" + private val DEFAULT_CANCEL_TIMEOUT = 300.milliseconds + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/MapboxAutomaticSpeechRecognitionEngine.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/MapboxAutomaticSpeechRecognitionEngine.kt new file mode 100644 index 00000000000..dbf359b4077 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/MapboxAutomaticSpeechRecognitionEngine.kt @@ -0,0 +1,199 @@ +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.utils.internal.logD +import com.mapbox.navigation.utils.internal.logI +import com.mapbox.navigation.voicefeedback.ASRState +import com.mapbox.navigation.voicefeedback.Microphone +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.MainScope +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.collectLatest +import kotlinx.coroutines.flow.combine +import kotlinx.coroutines.flow.filterIsInstance +import kotlinx.coroutines.flow.filterNotNull +import kotlinx.coroutines.flow.map +import kotlinx.coroutines.launch +import kotlin.time.Duration +import kotlin.time.DurationUnit +import kotlin.time.ExperimentalTime +import kotlin.time.toDuration + +@OptIn(ExperimentalTime::class) +@ExperimentalPreviewMapboxNavigationAPI +internal class MapboxAutomaticSpeechRecognitionEngine( + private val mapboxASRService: MapboxASRService, + private val microphone: Microphone, + private val scope: CoroutineScope = MainScope(), + private val stoppedSpeakingThreshold: Duration = STOPPED_SPEAKING_THRESHOLD, + private val checkSpeakingInterval: Duration = CHECK_SPEAKING_INTERVAL, + private val resultTimeout: Duration = RESULT_TIMEOUT, +) : AutomaticSpeechRecognitionEngine { + + private val listeningState = MutableStateFlow(value = false) + override val state = MutableStateFlow(null) + + private var lastKnownStopSpeakingState: ASRState? = null + + init { + scope.launch(Dispatchers.Main) { + mapboxASRService.sessionState + .map { + when (it) { + is AsrSessionState.Connected -> ASRState.Idle + is AsrSessionState.Connecting, AsrSessionState.Disconnected -> null + } + }.collect { + state.value = it + } + } + scope.launch(Dispatchers.Main) { + combine( + listeningState, + mapboxASRService.asrData, + ) { isListening, asrData -> + logD(TAG) { "listeningState: $isListening asrData: $asrData" } + when { + asrData is AsrData.Result -> + ASRState.Result(asrData.description, asrData.type) + + isListening && asrData is AsrData.Transcript -> { + if (asrData.isFinal) { + stopListening() + ASRState.SpeechFinishedWaitingForResult + } else { + ASRState.Listening(asrData.text) + } + } + + else -> null + } + }.filterNotNull().collect { asrState -> + logD(TAG) { "newAsrState: $asrState" } + state.value = asrState + } + } + scope.launch(Dispatchers.Main) { + microphone.state + .filterIsInstance() + .collect { microphoneErrorState -> + logD(TAG) { "Microphone error handled: $microphoneErrorState" } + state.value = ASRState.Error(RuntimeException(microphoneErrorState.reason)) + } + } + + scope.launch(Dispatchers.IO) { + listeningState.collectLatest { isListening -> + logD(TAG) { "stream start: $isListening" } + if (isListening) { + microphone.stream { streaming -> + val streamingIsListening = listeningState.value + if (!streamingIsListening) { + logD(TAG) { "Microphone has now stopped listening" } + microphone.stop() + return@stream + } + sendAsrData(streaming) + } + } else { + microphone.stop() + } + } + } + launchAsrTimeoutMonitor() + } + + private fun launchAsrTimeoutMonitor() = scope.launch(Dispatchers.Main) { + state.collectLatest { asrState -> + logI(TAG) { "launchAsrTimeoutMonitor: $asrState" } + when (asrState) { + is ASRState.Listening -> { + while (true) { + val elapsed = try { + asrState.elapsedTime() + } catch (iae: IllegalArgumentException) { + stoppedSpeakingThreshold + } + if (elapsed >= stoppedSpeakingThreshold) { + val stateValue = state.value + val transcription = (stateValue as? ASRState.Listening)?.text.orEmpty() + logI(TAG) { "User has stopped speaking: $transcription" } + logI(TAG) { + "User has stopped speaking. " + + "State: $stateValue LastKnownState: $lastKnownStopSpeakingState" + } + listeningState.value = false + if (transcription.isBlank() || + lastKnownStopSpeakingState == stateValue + ) { + logI(TAG) { "[MapboxASREngine]. InterruptedByTimeout" } + state.value = ASRState.InterruptedByTimeout + lastKnownStopSpeakingState = null + } else { + lastKnownStopSpeakingState = stateValue + } + } + delay(checkSpeakingInterval) + } + } + is ASRState.SpeechFinishedWaitingForResult -> { + delay(resultTimeout) + logI(TAG) { + "No server response received within $resultTimeout. " + + "Possible causes: speech classified as non-feedback by backend, " + + "or loss of connectivity." + } + state.value = ASRState.InterruptedByTimeout + } + else -> { /* no-op */ } + } + } + } + + override fun startListening() { + logD(TAG) { "startListening" } + mapboxASRService.startAsrRequest() + state.value = ASRState.Listening("") + listeningState.value = true + } + + override fun stopListening() { + logD(TAG) { "stopListening" } + listeningState.value = false + state.value = ASRState.Idle + mapboxASRService.sendFinalAsrData(false) + } + + override fun interruptListening() { + logD(TAG) { "interruptListening" } + listeningState.value = false + state.value = ASRState.Interrupted + mapboxASRService.sendFinalAsrData(true) + } + + override fun connect(token: String) { + mapboxASRService.connect(token) + } + + override fun disconnect() { + scope.launch { + mapboxASRService.disconnect() + } + } + + private fun sendAsrData(streaming: Microphone.State.Streaming) { + mapboxASRService.sendAsrData(streaming.byteArray) + } + + companion object { + + private const val TAG = "MapboxAutomaticSpeechRecognitionEngine" + + private val STOPPED_SPEAKING_THRESHOLD = 6.toDuration(DurationUnit.SECONDS) + private val CHECK_SPEAKING_INTERVAL = 1.toDuration(DurationUnit.SECONDS) + private val RESULT_TIMEOUT = 5.toDuration(DurationUnit.SECONDS) + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/WebSocketDTOs.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/WebSocketDTOs.kt new file mode 100644 index 00000000000..7e4d4f79e79 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/WebSocketDTOs.kt @@ -0,0 +1,385 @@ +@file:OptIn(ExperimentalSerializationApi::class) + +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import kotlinx.serialization.DeserializationStrategy +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.SerialName +import kotlinx.serialization.Serializable +import kotlinx.serialization.decodeFromString +import kotlinx.serialization.descriptors.SerialDescriptor +import kotlinx.serialization.descriptors.buildClassSerialDescriptor +import kotlinx.serialization.descriptors.element +import kotlinx.serialization.encodeToString +import kotlinx.serialization.encoding.Decoder +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonClassDiscriminator +import kotlinx.serialization.json.JsonDecoder +import kotlinx.serialization.json.JsonElement +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.contentOrNull +import kotlinx.serialization.json.encodeToJsonElement +import kotlinx.serialization.json.jsonObject +import kotlinx.serialization.json.jsonPrimitive +import kotlinx.serialization.modules.SerializersModule +import kotlinx.serialization.modules.polymorphic +import kotlinx.serialization.modules.subclass + +/** + * Session frame koin serializer module + */ +@OptIn(ExperimentalPreviewMapboxNavigationAPI::class) +internal val sessionFrameSerializerModule = SerializersModule { + + // TODO add correct round-tripping of unknown polymorphic types, + // right now the unknown types encode non-abstract fields in a separate 'details' JSON object + // when they are serialized back to JSON + + polymorphic(baseClass = SessionFrame::class) { + subclass(SessionFrame.SendEvent::class) + subclass(SessionFrame.StartSession::class) + subclass(SessionFrame.Unknown::class) + default { + SessionFrame.Unknown.UnknownDeserializationStrategy + } + } + polymorphic(baseClass = SessionFrame.SendEvent.Body::class) { + subclass(SessionFrame.SendEvent.Body.Conversation::class) + subclass(SessionFrame.SendEvent.Body.NoResponse::class) + subclass(SessionFrame.SendEvent.Body.StopListening::class) + subclass(SessionFrame.SendEvent.Body.Feedback::class) + subclass(SessionFrame.SendEvent.Body.Unknown::class) + default { + SessionFrame.SendEvent.Body.Unknown.UnknownDeserializationStrategy + } + } + polymorphic(baseClass = OutgoingSessionFrame::class) { + subclass(OutgoingSessionFrame.GetSessionId::class) + } +} + +@OptIn(ExperimentalSerializationApi::class) +private val jsonSerialization = Json { + serializersModule = sessionFrameSerializerModule + explicitNulls = false + ignoreUnknownKeys = true +} + +/** + * Definition of a frame that carries information across the communication channel with Feedback Agent backend. + */ +@ExperimentalPreviewMapboxNavigationAPI +@Serializable +@JsonClassDiscriminator(discriminator = "action") +internal abstract class SessionFrame { + + /** + * Definition of an event that is generated by the server when certain things happen, such as extracting a POI from a user / assistant conversation. + * + * @param body contents of the event + */ + @Serializable + @SerialName("send-event") + data class SendEvent( + val body: Body, + ) : SessionFrame() { + + /** + * Defines the event specifics, which varies based on the event type. + */ + @Serializable + abstract class Body { + + /** + * A unique increasing ID field for the event. This can be used by the client to reference the last received event ID when reconnecting to the service. + * + * @see [FeedbackAgentService.connect] + */ + abstract val id: Long + + /** + * Creation type of the event as a unix timestamp + */ + abstract val timestamp: Long + + /** + * Describes which conversational chunk the event is tied to, in format of `{date}@{id}` + * where the [chunkPrefix] should be considered as an ID of the AI response + * and [chunkOffset] is the logical order in the response stream. + */ + @SerialName("chunk_id") + abstract val chunkId: String + + /** + * Represents an ID of the AI response. + * + * See [chunkId]. + */ + val chunkPrefix: String by lazy { chunkId.split("@").first() } + + /** + * Represents logical order in the response stream. + * + * See [chunkId]. + */ + val chunkOffset: Int by lazy { chunkId.split("@")[1].toInt() } + + /** + * Defines events that were not generated in direct response to user query + * but retroactively or spontaneously by the assistant. + */ + @SerialName("is_supplement") + abstract val isSupplement: Boolean + + /** + * Defines a conversation chunk. A single chunk is typically a sentence in a longer AI response. + * + * Conversation can be in direct response to something the user said or may be sent separately, e.g., after processing a restaurant reservation request. + * + * @param data conversation contents + */ + @Serializable + @SerialName("conversation") + data class Conversation( + override val id: Long, + override val timestamp: Long, + @SerialName("chunk_id") + override val chunkId: String, + @SerialName("is_supplement") + override val isSupplement: Boolean = false, + val data: Data, + ) : Body() { + + /** + * Conversation contents. + * + * @param conversationId a unique string for this conversation + * @param content the sentence in a response + * @param initial whether this is the first chunk in the response stream + * @param confirmation whether this chunk is a confirmation of a user action + * @param final whether this is the last chunk in the response stream + * @param maxTokens whether conversation has hit the profile's response token limit + */ + @Serializable + data class Data( + @SerialName("conversation_id") + val conversationId: String, + val content: String, + val initial: Boolean = false, + val confirmation: Boolean = false, + val final: Boolean = false, + @SerialName("max_tokens") + val maxTokens: Boolean = false, + ) + } + + /** + * Defines action containing no verbal response for the user query. + */ + @Serializable + @SerialName("no_response") + data class NoResponse( + override val id: Long, + override val timestamp: Long, + @SerialName("chunk_id") + override val chunkId: String, + @SerialName("is_supplement") + override val isSupplement: Boolean = false, + ) : Body() + + /** + * Defines action ordering Feedback Agent to close the microphone and stop capturing input. + */ + @Serializable + @SerialName("stop_listening") + data class StopListening( + override val id: Long, + override val timestamp: Long, + @SerialName("chunk_id") + override val chunkId: String, + @SerialName("is_supplement") + override val isSupplement: Boolean = false, + ) : Body() + + /** + * Represents a user feedback event. + * + * This event stores details about user feedback, including metadata and feedback content. + * + * @property id The unique identifier of the event. + * @property timestamp The timestamp of when the event occurred. + * @property chunkId The associated chunk identifier. + * @property isSupplement Indicates whether this event is a supplemental entry. + * @property data The feedback-specific data. + */ + @Serializable + @SerialName("feedback") + data class Feedback( + override val id: Long, + override val timestamp: Long, + @SerialName("chunk_id") + override val chunkId: String, + @SerialName("is_supplement") + override val isSupplement: Boolean = false, + val data: Data, + ) : Body() { + + /** + * Represents the feedback-specific data. + * + * @property feedbackType The category or type of feedback (e.g., bug report, suggestion). + * @property feedbackDescription The detailed description of the feedback. + */ + @Serializable + data class Data( + @SerialName("feedbackType") + val feedbackType: String, + @SerialName("feedbackDescription") + val feedbackDescription: String, + ) + } + + /** + * Payload associated with unknown types. + * + * @param details map of key value pairs + */ + @Serializable + data class Unknown( + override val id: Long, + override val timestamp: Long, + @SerialName("chunk_id") + override val chunkId: String, + @SerialName("is_supplement") + override val isSupplement: Boolean = false, + val details: Map, + ) : Body() { + + internal object UnknownDeserializationStrategy : DeserializationStrategy { + + override val descriptor: SerialDescriptor = + buildClassSerialDescriptor("Unknown") { + element("id") + element("timestamp") + element("chunk_id") + element("is_supplement") + element("details") + } + + override fun deserialize(decoder: Decoder): Unknown { + val jsonInput = decoder as? JsonDecoder + ?: error("Can be deserialized only by JSON") + val json = jsonInput.decodeJsonElement().jsonObject + val id = json.getValue("id").jsonPrimitive.content.toLong() + val timestamp = json.getValue("timestamp").jsonPrimitive.content.toLong() + val chunkId = json.getValue("chunk_id").jsonPrimitive.content + val isSupplement = json["is_supplement"] + ?.jsonPrimitive?.contentOrNull?.toBooleanStrictOrNull() ?: false + val details = json.toMutableMap() + details.remove("id") + details.remove("timestamp") + details.remove("chunk_id") + details.remove("is_supplement") + return Unknown( + id = id, + timestamp = timestamp, + chunkId = chunkId, + isSupplement = isSupplement, + details = details, + ) + } + } + } + } + } + + /** + * Called when a connection with the backend service is established and a new session is started. + * + * @param body content associated with the session + */ + @Serializable + @SerialName("start-session") + data class StartSession( + val body: Body, + ) : SessionFrame() { + + /** + * Content associated with the session + * + * @param sessionId session id + */ + @Serializable + data class Body( + @SerialName("session_id") + val sessionId: String, + ) + } + + /** + * Payload associated with unknown types. + * + * @param details map of key value pairs + */ + @Serializable + data class Unknown( + val details: Map, + ) : SessionFrame() { + + internal object UnknownDeserializationStrategy : DeserializationStrategy { + + override val descriptor: SerialDescriptor = + buildClassSerialDescriptor("Unknown") { + element("details") + } + + override fun deserialize(decoder: Decoder): Unknown { + val jsonInput = decoder as? JsonDecoder + ?: error("Can be deserialized only by JSON") + val json = jsonInput.decodeJsonElement().jsonObject + val details = json.toMutableMap() + return Unknown( + details = details, + ) + } + } + } + + /** + * Encode the object to JSON string. + */ + fun toJsonString(): String { + return jsonSerialization.encodeToJsonElement(this).toString() + } + + companion object { + + /** + * Decode the object from JSON string. + */ + fun fromJsonString(json: String): SessionFrame { + return jsonSerialization.decodeFromString(json) + } + } +} + +@Serializable +@JsonClassDiscriminator(discriminator = "action") +internal abstract class OutgoingSessionFrame { + + @Serializable + @SerialName("get-session-id") + internal class GetSessionId : OutgoingSessionFrame() + + fun toJsonString(): String { + return jsonSerialization.encodeToString(this) + } + + companion object { + + fun fromJsonString(json: String): OutgoingSessionFrame { + return jsonSerialization.decodeFromString(json) + } + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/audio/AudioInfoRetriever.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/audio/AudioInfoRetriever.kt new file mode 100644 index 00000000000..88db8cca14c --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/audio/AudioInfoRetriever.kt @@ -0,0 +1,335 @@ +package com.mapbox.navigation.voicefeedback.internal.audio + +import android.content.Context +import android.media.AudioDeviceCallback +import android.media.AudioDeviceInfo +import android.media.AudioFormat +import android.media.AudioManager +import android.media.AudioPlaybackConfiguration +import android.media.AudioRecord +import android.media.AudioRecordingConfiguration +import android.media.MediaMetadataRetriever +import android.media.MediaRecorder +import android.media.MicrophoneInfo +import android.os.Build +import androidx.annotation.RequiresApi +import androidx.annotation.RestrictTo +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.utils.internal.logD +import com.mapbox.navigation.utils.internal.logW +import kotlinx.coroutines.channels.awaitClose +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.callbackFlow + +/** + * This object provides utility functions for retrieving audio information from the Android + * platform. This includes information about the audio devices, audio recording configurations, + * audio playback configurations, and audio metadata. + * + * Last updated for Api 33. + */ +@ExperimentalPreviewMapboxNavigationAPI +@RestrictTo(RestrictTo.Scope.LIBRARY_GROUP_PREFIX) +internal object AudioInfoRetriever { + + private const val TAG = "AudioInfoRetriever" + + fun getAudioManager(applicationContext: Context): AudioManager? { + return applicationContext.getSystemService( + Context.AUDIO_SERVICE, + ) as? AudioManager + } + + fun logAudioDevices(audioManager: AudioManager) { + if (Build.VERSION.SDK_INT < Build.VERSION_CODES.M) { + logW(TAG) { "AudioDevice info unavailable for api ${Build.VERSION.SDK_INT}" } + return + } + logD(TAG) { + val devices = audioManager.getDevices(AudioManager.GET_DEVICES_INPUTS) + val deviceProperties = devices.joinToString { it.toLogString() } + val microphoneProperties = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) { + audioManager.microphones.joinToString { it.toLogString() } + } else { + "VERSION.SDK_INT < P" + } + "AudioManager(" + + "AudioDeviceInfo=[$deviceProperties], " + + "MicrophoneInfo=[$microphoneProperties]" + + ")" + } + } + + fun stateString(audioRecord: AudioRecord): String { + val initializedState = when (val state = audioRecord.state) { + AudioRecord.STATE_INITIALIZED -> "STATE_INITIALIZED" + AudioRecord.STATE_UNINITIALIZED -> "STATE_UNINITIALIZED" + else -> "Other state: $state" + } + val recordingState = when (val state = audioRecord.recordingState) { + AudioRecord.RECORDSTATE_RECORDING -> "RECORDSTATE_RECORDING" + AudioRecord.RECORDSTATE_STOPPED -> "RECORDSTATE_STOPPED" + else -> "Other recording state: $state" + } + val channelCount = when (val count = audioRecord.channelCount) { + 1 -> "mono" + 2 -> "stereo" + else -> "Other channel count: $count" + } + val audioFormat = when (val format = audioRecord.audioFormat) { + AudioFormat.ENCODING_PCM_16BIT -> "ENCODING_PCM_16BIT" + AudioFormat.ENCODING_PCM_8BIT -> "ENCODING_PCM_8BIT" + else -> "Other audio format: $format" + } + val superString = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + audioRecord.format.toString() + } else { + "VERSION.SDK_INT < M" + } + return "AudioRecord(" + + "initializedState=$initializedState, " + + "recordingState=$recordingState, " + + "channelCount=$channelCount, " + + "audioFormat=$audioFormat, " + + "sampleRate=${audioRecord.sampleRate}, " + + "super=$superString)" + } + + fun observeRecordingConfigurationChanges(audioManager: AudioManager): Flow = + callbackFlow { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.N) { + val activeRecordingConfigurations = audioManager.activeRecordingConfigurations + var propertiesString = + activeRecordingConfigurations.joinToString { it.toLogString() } + trySend("activeRecordingConfigurations[$propertiesString]") + val callback = object : AudioManager.AudioRecordingCallback() { + override fun onRecordingConfigChanged( + configs: MutableList?, + ) { + propertiesString = configs?.joinToString { it.toLogString() } ?: "" + trySend("onRecordingConfigChanged[$propertiesString]") + } + } + audioManager.registerAudioRecordingCallback(callback, null) + awaitClose { audioManager.unregisterAudioRecordingCallback(callback) } + } else { + trySend("AudioRecordingConfiguration unavailable for api ${Build.VERSION.SDK_INT}") + close() + } + } + + fun observeAudioDeviceChanges(audioManager: AudioManager): Flow = callbackFlow { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + val callback = object : AudioDeviceCallback() { + override fun onAudioDevicesAdded(addedDevices: Array?) { + super.onAudioDevicesAdded(addedDevices) + val propertiesString = addedDevices?.joinToString { it.toLogString() } ?: "" + trySend("onAudioDevicesAdded[$propertiesString]") + } + + override fun onAudioDevicesRemoved(removedDevices: Array?) { + super.onAudioDevicesRemoved(removedDevices) + val propertiesString = removedDevices?.joinToString { it.toLogString() } ?: "" + trySend("onAudioDevicesRemoved[$propertiesString]") + } + } + audioManager.registerAudioDeviceCallback(callback, null) + awaitClose { audioManager.unregisterAudioDeviceCallback(callback) } + } else { + trySend("AudioDeviceCallback unavailable for api ${Build.VERSION.SDK_INT}") + close() + } + } + + fun observePlaybackConfigurationChanges(audioManager: AudioManager): Flow = + callbackFlow { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val callback = object : AudioManager.AudioPlaybackCallback() { + override fun onPlaybackConfigChanged( + configs: MutableList?, + ) { + val propertiesString = configs?.joinToString { it.toLogString() } ?: "" + trySend("onPlaybackConfigChanged[$propertiesString]") + } + } + audioManager.registerAudioPlaybackCallback(callback, null) + awaitClose { audioManager.unregisterAudioPlaybackCallback(callback) } + } else { + trySend("AudioDeviceCallback unavailable for api ${Build.VERSION.SDK_INT}") + close() + } + } + + /** + * This is a helper function to print out the properties of an [AudioDeviceInfo] object. + */ + private fun AudioDeviceInfo.toLogString(): String { + val properties = mutableMapOf() + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + properties["id"] = id.toString() + properties["type"] = audioDeviceInfoTypeString(type) + properties["isSource"] = isSource.toString() + properties["isSink"] = isSink.toString() + productName?.let { properties["productName"] = it.toString() } + properties["sampleRates"] = sampleRates.joinToString() + properties["channelCounts"] = channelCounts.joinToString() + properties["channelMasks"] = channelMasks.joinToString() + properties["channelIndexMasks"] = channelIndexMasks.joinToString() + properties["encodings"] = encodings.joinToString() + } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.P) { + properties["address"] = address + } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.R) { + properties["encapsulationModes"] = encapsulationModes.joinToString() + properties["encapsulationMetadataTypes"] = encapsulationMetadataTypes.joinToString() + } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + properties["audioProfiles"] = audioProfiles.joinToString() + properties["audioDescriptors"] = audioDescriptors.joinToString() + } + return properties.removeEmptyValues().toString() + } + + @RequiresApi(Build.VERSION_CODES.M) + private fun audioDeviceInfoTypeString(type: Int): String = when (type) { + AudioDeviceInfo.TYPE_AUX_LINE -> "aux_line" + AudioDeviceInfo.TYPE_BLE_BROADCAST -> "ble_broadcast" + AudioDeviceInfo.TYPE_BLE_HEADSET -> "ble_headset" + AudioDeviceInfo.TYPE_BLE_SPEAKER -> "ble_speaker" + AudioDeviceInfo.TYPE_BLUETOOTH_A2DP -> "bluetooth_a2dp" + AudioDeviceInfo.TYPE_BLUETOOTH_SCO -> "bluetooth_sco" + AudioDeviceInfo.TYPE_BUILTIN_EARPIECE -> "builtin_earpiece" + AudioDeviceInfo.TYPE_BUILTIN_MIC -> "builtin_mic" + AudioDeviceInfo.TYPE_BUILTIN_SPEAKER -> "builtin_speaker" + AudioDeviceInfo.TYPE_BUILTIN_SPEAKER_SAFE -> "builtin_speaker_safe" + AudioDeviceInfo.TYPE_BUS -> "bus" + AudioDeviceInfo.TYPE_DOCK -> "dock" + AudioDeviceInfo.TYPE_FM -> "fm" + AudioDeviceInfo.TYPE_FM_TUNER -> "fm_tuner" + AudioDeviceInfo.TYPE_HDMI -> "hdmi" + AudioDeviceInfo.TYPE_HDMI_ARC -> "hdmi_arc" + AudioDeviceInfo.TYPE_HDMI_EARC -> "hdmi_earc" + AudioDeviceInfo.TYPE_HEARING_AID -> "hearing_aid" + AudioDeviceInfo.TYPE_IP -> "ip" + AudioDeviceInfo.TYPE_LINE_ANALOG -> "line_analog" + AudioDeviceInfo.TYPE_LINE_DIGITAL -> "line_digital" + AudioDeviceInfo.TYPE_REMOTE_SUBMIX -> "remote_submix" + AudioDeviceInfo.TYPE_TELEPHONY -> "telephony" + AudioDeviceInfo.TYPE_TV_TUNER -> "tv_tuner" + AudioDeviceInfo.TYPE_UNKNOWN -> "unknown" + AudioDeviceInfo.TYPE_USB_ACCESSORY -> "usb_accessory" + AudioDeviceInfo.TYPE_USB_DEVICE -> "usb_device" + AudioDeviceInfo.TYPE_USB_HEADSET -> "usb_headset" + AudioDeviceInfo.TYPE_WIRED_HEADPHONES -> "wired_headphones" + AudioDeviceInfo.TYPE_WIRED_HEADSET -> "wired_headset" + else -> "unmapped_$type" + } + + @RequiresApi(Build.VERSION_CODES.N) + private fun AudioPlaybackConfiguration.toLogString(): String { + val properties = mutableMapOf() + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + @Suppress("DEPRECATION") + audioDeviceInfo?.let { properties["AudioDeviceInfo"] = it.toLogString() } + } + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + properties["audioAttributes"] = audioAttributes.toString() + } + // This.toString includes "AudioRecordingConfiguration" internal values + return "$this $properties" + } + + @RequiresApi(Build.VERSION_CODES.N) + private fun AudioRecordingConfiguration.toLogString(): String { + val properties = mutableMapOf() + audioDevice?.let { properties["AudioDeviceInfo"] = it.toLogString() } + properties["audioSource"] = when (val value = clientAudioSource) { + MediaRecorder.AudioSource.DEFAULT -> "DEFAULT" + MediaRecorder.AudioSource.MIC -> "MIC" + MediaRecorder.AudioSource.VOICE_UPLINK -> "VOICE_UPLINK" + MediaRecorder.AudioSource.VOICE_DOWNLINK -> "VOICE_DOWNLINK" + MediaRecorder.AudioSource.VOICE_CALL -> "VOICE_CALL" + MediaRecorder.AudioSource.CAMCORDER -> "CAMCORDER" + MediaRecorder.AudioSource.VOICE_RECOGNITION -> "VOICE_RECOGNITION" + MediaRecorder.AudioSource.VOICE_COMMUNICATION -> "VOICE_COMMUNICATION" + MediaRecorder.AudioSource.UNPROCESSED -> "UNPROCESSED" + MediaRecorder.AudioSource.VOICE_PERFORMANCE -> "VOICE_PERFORMANCE" + else -> "unmapped_$value" + } + properties["audioSessionId"] = clientAudioSessionId.toString() + properties["hardwareFormat"] = format.toString() + properties["applicationFormat"] = clientFormat.toString() + // Check the difference because sampling can impact audio performance. + properties["appHardwareSampleRateDiff"] = + (format.sampleRate - clientFormat.sampleRate).toString() + return properties.removeEmptyValues().toString() + } + + @RequiresApi(Build.VERSION_CODES.P) + private fun MicrophoneInfo.toLogString(): String { + val properties = mutableMapOf() + properties["id"] = id.toString() + properties["description"] = description + properties["type"] = audioDeviceInfoTypeString(type) + properties["address"] = address + properties["deviceLocation"] = when (val value = location) { + MicrophoneInfo.LOCATION_UNKNOWN -> "unknown" + MicrophoneInfo.LOCATION_MAINBODY -> "mainbody" + MicrophoneInfo.LOCATION_MAINBODY_MOVABLE -> "mainbody_movable" + MicrophoneInfo.LOCATION_PERIPHERAL -> "peripheral" + else -> "unmapped_$value" + } + properties["groupId"] = group.toString() + properties["groupIndex"] = indexInTheGroup.toString() + properties["position"] = with(position) { "(x=$x,y=$y,z=$z)" } + properties["orientation"] = with(orientation) { "(x=$x,y=$y,z=$z)" } + properties["hasFrequencyResponse"] = frequencyResponse.isNotEmpty().toString() + properties["channelMapping"] = channelMapping.joinToString { + val value = when (it.second) { + MicrophoneInfo.CHANNEL_MAPPING_PROCESSED -> "processed" + MicrophoneInfo.CHANNEL_MAPPING_DIRECT -> "direct" + else -> "unmapped_${it.second}" + } + "(index=${it.first},value=$value)" + } + properties["sensitivity"] = sensitivity.let { + if (it == MicrophoneInfo.SENSITIVITY_UNKNOWN) "unknown" else it.toString() + } + properties["soundPressureLevel"] = run { + val minSplString = minSpl.let { + if (it == MicrophoneInfo.SPL_UNKNOWN) "unknown" else it.toString() + } + val maxSplString = maxSpl.let { + if (it == MicrophoneInfo.SPL_UNKNOWN) "unknown" else it.toString() + } + "[$minSplString,$maxSplString]" + } + properties["directionality"] = when (val value = directionality) { + MicrophoneInfo.DIRECTIONALITY_UNKNOWN -> "unknown" + MicrophoneInfo.DIRECTIONALITY_OMNI -> "omni" + MicrophoneInfo.DIRECTIONALITY_BI_DIRECTIONAL -> "bi_directional" + MicrophoneInfo.DIRECTIONALITY_CARDIOID -> "cardioid" + MicrophoneInfo.DIRECTIONALITY_HYPER_CARDIOID -> "hyper_cardioid" + MicrophoneInfo.DIRECTIONALITY_SUPER_CARDIOID -> "super_cardioid" + else -> "unmapped_$value" + } + properties["indexInGroup"] = directionality.toString() + + return properties.removeEmptyValues().toString() + } + + private fun MutableMap.putMetadata( + retriever: MediaMetadataRetriever, + nameKey: String, + metadataKey: Int, + ) { + retriever.extractMetadata(metadataKey)?.let { value -> + put(nameKey, value) + } + } + + private fun MutableMap.removeEmptyValues() = apply { + entries.removeAll { it.value.isEmpty() } + } +} diff --git a/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/audio/microphone/AudioRecordMicrophone.kt b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/audio/microphone/AudioRecordMicrophone.kt new file mode 100644 index 00000000000..8855eb6f004 --- /dev/null +++ b/libnavigation-voicefeedback/src/main/java/com/mapbox/navigation/voicefeedback/internal/audio/microphone/AudioRecordMicrophone.kt @@ -0,0 +1,241 @@ +package com.mapbox.navigation.voicefeedback.internal.audio.microphone + +import android.Manifest +import android.content.Context +import android.content.pm.PackageManager +import android.media.AudioFormat +import android.media.AudioManager +import android.media.AudioRecord +import android.media.MediaRecorder +import androidx.annotation.RequiresPermission +import androidx.annotation.WorkerThread +import androidx.core.app.ActivityCompat +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.core.MapboxNavigation +import com.mapbox.navigation.utils.internal.logD +import com.mapbox.navigation.utils.internal.logE +import com.mapbox.navigation.utils.internal.logI +import com.mapbox.navigation.utils.internal.logW +import com.mapbox.navigation.voicefeedback.Microphone +import com.mapbox.navigation.voicefeedback.internal.audio.AudioInfoRetriever +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.cancel +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.launch +import kotlinx.coroutines.suspendCancellableCoroutine + +/** + * A [com.mapbox.navigation.voicefeedback.Microphone] that uses the Android [AudioRecord] API to stream audio from the + * device's microphone. This implementation is suitable for real-time audio streaming. + * + * Requires the `Manifest.permission.RECORD_AUDIO` permission. + */ +@ExperimentalPreviewMapboxNavigationAPI +internal class AudioRecordMicrophone : Microphone { + + private lateinit var context: Context + + private var _config: Microphone.Config = Microphone.Config() + + /** + * Current microphone configuration including sample rate and other audio parameters. + */ + override val config: Microphone.Config get() = _config + + private val _state = MutableStateFlow( + Microphone.State.Disconnected, + ) + + /** + * Current state of the microphone (Disconnected, Idle, Streaming, or Error). + */ + override val state: StateFlow = _state + + private var audioRecordBuffer: Pair? = null + + private var audioManager: AudioManager? = null + + private lateinit var coroutineScope: CoroutineScope + + override fun onAttached(mapboxNavigation: MapboxNavigation) { + coroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Main.immediate) + this.context = mapboxNavigation.navigationOptions.applicationContext + audioManager = AudioInfoRetriever.getAudioManager(context) + _state.value = Microphone.State.Idle + launchAudioDeviceLogging(audioManager) + } + + override fun onDetached(mapboxNavigation: MapboxNavigation) { + _state.value = Microphone.State.Disconnected + audioRecordBuffer?.first?.let { audioRecord -> + audioRecordBuffer = null + logD(TAG) { "Calling audioRecord.release()" } + audioRecord.release() + } + coroutineScope.cancel() + } + + private fun launchAudioDeviceLogging(audioManager: AudioManager?) { + if (audioManager == null) { + logE(TAG) { "AudioManager is null, cannot log audio devices" } + return + } + + coroutineScope.launch(Dispatchers.IO) { AudioInfoRetriever.logAudioDevices(audioManager) } + coroutineScope.launch(Dispatchers.IO) { + AudioInfoRetriever.observeRecordingConfigurationChanges(audioManager) + .collect { logString -> logD(TAG) { logString } } + } + coroutineScope.launch(Dispatchers.IO) { + AudioInfoRetriever.observeAudioDeviceChanges(audioManager) + .collect { logString -> logD(TAG) { logString } } + } + coroutineScope.launch(Dispatchers.IO) { + AudioInfoRetriever.observePlaybackConfigurationChanges(audioManager) + .collect { logString -> logD(TAG) { logString } } + } + } + + /** + * Streams audio data from the microphone to the provided consumer function. + * Requires [AudioRecord] to be properly initialized and permission to be granted. + * + * @param consumer Function that receives streaming audio data chunks + */ + override suspend fun stream(consumer: (Microphone.State.Streaming) -> Unit) { + if (ActivityCompat.checkSelfPermission( + context, + Manifest.permission.RECORD_AUDIO, + ) != PackageManager.PERMISSION_GRANTED + ) { + onError("Cannot stream when RECORD_AUDIO permission is not granted.") + return + } + val audioRecordBuffer = audioRecordBuffer ?: createAudioRecordBuffer() ?: run { + onError("Cannot stream when AudioRecord is unavailable.") + return + } + logI(TAG) { "stream $config" } + streamAudioBytes( + audioRecord = audioRecordBuffer.first, + byteArray = audioRecordBuffer.second, + ) { chunk -> + _state.value = chunk + consumer.invoke(chunk) + } + } + + @RequiresPermission(Manifest.permission.RECORD_AUDIO) + private fun createAudioRecordBuffer(): Pair? { + val minBufferSizeBytes = AudioRecord.getMinBufferSize( + config.sampleRateHz, + CHANNEL_CONFIG, + AUDIO_FORMAT, + ) + val bufferSizeBytes = minBufferSizeBytes * BUFFER_MULTIPLIER + val audioRecord = AudioRecord( + AUDIO_SOURCE, + config.sampleRateHz, + CHANNEL_CONFIG, + AUDIO_FORMAT, + bufferSizeBytes, + ) + if (audioRecord.state == AudioRecord.STATE_INITIALIZED) { + audioRecordBuffer = audioRecord to ByteArray(bufferSizeBytes) + if (config.sampleRateHz != audioRecord.sampleRate) { + logW(TAG) { + "Requested sample rate ${config.sampleRateHz} does not match " + + "AudioRecord sample rate ${audioRecord.sampleRate}." + + "Updating config.sampleRateHz to ${audioRecord.sampleRate}" + } + _config = config.copy(sampleRateHz = audioRecord.sampleRate) + } + logD(TAG) { + "AudioRecord initialized minBufferSizeBytes: $minBufferSizeBytes, " + + "bufferSizeBytes: $bufferSizeBytes, " + + AudioInfoRetriever.stateString(audioRecord) + } + } else { + onError("AudioRecord initialization failed, releasing AudioRecord.") + audioRecordBuffer = null + audioRecord.release() + } + return audioRecordBuffer + } + + @WorkerThread + private suspend fun streamAudioBytes( + audioRecord: AudioRecord, + byteArray: ByteArray, + wrappedConsumer: (Microphone.State.Streaming) -> Unit, + ) = suspendCancellableCoroutine { cont -> + wrappedConsumer.invoke( + Microphone.State.Streaming( + chunkId = 0, + byteArray = byteArray, + bytesRead = 0, + ), + ) + logD(TAG) { "Before startRecording ${AudioInfoRetriever.stateString(audioRecord)}" } + audioRecord.startRecording() + logD(TAG) { "After startRecording: ${AudioInfoRetriever.stateString(audioRecord)}" } + cont.invokeOnCancellation { + logD(TAG) { "stream cancelled, calling audioRecord.stop()" } + audioRecord.stop() + } + while ( + cont.isActive && + audioRecord.recordingState == AudioRecord.RECORDSTATE_RECORDING + ) { + val previousState = state.value as? Microphone.State.Streaming ?: break + audioRecord.read(byteArray, 0, byteArray.size).let { bytesRead -> + if (bytesRead < 0) { + onError("AudioRecord read failed $bytesRead") + } else if (bytesRead > 0) { + wrappedConsumer.invoke( + Microphone.State.Streaming( + chunkId = previousState.chunkId + 1, + byteArray = byteArray, + bytesRead = bytesRead, + ), + ) + } + } + } + _state.value = Microphone.State.Idle + logD(TAG) { "Done streaming" } + } + + private fun onError(reason: String) { + logI(TAG) { "onError $reason" } + _state.value = Microphone.State.Error(reason) + stop() + } + + /** + * Stops the current audio recording session and transitions state to [Microphone.State.Idle]. + */ + override fun stop() { + logI(TAG) { "stop" } + audioRecordBuffer?.first?.let { audioRecord -> + logD(TAG) { "Calling audioRecord.stop()" } + audioRecord.stop() + } + _state.value = Microphone.State.Idle + } + + companion object { + private const val TAG = "AudioRecordMicrophone" + + private const val AUDIO_SOURCE: Int = MediaRecorder.AudioSource.VOICE_RECOGNITION + private const val CHANNEL_CONFIG: Int = AudioFormat.CHANNEL_IN_MONO + private const val AUDIO_FORMAT: Int = AudioFormat.ENCODING_PCM_16BIT + + // Use a larger buffer size for better performance (e.g., 4 times the minimum size) + private const val BUFFER_MULTIPLIER = 4 + } +} diff --git a/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/testing/voicefeedback/FakeInputStreamMicrophone.kt b/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/testing/voicefeedback/FakeInputStreamMicrophone.kt new file mode 100644 index 00000000000..49a1aa23446 --- /dev/null +++ b/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/testing/voicefeedback/FakeInputStreamMicrophone.kt @@ -0,0 +1,73 @@ +package com.mapbox.navigation.testing.voicefeedback + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.core.MapboxNavigation +import com.mapbox.navigation.voicefeedback.Microphone +import kotlinx.coroutines.delay +import kotlinx.coroutines.flow.MutableStateFlow +import java.io.InputStream + +/** + * JVM-compatible test double for [com.mapbox.navigation.voicefeedback.Microphone] that streams audio from an [java.io.InputStream]. + */ +@OptIn(ExperimentalPreviewMapboxNavigationAPI::class) +internal class FakeInputStreamMicrophone( + private val bufferSizeBytes: Int = DEFAULT_BUFFER_SIZE, + private val inputStreamProvider: () -> InputStream, +) : Microphone { + + override val config: Microphone.Config = Microphone.Config() + override val state = MutableStateFlow(Microphone.State.Disconnected) + + override fun onAttached(mapboxNavigation: MapboxNavigation) { + // no-op for unit tests + } + + override fun onDetached(mapboxNavigation: MapboxNavigation) { + // no-op for unit tests + } + + override suspend fun stream(consumer: (Microphone.State.Streaming) -> Unit) { + val byteArray = ByteArray(bufferSizeBytes) + val initialChunk = Microphone.State.Streaming( + chunkId = 0, + byteArray = byteArray, + bytesRead = 0, + ) + state.value = initialChunk + consumer(initialChunk) + + inputStreamProvider().use { inputStream -> + var chunkId = 1 + while (state.value is Microphone.State.Streaming) { + val bytesRead = inputStream.read(byteArray) + if (bytesRead < 0) { + break + } + val chunk = Microphone.State.Streaming( + chunkId = chunkId++, + byteArray = byteArray, + bytesRead = bytesRead, + ) + state.value = chunk + consumer(chunk) + delay(calculateDelayMillis(bytesRead)) + } + } + state.value = Microphone.State.Idle + } + + @Suppress("MagicNumber") + private fun calculateDelayMillis(bytesRead: Int): Long { + return (bytesRead * 8 * 1000L) / DEFAULT_BITRATE + } + + override fun stop() { + state.value = Microphone.State.Idle + } + + private companion object { + private const val DEFAULT_BUFFER_SIZE = 4096 + private const val DEFAULT_BITRATE = 700_000 + } +} diff --git a/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/voicefeedback/internal/DefaultContextProviderTest.kt b/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/voicefeedback/internal/DefaultContextProviderTest.kt new file mode 100644 index 00000000000..357967832cc --- /dev/null +++ b/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/voicefeedback/internal/DefaultContextProviderTest.kt @@ -0,0 +1,91 @@ +package com.mapbox.navigation.voicefeedback.internal + +import android.location.Location +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.core.trip.session.LocationMatcherResult +import io.mockk.every +import io.mockk.mockk +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertTrue +import org.junit.Test +import java.util.Locale + +@OptIn(ExperimentalPreviewMapboxNavigationAPI::class) +class DefaultContextProviderTest { + + @Test + fun `getContext without location uses empty user fields`() { + val provider = DefaultContextProvider(Locale.getDefault()) { null } + + val ctx = provider.getContext() + + assertEquals("", ctx.userContext.lat) + assertEquals("", ctx.userContext.lon) + assertEquals("", ctx.userContext.placeName) + } + + @Test + fun `getContext maps enhanced location`() { + val provider = DefaultContextProvider(Locale.getDefault()) { + makeLocationMatcherResult(-74.0, 40.0, 180f) + } + + val ctx = provider.getContext() + + assertEquals("40.0", ctx.userContext.lat) + assertEquals("-74.0", ctx.userContext.lon) + assertEquals("180.0", ctx.userContext.heading) + } + + @Test + fun `getContext app locale uses language tag`() { + val provider = DefaultContextProvider(Locale.forLanguageTag("de-DE")) { null } + + val ctx = provider.getContext() + + assertEquals("de-DE", ctx.appContext?.locale) + assertNotNull(ctx.appContext?.clientTime) + } + + @Test + fun `getContext imperial locale uses Fahrenheit and miles`() { + val provider = DefaultContextProvider(Locale.US) { null } + + val app = provider.getContext().appContext!! + + assertEquals("Fahrenheit", app.temperatureUnits) + assertEquals("mi", app.distanceUnits) + } + + @Test + fun `getContext metric locale uses Celsius and km`() { + val provider = DefaultContextProvider(Locale.GERMANY) { null } + + val app = provider.getContext().appContext!! + + assertEquals("Celsius", app.temperatureUnits) + assertEquals("km", app.distanceUnits) + } + + @Test + fun `client time is non-empty`() { + val provider = DefaultContextProvider(Locale.US) { null } + val time = provider.getContext().appContext!!.clientTime!! + + assertTrue(time.isNotBlank()) + } + + fun makeLocation(lat: Double, lon: Double, heading: Float = 0f) = + mockk { + every { latitude } returns lat + every { longitude } returns lon + every { bearing } returns heading + } + + fun makeLocationMatcherResult(lon: Double, lat: Double, bearing: Float) = + mockk { + val location = makeLocation(lat, lon, bearing) + every { enhancedLocation } returns location + } +} diff --git a/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/voicefeedback/internal/MapboxAutomaticSpeechRecognitionEngineTest.kt b/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/voicefeedback/internal/MapboxAutomaticSpeechRecognitionEngineTest.kt new file mode 100644 index 00000000000..ac4a7e46559 --- /dev/null +++ b/libnavigation-voicefeedback/src/test/java/com/mapbox/navigation/voicefeedback/internal/MapboxAutomaticSpeechRecognitionEngineTest.kt @@ -0,0 +1,354 @@ +package com.mapbox.navigation.voicefeedback.internal + +import com.mapbox.navigation.base.ExperimentalPreviewMapboxNavigationAPI +import com.mapbox.navigation.testing.LoggingFrontendTestRule +import com.mapbox.navigation.testing.voicefeedback.FakeInputStreamMicrophone +import com.mapbox.navigation.voicefeedback.ASRState +import com.mapbox.navigation.voicefeedback.Microphone +import io.mockk.coVerify +import io.mockk.every +import io.mockk.mockk +import io.mockk.verify +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.flow.MutableSharedFlow +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.first +import kotlinx.coroutines.launch +import kotlinx.coroutines.test.TestCoroutineDispatcher +import kotlinx.coroutines.test.TestCoroutineScope +import kotlinx.coroutines.test.resetMain +import kotlinx.coroutines.test.runBlockingTest +import kotlinx.coroutines.test.setMain +import org.junit.After +import org.junit.Assert +import org.junit.Before +import org.junit.Rule +import org.junit.Test +import kotlin.time.Duration +import kotlin.time.Duration.Companion.milliseconds +import kotlin.time.DurationUnit +import kotlin.time.ExperimentalTime +import kotlin.time.toDuration + +@OptIn( + ExperimentalPreviewMapboxNavigationAPI::class, + ExperimentalCoroutinesApi::class, + ExperimentalTime::class, +) +class MapboxAutomaticSpeechRecognitionEngineTest { + + @get:Rule + val logRule = LoggingFrontendTestRule() + + private val testDispatcher = TestCoroutineDispatcher() + private lateinit var testScope: TestCoroutineScope + private lateinit var backgroundScope: TestCoroutineScope + private val sessionStateFlow = MutableStateFlow(AsrSessionState.Disconnected) + private val asrDataFlow = MutableSharedFlow() + private val mapboxASRService = mockk(relaxed = true) { + every { sessionState } returns sessionStateFlow + every { asrData } returns asrDataFlow + } + + private fun createMicrophone(): Microphone = FakeInputStreamMicrophone { + javaClass.classLoader?.getResourceAsStream("test_report.wav") + ?: error("test_report.wav not found in test resources") + } + + private fun createEngine( + scope: CoroutineScope, + stoppedSpeakingThreshold: Duration = 6.toDuration(DurationUnit.SECONDS), + resultTimeout: Duration = 5.toDuration(DurationUnit.SECONDS), + checkSpeakingInterval: Duration = 1.toDuration(DurationUnit.SECONDS), + ): MapboxAutomaticSpeechRecognitionEngine = MapboxAutomaticSpeechRecognitionEngine( + mapboxASRService = mapboxASRService, + microphone = createMicrophone(), + scope = scope, + stoppedSpeakingThreshold = stoppedSpeakingThreshold, + resultTimeout = resultTimeout, + checkSpeakingInterval = checkSpeakingInterval, + ) + + @Before + fun setUp() { + testScope = TestCoroutineScope(testDispatcher) + backgroundScope = TestCoroutineScope() + Dispatchers.setMain(testDispatcher) + sessionStateFlow.value = AsrSessionState.Disconnected + } + + @After + fun tearDown() { + Dispatchers.resetMain() + backgroundScope.cleanupTestCoroutines() + testScope.cleanupTestCoroutines() + } + + @Test + fun `when service is connected then engine is idle`() = testScope.runBlockingTest { + val engine = createEngine(backgroundScope) + runCurrent() + + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + runCurrent() + + val state = engine.state.first { it != null } + Assert.assertTrue(state is ASRState.Idle) + } + + @Test + fun `when service is connecting then engine state is null`() = testScope.runBlockingTest { + val engine = createEngine(backgroundScope) + runCurrent() + + sessionStateFlow.value = AsrSessionState.Connecting("https://asr.example.com", "sid") + runCurrent() + + Assert.assertNull(engine.state.value) + } + + @Test + fun `when service is disconnected then engine state is null`() = testScope.runBlockingTest { + val engine = createEngine(backgroundScope) + runCurrent() + + sessionStateFlow.value = AsrSessionState.Disconnected + runCurrent() + + Assert.assertNull(engine.state.value) + } + + @Test + fun `when startListening invoked then engine emits Listening`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.Listening) + Assert.assertEquals("", (state as ASRState.Listening).text) + } + + @Test + fun `when transcript received then engine emits text`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + launch { + asrDataFlow.emit(AsrData.Transcript("hello world", isFinal = false)) + } + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.Listening) + Assert.assertEquals("hello world", (state as ASRState.Listening).text) + } + + @Test + fun `when final transcript received then engine emits SpeechFinishedWaitingForResult`() = + testScope.runBlockingTest { + sessionStateFlow.value = + AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + launch { + asrDataFlow.emit(AsrData.Transcript("final transcription", isFinal = true)) + } + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.SpeechFinishedWaitingForResult) + } + + @Test + fun `when ASR Result received then engine emits Result`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + launch { + asrDataFlow.emit(AsrData.Result("Bug report description", "bug_report")) + } + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.Result) + val resultState = state as ASRState.Result + Assert.assertEquals("Bug report description", resultState.text) + Assert.assertEquals("bug_report", resultState.feedbackType) + } + + @Test + fun `when microphone Error occurs then engine emits Error state`() = testScope.runBlockingTest { + val errorMicrophone = mockk(relaxed = true) { + every { config } returns Microphone.Config() + every { state } returns MutableStateFlow(Microphone.State.Error("permission denied")) + } + + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = MapboxAutomaticSpeechRecognitionEngine( + mapboxASRService = mapboxASRService, + microphone = errorMicrophone, + scope = backgroundScope, + ) + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.Error) + Assert.assertNotNull((state as ASRState.Error).error.message) + Assert.assertTrue((state).error.message!!.contains("permission denied")) + } + + @Test + fun `when stopListening invoked then engine emits Idle`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + engine.stopListening() + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.Idle) + } + + @Test + fun `when stopListening invoked then engine stops microphone`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + engine.stopListening() + runCurrent() + + verify { mapboxASRService.sendFinalAsrData(false) } + } + + @Test + fun `when interruptListening invoked then engine emits Interrupted`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + engine.interruptListening() + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.Interrupted) + } + + @Test + fun `when interruptListening invoked then engine sends abort`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.startListening() + runCurrent() + + engine.interruptListening() + runCurrent() + + verify { mapboxASRService.sendFinalAsrData(true) } + } + + @Test + fun `when Listening times out with blank transcript then engine emits InterruptedByTimeout`() = + testScope.runBlockingTest { + sessionStateFlow.value = + AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine( + scope = backgroundScope, + stoppedSpeakingThreshold = 0.milliseconds, + checkSpeakingInterval = 1.milliseconds, + ) + runCurrent() + + engine.startListening() + runCurrent() + + advanceTimeBy(10) + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.InterruptedByTimeout) + } + + @Test + fun `when SpeechFinishedWaitingForResult times out then engine emits InterruptedByTimeout`() = + testScope.runBlockingTest { + sessionStateFlow.value = + AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine( + scope = backgroundScope, + resultTimeout = 100.milliseconds, + checkSpeakingInterval = 1.milliseconds, + ) + runCurrent() + + engine.startListening() + runCurrent() + + launch { + asrDataFlow.emit(AsrData.Transcript("speech", isFinal = true)) + } + runCurrent() + + Assert.assertTrue(engine.state.value is ASRState.SpeechFinishedWaitingForResult) + + advanceTimeBy(150) + runCurrent() + + val state = engine.state.value + Assert.assertTrue(state is ASRState.InterruptedByTimeout) + } + + @Test + fun `when connect invoked then delegate to mapboxASRService`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.connect("test-token") + + verify { mapboxASRService.connect("test-token") } + } + + @Test + fun `when disconnect invoked then delegate to mapboxASRService`() = testScope.runBlockingTest { + sessionStateFlow.value = AsrSessionState.Connected("https://asr.example.com", "session-1") + val engine = createEngine(backgroundScope) + runCurrent() + + engine.disconnect() + advanceTimeBy(10) + runCurrent() + + coVerify { mapboxASRService.disconnect() } + } +} diff --git a/libnavigation-voicefeedback/src/test/resources/test_report.wav b/libnavigation-voicefeedback/src/test/resources/test_report.wav new file mode 100644 index 00000000000..402fd83b146 Binary files /dev/null and b/libnavigation-voicefeedback/src/test/resources/test_report.wav differ diff --git a/scripts/generate-license.py b/scripts/generate-license.py index 401736097f1..1783c7ea25b 100644 --- a/scripts/generate-license.py +++ b/scripts/generate-license.py @@ -124,6 +124,7 @@ def writeToFile(file, filePath) : removeLicenseHtmlFileForModule("libnavigation-core") removeLicenseHtmlFileForModule("libnavigation-metrics") removeLicenseHtmlFileForModule("libnavigation-copilot") +removeLicenseHtmlFileForModule("libnavigation-voicefeedback") removeLicenseHtmlFileForModule("libnavigator") removeLicenseHtmlFileForModule("libtrip-notification") removeLicenseHtmlFileForModule("libnavui-base") diff --git a/settings.gradle b/settings.gradle index 1f5f1fbb830..64af2a73a9f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -12,6 +12,7 @@ include ':examples', ':libnavigation-util', ':libnavigation-core', ':libnavigation-copilot', + ':libnavigation-voicefeedback', ':libtesting-ui', ':libtesting-utils', ':libnavui-base',