Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ai-logic/firebase-ai/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Unreleased

- [feature] Added support for `LiveRealtimeInputConfig` and `LiveActivityDetection` to configure voice activity detection in Live API. Added `sendStartActivityRealtime` and `sendStopActivityRealtime` to `LiveSession` for manual activity control. (#8080)

- [feature] Added support for `ImageConfig` and `finishMessage`. (#8020)

- [feature] Added a Java-friendly wrapper for TemplateChat interactions (`TemplateChatFutures`).
Expand Down
85 changes: 85 additions & 0 deletions ai-logic/firebase-ai/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ package com.google.firebase.ai.java {
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendAudioRealtime(com.google.firebase.ai.type.InlineData audio);
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList);
method @Deprecated public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks);
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendStartActivityRealtime();
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendStopActivityRealtime();
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendTextRealtime(String text);
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendVideoRealtime(com.google.firebase.ai.type.InlineData video);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
Expand Down Expand Up @@ -1306,6 +1308,43 @@ package com.google.firebase.ai.type {
property public final double longitude;
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveActivityDetection {
method public static com.google.firebase.ai.type.LiveActivityDetection.Builder builder();
field public static final com.google.firebase.ai.type.LiveActivityDetection.Companion Companion;
}

public static final class LiveActivityDetection.Builder {
ctor public LiveActivityDetection.Builder();
method public com.google.firebase.ai.type.LiveActivityDetection build();
method public com.google.firebase.ai.type.LiveActivityDetection.Builder setDisabled(boolean disabled);
method public com.google.firebase.ai.type.LiveActivityDetection.Builder setEndSensitivity(com.google.firebase.ai.type.LiveActivityDetection.Sensitivity sensitivity);
method public com.google.firebase.ai.type.LiveActivityDetection.Builder setPrefixPaddingMS(int paddingMs);
method public com.google.firebase.ai.type.LiveActivityDetection.Builder setSilenceDurationMS(int durationMs);
method public com.google.firebase.ai.type.LiveActivityDetection.Builder setStartSensitivity(com.google.firebase.ai.type.LiveActivityDetection.Sensitivity sensitivity);
field public Boolean? disabled;
field public com.google.firebase.ai.type.LiveActivityDetection.Sensitivity? endSensitivity;
field public Integer? prefixPaddingMS;
field public Integer? silenceDurationMS;
field public com.google.firebase.ai.type.LiveActivityDetection.Sensitivity? startSensitivity;
}

public static final class LiveActivityDetection.Companion {
method public com.google.firebase.ai.type.LiveActivityDetection.Builder builder();
}

public static final class LiveActivityDetection.Sensitivity {
field public static final com.google.firebase.ai.type.LiveActivityDetection.Sensitivity.Companion Companion;
field public static final com.google.firebase.ai.type.LiveActivityDetection.Sensitivity HIGH;
field public static final com.google.firebase.ai.type.LiveActivityDetection.Sensitivity LOW;
}

public static final class LiveActivityDetection.Sensitivity.Companion {
}

public final class LiveActivityDetectionKt {
method public static com.google.firebase.ai.type.LiveActivityDetection liveActivityDetection(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.LiveActivityDetection.Builder,kotlin.Unit> init);
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveAudioConversationConfig {
field public static final com.google.firebase.ai.type.LiveAudioConversationConfig.Companion Companion;
}
Expand Down Expand Up @@ -1346,6 +1385,7 @@ package com.google.firebase.ai.type {
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setMaxOutputTokens(Integer? maxOutputTokens);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setOutputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setPresencePenalty(Float? presencePenalty);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setRealtimeInputConfig(com.google.firebase.ai.type.LiveRealtimeInputConfig? config);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setResponseModality(com.google.firebase.ai.type.ResponseModality? responseModality);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setSpeechConfig(com.google.firebase.ai.type.SpeechConfig? speechConfig);
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTemperature(Float? temperature);
Expand All @@ -1357,6 +1397,7 @@ package com.google.firebase.ai.type {
field public Integer? maxOutputTokens;
field public com.google.firebase.ai.type.AudioTranscriptionConfig? outputAudioTranscription;
field public Float? presencePenalty;
field public com.google.firebase.ai.type.LiveRealtimeInputConfig? realtimeInputConfig;
field public com.google.firebase.ai.type.ResponseModality? responseModality;
field public com.google.firebase.ai.type.SpeechConfig? speechConfig;
field public Float? temperature;
Expand All @@ -1372,6 +1413,48 @@ package com.google.firebase.ai.type {
method public static com.google.firebase.ai.type.LiveGenerationConfig liveGenerationConfig(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.LiveGenerationConfig.Builder,kotlin.Unit> init);
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveRealtimeInputConfig {
method public static com.google.firebase.ai.type.LiveRealtimeInputConfig.Builder builder();
field public static final com.google.firebase.ai.type.LiveRealtimeInputConfig.Companion Companion;
}

public static final class LiveRealtimeInputConfig.ActivityHandling {
field public static final com.google.firebase.ai.type.LiveRealtimeInputConfig.ActivityHandling.Companion Companion;
field public static final com.google.firebase.ai.type.LiveRealtimeInputConfig.ActivityHandling INTERRUPT;
field public static final com.google.firebase.ai.type.LiveRealtimeInputConfig.ActivityHandling NO_INTERRUPT;
}

public static final class LiveRealtimeInputConfig.ActivityHandling.Companion {
}

public static final class LiveRealtimeInputConfig.Builder {
ctor public LiveRealtimeInputConfig.Builder();
method public com.google.firebase.ai.type.LiveRealtimeInputConfig build();
method public com.google.firebase.ai.type.LiveRealtimeInputConfig.Builder setActivityHandling(com.google.firebase.ai.type.LiveRealtimeInputConfig.ActivityHandling handling);
method public com.google.firebase.ai.type.LiveRealtimeInputConfig.Builder setAutomaticActivityDetection(com.google.firebase.ai.type.LiveActivityDetection config);
method public com.google.firebase.ai.type.LiveRealtimeInputConfig.Builder setTurnCoverage(com.google.firebase.ai.type.LiveRealtimeInputConfig.TurnCoverage coverage);
field public com.google.firebase.ai.type.LiveRealtimeInputConfig.ActivityHandling? activityHandling;
field public com.google.firebase.ai.type.LiveActivityDetection? automaticActivityDetection;
field public com.google.firebase.ai.type.LiveRealtimeInputConfig.TurnCoverage? turnCoverage;
}

public static final class LiveRealtimeInputConfig.Companion {
method public com.google.firebase.ai.type.LiveRealtimeInputConfig.Builder builder();
}

public static final class LiveRealtimeInputConfig.TurnCoverage {
field public static final com.google.firebase.ai.type.LiveRealtimeInputConfig.TurnCoverage ALL_INPUT;
field public static final com.google.firebase.ai.type.LiveRealtimeInputConfig.TurnCoverage.Companion Companion;
field public static final com.google.firebase.ai.type.LiveRealtimeInputConfig.TurnCoverage ONLY_ACTIVITY;
}

public static final class LiveRealtimeInputConfig.TurnCoverage.Companion {
}

public final class LiveRealtimeInputConfigKt {
method public static com.google.firebase.ai.type.LiveRealtimeInputConfig liveRealtimeInputConfig(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.LiveRealtimeInputConfig.Builder,kotlin.Unit> init);
}

@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveServerContent implements com.google.firebase.ai.type.LiveServerMessage {
ctor @Deprecated public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete, com.google.firebase.ai.type.Transcription? inputTranscription, com.google.firebase.ai.type.Transcription? outputTranscription);
method public com.google.firebase.ai.type.Content? getContent();
Expand Down Expand Up @@ -1430,6 +1513,8 @@ package com.google.firebase.ai.type {
method public suspend Object? sendAudioRealtime(com.google.firebase.ai.type.InlineData audio, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @Deprecated public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendStartActivityRealtime(kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendStopActivityRealtime(kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig, kotlin.coroutines.Continuation<? super kotlin.Unit>);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ import com.google.firebase.ai.type.Content
import com.google.firebase.ai.type.FunctionResponsePart
import com.google.firebase.ai.type.GenerativeBackend
import com.google.firebase.ai.type.InlineData
import com.google.firebase.ai.type.LiveActivityDetection
import com.google.firebase.ai.type.LiveGenerationConfig
import com.google.firebase.ai.type.LiveRealtimeInputConfig
import com.google.firebase.ai.type.LiveServerContent
import com.google.firebase.ai.type.LiveServerToolCall
import com.google.firebase.ai.type.LiveSession
Expand All @@ -36,7 +38,9 @@ import com.google.firebase.ai.type.Schema
import com.google.firebase.ai.type.SessionResumptionConfig
import com.google.firebase.ai.type.Tool
import com.google.firebase.ai.type.content
import com.google.firebase.ai.type.liveActivityDetection
import com.google.firebase.ai.type.liveGenerationConfig
import com.google.firebase.ai.type.liveRealtimeInputConfig
import io.kotest.matchers.ints.shouldBeGreaterThan
import io.kotest.matchers.longs.shouldBeGreaterThan
import io.kotest.matchers.nulls.shouldNotBeNull
Expand Down Expand Up @@ -318,4 +322,65 @@ class LiveSessionTests {
.collect {}
return transcriptBuilder.toString()
}

@Test
fun testRealtimeInputConfig_manualActivity(): Unit = runBlocking {
val config = liveGenerationConfig {
responseModality = ResponseModality.AUDIO
outputAudioTranscription = AudioTranscriptionConfig()
realtimeInputConfig = liveRealtimeInputConfig {
automaticActivityDetection = liveActivityDetection { disabled = true }
}
}
val liveModel =
getLiveModel(
modelName = modelName,
config = config,
systemInstruction = SystemInstructions.yesOrNo
)
val session = liveModel.connect()
try {
session.sendStartActivityRealtime()
session.sendTextRealtime("Does five plus five equal ten?")
session.sendStopActivityRealtime()

val text = withTimeoutOrNull(15.seconds) { session.collectNextAudioOutputTranscript() } ?: ""
text.toLowerCasePreservingASCIIRules() shouldContain "yes"
} finally {
session.close()
}
}

@Test
fun testRealtimeInputConfig_fullConfiguration(): Unit = runBlocking {
val config = liveGenerationConfig {
responseModality = ResponseModality.AUDIO
outputAudioTranscription = AudioTranscriptionConfig()
realtimeInputConfig = liveRealtimeInputConfig {
activityHandling = LiveRealtimeInputConfig.ActivityHandling.NO_INTERRUPT
turnCoverage = LiveRealtimeInputConfig.TurnCoverage.ONLY_ACTIVITY
automaticActivityDetection = liveActivityDetection {
startSensitivity = LiveActivityDetection.Sensitivity.HIGH
endSensitivity = LiveActivityDetection.Sensitivity.LOW
prefixPaddingMS = 100
silenceDurationMS = 500
disabled = false
}
}
}
val liveModel =
getLiveModel(
modelName = modelName,
config = config,
systemInstruction = SystemInstructions.yesOrNo
)
val session = liveModel.connect()
try {
session.sendTextRealtime("Is the sky blue?")
val text = withTimeoutOrNull(15.seconds) { session.collectNextAudioOutputTranscript() } ?: ""
text.toLowerCasePreservingASCIIRules() shouldContain "yes"
} finally {
session.close()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ internal constructor(
config?.inputAudioTranscription?.toInternal(),
config?.outputAudioTranscription?.toInternal(),
resumption?.toInternal(),
config?.contextWindowCompression?.toInternal()
config?.contextWindowCompression?.toInternal(),
config?.realtimeInputConfig?.toInternal()
)
.toInternal()
val data: String = JSON.encodeToString(clientMessage)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,18 @@ public abstract class LiveSessionFutures internal constructor() {
*/
public abstract fun sendTextRealtime(text: String): ListenableFuture<Unit>

/**
* Manually marks the start of user activity. Required only when automatic activity detection is
* disabled.
*/
public abstract fun sendStartActivityRealtime(): ListenableFuture<Unit>

/**
* Manually marks the end of user activity. Required only when automatic activity detection is
* disabled.
*/
public abstract fun sendStopActivityRealtime(): ListenableFuture<Unit>

/**
* Streams client data to the model.
*
Expand Down Expand Up @@ -294,6 +306,12 @@ public abstract class LiveSessionFutures internal constructor() {
override fun sendTextRealtime(text: String): ListenableFuture<Unit> =
SuspendToFutureAdapter.launchFuture { session.sendTextRealtime(text) }

override fun sendStartActivityRealtime(): ListenableFuture<Unit> =
SuspendToFutureAdapter.launchFuture { session.sendStartActivityRealtime() }

override fun sendStopActivityRealtime(): ListenableFuture<Unit> =
SuspendToFutureAdapter.launchFuture { session.sendStopActivityRealtime() }

override fun sendMediaStream(mediaChunks: List<MediaData>) =
SuspendToFutureAdapter.launchFuture { session.sendMediaStream(mediaChunks) }

Expand Down
Loading
Loading