Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 44 additions & 7 deletions src/google/adk/models/gemini_llm_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from google.genai import types

from ..utils import model_name_utils
from ..utils.content_utils import filter_audio_parts
from ..utils.context_utils import Aclosing
from ..utils.variant_utils import GoogleLLMVariant
Expand Down Expand Up @@ -99,7 +100,6 @@ async def send_content(self, content: types.Content):
Args:
content: The content to send to the model.
"""

assert content.parts
if content.parts[0].function_response:
# All parts have to be function responses.
Expand All @@ -112,12 +112,30 @@ async def send_content(self, content: types.Content):
)
else:
logger.debug('Sending LLM new content %s', content)
await self._gemini_session.send(
input=types.LiveClientContent(
turns=[content],
turn_complete=True,
)
is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
self._model_version
)
is_gemini_api = self._api_backend == GoogleLLMVariant.GEMINI_API

# As of now, Gemini 3.1 Flash Live is only available in Gemini API, not
# Vertex AI.
if (
is_gemini_31
and is_gemini_api
and len(content.parts) == 1
and content.parts[0].text
):
logger.debug('Using send_realtime_input for Gemini 3.1 text input')
await self._gemini_session.send_realtime_input(
text=content.parts[0].text
)
else:
await self._gemini_session.send(
input=types.LiveClientContent(
turns=[content],
turn_complete=True,
)
)

async def send_realtime(self, input: RealtimeInput):
"""Sends a chunk of audio or a frame of video to the model in realtime.
Expand All @@ -128,7 +146,26 @@ async def send_realtime(self, input: RealtimeInput):
if isinstance(input, types.Blob):
# The blob is binary and is very large. So let's not log it.
logger.debug('Sending LLM Blob.')
await self._gemini_session.send_realtime_input(media=input)
is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
self._model_version
)
is_gemini_api = self._api_backend == GoogleLLMVariant.GEMINI_API

# As of now, Gemini 3.1 Flash Live is only available in Gemini API, not
# Vertex AI.
if is_gemini_31 and is_gemini_api:
if input.mime_type and input.mime_type.startswith('audio/'):
await self._gemini_session.send_realtime_input(audio=input)
elif input.mime_type and input.mime_type.startswith('image/'):
await self._gemini_session.send_realtime_input(video=input)
else:
logger.warning(
'Blob not sent. Unknown or empty mime type for'
' send_realtime_input: %s',
input.mime_type,
)
else:
await self._gemini_session.send_realtime_input(media=input)

elif isinstance(input, types.ActivityStart):
logger.debug('Sending LLM activity start signal.')
Expand Down
18 changes: 18 additions & 0 deletions src/google/adk/utils/model_name_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,21 @@ def is_gemini_2_or_above(model_string: Optional[str]) -> bool:
return False

return parsed_version.major >= 2


def is_gemini_3_1_flash_live(model_string: Optional[str]) -> bool:
"""Check if the model is a Gemini 3.1 Flash Live model.

Note: This is a very specific model name for live bidi streaming, so we check
for exact match.

Args:
model_string: The model name

Returns:
True if it's a Gemini 3.1 Flash Live model, False otherwise
"""
if not model_string:
return False

return model_string == 'gemini-3.1-flash-live-preview'
Loading