Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ __pycache__

.env
config.debug.yaml
data/
data/

tests/
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,35 @@ gemini:
- model_name: "gemini-3.0-pro"
model_header:
x-goog-ext-525001261-jspb: '[1,null,null,null,"9d8ca3786ebdfbea",null,null,0,[4],null,null,1]'
gems:
# Disabled by default to avoid accidental creation/update/deletion of gems.
enabled: false
# Policy mode:
# - off: disabled
# - fetch_only: load existing server-managed gems only
# - create_on_demand: create missing managed gems when needed (rate-limited)
# - privacy: reserved for ephemeral request-time flow (startup sync skipped)
policy: "off"
create_rate_limit_per_minute: 4
managed_gems_max_total: 200
cleanup:
enabled: false
unused_days: 7
touch_interval_minutes: 60
dry_run: false
max_deletes_per_run: 5
require_managed_marker: true
fetch_on_init: true
include_hidden_on_fetch: false
policies:
enabled: false
prefix: "fastapi_policy_"
default_policy:
enabled: false
key: "general_capability_guardrail"
# If `prompt` is null (or omitted), the implementation's built-in
# base system prompt will be used instead.
prompt: null
```

#### Environment Variables
Expand Down
139 changes: 132 additions & 7 deletions app/server/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,28 @@ def _prepare_messages_for_model(
return prepared


def _extract_leading_system_prompt(messages: list[Message]) -> tuple[str | None, list[Message]]:
"""Extract and remove leading system messages, returning joined system text.

Only leading system messages are extracted to preserve regular conversation flow.
"""
if not messages:
return None, messages

idx = 0
system_parts: list[str] = []
while idx < len(messages) and messages[idx].role == "system":
text = text_from_message(messages[idx]).strip()
if text:
system_parts.append(text)
idx += 1

if not system_parts:
return None, messages

return "\n\n".join(system_parts), messages[idx:]


def _response_items_to_messages(
items: str | list[ResponseInputItem],
) -> tuple[list[Message], str | list[ResponseInputItem]]:
Expand Down Expand Up @@ -1773,18 +1795,70 @@ async def create_chat_completion(
structured_requirement = _build_structured_requirement(request.response_format)
extra_instr = [structured_requirement.instruction] if structured_requirement else None

# This ensures that server-injected system instructions are part of the history
msgs = _prepare_messages_for_model(
# Split leading user-provided system prompt so we can attach it as a managed gem
# when create_on_demand is enabled.
system_prompt_text, non_system_messages = _extract_leading_system_prompt(request.messages)
system_only_request = bool(system_prompt_text) and not non_system_messages

if not system_prompt_text:
non_system_messages = request.messages

# Prepared messages with system prompt removed (candidate gem path).
msgs_without_system = _prepare_messages_for_model(
[] if system_only_request else non_system_messages,
request.tools,
request.tool_choice,
extra_instr,
)

# Prepared messages with full system prompt retained (fallback path).
msgs_with_system = _prepare_messages_for_model(
request.messages,
request.tools,
request.tool_choice,
extra_instr,
)

# Prefer searching reusable sessions against system-stripped history because
# gem-based sessions persist that history shape.
msgs = msgs_without_system if (system_prompt_text and not system_only_request) else msgs_with_system

session, client, remain = await _find_reusable_session(db, pool, model, msgs)
reused_session = session is not None
use_google_temporary_mode = g_config.gemini.chat_mode == ChatMode.TEMPORARY

# Fallback search for legacy sessions that still contain explicit system messages.
if (
session is None
and system_prompt_text
and not system_only_request
and msgs_with_system != msgs_without_system
):
session, client, remain = await _find_reusable_session(db, pool, model, msgs_with_system)
if session is not None:
msgs = msgs_with_system

managed_system_gem_id: str | None = None
if system_prompt_text and not system_only_request:
target_client = client
if target_client is None:
target_client = await pool.acquire()
client = target_client

managed_system_gem_id = await target_client.system_prompt_gem_id_or_create(system_prompt_text)
if managed_system_gem_id:
# When gem is available, keep system text out of the prompt payload.
msgs = msgs_without_system
if session is not None:
session.gem = managed_system_gem_id
else:
# Fall back to explicit system-text path.
msgs = msgs_with_system

# If we changed message mode after initial reuse lookup, re-check reuse quickly.
if session is None and msgs in (msgs_without_system, msgs_with_system):
session, client, remain = await _find_reusable_session(db, pool, model, msgs)

if session:
if not remain:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="No new messages.")
Expand All @@ -1810,8 +1884,9 @@ async def create_chat_completion(
)
else:
try:
client = await pool.acquire()
session = client.start_chat(model=model)
if client is None:
client = await pool.acquire()
session = client.start_chat(model=model, gem=managed_system_gem_id)
# Use the already prepared 'msgs' for a fresh session
m_input, files = await _process_conversation_with_compaction(
msgs,
Expand Down Expand Up @@ -1972,12 +2047,31 @@ async def create_response(
request.tool_choice if isinstance(request.tool_choice, (str, ToolChoiceFunction)) else None
)

messages = _prepare_messages_for_model(
# Split leading system/instruction content so it can be mapped to a managed
# gem when create_on_demand is enabled.
system_prompt_text, conv_without_system = _extract_leading_system_prompt(conv_messages)
system_only_conversation = bool(system_prompt_text) and not conv_without_system
if not system_prompt_text:
conv_without_system = conv_messages

messages_without_system = _prepare_messages_for_model(
[] if system_only_conversation else conv_without_system,
standard_tools or None,
model_tool_choice,
extra_instr or None,
)
messages_with_system = _prepare_messages_for_model(
conv_messages,
standard_tools or None,
model_tool_choice,
extra_instr or None,
)
messages = (
messages_without_system
if (system_prompt_text and not system_only_conversation)
else messages_with_system
)

pool, db = GeminiClientPool(), LMDBConversationStore()
try:
model = _get_model_by_name(request.model)
Expand All @@ -1987,6 +2081,36 @@ async def create_response(
session, client, remain = await _find_reusable_session(db, pool, model, messages)
reused_session = session is not None
use_google_temporary_mode = g_config.gemini.chat_mode == ChatMode.TEMPORARY

# Fallback reuse search for legacy sessions that still included explicit system text.
if (
session is None
and system_prompt_text
and not system_only_conversation
and messages_with_system != messages_without_system
):
session, client, remain = await _find_reusable_session(db, pool, model, messages_with_system)
if session is not None:
messages = messages_with_system

managed_system_gem_id: str | None = None
if system_prompt_text and not system_only_conversation:
target_client = client
if target_client is None:
target_client = await pool.acquire()
client = target_client

managed_system_gem_id = await target_client.system_prompt_gem_id_or_create(system_prompt_text)
if managed_system_gem_id:
messages = messages_without_system
if session is not None:
session.gem = managed_system_gem_id
else:
messages = messages_with_system

# If message shape changed after gem resolution, search reusable session again.
if session is None and messages in (messages_without_system, messages_with_system):
session, client, remain = await _find_reusable_session(db, pool, model, messages)
if session:
msgs = _prepare_messages_for_model(
remain,
Expand All @@ -2008,8 +2132,9 @@ async def create_response(
)
else:
try:
client = await pool.acquire()
session = client.start_chat(model=model)
if client is None:
client = await pool.acquire()
session = client.start_chat(model=model, gem=managed_system_gem_id)
m_input, files = await _process_conversation_with_compaction(
messages,
tmp_dir,
Expand Down
Loading