Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,9 @@ dmypy.json

# PyCharm
.idea/

# pnpm
pnpm-lock.yaml

.yarn/*
!.yarn/patches
2 changes: 2 additions & 0 deletions backend/app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from redis import asyncio as aioredis

from . import auth, messages
from .ai_helpers.routers import router as ai_helpers_router
from .config import BACKEND_ROOT_PATH, DOMAIN, REDIS_HOST
from .experiments.routers import router as experiments_router
from .users.routers import (
Expand Down Expand Up @@ -62,5 +63,6 @@ def create_app() -> FastAPI:
app.include_router(users_router)
app.include_router(messages.router)
app.include_router(workspaces_router)
app.include_router(ai_helpers_router)

return app
110 changes: 110 additions & 0 deletions backend/app/ai_helpers/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""This module contains prompts for the AI Helper LLM tasks."""

import textwrap

SUGGEST_BAYS_AB_ARMS = textwrap.dedent(
"""
You are an assistant for a tool that helps social sector organizations run
digital experiments. Given the experiment details below, suggest concise
names and descriptions for each arm (variant) of the experiment.
For Bayesian A/B tests, there are usually two arms: a control
(existing/baseline) and a treatment (new/changed feature).
For each arm, also suggest reasonable initial values for mu_init (mean prior,
between 0 and 1 for rates) and sigma_init (standard deviation).
Respond ONLY with a valid JSON array. Each array element should be an object
with keys: name, description, mu_init, sigma_init.
No explanation, no markdown.
"""
).strip()

SUGGEST_MAB_ARMS = textwrap.dedent(
"""
You are an assistant for a tool that helps social sector organizations run
digital experiments. Given the experiment details below, suggest concise
names and descriptions for each arm (variant) of the experiment.
For multi-armed bandit (MAB) experiments, use the number of variants provided.
For each arm, also suggest reasonable initial values for alpha_init and
beta_init (for beta prior) or mu_init and sigma_init (for normal prior),
depending on the prior_type.
Respond ONLY with a valid JSON array. Each array element should be an object
with keys: name, description, and the appropriate prior parameters.
No explanation, no markdown.
"""
).strip()

SUGGEST_CMAB_CONTEXTS = textwrap.dedent(
"""
You are an assistant for a tool that helps social sector organizations run
digital experiments. Given the experiment details below, suggest relevant
user contexts for a Contextual Bandit (CMAB) experiment.
Contexts are user attributes (e.g., age, location, engagement level) that
might influence how they respond to different variants.
For each context, provide a concise 'name', a 'description', and a
'value_type' ('binary' or 'real-valued').
Respond ONLY with a valid JSON array. Each array element should be an object
with keys: name, description, value_type.
No explanation, no markdown.
"""
).strip()

SUGGEST_CMAB_ARMS = textwrap.dedent(
"""
You are an assistant for a tool that helps social sector organizations run
digital experiments. Given the experiment details and user contexts below,
suggest concise names and descriptions for each arm (variant) of a
Contextual Bandit (CMAB) experiment.
The arms should be distinct variations of a feature that is being tested.
For each arm, also suggest reasonable initial values for mu_init (mean prior)
and sigma_init (standard deviation prior).
Respond ONLY with a valid JSON array. Each array element should be an object
with keys: name, description, mu_init, sigma_init.
No explanation, no markdown.
"""
).strip()

GENERATE_EXPERIMENT_FIELDS = textwrap.dedent(
"""
You are an assistant for a tool that helps social sector organizations
run digital experiments. Below are documentation excerpts about
different experiment types. Use this context to choose the most
appropriate experiment type and generate relevant names and
descriptions.
-----
# Bayesian A/B Testing
Bayesian A/B testing compares two variants: treatment (e.g. a new feature)
and control (e.g. an existing feature). This is a useful experiment when
you need intuitive probability statements about which arm is better for
making downstream decisions, and have the resources to balance how your
arms are allocated to your experimental cohort. Choose this over the bandit
algorithms when you're trying to make a 'permanent' decision about which
variant is better, as opposed to trying to dynamically pick the
best performing variant as data comes in.
-----
# Contextual Bandits (CMABs)
Contextual bandits (CMABs), similarly to multi-armed bandits (MABs), are
useful for running experiments where you have multiple variants of a feature
/ implementation that you want to test. However, the key difference is that
contextual bandits take information about the end-user (e.g. gender, age,
engagement history) into account while converging to the best-performing
variant. Thus, rather than having a single best-performing variant at the
end of an experiment, you instead have the best-performing variant that
depends on the user context.
-----
# Multi-Armed Bandits (MABs)
Multi-armed Bandits (MABs) are useful for running experiments where you have
multiple variants of a feature / implementation that you want to test, and
want to automatically converge to the variant that produces the best
results. Since we update the probabilities for the variants with every
result observation, at any given time you can observe the updated
probability of success for every arm. The best-performing variant at the
end of the experiment is the one with the highest probability.
-----
Given a goal, outcome, and number of variants, generate:
1. A concise and descriptive experiment name (max 8 words).
2. A detailed description of the experiment.
3. The most appropriate experiment type: 'mab' (multi-armed bandit),
'bayes_ab' (Bayesian A/B test), or 'cmab' (contextual bandit).
Respond ONLY with a valid JSON object with keys: name, description,
experiment_type. No explanation, no markdown.
"""
).strip()
235 changes: 235 additions & 0 deletions backend/app/ai_helpers/routers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
import json
import os
from typing import Any, Optional

from fastapi import APIRouter, HTTPException
from google import api_core, genai
from google.genai import types

from . import prompts
from .schemas import (
CMABArmContext,
CMABArmsSuggestionRequest,
CMABContextSuggestionRequest,
CompleteExperimentResponse,
ExperimentAIGenerateRequest,
ExperimentAIGenerateResponse,
MABArmsSuggestionRequest,
baysABArmsSuggestionRequest,
)

router = APIRouter(prefix="/ai_helpers", tags=["AI Helpers"])


api_key = os.environ.get("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY environment variable not set.")

client = genai.Client(api_key=api_key)


@router.post("/suggestBaysAB-arms")
async def suggest_arms(
request: baysABArmsSuggestionRequest,
) -> list[dict[str, Any]]:
"""Suggest arms for Bayesian A/B test."""

user_prompt = request.model_dump_json()

try:
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[user_prompt],
config=types.GenerateContentConfig(
system_instruction=prompts.SUGGEST_BAYS_AB_ARMS,
max_output_tokens=500,
temperature=0.1,
response_mime_type="application/json",
),
)
return json.loads(response.text)
except api_core.exceptions.GoogleAPICallError as e:
raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e


@router.post("/suggestMAB-arms")
async def suggest_mab_arms(
request: MABArmsSuggestionRequest,
) -> list[dict[str, Any]]:
"""Suggest arms for Multi-Armed Bandit experiment."""

user_prompt = request.model_dump_json()

try:
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[user_prompt],
config=types.GenerateContentConfig(
system_instruction=prompts.SUGGEST_MAB_ARMS,
max_output_tokens=500,
temperature=0.9,
response_mime_type="application/json",
),
)
print(response.text)
return json.loads(response.text)
except api_core.exceptions.GoogleAPICallError as e:
raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e


@router.post("/suggestCMAB-contexts")
async def suggest_cmab_contexts(
request: CMABContextSuggestionRequest,
) -> list[dict[str, Any]]:
"""Suggest contexts for Contextual Multi-Armed Bandit experiment."""

user_prompt = request.model_dump_json()

try:
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[user_prompt],
config=types.GenerateContentConfig(
system_instruction=prompts.SUGGEST_CMAB_CONTEXTS,
max_output_tokens=500,
temperature=0.7,
response_mime_type="application/json",
),
)
print(response.text)
return json.loads(response.text)
except api_core.exceptions.GoogleAPICallError as e:
raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e


@router.post("/suggestCMAB-arms")
async def suggest_cmab_arms(
request: CMABArmsSuggestionRequest,
) -> list[dict[str, Any]]:
"""Suggest arms for Contextual Multi-Armed Bandit experiment."""

user_prompt = request.model_dump_json()

try:
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[user_prompt],
config=types.GenerateContentConfig(
system_instruction=prompts.SUGGEST_CMAB_ARMS,
max_output_tokens=500,
temperature=0.7,
response_mime_type="application/json",
),
)
print(response.text)
return json.loads(response.text)
except api_core.exceptions.GoogleAPICallError as e:
raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e


async def generate_experiment_fields_logic(
data: ExperimentAIGenerateRequest,
) -> ExperimentAIGenerateResponse:
"""Generate experiment name, description, and type based on user inputs."""
try:
user_prompt = data.model_dump_json()

response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[user_prompt],
config=types.GenerateContentConfig(
system_instruction=prompts.GENERATE_EXPERIMENT_FIELDS,
max_output_tokens=500,
temperature=0.1,
response_mime_type="application/json",
response_schema=ExperimentAIGenerateResponse,
),
)

response_data = json.loads(response.text)
return ExperimentAIGenerateResponse(**response_data)

except api_core.exceptions.GoogleAPICallError as e:
raise RuntimeError(f"Gemini API error: {e}") from e


@router.post("/generate-whole-experiment", response_model=CompleteExperimentResponse)
async def generate_whole_experiment(
data: ExperimentAIGenerateRequest,
) -> CompleteExperimentResponse:
"""Generate a complete experiment configuration."""
try:
experiment_fields = await generate_experiment_fields_logic(data)

arms: list[dict[str, Any]] = []
contexts: Optional[list[dict[str, Any]]] = None

if experiment_fields.experiment_type == "bayes_ab":
# Generate arms for Bayesian A/B test
bayes_request = baysABArmsSuggestionRequest(
name=experiment_fields.name,
description=experiment_fields.description,
methodType=experiment_fields.experiment_type,
goal=data.goal,
outcome=data.outcome,
numVariants=data.num_variants,
reward_type="binary", # Default for A/B tests
)
arms = await suggest_arms(bayes_request)

elif experiment_fields.experiment_type == "mab":
# Generate arms for Multi-Armed Bandit
mab_request = MABArmsSuggestionRequest(
name=experiment_fields.name,
description=experiment_fields.description,
methodType=experiment_fields.experiment_type,
goal=data.goal,
outcome=data.outcome,
numVariants=data.num_variants,
prior_type="beta", # Default prior type
reward_type="binary", # Default reward type
)
arms = await suggest_mab_arms(mab_request)

elif experiment_fields.experiment_type == "cmab":
# Generate contexts first for Contextual Bandit
context_request = CMABContextSuggestionRequest(
name=experiment_fields.name,
description=experiment_fields.description,
methodType=experiment_fields.experiment_type,
goal=data.goal,
outcome=data.outcome,
numVariants=data.num_variants,
prior_type="normal", # Default prior type
reward_type="binary", # Default reward type
)
contexts = await suggest_cmab_contexts(context_request)

# Generate arms for Contextual Bandit
if contexts is not None:
cmab_contexts = [CMABArmContext(**ctx) for ctx in contexts]
cmab_request = CMABArmsSuggestionRequest(
name=experiment_fields.name,
description=experiment_fields.description,
methodType=experiment_fields.experiment_type,
goal=data.goal,
outcome=data.outcome,
numVariants=data.num_variants,
prior_type="normal", # Default prior type
reward_type="binary", # Default reward type
contexts=cmab_contexts,
)
arms = await suggest_cmab_arms(cmab_request)

return CompleteExperimentResponse(
name=experiment_fields.name,
description=experiment_fields.description,
experiment_type=experiment_fields.experiment_type,
arms=arms,
contexts=contexts,
)

except (RuntimeError, api_core.exceptions.GoogleAPICallError) as e:
raise HTTPException(
status_code=500, detail=f"Error generating complete experiment: {e}"
) from e
Loading
Loading