diff --git a/.gitignore b/.gitignore index c803427..16bd050 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,9 @@ dmypy.json # PyCharm .idea/ + +# pnpm +pnpm-lock.yaml + +.yarn/* +!.yarn/patches diff --git a/backend/app/__init__.py b/backend/app/__init__.py index 88e9dfd..1e3688b 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -6,6 +6,7 @@ from redis import asyncio as aioredis from . import auth, messages +from .ai_helpers.routers import router as ai_helpers_router from .config import BACKEND_ROOT_PATH, DOMAIN, REDIS_HOST from .experiments.routers import router as experiments_router from .users.routers import ( @@ -62,5 +63,6 @@ def create_app() -> FastAPI: app.include_router(users_router) app.include_router(messages.router) app.include_router(workspaces_router) + app.include_router(ai_helpers_router) return app diff --git a/backend/app/ai_helpers/prompts.py b/backend/app/ai_helpers/prompts.py new file mode 100644 index 0000000..6f4fb39 --- /dev/null +++ b/backend/app/ai_helpers/prompts.py @@ -0,0 +1,110 @@ +"""This module contains prompts for the AI Helper LLM tasks.""" + +import textwrap + +SUGGEST_BAYS_AB_ARMS = textwrap.dedent( + """ + You are an assistant for a tool that helps social sector organizations run + digital experiments. Given the experiment details below, suggest concise + names and descriptions for each arm (variant) of the experiment. + For Bayesian A/B tests, there are usually two arms: a control + (existing/baseline) and a treatment (new/changed feature). + For each arm, also suggest reasonable initial values for mu_init (mean prior, + between 0 and 1 for rates) and sigma_init (standard deviation). + Respond ONLY with a valid JSON array. Each array element should be an object + with keys: name, description, mu_init, sigma_init. + No explanation, no markdown. + """ +).strip() + +SUGGEST_MAB_ARMS = textwrap.dedent( + """ + You are an assistant for a tool that helps social sector organizations run + digital experiments. Given the experiment details below, suggest concise + names and descriptions for each arm (variant) of the experiment. + For multi-armed bandit (MAB) experiments, use the number of variants provided. + For each arm, also suggest reasonable initial values for alpha_init and + beta_init (for beta prior) or mu_init and sigma_init (for normal prior), + depending on the prior_type. + Respond ONLY with a valid JSON array. Each array element should be an object + with keys: name, description, and the appropriate prior parameters. + No explanation, no markdown. + """ +).strip() + +SUGGEST_CMAB_CONTEXTS = textwrap.dedent( + """ + You are an assistant for a tool that helps social sector organizations run + digital experiments. Given the experiment details below, suggest relevant + user contexts for a Contextual Bandit (CMAB) experiment. + Contexts are user attributes (e.g., age, location, engagement level) that + might influence how they respond to different variants. + For each context, provide a concise 'name', a 'description', and a + 'value_type' ('binary' or 'real-valued'). + Respond ONLY with a valid JSON array. Each array element should be an object + with keys: name, description, value_type. + No explanation, no markdown. + """ +).strip() + +SUGGEST_CMAB_ARMS = textwrap.dedent( + """ + You are an assistant for a tool that helps social sector organizations run + digital experiments. Given the experiment details and user contexts below, + suggest concise names and descriptions for each arm (variant) of a + Contextual Bandit (CMAB) experiment. + The arms should be distinct variations of a feature that is being tested. + For each arm, also suggest reasonable initial values for mu_init (mean prior) + and sigma_init (standard deviation prior). + Respond ONLY with a valid JSON array. Each array element should be an object + with keys: name, description, mu_init, sigma_init. + No explanation, no markdown. + """ +).strip() + +GENERATE_EXPERIMENT_FIELDS = textwrap.dedent( + """ + You are an assistant for a tool that helps social sector organizations + run digital experiments. Below are documentation excerpts about + different experiment types. Use this context to choose the most + appropriate experiment type and generate relevant names and + descriptions. + ----- + # Bayesian A/B Testing + Bayesian A/B testing compares two variants: treatment (e.g. a new feature) + and control (e.g. an existing feature). This is a useful experiment when + you need intuitive probability statements about which arm is better for + making downstream decisions, and have the resources to balance how your + arms are allocated to your experimental cohort. Choose this over the bandit + algorithms when you're trying to make a 'permanent' decision about which + variant is better, as opposed to trying to dynamically pick the + best performing variant as data comes in. + ----- + # Contextual Bandits (CMABs) + Contextual bandits (CMABs), similarly to multi-armed bandits (MABs), are + useful for running experiments where you have multiple variants of a feature + / implementation that you want to test. However, the key difference is that + contextual bandits take information about the end-user (e.g. gender, age, + engagement history) into account while converging to the best-performing + variant. Thus, rather than having a single best-performing variant at the + end of an experiment, you instead have the best-performing variant that + depends on the user context. + ----- + # Multi-Armed Bandits (MABs) + Multi-armed Bandits (MABs) are useful for running experiments where you have + multiple variants of a feature / implementation that you want to test, and + want to automatically converge to the variant that produces the best + results. Since we update the probabilities for the variants with every + result observation, at any given time you can observe the updated + probability of success for every arm. The best-performing variant at the + end of the experiment is the one with the highest probability. + ----- + Given a goal, outcome, and number of variants, generate: + 1. A concise and descriptive experiment name (max 8 words). + 2. A detailed description of the experiment. + 3. The most appropriate experiment type: 'mab' (multi-armed bandit), + 'bayes_ab' (Bayesian A/B test), or 'cmab' (contextual bandit). + Respond ONLY with a valid JSON object with keys: name, description, + experiment_type. No explanation, no markdown. + """ +).strip() diff --git a/backend/app/ai_helpers/routers.py b/backend/app/ai_helpers/routers.py new file mode 100644 index 0000000..1a25647 --- /dev/null +++ b/backend/app/ai_helpers/routers.py @@ -0,0 +1,235 @@ +import json +import os +from typing import Any, Optional + +from fastapi import APIRouter, HTTPException +from google import api_core, genai +from google.genai import types + +from . import prompts +from .schemas import ( + CMABArmContext, + CMABArmsSuggestionRequest, + CMABContextSuggestionRequest, + CompleteExperimentResponse, + ExperimentAIGenerateRequest, + ExperimentAIGenerateResponse, + MABArmsSuggestionRequest, + baysABArmsSuggestionRequest, +) + +router = APIRouter(prefix="/ai_helpers", tags=["AI Helpers"]) + + +api_key = os.environ.get("GEMINI_API_KEY") +if not api_key: + raise ValueError("GEMINI_API_KEY environment variable not set.") + +client = genai.Client(api_key=api_key) + + +@router.post("/suggestBaysAB-arms") +async def suggest_arms( + request: baysABArmsSuggestionRequest, +) -> list[dict[str, Any]]: + """Suggest arms for Bayesian A/B test.""" + + user_prompt = request.model_dump_json() + + try: + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[user_prompt], + config=types.GenerateContentConfig( + system_instruction=prompts.SUGGEST_BAYS_AB_ARMS, + max_output_tokens=500, + temperature=0.1, + response_mime_type="application/json", + ), + ) + return json.loads(response.text) + except api_core.exceptions.GoogleAPICallError as e: + raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e + + +@router.post("/suggestMAB-arms") +async def suggest_mab_arms( + request: MABArmsSuggestionRequest, +) -> list[dict[str, Any]]: + """Suggest arms for Multi-Armed Bandit experiment.""" + + user_prompt = request.model_dump_json() + + try: + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[user_prompt], + config=types.GenerateContentConfig( + system_instruction=prompts.SUGGEST_MAB_ARMS, + max_output_tokens=500, + temperature=0.9, + response_mime_type="application/json", + ), + ) + print(response.text) + return json.loads(response.text) + except api_core.exceptions.GoogleAPICallError as e: + raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e + + +@router.post("/suggestCMAB-contexts") +async def suggest_cmab_contexts( + request: CMABContextSuggestionRequest, +) -> list[dict[str, Any]]: + """Suggest contexts for Contextual Multi-Armed Bandit experiment.""" + + user_prompt = request.model_dump_json() + + try: + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[user_prompt], + config=types.GenerateContentConfig( + system_instruction=prompts.SUGGEST_CMAB_CONTEXTS, + max_output_tokens=500, + temperature=0.7, + response_mime_type="application/json", + ), + ) + print(response.text) + return json.loads(response.text) + except api_core.exceptions.GoogleAPICallError as e: + raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e + + +@router.post("/suggestCMAB-arms") +async def suggest_cmab_arms( + request: CMABArmsSuggestionRequest, +) -> list[dict[str, Any]]: + """Suggest arms for Contextual Multi-Armed Bandit experiment.""" + + user_prompt = request.model_dump_json() + + try: + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[user_prompt], + config=types.GenerateContentConfig( + system_instruction=prompts.SUGGEST_CMAB_ARMS, + max_output_tokens=500, + temperature=0.7, + response_mime_type="application/json", + ), + ) + print(response.text) + return json.loads(response.text) + except api_core.exceptions.GoogleAPICallError as e: + raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e + + +async def generate_experiment_fields_logic( + data: ExperimentAIGenerateRequest, +) -> ExperimentAIGenerateResponse: + """Generate experiment name, description, and type based on user inputs.""" + try: + user_prompt = data.model_dump_json() + + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=[user_prompt], + config=types.GenerateContentConfig( + system_instruction=prompts.GENERATE_EXPERIMENT_FIELDS, + max_output_tokens=500, + temperature=0.1, + response_mime_type="application/json", + response_schema=ExperimentAIGenerateResponse, + ), + ) + + response_data = json.loads(response.text) + return ExperimentAIGenerateResponse(**response_data) + + except api_core.exceptions.GoogleAPICallError as e: + raise RuntimeError(f"Gemini API error: {e}") from e + + +@router.post("/generate-whole-experiment", response_model=CompleteExperimentResponse) +async def generate_whole_experiment( + data: ExperimentAIGenerateRequest, +) -> CompleteExperimentResponse: + """Generate a complete experiment configuration.""" + try: + experiment_fields = await generate_experiment_fields_logic(data) + + arms: list[dict[str, Any]] = [] + contexts: Optional[list[dict[str, Any]]] = None + + if experiment_fields.experiment_type == "bayes_ab": + # Generate arms for Bayesian A/B test + bayes_request = baysABArmsSuggestionRequest( + name=experiment_fields.name, + description=experiment_fields.description, + methodType=experiment_fields.experiment_type, + goal=data.goal, + outcome=data.outcome, + numVariants=data.num_variants, + reward_type="binary", # Default for A/B tests + ) + arms = await suggest_arms(bayes_request) + + elif experiment_fields.experiment_type == "mab": + # Generate arms for Multi-Armed Bandit + mab_request = MABArmsSuggestionRequest( + name=experiment_fields.name, + description=experiment_fields.description, + methodType=experiment_fields.experiment_type, + goal=data.goal, + outcome=data.outcome, + numVariants=data.num_variants, + prior_type="beta", # Default prior type + reward_type="binary", # Default reward type + ) + arms = await suggest_mab_arms(mab_request) + + elif experiment_fields.experiment_type == "cmab": + # Generate contexts first for Contextual Bandit + context_request = CMABContextSuggestionRequest( + name=experiment_fields.name, + description=experiment_fields.description, + methodType=experiment_fields.experiment_type, + goal=data.goal, + outcome=data.outcome, + numVariants=data.num_variants, + prior_type="normal", # Default prior type + reward_type="binary", # Default reward type + ) + contexts = await suggest_cmab_contexts(context_request) + + # Generate arms for Contextual Bandit + if contexts is not None: + cmab_contexts = [CMABArmContext(**ctx) for ctx in contexts] + cmab_request = CMABArmsSuggestionRequest( + name=experiment_fields.name, + description=experiment_fields.description, + methodType=experiment_fields.experiment_type, + goal=data.goal, + outcome=data.outcome, + numVariants=data.num_variants, + prior_type="normal", # Default prior type + reward_type="binary", # Default reward type + contexts=cmab_contexts, + ) + arms = await suggest_cmab_arms(cmab_request) + + return CompleteExperimentResponse( + name=experiment_fields.name, + description=experiment_fields.description, + experiment_type=experiment_fields.experiment_type, + arms=arms, + contexts=contexts, + ) + + except (RuntimeError, api_core.exceptions.GoogleAPICallError) as e: + raise HTTPException( + status_code=500, detail=f"Error generating complete experiment: {e}" + ) from e diff --git a/backend/app/ai_helpers/schemas.py b/backend/app/ai_helpers/schemas.py new file mode 100644 index 0000000..853aa76 --- /dev/null +++ b/backend/app/ai_helpers/schemas.py @@ -0,0 +1,91 @@ +from typing import Any, Optional + +from pydantic import BaseModel + +from ..experiments.schemas import ExperimentsEnum + + +class ExperimentAIGenerateRequest(BaseModel): + """Request model for experiment generation.""" + + goal: str + outcome: str + num_variants: int + + +class ExperimentAIGenerateResponse(BaseModel): + """Response model for experiment generation.""" + + name: str + description: str + experiment_type: str + + +class baysABArmsSuggestionRequest(BaseModel): + """Request model for Bayesian A/B arms suggestion.""" + + name: str + description: str + methodType: ExperimentsEnum + goal: str + outcome: str + numVariants: int + reward_type: str + + +class CompleteExperimentResponse(BaseModel): + """Response model for complete experiment generation.""" + + name: str + description: str + experiment_type: str + arms: list[dict[str, Any]] + contexts: Optional[list[dict[str, Any]]] = None + + +class MABArmsSuggestionRequest(BaseModel): + """Request model for MAB arms suggestion.""" + + name: str + description: str + methodType: ExperimentsEnum + goal: str + outcome: str + numVariants: int + prior_type: str + reward_type: str + + +class CMABContextSuggestionRequest(BaseModel): + """Request model for CMAB context suggestion.""" + + name: str + description: str + methodType: ExperimentsEnum + goal: str + outcome: str + numVariants: int + prior_type: str + reward_type: str + + +class CMABArmContext(BaseModel): + """Model for CMAB arm context.""" + + name: str + description: str + value_type: str + + +class CMABArmsSuggestionRequest(BaseModel): + """Request model for CMAB arms suggestion.""" + + name: str + description: str + methodType: ExperimentsEnum + goal: str + outcome: str + numVariants: int + prior_type: str + reward_type: str + contexts: list[CMABArmContext] diff --git a/backend/requirements.txt b/backend/requirements.txt index b762ddb..7c6fa22 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -15,3 +15,5 @@ sqlalchemy[asyncio]==2.0.20 uvicorn==0.23.2 boto3==1.37.25 pydantic[email]==2.11.3 +google==3.0.0 +google-genai==1.20.0 diff --git a/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx b/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx new file mode 100644 index 0000000..cbf84b2 --- /dev/null +++ b/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx @@ -0,0 +1,296 @@ +"use client"; +import { useState, useEffect } from "react"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Textarea } from "@/components/ui/textarea"; +import { useRouter } from "next/navigation"; +import { useExperimentStore } from "../../store/useExperimentStore"; +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; +import { HelpCircle, Sparkles, ArrowRight, Wand2, FlaskConical } from "lucide-react"; +import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; +import { Separator } from "@/components/ui/separator"; +import { + Breadcrumb, + BreadcrumbItem, + BreadcrumbLink, + BreadcrumbList, + BreadcrumbPage, + BreadcrumbSeparator, +} from "@/components/ui/breadcrumb"; + +export default function AIWizardPage() { + const router = useRouter(); + const { + updateName, + updateDescription, + updateMethodType, + updateArms, + updateContexts, + aiWizardState, + updateAIGoal, + updateAIOutcome, + updateAINumVariants, + resetState + } = useExperimentStore(); + + + useEffect(() => { + resetState(); + }, []); + + const [isGenerating, setIsGenerating] = useState(false); + + const handleGenerate = async () => { + setIsGenerating(true); + try { + const response = await fetch('http://localhost:8000/ai_helpers/generate-whole-experiment', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + goal: aiWizardState.goal, + outcome: aiWizardState.outcome, + num_variants: aiWizardState.numVariants, + }), + }); + + if (response.ok) { + const aiData = await response.json(); + + updateName(aiData.name); + updateDescription(aiData.description); + updateMethodType(aiData.experiment_type); + + if (aiData.arms && Array.isArray(aiData.arms)) { + updateArms(aiData.arms); + } + + if (aiData.contexts && Array.isArray(aiData.contexts)) { + updateContexts(aiData.contexts); + } + + router.push("/experiments/add"); + } else { + console.error("AI generation failed with status:", response.status); + } + } catch (error) { + console.error("AI generation failed:", error); + } finally { + setIsGenerating(false); + } + }; + + const isFormValid = aiWizardState.goal.trim() && aiWizardState.outcome.trim() && aiWizardState.numVariants >= 2; + + return ( + +
+ + + + Experiments + + + + Create + + + AI Wizard + + + +
+
+

AI Experiment Wizard

+
+

+ Answer a few questions and we'll design your experiment +

+
+ +
+
+ + +
+ + Experiment Details +
+ + Answer these questions to help AI design your experiment + +
+ +
+
+ + + + + + +

+ Examples: Increase sign-ups by testing a new onboarding flow; reduce drop-off by changing message timing; improve engagement by using a casual tone for youth users. +

+
+
+
+