diff --git a/.gitignore b/.gitignore
index c803427..16bd050 100644
--- a/.gitignore
+++ b/.gitignore
@@ -158,3 +158,9 @@ dmypy.json
# PyCharm
.idea/
+
+# pnpm
+pnpm-lock.yaml
+
+.yarn/*
+!.yarn/patches
diff --git a/backend/app/__init__.py b/backend/app/__init__.py
index 88e9dfd..1e3688b 100644
--- a/backend/app/__init__.py
+++ b/backend/app/__init__.py
@@ -6,6 +6,7 @@
from redis import asyncio as aioredis
from . import auth, messages
+from .ai_helpers.routers import router as ai_helpers_router
from .config import BACKEND_ROOT_PATH, DOMAIN, REDIS_HOST
from .experiments.routers import router as experiments_router
from .users.routers import (
@@ -62,5 +63,6 @@ def create_app() -> FastAPI:
app.include_router(users_router)
app.include_router(messages.router)
app.include_router(workspaces_router)
+ app.include_router(ai_helpers_router)
return app
diff --git a/backend/app/ai_helpers/prompts.py b/backend/app/ai_helpers/prompts.py
new file mode 100644
index 0000000..6f4fb39
--- /dev/null
+++ b/backend/app/ai_helpers/prompts.py
@@ -0,0 +1,110 @@
+"""This module contains prompts for the AI Helper LLM tasks."""
+
+import textwrap
+
+SUGGEST_BAYS_AB_ARMS = textwrap.dedent(
+ """
+ You are an assistant for a tool that helps social sector organizations run
+ digital experiments. Given the experiment details below, suggest concise
+ names and descriptions for each arm (variant) of the experiment.
+ For Bayesian A/B tests, there are usually two arms: a control
+ (existing/baseline) and a treatment (new/changed feature).
+ For each arm, also suggest reasonable initial values for mu_init (mean prior,
+ between 0 and 1 for rates) and sigma_init (standard deviation).
+ Respond ONLY with a valid JSON array. Each array element should be an object
+ with keys: name, description, mu_init, sigma_init.
+ No explanation, no markdown.
+ """
+).strip()
+
+SUGGEST_MAB_ARMS = textwrap.dedent(
+ """
+ You are an assistant for a tool that helps social sector organizations run
+ digital experiments. Given the experiment details below, suggest concise
+ names and descriptions for each arm (variant) of the experiment.
+ For multi-armed bandit (MAB) experiments, use the number of variants provided.
+ For each arm, also suggest reasonable initial values for alpha_init and
+ beta_init (for beta prior) or mu_init and sigma_init (for normal prior),
+ depending on the prior_type.
+ Respond ONLY with a valid JSON array. Each array element should be an object
+ with keys: name, description, and the appropriate prior parameters.
+ No explanation, no markdown.
+ """
+).strip()
+
+SUGGEST_CMAB_CONTEXTS = textwrap.dedent(
+ """
+ You are an assistant for a tool that helps social sector organizations run
+ digital experiments. Given the experiment details below, suggest relevant
+ user contexts for a Contextual Bandit (CMAB) experiment.
+ Contexts are user attributes (e.g., age, location, engagement level) that
+ might influence how they respond to different variants.
+ For each context, provide a concise 'name', a 'description', and a
+ 'value_type' ('binary' or 'real-valued').
+ Respond ONLY with a valid JSON array. Each array element should be an object
+ with keys: name, description, value_type.
+ No explanation, no markdown.
+ """
+).strip()
+
+SUGGEST_CMAB_ARMS = textwrap.dedent(
+ """
+ You are an assistant for a tool that helps social sector organizations run
+ digital experiments. Given the experiment details and user contexts below,
+ suggest concise names and descriptions for each arm (variant) of a
+ Contextual Bandit (CMAB) experiment.
+ The arms should be distinct variations of a feature that is being tested.
+ For each arm, also suggest reasonable initial values for mu_init (mean prior)
+ and sigma_init (standard deviation prior).
+ Respond ONLY with a valid JSON array. Each array element should be an object
+ with keys: name, description, mu_init, sigma_init.
+ No explanation, no markdown.
+ """
+).strip()
+
+GENERATE_EXPERIMENT_FIELDS = textwrap.dedent(
+ """
+ You are an assistant for a tool that helps social sector organizations
+ run digital experiments. Below are documentation excerpts about
+ different experiment types. Use this context to choose the most
+ appropriate experiment type and generate relevant names and
+ descriptions.
+ -----
+ # Bayesian A/B Testing
+ Bayesian A/B testing compares two variants: treatment (e.g. a new feature)
+ and control (e.g. an existing feature). This is a useful experiment when
+ you need intuitive probability statements about which arm is better for
+ making downstream decisions, and have the resources to balance how your
+ arms are allocated to your experimental cohort. Choose this over the bandit
+ algorithms when you're trying to make a 'permanent' decision about which
+ variant is better, as opposed to trying to dynamically pick the
+ best performing variant as data comes in.
+ -----
+ # Contextual Bandits (CMABs)
+ Contextual bandits (CMABs), similarly to multi-armed bandits (MABs), are
+ useful for running experiments where you have multiple variants of a feature
+ / implementation that you want to test. However, the key difference is that
+ contextual bandits take information about the end-user (e.g. gender, age,
+ engagement history) into account while converging to the best-performing
+ variant. Thus, rather than having a single best-performing variant at the
+ end of an experiment, you instead have the best-performing variant that
+ depends on the user context.
+ -----
+ # Multi-Armed Bandits (MABs)
+ Multi-armed Bandits (MABs) are useful for running experiments where you have
+ multiple variants of a feature / implementation that you want to test, and
+ want to automatically converge to the variant that produces the best
+ results. Since we update the probabilities for the variants with every
+ result observation, at any given time you can observe the updated
+ probability of success for every arm. The best-performing variant at the
+ end of the experiment is the one with the highest probability.
+ -----
+ Given a goal, outcome, and number of variants, generate:
+ 1. A concise and descriptive experiment name (max 8 words).
+ 2. A detailed description of the experiment.
+ 3. The most appropriate experiment type: 'mab' (multi-armed bandit),
+ 'bayes_ab' (Bayesian A/B test), or 'cmab' (contextual bandit).
+ Respond ONLY with a valid JSON object with keys: name, description,
+ experiment_type. No explanation, no markdown.
+ """
+).strip()
diff --git a/backend/app/ai_helpers/routers.py b/backend/app/ai_helpers/routers.py
new file mode 100644
index 0000000..1a25647
--- /dev/null
+++ b/backend/app/ai_helpers/routers.py
@@ -0,0 +1,235 @@
+import json
+import os
+from typing import Any, Optional
+
+from fastapi import APIRouter, HTTPException
+from google import api_core, genai
+from google.genai import types
+
+from . import prompts
+from .schemas import (
+ CMABArmContext,
+ CMABArmsSuggestionRequest,
+ CMABContextSuggestionRequest,
+ CompleteExperimentResponse,
+ ExperimentAIGenerateRequest,
+ ExperimentAIGenerateResponse,
+ MABArmsSuggestionRequest,
+ baysABArmsSuggestionRequest,
+)
+
+router = APIRouter(prefix="/ai_helpers", tags=["AI Helpers"])
+
+
+api_key = os.environ.get("GEMINI_API_KEY")
+if not api_key:
+ raise ValueError("GEMINI_API_KEY environment variable not set.")
+
+client = genai.Client(api_key=api_key)
+
+
+@router.post("/suggestBaysAB-arms")
+async def suggest_arms(
+ request: baysABArmsSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest arms for Bayesian A/B test."""
+
+ user_prompt = request.model_dump_json()
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=prompts.SUGGEST_BAYS_AB_ARMS,
+ max_output_tokens=500,
+ temperature=0.1,
+ response_mime_type="application/json",
+ ),
+ )
+ return json.loads(response.text)
+ except api_core.exceptions.GoogleAPICallError as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+@router.post("/suggestMAB-arms")
+async def suggest_mab_arms(
+ request: MABArmsSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest arms for Multi-Armed Bandit experiment."""
+
+ user_prompt = request.model_dump_json()
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=prompts.SUGGEST_MAB_ARMS,
+ max_output_tokens=500,
+ temperature=0.9,
+ response_mime_type="application/json",
+ ),
+ )
+ print(response.text)
+ return json.loads(response.text)
+ except api_core.exceptions.GoogleAPICallError as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+@router.post("/suggestCMAB-contexts")
+async def suggest_cmab_contexts(
+ request: CMABContextSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest contexts for Contextual Multi-Armed Bandit experiment."""
+
+ user_prompt = request.model_dump_json()
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=prompts.SUGGEST_CMAB_CONTEXTS,
+ max_output_tokens=500,
+ temperature=0.7,
+ response_mime_type="application/json",
+ ),
+ )
+ print(response.text)
+ return json.loads(response.text)
+ except api_core.exceptions.GoogleAPICallError as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+@router.post("/suggestCMAB-arms")
+async def suggest_cmab_arms(
+ request: CMABArmsSuggestionRequest,
+) -> list[dict[str, Any]]:
+ """Suggest arms for Contextual Multi-Armed Bandit experiment."""
+
+ user_prompt = request.model_dump_json()
+
+ try:
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=prompts.SUGGEST_CMAB_ARMS,
+ max_output_tokens=500,
+ temperature=0.7,
+ response_mime_type="application/json",
+ ),
+ )
+ print(response.text)
+ return json.loads(response.text)
+ except api_core.exceptions.GoogleAPICallError as e:
+ raise HTTPException(status_code=500, detail=f"Gemini API error: {e}") from e
+
+
+async def generate_experiment_fields_logic(
+ data: ExperimentAIGenerateRequest,
+) -> ExperimentAIGenerateResponse:
+ """Generate experiment name, description, and type based on user inputs."""
+ try:
+ user_prompt = data.model_dump_json()
+
+ response = client.models.generate_content(
+ model="gemini-2.0-flash",
+ contents=[user_prompt],
+ config=types.GenerateContentConfig(
+ system_instruction=prompts.GENERATE_EXPERIMENT_FIELDS,
+ max_output_tokens=500,
+ temperature=0.1,
+ response_mime_type="application/json",
+ response_schema=ExperimentAIGenerateResponse,
+ ),
+ )
+
+ response_data = json.loads(response.text)
+ return ExperimentAIGenerateResponse(**response_data)
+
+ except api_core.exceptions.GoogleAPICallError as e:
+ raise RuntimeError(f"Gemini API error: {e}") from e
+
+
+@router.post("/generate-whole-experiment", response_model=CompleteExperimentResponse)
+async def generate_whole_experiment(
+ data: ExperimentAIGenerateRequest,
+) -> CompleteExperimentResponse:
+ """Generate a complete experiment configuration."""
+ try:
+ experiment_fields = await generate_experiment_fields_logic(data)
+
+ arms: list[dict[str, Any]] = []
+ contexts: Optional[list[dict[str, Any]]] = None
+
+ if experiment_fields.experiment_type == "bayes_ab":
+ # Generate arms for Bayesian A/B test
+ bayes_request = baysABArmsSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ reward_type="binary", # Default for A/B tests
+ )
+ arms = await suggest_arms(bayes_request)
+
+ elif experiment_fields.experiment_type == "mab":
+ # Generate arms for Multi-Armed Bandit
+ mab_request = MABArmsSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ prior_type="beta", # Default prior type
+ reward_type="binary", # Default reward type
+ )
+ arms = await suggest_mab_arms(mab_request)
+
+ elif experiment_fields.experiment_type == "cmab":
+ # Generate contexts first for Contextual Bandit
+ context_request = CMABContextSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ prior_type="normal", # Default prior type
+ reward_type="binary", # Default reward type
+ )
+ contexts = await suggest_cmab_contexts(context_request)
+
+ # Generate arms for Contextual Bandit
+ if contexts is not None:
+ cmab_contexts = [CMABArmContext(**ctx) for ctx in contexts]
+ cmab_request = CMABArmsSuggestionRequest(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ methodType=experiment_fields.experiment_type,
+ goal=data.goal,
+ outcome=data.outcome,
+ numVariants=data.num_variants,
+ prior_type="normal", # Default prior type
+ reward_type="binary", # Default reward type
+ contexts=cmab_contexts,
+ )
+ arms = await suggest_cmab_arms(cmab_request)
+
+ return CompleteExperimentResponse(
+ name=experiment_fields.name,
+ description=experiment_fields.description,
+ experiment_type=experiment_fields.experiment_type,
+ arms=arms,
+ contexts=contexts,
+ )
+
+ except (RuntimeError, api_core.exceptions.GoogleAPICallError) as e:
+ raise HTTPException(
+ status_code=500, detail=f"Error generating complete experiment: {e}"
+ ) from e
diff --git a/backend/app/ai_helpers/schemas.py b/backend/app/ai_helpers/schemas.py
new file mode 100644
index 0000000..853aa76
--- /dev/null
+++ b/backend/app/ai_helpers/schemas.py
@@ -0,0 +1,91 @@
+from typing import Any, Optional
+
+from pydantic import BaseModel
+
+from ..experiments.schemas import ExperimentsEnum
+
+
+class ExperimentAIGenerateRequest(BaseModel):
+ """Request model for experiment generation."""
+
+ goal: str
+ outcome: str
+ num_variants: int
+
+
+class ExperimentAIGenerateResponse(BaseModel):
+ """Response model for experiment generation."""
+
+ name: str
+ description: str
+ experiment_type: str
+
+
+class baysABArmsSuggestionRequest(BaseModel):
+ """Request model for Bayesian A/B arms suggestion."""
+
+ name: str
+ description: str
+ methodType: ExperimentsEnum
+ goal: str
+ outcome: str
+ numVariants: int
+ reward_type: str
+
+
+class CompleteExperimentResponse(BaseModel):
+ """Response model for complete experiment generation."""
+
+ name: str
+ description: str
+ experiment_type: str
+ arms: list[dict[str, Any]]
+ contexts: Optional[list[dict[str, Any]]] = None
+
+
+class MABArmsSuggestionRequest(BaseModel):
+ """Request model for MAB arms suggestion."""
+
+ name: str
+ description: str
+ methodType: ExperimentsEnum
+ goal: str
+ outcome: str
+ numVariants: int
+ prior_type: str
+ reward_type: str
+
+
+class CMABContextSuggestionRequest(BaseModel):
+ """Request model for CMAB context suggestion."""
+
+ name: str
+ description: str
+ methodType: ExperimentsEnum
+ goal: str
+ outcome: str
+ numVariants: int
+ prior_type: str
+ reward_type: str
+
+
+class CMABArmContext(BaseModel):
+ """Model for CMAB arm context."""
+
+ name: str
+ description: str
+ value_type: str
+
+
+class CMABArmsSuggestionRequest(BaseModel):
+ """Request model for CMAB arms suggestion."""
+
+ name: str
+ description: str
+ methodType: ExperimentsEnum
+ goal: str
+ outcome: str
+ numVariants: int
+ prior_type: str
+ reward_type: str
+ contexts: list[CMABArmContext]
diff --git a/backend/requirements.txt b/backend/requirements.txt
index b762ddb..7c6fa22 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -15,3 +15,5 @@ sqlalchemy[asyncio]==2.0.20
uvicorn==0.23.2
boto3==1.37.25
pydantic[email]==2.11.3
+google==3.0.0
+google-genai==1.20.0
diff --git a/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx b/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx
new file mode 100644
index 0000000..cbf84b2
--- /dev/null
+++ b/frontend/src/app/(protected)/experiments/add/ai-wizard/page.tsx
@@ -0,0 +1,296 @@
+"use client";
+import { useState, useEffect } from "react";
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { Textarea } from "@/components/ui/textarea";
+import { useRouter } from "next/navigation";
+import { useExperimentStore } from "../../store/useExperimentStore";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
+import { HelpCircle, Sparkles, ArrowRight, Wand2, FlaskConical } from "lucide-react";
+import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip";
+import { Separator } from "@/components/ui/separator";
+import {
+ Breadcrumb,
+ BreadcrumbItem,
+ BreadcrumbLink,
+ BreadcrumbList,
+ BreadcrumbPage,
+ BreadcrumbSeparator,
+} from "@/components/ui/breadcrumb";
+
+export default function AIWizardPage() {
+ const router = useRouter();
+ const {
+ updateName,
+ updateDescription,
+ updateMethodType,
+ updateArms,
+ updateContexts,
+ aiWizardState,
+ updateAIGoal,
+ updateAIOutcome,
+ updateAINumVariants,
+ resetState
+ } = useExperimentStore();
+
+
+ useEffect(() => {
+ resetState();
+ }, []);
+
+ const [isGenerating, setIsGenerating] = useState(false);
+
+ const handleGenerate = async () => {
+ setIsGenerating(true);
+ try {
+ const response = await fetch('http://localhost:8000/ai_helpers/generate-whole-experiment', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({
+ goal: aiWizardState.goal,
+ outcome: aiWizardState.outcome,
+ num_variants: aiWizardState.numVariants,
+ }),
+ });
+
+ if (response.ok) {
+ const aiData = await response.json();
+
+ updateName(aiData.name);
+ updateDescription(aiData.description);
+ updateMethodType(aiData.experiment_type);
+
+ if (aiData.arms && Array.isArray(aiData.arms)) {
+ updateArms(aiData.arms);
+ }
+
+ if (aiData.contexts && Array.isArray(aiData.contexts)) {
+ updateContexts(aiData.contexts);
+ }
+
+ router.push("/experiments/add");
+ } else {
+ console.error("AI generation failed with status:", response.status);
+ }
+ } catch (error) {
+ console.error("AI generation failed:", error);
+ } finally {
+ setIsGenerating(false);
+ }
+ };
+
+ const isFormValid = aiWizardState.goal.trim() && aiWizardState.outcome.trim() && aiWizardState.numVariants >= 2;
+
+ return (
+
+
+ Answer a few questions and we'll design your experiment
+
+
+
+
+
+
+
+
+
+ Experiment Details
+
+
+ Answer these questions to help AI design your experiment
+
+
+
+
+
+
+
+
+
+
+
+
+ Examples: Increase sign-ups by testing a new onboarding flow; reduce drop-off by changing message timing; improve engagement by using a casual tone for youth users.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Examples: Completion rate (% who finish), click rate (% who click), time spent, engagement score, drop-off rate. What will you do if the outcome improves?
+
+
+
+
+ updateAIOutcome(e.target.value)}
+ placeholder="E.g., Completion rate, engagement score, time spent, drop-off rate, clicks. If completion rate improves, roll out new onboarding to all users."
+ className="text-base"
+ />
+
+ This should be something you can measure on your platform.
+
+
+
+ Required field
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Variants are the different versions you want to compare. For example: Variant 1 - current button, Variant 2 - new red button, Variant 3 - new blue button.
+
+
+
+
+ updateAINumVariants(Number(e.target.value))}
+ className="w-24 text-base text-center"
+ placeholder="E.g., 2 for A/B test, 3 for multivariate"
+ />
+
+ Usually 2-4 variants work best. More variants need more users to get reliable results.
+
+ Please review all the details below before creating your experiment.
+
+
+
+ {!isFormValid && (
+
+
+
+ Some required information is missing. Please go back to previous steps to complete the setup.
+
+
+ )}
+
+
+
+
+
+
+ Basic Information
+
+
+
+