Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 33 additions & 40 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@

from typing import List, Dict, Any
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, MIPRO, MIPROv2, COPRO, BootstrapFinetune
from pydantic import create_model
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, MIPROv2, COPRO, GEPA, BootstrapFinetune

# List of supported Groq models
SUPPORTED_GROQ_MODELS = [
Expand All @@ -35,26 +34,18 @@ def create_custom_signature(input_fields: List[str], output_fields: List[str], i
fields = {}
for i, field in enumerate(input_fields):
if i < len(input_descs) and input_descs[i]:
fields[field] = (str, dspy.InputField(default=..., desc=input_descs[i], json_schema_extra={"__dspy_field_type": "input"}))
fields[field] = (str, dspy.InputField(desc=input_descs[i]))
else:
fields[field] = (str, dspy.InputField(default=..., json_schema_extra={"__dspy_field_type": "input"}))
fields[field] = (str, dspy.InputField())

for i, field in enumerate(output_fields):
if i < len(output_descs) and output_descs[i]:
fields[field] = (str, dspy.OutputField(default=..., desc=output_descs[i], json_schema_extra={"__dspy_field_type": "output"}))
fields[field] = (str, dspy.OutputField(desc=output_descs[i]))
else:
fields[field] = (str, dspy.OutputField(default=..., json_schema_extra={"__dspy_field_type": "output"}))

CustomSignatureModel = create_model('CustomSignatureModel', **fields)

class CustomSignature(dspy.Signature, CustomSignatureModel):
"""
{instructions}
"""

CustomSignature.__doc__ = CustomSignature.__doc__.format(instructions=instructions)

return CustomSignature
fields[field] = (str, dspy.OutputField())

# DSPy supports building signatures directly from a field dict
return dspy.Signature(fields, instructions)

def generate_human_readable_id(input_fields: List[str], output_fields: List[str], dspy_module: str, llm_model: str, teacher_model: str, optimizer: str, instructions: str) -> str:
# Create a signature-based name
Expand Down Expand Up @@ -100,9 +91,12 @@ def forward(self, **kwargs):
class CustomChainOfThoughtWithHintModule(dspy.Module):
def __init__(self):
super().__init__()
self.cot_with_hint = dspy.ChainOfThought(CustomSignature)
# dspy.ChainOfThoughtWithHint was removed in DSPy 3.x, and extra kwargs not in the
# signature are now ignored, so add the hint to the signature as an input field
signature_with_hint = CustomSignature.append("hint", dspy.InputField(desc="A hint to guide the reasoning"), str)
self.cot_with_hint = dspy.ChainOfThought(signature_with_hint)
self.hint = hint

def forward(self, **kwargs):
# Inject the hint into the kwargs
kwargs['hint'] = self.hint
Expand All @@ -121,7 +115,7 @@ def compile_program(input_fields: List[str], output_fields: List[str], dspy_modu
elif llm_model in SUPPORTED_GROQ_MODELS:
lm = dspy.LM(f'groq/{llm_model}', api_key=os.environ.get("GROQ_API_KEY"))
elif llm_model in SUPPORTED_GOOGLE_MODELS:
lm = dspy.LM(f'google/{llm_model}', api_key=os.environ.get("GOOGLE_API_KEY"))
lm = dspy.LM(f'gemini/{llm_model}', api_key=os.environ.get("GOOGLE_API_KEY"))
else:
raise ValueError(f"Unsupported LLM model: {llm_model}")

Expand All @@ -139,7 +133,7 @@ def compile_program(input_fields: List[str], output_fields: List[str], dspy_modu
elif teacher_model in SUPPORTED_GROQ_MODELS:
teacher_lm = dspy.LM(f'groq/{teacher_model}', api_key=os.environ.get("GROQ_API_KEY"))
elif teacher_model in SUPPORTED_GOOGLE_MODELS:
teacher_lm = dspy.LM(f'google/{teacher_model}', api_key=os.environ.get("GOOGLE_API_KEY"))
teacher_lm = dspy.LM(f'gemini/{teacher_model}', api_key=os.environ.get("GOOGLE_API_KEY"))
else:
raise ValueError(f"Unsupported teacher model: {teacher_model}")

Expand Down Expand Up @@ -326,8 +320,9 @@ def metric(gold, pred, trace=None):
kwargs = dict(num_threads=1, display_progress=True, display_table=1)

# Evaluate the module to establish a baseline
# Evaluate now returns an EvaluationResult object, so extract the score
baseline_evaluate = Evaluate(metric=metric, devset=devset, num_threads=1)
baseline_score = baseline_evaluate(module)
baseline_score = baseline_evaluate(module).score

# Set up the optimizer
if optimizer == "BootstrapFewShot":
Expand All @@ -337,39 +332,37 @@ def metric(gold, pred, trace=None):
teleprompter = BootstrapFewShotWithRandomSearch(metric=metric, teacher_settings=dict(lm=teacher_lm), num_threads=1)
compiled_program = teleprompter.compile(module, trainset=trainset, valset=devset)
elif optimizer == "COPRO":
teleprompter = COPRO(metric=metric, teacher_settings=dict(lm=teacher_lm))
teleprompter = COPRO(metric=metric, prompt_model=teacher_lm)
compiled_program = teleprompter.compile(module, trainset=trainset, eval_kwargs=kwargs)
elif optimizer == "MIPRO":
teleprompter = MIPRO(metric=metric, teacher_settings=dict(lm=teacher_lm), prompt_model=teacher_lm, task_model=lm)
num_trials = 10 # Adjust this value as needed
max_bootstrapped_demos = 5 # Adjust this value as needed
max_labeled_demos = 5 # Adjust this value as needed
compiled_program = teleprompter.compile(module, trainset=trainset, num_trials=num_trials,
max_bootstrapped_demos=max_bootstrapped_demos,
max_labeled_demos=max_labeled_demos,
eval_kwargs=kwargs, requires_permission_to_run=False)
elif optimizer == "MIPROv2":
teleprompter = MIPROv2(metric=metric, prompt_model=lm, task_model=teacher_lm, num_candidates=10, init_temperature=1.0)
# MIPRO was removed in DSPy 3.x; MIPROv2 now sizes its search via the 'auto' setting
teleprompter = MIPROv2(metric=metric, prompt_model=teacher_lm, task_model=lm, teacher_settings=dict(lm=teacher_lm), auto="medium", init_temperature=1.0)

num_batches = 30
max_bootstrapped_demos = 8
max_labeled_demos = 16
compiled_program = teleprompter.compile(
module,
trainset=trainset,
valset=devset,
num_batches=num_batches,
max_bootstrapped_demos=max_bootstrapped_demos,
max_labeled_demos=max_labeled_demos,
eval_kwargs=kwargs,
requires_permission_to_run=False
max_labeled_demos=max_labeled_demos
)
elif optimizer == "GEPA":
# GEPA metrics receive extra arguments (pred_name, pred_trace) and can return
# textual feedback alongside the score, so wrap the selected metric accordingly
def gepa_metric(gold, pred, trace=None, pred_name=None, pred_trace=None):
score = metric(gold, pred, trace)
return dspy.Prediction(score=float(score), feedback=f"The score for this prediction was {float(score)}.")

# GEPA reflects on prompts in natural language, so use the stronger teacher model as the reflection LM
teleprompter = GEPA(metric=gepa_metric, auto="light", reflection_lm=teacher_lm, num_threads=1)
compiled_program = teleprompter.compile(module, trainset=trainset, valset=devset)
else:
raise ValueError(f"Unsupported optimizer: {optimizer}")

# Evaluate the compiled program
evaluate = Evaluate(metric=metric, devset=devset, num_threads=1)
score = evaluate(compiled_program)
score = evaluate(compiled_program).score

print("Evaluation Score:")
print(score)
Expand Down
4 changes: 2 additions & 2 deletions interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,10 +461,10 @@ def generate_response(human_readable_id, row_selector, df):

with gr.Row():
optimizer = gr.Dropdown(
["BootstrapFewShot", "BootstrapFewShotWithRandomSearch", "MIPRO", "MIPROv2", "COPRO"],
["BootstrapFewShot", "BootstrapFewShotWithRandomSearch", "MIPROv2", "COPRO", "GEPA"],
label="Optimizer",
value="BootstrapFewShot",
info="Choose optimization strategy: None (no optimization), BootstrapFewShot (small datasets, ~10 examples) uses few-shot learning; BootstrapFewShotWithRandomSearch (medium, ~50) adds randomized search; MIPRO, MIPROv2, and COPRO (large, 300+) also optimize the prompt instructions.",
info="Choose optimization strategy: None (no optimization), BootstrapFewShot (small datasets, ~10 examples) uses few-shot learning; BootstrapFewShotWithRandomSearch (medium, ~50) adds randomized search; MIPROv2 and COPRO (large, 300+) also optimize the prompt instructions; GEPA uses the teacher model to reflectively evolve the prompt instructions.",
interactive=True # Add this line
)
with gr.Column():
Expand Down
7 changes: 4 additions & 3 deletions webui.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ else
echo "No .env file found. Make sure to set any necessary environment variables manually."
fi

# Install required packages if not already installed
if ! pip freeze | grep -q "gradio\|dspy\|pandas\|openai\|anthropic\|groq\|sklearn|google-generativeai"; then
# Install required packages if not already installed (the 'dspy-ai' package was renamed to 'dspy' in 3.x)
if ! pip freeze | grep -q "^dspy==3"; then
echo "Installing required packages..."
pip install gradio dspy-ai pandas openai anthropic groq scikit-learn google-generativeai
pip uninstall -y dspy-ai 2>/dev/null || true
pip install gradio "dspy>=3.2.1" pandas openai anthropic groq scikit-learn google-generativeai
else
echo "Required packages are already installed."
fi
Expand Down