diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index ad06953..3cfaccc 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -5,7 +5,7 @@ Auto-generated from `leaderboards/all.csv` by `scripts/update_contributors.py`. Thanks to everyone who has contributed benchmark runs: -- **LearningCircuit** — 6 submissions +- **LearningCircuit** — 8 submissions - **Daniel Petti** — 1 submission - **kwhyte7** — 1 submission diff --git a/README.md b/README.md index a4cfd08..ab42427 100644 --- a/README.md +++ b/README.md @@ -220,7 +220,7 @@ add `contributor: your-handle` to the YAML. Thanks to everyone who has contributed benchmark runs: -- **LearningCircuit** — 6 submissions +- **LearningCircuit** — 8 submissions - **Daniel Petti** — 1 submission - **kwhyte7** — 1 submission diff --git a/hf_README.md b/hf_README.md index d0bfc8c..00ea8db 100644 --- a/hf_README.md +++ b/hf_README.md @@ -125,7 +125,7 @@ is prohibited. Thanks to everyone who has contributed benchmark runs: -- **LearningCircuit** — 6 submissions +- **LearningCircuit** — 8 submissions - **Daniel Petti** — 1 submission - **kwhyte7** — 1 submission diff --git a/leaderboards/all.csv b/leaderboards/all.csv index 217cf2a..a6c2731 100644 --- a/leaderboards/all.csv +++ b/leaderboards/all.csv @@ -1,6 +1,8 @@ dataset,model,model_provider,quantization,strategy,search_engine,accuracy_pct,accuracy_raw,correct,total,iterations,questions_per_iteration,avg_time_per_question,total_tokens_used,temperature,context_window,max_tokens,hardware_gpu,hardware_ram,hardware_cpu,evaluator_model,evaluator_provider,ldr_version,date_tested,contributor,contributor_source,notes,source_file SimpleQA,qwen3.6:latest,OLLAMA,,langgraph_agent,serper,95.7,95.7% (287/300),287,300,10,1,3m 19s,,0.7,36352,30000,,,,qwen3.6:latest,ollama,1.5.6,2026-04-18,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/qwen3.6-latest_2026-04-18.yaml +SimpleQA,qwen3.6:27b,OLLAMA,,langgraph-agent,serper,95.7,95.7% (287/300),287,300,10,1,1m 54s,,0.7,20480,30000,,,,qwen3.6:27b,ollama,1.6.7,2026-05-01,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/qwen3.6-27b_2026-05-01.yaml SimpleQA,qwen3.5:9b,OLLAMA,,langgraph_agent,serper,91.2,91.2% (182/200),182,200,10,1,1m 18s,,0.7,36352,30000,,,,qwen3.5:9b,ollama,1.5.6,2026-04-06,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-06.yaml +SimpleQA,gemma4:31b,OLLAMA,,langgraph-agent,serper,90.3,90.3% (271/300),271,300,10,1,6m 25s,,0.7,36352,30000,,,,gemma4:31b,ollama,1.5.6,2026-04-22,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/gemma4-31b_2026-04-22.yaml SimpleQA,gpt-oss-120b,openai_endpoint,,langgraph-agent,serper,86.7,86.7% (26/30),26,30,3,1,1m 3s,,0.7,,30000,,,,gpt-oss-120b,openai_endpoint,1.5.6,2026-04-19,Daniel Petti,git,,results/simpleqa/langgraph-agent/serper/gpt-oss-120b_2026-04-19.yaml SimpleQA,gpt-oss:20b,OLLAMA,,langgraph_agent,serper,85.4,85.4% (295/346),295,346,10,1,30.6s,,0.7,36352,30000,,,,gpt-oss:20b,ollama,1.5.6,2026-04-12,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/gpt-oss-20b_2026-04-12.yaml xbench_deepsearch,qwen3.6:latest,OLLAMA,,langgraph_agent,serper,77.0,77.0% (77/100),77,100,10,1,8m 53s,,0.7,36352,30000,,,,qwen3.6:latest,ollama,1.5.6,2026-04-19,LearningCircuit,git,,results/xbench-deepsearch/langgraph-agent/serper/qwen3.6-latest_2026-04-19.yaml diff --git a/leaderboards/simpleqa.csv b/leaderboards/simpleqa.csv index 85c4e27..5314dc3 100644 --- a/leaderboards/simpleqa.csv +++ b/leaderboards/simpleqa.csv @@ -1,6 +1,8 @@ dataset,model,model_provider,quantization,strategy,search_engine,accuracy_pct,accuracy_raw,correct,total,iterations,questions_per_iteration,avg_time_per_question,total_tokens_used,temperature,context_window,max_tokens,hardware_gpu,hardware_ram,hardware_cpu,evaluator_model,evaluator_provider,ldr_version,date_tested,contributor,contributor_source,notes,source_file SimpleQA,qwen3.6:latest,OLLAMA,,langgraph_agent,serper,95.7,95.7% (287/300),287,300,10,1,3m 19s,,0.7,36352,30000,,,,qwen3.6:latest,ollama,1.5.6,2026-04-18,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/qwen3.6-latest_2026-04-18.yaml +SimpleQA,qwen3.6:27b,OLLAMA,,langgraph-agent,serper,95.7,95.7% (287/300),287,300,10,1,1m 54s,,0.7,20480,30000,,,,qwen3.6:27b,ollama,1.6.7,2026-05-01,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/qwen3.6-27b_2026-05-01.yaml SimpleQA,qwen3.5:9b,OLLAMA,,langgraph_agent,serper,91.2,91.2% (182/200),182,200,10,1,1m 18s,,0.7,36352,30000,,,,qwen3.5:9b,ollama,1.5.6,2026-04-06,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/qwen3.5-9b_2026-04-06.yaml +SimpleQA,gemma4:31b,OLLAMA,,langgraph-agent,serper,90.3,90.3% (271/300),271,300,10,1,6m 25s,,0.7,36352,30000,,,,gemma4:31b,ollama,1.5.6,2026-04-22,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/gemma4-31b_2026-04-22.yaml SimpleQA,gpt-oss-120b,openai_endpoint,,langgraph-agent,serper,86.7,86.7% (26/30),26,30,3,1,1m 3s,,0.7,,30000,,,,gpt-oss-120b,openai_endpoint,1.5.6,2026-04-19,Daniel Petti,git,,results/simpleqa/langgraph-agent/serper/gpt-oss-120b_2026-04-19.yaml SimpleQA,gpt-oss:20b,OLLAMA,,langgraph_agent,serper,85.4,85.4% (295/346),295,346,10,1,30.6s,,0.7,36352,30000,,,,gpt-oss:20b,ollama,1.5.6,2026-04-12,LearningCircuit,git,,results/simpleqa/langgraph-agent/serper/gpt-oss-20b_2026-04-12.yaml SimpleQA,qwen3:4b,OLLAMA,,source_based,serper,74.0,74.0% (37/50),37,50,2,2,1m 3s,,0.7,4096,30000,NVIDIA GeForce RTX 4090 [Discrete],32GB,AMD Ryzen 7 7800X3D (16) @ 5.02 GHz,,,1.3.50,2026-02-18,kwhyte7,yaml,"# Add any observations, errors, or insights here",results/simpleqa/source-based/serper/qwen3-4b_2026-02-18.yaml