From eace75cf875abe47fb9562aaa51a4ced7525bc9b Mon Sep 17 00:00:00 2001
From: Bartolomej Kozorog <bartolomej.kozorog@gmail.com>
Date: Thu, 12 Mar 2026 15:44:14 +0100
Subject: [PATCH] use foresight-v3 in model consensus notebook

---
 notebooks/e2e/model_consensus.ipynb | 423 +++++++++++++++-------------
 src/lightningrod/__init__.py        |   3 +-
 src/lightningrod/utils/models.py    |   9 +
 3 files changed, 246 insertions(+), 189 deletions(-)

diff --git a/notebooks/e2e/model_consensus.ipynb b/notebooks/e2e/model_consensus.ipynb
index 9f5ef13..e978e0c 100644
--- a/notebooks/e2e/model_consensus.ipynb
+++ b/notebooks/e2e/model_consensus.ipynb
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -43,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -77,10 +77,70 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
-   "source": "from datetime import datetime\nfrom lightningrod import (\n    NewsSeedGenerator,\n    ForwardLookingQuestionGenerator,\n    WebSearchLabeler,\n    QuestionPipeline,\n    NewsContextGenerator,\n    QuestionRenderer,\n    RolloutGenerator,\n    RolloutScorer,\n    BinaryAnswerType,\n    open_router_model,\n)\n\n# Date range — adjust these to a period ~2-3 months in the past\nSTART_DATE = datetime(2025, 11, 1)\nEND_DATE = datetime(2025, 12, 1)\n\nseed_generator = NewsSeedGenerator(\n    start_date=START_DATE,\n    end_date=END_DATE,\n    search_query=\"technology announcements\",\n)\n\nanswer_type = BinaryAnswerType()\n\nquestion_generator = ForwardLookingQuestionGenerator(\n    instructions=\"Generate forward-looking yes/no questions about technology announcements. \"\n    \"Questions should be clearly resolvable within 1-2 months.\",\n    answer_type=answer_type,\n)\n\nlabeler = WebSearchLabeler(answer_type=answer_type)\n\nrenderer = QuestionRenderer(answer_type=answer_type)\n\nmodels = [\n    open_router_model(\"openai/gpt-4.1-mini\"),\n    open_router_model(\"anthropic/claude-sonnet-4\"),\n    open_router_model(\"google/gemini-2.5-flash\"),\n]\n\ncontext_generator = NewsContextGenerator()\n\nrollout_generator = RolloutGenerator(models=models)\n\nscorer = RolloutScorer(answer_type=answer_type)\n\npipeline = QuestionPipeline(\n    seed_generator=seed_generator,\n    question_generator=question_generator,\n    context_generators=[context_generator],\n    labeler=labeler,\n    renderer=renderer,\n    rollout_generator=rollout_generator,\n    scorer=scorer,\n)"
+   "source": [
+    "from datetime import datetime\n",
+    "from lightningrod import (\n",
+    "    NewsSeedGenerator,\n",
+    "    ForwardLookingQuestionGenerator,\n",
+    "    WebSearchLabeler,\n",
+    "    QuestionPipeline,\n",
+    "    NewsContextGenerator,\n",
+    "    QuestionRenderer,\n",
+    "    RolloutGenerator,\n",
+    "    RolloutScorer,\n",
+    "    BinaryAnswerType,\n",
+    "    open_router_model,\n",
+    "    lightningrod_model,\n",
+    ")\n",
+    "\n",
+    "# Date range — adjust these to a period ~2-3 months in the past\n",
+    "START_DATE = datetime(2025, 11, 6)\n",
+    "END_DATE = datetime(2026, 3, 1)\n",
+    "\n",
+    "seed_generator = NewsSeedGenerator(\n",
+    "    start_date=START_DATE,\n",
+    "    end_date=END_DATE,\n",
+    "    search_query=\"technology announcements\",\n",
+    ")\n",
+    "\n",
+    "answer_type = BinaryAnswerType()\n",
+    "\n",
+    "question_generator = ForwardLookingQuestionGenerator(\n",
+    "    instructions=\"Generate forward-looking yes/no questions about tech announcements. \"\n",
+    "    \"Questions should be clearly resolvable within 1-2 months.\",\n",
+    "    answer_type=answer_type,\n",
+    ")\n",
+    "\n",
+    "labeler = WebSearchLabeler(answer_type=answer_type)\n",
+    "\n",
+    "renderer = QuestionRenderer(answer_type=answer_type)\n",
+    "\n",
+    "models = [\n",
+    "    open_router_model(\"openai/gpt-5.2\"),\n",
+    "    open_router_model(\"anthropic/claude-sonnet-4.6\"),\n",
+    "    open_router_model(\"google/gemini-3.1-pro-preview\"),\n",
+    "    lightningrod_model(\"foresight-v3\"),\n",
+    "]\n",
+    "\n",
+    "context_generator = NewsContextGenerator()\n",
+    "\n",
+    "rollout_generator = RolloutGenerator(models=models)\n",
+    "\n",
+    "scorer = RolloutScorer(answer_type=answer_type)\n",
+    "\n",
+    "pipeline = QuestionPipeline(\n",
+    "    seed_generator=seed_generator,\n",
+    "    question_generator=question_generator,\n",
+    "    context_generators=[context_generator],\n",
+    "    labeler=labeler,\n",
+    "    renderer=renderer,\n",
+    "    rollout_generator=rollout_generator,\n",
+    "    scorer=scorer,\n",
+    ")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -93,13 +153,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #808000; text-decoration-color: #808000\">╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮</span>\n",
+       "<span style=\"color: #808000; text-decoration-color: #808000\">│</span>                                                                                                                 <span style=\"color: #808000; text-decoration-color: #808000\">│</span>\n",
+       "<span style=\"color: #808000; text-decoration-color: #808000\">│</span>  <span style=\"color: #808000; text-decoration-color: #808000; font-weight: bold\">&gt;&gt; Warning</span>                                                                                                     <span style=\"color: #808000; text-decoration-color: #808000\">│</span>\n",
+       "<span style=\"color: #808000; text-decoration-color: #808000\">│</span>                                                                                                                 <span style=\"color: #808000; text-decoration-color: #808000\">│</span>\n",
+       "<span style=\"color: #808000; text-decoration-color: #808000\">│</span>  Estimated cost ($60.33) exceeds current balance ($10.31). Consider adding credits before running this job.     <span style=\"color: #808000; text-decoration-color: #808000\">│</span>\n",
+       "<span style=\"color: #808000; text-decoration-color: #808000\">│</span>                                                                                                                 <span style=\"color: #808000; text-decoration-color: #808000\">│</span>\n",
+       "<span style=\"color: #808000; text-decoration-color: #808000\">╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯</span>\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "\u001b[33m╭─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮\u001b[0m\n",
+       "\u001b[33m│\u001b[0m                                                                                                                 \u001b[33m│\u001b[0m\n",
+       "\u001b[33m│\u001b[0m  \u001b[1;33m>> Warning\u001b[0m                                                                                                     \u001b[33m│\u001b[0m\n",
+       "\u001b[33m│\u001b[0m                                                                                                                 \u001b[33m│\u001b[0m\n",
+       "\u001b[33m│\u001b[0m  Estimated cost ($60.33) exceeds current balance ($10.31). Consider adding credits before running this job.     \u001b[33m│\u001b[0m\n",
+       "\u001b[33m│\u001b[0m                                                                                                                 \u001b[33m│\u001b[0m\n",
+       "\u001b[33m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ebd1c29a14fc48669015ce5ef0c0e2e9",
+       "model_id": "b4e3e81c1ead4708902f6792c55ab49b",
        "version_major": 2,
        "version_minor": 0
       },
@@ -122,7 +207,7 @@
     }
    ],
    "source": [
-    "dataset = lr.transforms.run(pipeline, max_questions=20, name=\"News Forecasting Benchmark\")"
+    "dataset = lr.transforms.run(pipeline, max_questions=600, name=\"News Forecasting Benchmark\")"
    ]
   },
   {
@@ -136,14 +221,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Generated 20 samples (80.0% valid)\n",
+      "Generated 120 samples (79.2% valid)\n",
       "\n"
      ]
     }
@@ -167,16 +252,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Consensus: 6/16 questions have full agreement (38%)\n",
-      "Disagreement: 10/16 questions have models on opposite sides of 0.5\n",
-      "Mean spread: 0.400\n",
+      "Consensus: 58/95 questions have full agreement (61%)\n",
+      "Disagreement: 37/95 questions have models on opposite sides of 0.5\n",
+      "Mean spread: 0.308\n",
       "\n"
      ]
     },
@@ -205,215 +290,170 @@
        "      <th>Label</th>\n",
        "      <th>Spread</th>\n",
        "      <th>Agree</th>\n",
-       "      <th>gpt-4.1-mini</th>\n",
-       "      <th>claude-sonnet-4</th>\n",
-       "      <th>gemini-2.5-flash</th>\n",
+       "      <th>gpt-5.2</th>\n",
+       "      <th>claude-sonnet-4.6</th>\n",
+       "      <th>gemini-3.1-pro-preview</th>\n",
+       "      <th>foresight-v3</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>Will the New Frontiers in Research Fund (NFRF)...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.719</td>\n",
+       "      <td>Will Apple Inc. officially announce a new gene...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.93</td>\n",
        "      <td>False</td>\n",
-       "      <td>0.001</td>\n",
-       "      <td>0.72</td>\n",
-       "      <td>0.01</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>0.95</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.17</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Will Autozi Internet Technology (AZI) complete...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.670</td>\n",
+       "      <td>Will Apple Inc. announce or release a new gene...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.87</td>\n",
        "      <td>False</td>\n",
-       "      <td>0.050</td>\n",
-       "      <td>0.72</td>\n",
-       "      <td>0.50</td>\n",
+       "      <td>0.90</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.03</td>\n",
+       "      <td>0.12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Is the 13th Seoul Mediacity Biennale catalogue...</td>\n",
+       "      <td>Will the 'motorola signature' smartphone, anno...</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.670</td>\n",
+       "      <td>0.76</td>\n",
        "      <td>False</td>\n",
-       "      <td>0.050</td>\n",
-       "      <td>0.72</td>\n",
-       "      <td>0.50</td>\n",
+       "      <td>0.55</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.85</td>\n",
+       "      <td>0.12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>Will Mercury Ev-Tech Limited hold its 39th Ann...</td>\n",
+       "      <td>Will the European Commission or European Parli...</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.620</td>\n",
+       "      <td>0.70</td>\n",
        "      <td>False</td>\n",
-       "      <td>0.600</td>\n",
-       "      <td>0.72</td>\n",
-       "      <td>0.10</td>\n",
+       "      <td>0.18</td>\n",
+       "      <td>0.35</td>\n",
+       "      <td>0.88</td>\n",
+       "      <td>0.22</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Will Supermicro announce or list at least one ...</td>\n",
+       "      <td>By January 15, 2026, will Supermicro appear in...</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.500</td>\n",
+       "      <td>0.67</td>\n",
        "      <td>False</td>\n",
-       "      <td>0.150</td>\n",
+       "      <td>0.35</td>\n",
        "      <td>0.65</td>\n",
-       "      <td>0.60</td>\n",
+       "      <td>0.76</td>\n",
+       "      <td>0.09</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>Will the Commonwealth Bank of Australia (CBA) ...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.450</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.200</td>\n",
-       "      <td>0.65</td>\n",
-       "      <td>0.60</td>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>By January 31, 2026, will Trimble have officia...</td>\n",
+       "      <th>90</th>\n",
+       "      <td>Will the Rwandan Ministry of ICT and Innovatio...</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.420</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.450</td>\n",
-       "      <td>0.72</td>\n",
-       "      <td>0.30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>Will the application period for Innovate UK's ...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.400</td>\n",
+       "      <td>0.05</td>\n",
        "      <td>True</td>\n",
-       "      <td>0.900</td>\n",
-       "      <td>0.65</td>\n",
-       "      <td>0.50</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>Will the weekly U.S. initial jobless claims (s...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.400</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.200</td>\n",
-       "      <td>0.35</td>\n",
-       "      <td>0.60</td>\n",
+       "      <td>0.25</td>\n",
+       "      <td>0.20</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.22</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>By December 31, 2025, will Google officially r...</td>\n",
+       "      <th>91</th>\n",
+       "      <td>Will the solo exhibition of 'Calculating Empir...</td>\n",
        "      <td>1</td>\n",
-       "      <td>0.400</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.200</td>\n",
-       "      <td>0.35</td>\n",
-       "      <td>0.60</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>Will Straumann Holding (SWX:STMN) report a yea...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.250</td>\n",
+       "      <td>0.05</td>\n",
        "      <td>True</td>\n",
-       "      <td>0.200</td>\n",
-       "      <td>0.25</td>\n",
-       "      <td>0.45</td>\n",
+       "      <td>0.93</td>\n",
+       "      <td>0.90</td>\n",
+       "      <td>0.95</td>\n",
+       "      <td>0.93</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>Will the Northern Virginia Technology Council ...</td>\n",
+       "      <th>92</th>\n",
+       "      <td>Will the U.S. Attorney's Office for the Distri...</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.250</td>\n",
-       "      <td>True</td>\n",
-       "      <td>0.600</td>\n",
-       "      <td>0.72</td>\n",
-       "      <td>0.85</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>Will Amazon Web Services (AWS) officially anno...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.250</td>\n",
-       "      <td>False</td>\n",
-       "      <td>0.600</td>\n",
-       "      <td>0.35</td>\n",
-       "      <td>0.60</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>By December 31, 2025, will IBM officially anno...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.200</td>\n",
+       "      <td>0.04</td>\n",
        "      <td>True</td>\n",
-       "      <td>0.700</td>\n",
-       "      <td>0.75</td>\n",
-       "      <td>0.90</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.03</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>Will the United States and South Korea formall...</td>\n",
+       "      <th>93</th>\n",
+       "      <td>Will IBM publicly announce the deployment of a...</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.100</td>\n",
+       "      <td>0.04</td>\n",
        "      <td>True</td>\n",
-       "      <td>0.100</td>\n",
-       "      <td>0.15</td>\n",
-       "      <td>0.20</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>0.03</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.04</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>By December 1, 2025, will AI-generated voice a...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.100</td>\n",
+       "      <th>94</th>\n",
+       "      <td>Will Robin Mooldijk remain an active employee ...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.03</td>\n",
        "      <td>True</td>\n",
-       "      <td>0.150</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>0.05</td>\n",
+       "      <td>0.03</td>\n",
+       "      <td>0.04</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.03</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
+       "<p>95 rows × 8 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
        "                                             Question Label  Spread  Agree  \\\n",
-       "0   Will the New Frontiers in Research Fund (NFRF)...     1   0.719  False   \n",
-       "1   Will Autozi Internet Technology (AZI) complete...     1   0.670  False   \n",
-       "2   Is the 13th Seoul Mediacity Biennale catalogue...     1   0.670  False   \n",
-       "3   Will Mercury Ev-Tech Limited hold its 39th Ann...     0   0.620  False   \n",
-       "4   Will Supermicro announce or list at least one ...     1   0.500  False   \n",
-       "5   Will the Commonwealth Bank of Australia (CBA) ...     0   0.450  False   \n",
-       "6   By January 31, 2026, will Trimble have officia...     1   0.420  False   \n",
-       "7   Will the application period for Innovate UK's ...     0   0.400   True   \n",
-       "8   Will the weekly U.S. initial jobless claims (s...     0   0.400  False   \n",
-       "9   By December 31, 2025, will Google officially r...     1   0.400  False   \n",
-       "10  Will Straumann Holding (SWX:STMN) report a yea...     0   0.250   True   \n",
-       "11  Will the Northern Virginia Technology Council ...     0   0.250   True   \n",
-       "12  Will Amazon Web Services (AWS) officially anno...     1   0.250  False   \n",
-       "13  By December 31, 2025, will IBM officially anno...     1   0.200   True   \n",
-       "14  Will the United States and South Korea formall...     0   0.100   True   \n",
-       "15  By December 1, 2025, will AI-generated voice a...     1   0.100   True   \n",
+       "0   Will Apple Inc. officially announce a new gene...     0    0.93  False   \n",
+       "1   Will Apple Inc. announce or release a new gene...     0    0.87  False   \n",
+       "2   Will the 'motorola signature' smartphone, anno...     1    0.76  False   \n",
+       "3   Will the European Commission or European Parli...     0    0.70  False   \n",
+       "4   By January 15, 2026, will Supermicro appear in...     1    0.67  False   \n",
+       "..                                                ...   ...     ...    ...   \n",
+       "90  Will the Rwandan Ministry of ICT and Innovatio...     1    0.05   True   \n",
+       "91  Will the solo exhibition of 'Calculating Empir...     1    0.05   True   \n",
+       "92  Will the U.S. Attorney's Office for the Distri...     0    0.04   True   \n",
+       "93  Will IBM publicly announce the deployment of a...     0    0.04   True   \n",
+       "94  Will Robin Mooldijk remain an active employee ...     0    0.03   True   \n",
+       "\n",
+       "    gpt-5.2  claude-sonnet-4.6  gemini-3.1-pro-preview  foresight-v3  \n",
+       "0      0.08               0.95                    0.02          0.17  \n",
+       "1      0.90               0.04                    0.03          0.12  \n",
+       "2      0.55               0.88                    0.85          0.12  \n",
+       "3      0.18               0.35                    0.88          0.22  \n",
+       "4      0.35               0.65                    0.76          0.09  \n",
+       "..      ...                ...                     ...           ...  \n",
+       "90     0.25               0.20                     NaN          0.22  \n",
+       "91     0.93               0.90                    0.95          0.93  \n",
+       "92     0.01               0.05                    0.01          0.03  \n",
+       "93     0.05               0.03                    0.01          0.04  \n",
+       "94     0.03               0.04                    0.01          0.03  \n",
        "\n",
-       "    gpt-4.1-mini  claude-sonnet-4  gemini-2.5-flash  \n",
-       "0          0.001             0.72              0.01  \n",
-       "1          0.050             0.72              0.50  \n",
-       "2          0.050             0.72              0.50  \n",
-       "3          0.600             0.72              0.10  \n",
-       "4          0.150             0.65              0.60  \n",
-       "5          0.200             0.65              0.60  \n",
-       "6          0.450             0.72              0.30  \n",
-       "7          0.900             0.65              0.50  \n",
-       "8          0.200             0.35              0.60  \n",
-       "9          0.200             0.35              0.60  \n",
-       "10         0.200             0.25              0.45  \n",
-       "11         0.600             0.72              0.85  \n",
-       "12         0.600             0.35              0.60  \n",
-       "13         0.700             0.75              0.90  \n",
-       "14         0.100             0.15              0.20  \n",
-       "15         0.150             0.05              0.05  "
+       "[95 rows x 8 columns]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -455,7 +495,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -492,36 +532,43 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>openai/gpt-4.1-mini</th>\n",
-       "      <td>-0.443781</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>16</td>\n",
+       "      <th>openai/gpt-5.2</th>\n",
+       "      <td>-0.230492</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>95</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>anthropic/claude-sonnet-4.6</th>\n",
+       "      <td>-0.222194</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>95</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>anthropic/claude-sonnet-4</th>\n",
-       "      <td>-0.270962</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>16</td>\n",
+       "      <th>google/gemini-3.1-pro-preview</th>\n",
+       "      <td>-0.279008</td>\n",
+       "      <td>0.978947</td>\n",
+       "      <td>95</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>google/gemini-2.5-flash</th>\n",
-       "      <td>-0.331725</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>16</td>\n",
+       "      <th>LightningRodLabs/foresight-v3</th>\n",
+       "      <td>-0.298932</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>95</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                           mean_reward  parse_rate  n_total\n",
-       "model                                                      \n",
-       "openai/gpt-4.1-mini          -0.443781         1.0       16\n",
-       "anthropic/claude-sonnet-4    -0.270962         1.0       16\n",
-       "google/gemini-2.5-flash      -0.331725         1.0       16"
+       "                               mean_reward  parse_rate  n_total\n",
+       "model                                                          \n",
+       "openai/gpt-5.2                   -0.230492    1.000000       95\n",
+       "anthropic/claude-sonnet-4.6      -0.222194    1.000000       95\n",
+       "google/gemini-3.1-pro-preview    -0.279008    0.978947       95\n",
+       "LightningRodLabs/foresight-v3    -0.298932    1.000000       95"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -550,9 +597,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python (lightningrod-sdk)",
    "language": "python",
-   "name": "python3"
+   "name": "lightningrod-sdk"
   },
   "language_info": {
    "codemirror_mode": {
@@ -564,9 +611,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.18"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/src/lightningrod/__init__.py b/src/lightningrod/__init__.py
index f7e1a45..201f4ae 100644
--- a/src/lightningrod/__init__.py
+++ b/src/lightningrod/__init__.py
@@ -8,7 +8,7 @@
 from lightningrod.datasets.dataset import Dataset
 from lightningrod import preprocessing, utils
 from lightningrod.utils.sample import create_sample
-from lightningrod.utils.models import open_router_model
+from lightningrod.utils.models import open_router_model, lightningrod_model
 from lightningrod import preprocessing, training, utils
 from lightningrod.training import to_messages
 from lightningrod._generated.models import (
@@ -81,6 +81,7 @@
     "QuestionRenderer",
     "create_sample",
     "open_router_model",
+    "lightningrod_model",
     "render_sample",
     "Rollout",
     "RolloutScorer",
diff --git a/src/lightningrod/utils/models.py b/src/lightningrod/utils/models.py
index 9c1d61f..16a96fd 100644
--- a/src/lightningrod/utils/models.py
+++ b/src/lightningrod/utils/models.py
@@ -11,3 +11,12 @@ def open_router_model(model_name: str) -> ModelConfig:
         model_source=ModelSourceType.OPEN_ROUTER,
         use_pipeline_key=True,
     )
+
+def lightningrod_model(model_name = "foresight-v3") -> ModelConfig:
+    """Create a ModelConfig for a Lightning Rod-hosted model."""
+    return ModelConfig(
+        model_name=f"LightningRodLabs/{model_name}",
+        model_source=ModelSourceType.VLLM,
+        reasoning_effort="high",
+        is_lightningrod_model=True,
+    )
\ No newline at end of file