From e2263b9e7449ee2eea5ec31950c3d4555119b0ca Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Wed, 20 May 2026 23:51:59 +0100
Subject: [PATCH 01/16] add unique inference

---
 every_eval_ever/helpers/eee_stats.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/every_eval_ever/helpers/eee_stats.py b/every_eval_ever/helpers/eee_stats.py
index f4556c1b8..1879cdd9a 100644
--- a/every_eval_ever/helpers/eee_stats.py
+++ b/every_eval_ever/helpers/eee_stats.py
@@ -192,9 +192,10 @@ def analyze_data(con, schema_table, instance_table, csv_path) -> None:
                 WHEN eval_library.name IN ('unknown', 'custom') THEN 'unknown/custom'
                 ELSE 'named harness'
             END AS harness_status,
-            COUNT(DISTINCT evaluation_id) AS n_evaluation_runs,
-            ROUND(100.0 * COUNT(DISTINCT evaluation_id) / SUM(COUNT(DISTINCT evaluation_id)) OVER (), 1) AS pct
-        FROM {schema_table}
+            COUNT(*) AS n_evaluation_runs,
+            ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) AS pct
+        FROM {schema_table},
+        LATERAL UNNEST(evaluation_results) AS t(er)
         GROUP BY 1
         ORDER BY n_evaluation_runs DESC;
         """,
@@ -208,6 +209,17 @@ def analyze_data(con, schema_table, instance_table, csv_path) -> None:
     )
     section(f'eval harness percentage saved to {csv_path}')
 
+    unique_inference = execute_query(
+        con,
+        f"""
+        SELECT DISTINCT
+            -- COALESCE(model_info.inference_platform, 'unreported') AS platform_inference --
+            COUNT(DISTINCT model_info.inference_platform) AS platform_inference
+        FROM {schema_table}
+        """
+    )
+    section(f'unique inference platforms {unique_inference}')
+
     count_inference_platform = execute_query(
         con,
         f"""
@@ -820,7 +832,7 @@ def main():
             sys.exit(1)
 
         analyze_data(con, schema_table, instance_table, csv_path)
-        create_visualisations(con, schema_table, instance_table, csv_path)
+        # create_visualisations(con, schema_table, instance_table, csv_path)
 
 
 if __name__ == '__main__':

From 093770f32679fa9030cb61f491801e448db1cb1c Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Thu, 21 May 2026 18:30:46 +0100
Subject: [PATCH 02/16] feat: add docs first pass

---
 .github/workflows/pages.yml       |  0
 .gitignore                        | 12 +++-
 Gemfile                           |  2 +
 Gemfile.lock                      | 91 +++++++++++++++++++++++++++++++
 _config.yml                       | 22 ++++++++
 docs/contributing/index.md        | 26 +++++++++
 docs/data-structure/index.md      | 15 +++++
 docs/data-structure/schema.md     | 24 ++++++++
 docs/data-structure/validation.md | 24 ++++++++
 docs/eval-converters/index.md     | 27 +++++++++
 docs/getting-started/index.md     | 33 +++++++++++
 docs/index.md                     | 37 +++++++++++++
 12 files changed, 312 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/pages.yml
 create mode 100644 Gemfile
 create mode 100644 Gemfile.lock
 create mode 100644 _config.yml
 create mode 100644 docs/contributing/index.md
 create mode 100644 docs/data-structure/index.md
 create mode 100644 docs/data-structure/schema.md
 create mode 100644 docs/data-structure/validation.md
 create mode 100644 docs/eval-converters/index.md
 create mode 100644 docs/getting-started/index.md
 create mode 100644 docs/index.md

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
new file mode 100644
index 000000000..e69de29bb
diff --git a/.gitignore b/.gitignore
index 5a5d93546..b925edb9a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -213,4 +213,14 @@ __marimo__/
 *.DS_Store*
 None/
 global-mmlu-lite/
-/data/
\ No newline at end of file
+/data/
+
+# Ignore folders generated by Bundler
+.bundle/
+vendor/
+
+# Ignore the default location of the built site, and caches and metadata generated by Jekyll
+_site/
+.sass-cache/
+.jekyll-cache/
+.jekyll-metadata
\ No newline at end of file
diff --git a/Gemfile b/Gemfile
new file mode 100644
index 000000000..f10963c73
--- /dev/null
+++ b/Gemfile
@@ -0,0 +1,2 @@
+source "https://rubygems.org"
+gem "just-the-docs"
\ No newline at end of file
diff --git a/Gemfile.lock b/Gemfile.lock
new file mode 100644
index 000000000..3af5d9ff2
--- /dev/null
+++ b/Gemfile.lock
@@ -0,0 +1,91 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    addressable (2.9.0)
+      public_suffix (>= 2.0.2, < 8.0)
+    base64 (0.3.0)
+    bigdecimal (4.1.2)
+    colorator (1.1.0)
+    concurrent-ruby (1.3.6)
+    csv (3.3.5)
+    em-websocket (0.5.3)
+      eventmachine (>= 0.12.9)
+      http_parser.rb (~> 0)
+    eventmachine (1.2.7)
+    ffi (1.17.4-x86_64-linux-gnu)
+    forwardable-extended (2.6.0)
+    google-protobuf (4.35.0-x86_64-linux-gnu)
+      bigdecimal
+      rake (~> 13.3)
+    http_parser.rb (0.8.1)
+    i18n (1.14.8)
+      concurrent-ruby (~> 1.0)
+    jekyll (4.4.1)
+      addressable (~> 2.4)
+      base64 (~> 0.2)
+      colorator (~> 1.0)
+      csv (~> 3.0)
+      em-websocket (~> 0.5)
+      i18n (~> 1.0)
+      jekyll-sass-converter (>= 2.0, < 4.0)
+      jekyll-watch (~> 2.0)
+      json (~> 2.6)
+      kramdown (~> 2.3, >= 2.3.1)
+      kramdown-parser-gfm (~> 1.0)
+      liquid (~> 4.0)
+      mercenary (~> 0.3, >= 0.3.6)
+      pathutil (~> 0.9)
+      rouge (>= 3.0, < 5.0)
+      safe_yaml (~> 1.0)
+      terminal-table (>= 1.8, < 4.0)
+      webrick (~> 1.7)
+    jekyll-include-cache (0.2.1)
+      jekyll (>= 3.7, < 5.0)
+    jekyll-sass-converter (3.1.0)
+      sass-embedded (~> 1.75)
+    jekyll-seo-tag (2.9.0)
+      jekyll (>= 3.8, < 5.0)
+    jekyll-watch (2.2.1)
+      listen (~> 3.0)
+    json (2.19.5)
+    just-the-docs (0.12.0)
+      jekyll (>= 3.8.5)
+      jekyll-include-cache
+      jekyll-seo-tag (>= 2.0)
+      rake (>= 12.3.1)
+    kramdown (2.5.2)
+      rexml (>= 3.4.4)
+    kramdown-parser-gfm (1.1.0)
+      kramdown (~> 2.0)
+    liquid (4.0.4)
+    listen (3.10.0)
+      logger
+      rb-fsevent (~> 0.10, >= 0.10.3)
+      rb-inotify (~> 0.9, >= 0.9.10)
+    logger (1.7.0)
+    mercenary (0.4.0)
+    pathutil (0.16.2)
+      forwardable-extended (~> 2.6)
+    public_suffix (7.0.5)
+    rake (13.4.2)
+    rb-fsevent (0.11.2)
+    rb-inotify (0.11.1)
+      ffi (~> 1.0)
+    rexml (3.4.4)
+    rouge (4.7.0)
+    safe_yaml (1.0.5)
+    sass-embedded (1.99.0-x86_64-linux-gnu)
+      google-protobuf (~> 4.31)
+    terminal-table (3.0.2)
+      unicode-display_width (>= 1.1.1, < 3)
+    unicode-display_width (2.6.0)
+    webrick (1.9.2)
+
+PLATFORMS
+  x86_64-linux-gnu
+
+DEPENDENCIES
+  just-the-docs
+
+BUNDLED WITH
+   2.4.20
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 000000000..bc2e9898d
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,22 @@
+title: Every Eval Ever
+description: Documentation for the Every Eval Ever schema, CLI, and converters
+theme: just-the-docs
+color_scheme: light
+
+source: docs
+
+url: https://evaleval.github.io
+baseurl: /every_eval_ever
+
+search_enabled: true
+heading_anchors: true
+
+aux_links:
+  GitHub:
+    - https://github.com/evaleval/every_eval_ever
+
+defaults:
+  - scope:
+      path: ""
+    values:
+      layout: default
\ No newline at end of file
diff --git a/docs/contributing/index.md b/docs/contributing/index.md
new file mode 100644
index 000000000..9fb2e43f8
--- /dev/null
+++ b/docs/contributing/index.md
@@ -0,0 +1,26 @@
+---
+layout: default
+title: Contributing
+nav_order: 5
+---
+
+# Contributing
+
+Data contributions land in the datastore, while validation gates run through the validator/EvalEvalBot workflow.
+
+To contribute evaluation data:
+
+1. Add files under `data/{benchmark}/{developer}/{model}/`
+2. Name aggregate files as `{uuid}.json`
+3. Optionally add instance-level `{uuid}_samples.jsonl`
+4. Validate before submission
+
+Datastore: https://huggingface.co/datasets/evaleval/EEE_datastore
+
+The validator checks datastore pull requests using core checks from this repository and additional checks that are being upstreamed.
+
+Before submitting, run:
+
+```bash
+uv run python -m every_eval_ever validate data/
+```
diff --git a/docs/data-structure/index.md b/docs/data-structure/index.md
new file mode 100644
index 000000000..87ce1145e
--- /dev/null
+++ b/docs/data-structure/index.md
@@ -0,0 +1,15 @@
+---
+layout: default
+title: Data Structure
+nav_order: 3
+has_children: true
+---
+
+# Data Structure
+
+Evaluation data is represented in two layers:
+
+- Aggregate JSON records (`{uuid}.json`)
+- Instance-level JSONL records (`{uuid}_samples.jsonl`)
+
+Use the child pages in this section for schema and validation details.
diff --git a/docs/data-structure/schema.md b/docs/data-structure/schema.md
new file mode 100644
index 000000000..c1720fa89
--- /dev/null
+++ b/docs/data-structure/schema.md
@@ -0,0 +1,24 @@
+---
+layout: default
+title: Schema
+parent: Data Structure
+nav_order: 1
+---
+
+# Schema
+
+The canonical schemas are:
+
+- [Aggregate schema](../../eval.schema.json)
+- [Instance-level schema](../../instance_level_eval.schema.json)
+
+Both schema definitions are currently version `0.2.2`.
+
+The repository enforces schema compatibility by generating Pydantic models from JSON Schema and applying post-generation patches (`post_codegen.py`). This generation flow is automated in CI and can also be run manually.
+
+For aggregate records, keep these conventions:
+
+1. `evaluation_id` uses `{benchmark_name}/{model_id}/{retrieved_timestamp}`
+2. `source_metadata.source_type` is `documentation` or `evaluation_run`
+3. `source_data` is set per result (`url`, `hf_dataset`, or `other`)
+4. Level-based metrics use integer values plus `level_names`
diff --git a/docs/data-structure/validation.md b/docs/data-structure/validation.md
new file mode 100644
index 000000000..56cdd7048
--- /dev/null
+++ b/docs/data-structure/validation.md
@@ -0,0 +1,24 @@
+---
+layout: default
+title: Validation
+parent: Data Structure
+nav_order: 2
+---
+
+# Validation
+
+Validate aggregate `.json` files and instance-level `.jsonl` files:
+
+```bash
+uv run python -m every_eval_ever validate data/
+```
+
+Output formats:
+
+```bash
+uv run python -m every_eval_ever validate --format rich data/
+uv run python -m every_eval_ever validate --format json data/
+uv run python -m every_eval_ever validate --format github data/
+```
+
+Exit code is `0` when all files pass and `1` when any file fails.
diff --git a/docs/eval-converters/index.md b/docs/eval-converters/index.md
new file mode 100644
index 000000000..8388a8af0
--- /dev/null
+++ b/docs/eval-converters/index.md
@@ -0,0 +1,27 @@
+---
+layout: default
+title: Eval Converters
+nav_order: 4
+---
+
+# Eval Converters
+
+Supported conversion targets:
+
+- Inspect AI
+- HELM
+- lm-evaluation-harness
+
+These are the three main general-purpose converters expected to be supported in the core package.
+
+Example commands:
+
+```bash
+uv run python -m every_eval_ever convert inspect --log_path <path>
+uv run python -m every_eval_ever convert helm --log_path <path>
+uv run python -m every_eval_ever convert lm_eval --log_path <path>
+```
+
+Adapter source code lives under [every_eval_ever/converters](../../every_eval_ever/converters/).
+
+One-off adapters also exist under [utils](../../utils/) for source-specific parsing and business logic.
diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md
new file mode 100644
index 000000000..136c14409
--- /dev/null
+++ b/docs/getting-started/index.md
@@ -0,0 +1,33 @@
+---
+layout: default
+title: Getting Started
+nav_order: 2
+---
+
+# Getting Started
+
+Install the package:
+
+```bash
+pip install every-eval-ever
+```
+
+Optional converter dependencies:
+
+```bash
+pip install 'every-eval-ever[inspect]'
+pip install 'every-eval-ever[helm]'
+pip install 'every-eval-ever[all]'
+```
+
+## Run the CLI
+
+```bash
+uv run python -m every_eval_ever --help
+```
+
+## Continue
+
+- See [Data Structure](../data-structure/)
+- See [Eval Converters](../eval-converters/)
+- See [Contributing](../contributing/)
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 000000000..468f0f87f
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,37 @@
+---
+layout: default
+title: Home
+nav_order: 1
+---
+
+# Every Eval Ever
+
+> [EvalEval Coalition](https://evalevalai.com) — "We are a researcher community developing scientifically grounded research outputs and robust deployment infrastructure for broader impact evaluations."
+
+**Every Eval Ever** is a shared schema and crowdsourced eval database. It defines a standardized metadata format for storing AI evaluation results — from leaderboard scrapes and research papers to local evaluation runs — so that results from different frameworks can be compared, reproduced, and reused. The three components that make it work:
+
+- 📋 **A metadata schema** ([eval.schema.json](../eval.schema.json)) that defines the information needed for meaningful comparison of evaluation results, including [instance-level data](../instance_level_eval.schema.json)
+- 🔧 **Validation** that checks data against the schema before it enters the repository
+- 🔌 **Converters** for [Inspect AI](../every_eval_ever/converters/inspect/), [HELM](../every_eval_ever/converters/helm/), and [lm-eval-harness](../every_eval_ever/converters/lm_eval/), so you can transform your existing evaluation logs into the standard format
+
+## Project Components
+
+Every Eval Ever is maintained across three connected components:
+
+- [GitHub repository](https://github.com/evaleval/every_eval_ever): the `every_eval_ever` Python package with schema definitions, converters/adapters, tests, and core tooling.
+- [EEE Datastore](https://huggingface.co/datasets/evaleval/EEE_datastore): the Hugging Face datastore that stores normalized Every Eval Ever evaluation data.
+- [EEE Validator](https://huggingface.co/spaces/evaleval/eee_validator): validator and EvalEvalBot checks used on datastore pull requests, built from repository logic plus additional checks that are being upstreamed.
+
+Install the package:
+
+```bash
+pip install every-eval-ever
+```
+
+Optional converter dependencies:
+
+```bash
+pip install 'every-eval-ever[inspect]'
+pip install 'every-eval-ever[helm]'
+pip install 'every-eval-ever[all]'
+```
\ No newline at end of file

From ea9794d807a3b04d35661cf543b2415b6072dea2 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Fri, 22 May 2026 13:09:14 +0100
Subject: [PATCH 03/16] feat: add pages.yml github actions jekyll deployment

---
 .github/workflows/pages.yml | 51 +++++++++++++++++++++++++++++++++++++
 docs/index.md               |  4 +--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index e69de29bb..9db4c15a1 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -0,0 +1,51 @@
+# Sample workflow for building and deploying a Jekyll site to GitHub Pages
+name: Deploy Jekyll with GitHub Pages dependencies preinstalled
+
+on:
+  # Runs on pushes targeting the default branch
+  push:
+    branches: ["add-read-the-docs"]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
+# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  # Build job
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Build with Jekyll
+        uses: actions/jekyll-build-pages@v1
+        with:
+          source: ./
+          destination: ./_site
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+
+  # Deployment job
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v5
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index 468f0f87f..e16385303 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -10,9 +10,9 @@ nav_order: 1
 
 **Every Eval Ever** is a shared schema and crowdsourced eval database. It defines a standardized metadata format for storing AI evaluation results — from leaderboard scrapes and research papers to local evaluation runs — so that results from different frameworks can be compared, reproduced, and reused. The three components that make it work:
 
-- 📋 **A metadata schema** ([eval.schema.json](../eval.schema.json)) that defines the information needed for meaningful comparison of evaluation results, including [instance-level data](../instance_level_eval.schema.json)
+- 📋 **A metadata schema** ([eval.schema.json](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/eval.schema.json)) that defines the information needed for meaningful comparison of evaluation results, including [instance-level data](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/instance_level_eval.schema.json)
 - 🔧 **Validation** that checks data against the schema before it enters the repository
-- 🔌 **Converters** for [Inspect AI](../every_eval_ever/converters/inspect/), [HELM](../every_eval_ever/converters/helm/), and [lm-eval-harness](../every_eval_ever/converters/lm_eval/), so you can transform your existing evaluation logs into the standard format
+- 🔌 **Converters** for [Inspect AI](https://github.com/gbemike/every_eval_ever/tree/add-read-the-docs/every_eval_ever/converters/inspect), [HELM](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/every_eval_ever/converters/helm), and [lm-eval-harness](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/every_eval_ever/converters/lm_eval), so you can transform your existing evaluation logs into the standard format
 
 ## Project Components
 

From 6cc1a9fbf7bf42ade8cb7d8099b14f7d2c70b993 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Fri, 22 May 2026 13:22:43 +0100
Subject: [PATCH 04/16] fix: refactor config.yml fields

---
 _config.yml | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/_config.yml b/_config.yml
index bc2e9898d..dd06788be 100644
--- a/_config.yml
+++ b/_config.yml
@@ -1,22 +1,28 @@
 title: Every Eval Ever
 description: Documentation for the Every Eval Ever schema, CLI, and converters
-theme: just-the-docs
-color_scheme: light
+color_scheme: nil
 
-source: docs
+baseurl: ""
+url: https://evalevalai.com/
+repository: every_eval_ever/every_eval_ever
 
-url: https://evaleval.github.io
-baseurl: /every_eval_ever
+permalink: pretty
 
 search_enabled: true
 heading_anchors: true
 
 aux_links:
-  GitHub:
+  "Every Eval Ever on GitHub":
     - https://github.com/evaleval/every_eval_ever
 
 defaults:
   - scope:
       path: ""
     values:
-      layout: default
\ No newline at end of file
+      layout: default
+
+nav_sort: case_sensitive
+
+# Back to top link
+back_to_top: true
+back_to_top_text: "Back to top"
\ No newline at end of file

From bd1aa6f9941b540bf5d56f673572fdfa985f2101 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Fri, 22 May 2026 13:28:10 +0100
Subject: [PATCH 05/16] add source docs as source

---
 _config.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/_config.yml b/_config.yml
index dd06788be..605567d89 100644
--- a/_config.yml
+++ b/_config.yml
@@ -2,6 +2,8 @@ title: Every Eval Ever
 description: Documentation for the Every Eval Ever schema, CLI, and converters
 color_scheme: nil
 
+source: docs
+
 baseurl: ""
 url: https://evalevalai.com/
 repository: every_eval_ever/every_eval_ever

From 417cc6fb58c83d3ec707607037bef66b3bbdd9ff Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Fri, 22 May 2026 13:42:02 +0100
Subject: [PATCH 06/16] add just-the-docs as value for theme field

---
 _config.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/_config.yml b/_config.yml
index 605567d89..2841b0377 100644
--- a/_config.yml
+++ b/_config.yml
@@ -1,6 +1,7 @@
 title: Every Eval Ever
 description: Documentation for the Every Eval Ever schema, CLI, and converters
-color_scheme: nil
+theme: just-the-docs
+color_scheme: light
 
 source: docs
 

From 726c6595678bd7bb36f1335e4932123ea2ac7300 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Fri, 22 May 2026 13:46:35 +0100
Subject: [PATCH 07/16] ci: build pages with builder

---
 .github/workflows/pages.yml | 24 ++++++++++++++----------
 _config.yml                 |  2 +-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 9db4c15a1..5d5613b1f 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -1,5 +1,4 @@
-# Sample workflow for building and deploying a Jekyll site to GitHub Pages
-name: Deploy Jekyll with GitHub Pages dependencies preinstalled
+name: Deploy Docs to GitHub Pages
 
 on:
   # Runs on pushes targeting the default branch
@@ -22,23 +21,28 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
-  # Build job
   build:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+
+      - name: Setup Ruby
+        uses: ruby/setup-ruby@v1
+        with:
+          bundler-cache: true
+
       - name: Setup Pages
         uses: actions/configure-pages@v5
-      - name: Build with Jekyll
-        uses: actions/jekyll-build-pages@v1
-        with:
-          source: ./
-          destination: ./_site
+
+      - name: Build site
+        run: bundle exec jekyll build
+
       - name: Upload artifact
         uses: actions/upload-pages-artifact@v3
+        with:
+          path: ./_site
 
-  # Deployment job
   deploy:
     environment:
       name: github-pages
@@ -46,6 +50,6 @@ jobs:
     runs-on: ubuntu-latest
     needs: build
     steps:
-      - name: Deploy to GitHub Pages
+      - name: Deploy
         id: deployment
         uses: actions/deploy-pages@v5
\ No newline at end of file
diff --git a/_config.yml b/_config.yml
index 2841b0377..a909c26e1 100644
--- a/_config.yml
+++ b/_config.yml
@@ -7,7 +7,7 @@ source: docs
 
 baseurl: ""
 url: https://evalevalai.com/
-repository: every_eval_ever/every_eval_ever
+repository: /every_eval_ever
 
 permalink: pretty
 

From 8366fc8592bfb5a6c61ee0483980b58a9764f505 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Fri, 22 May 2026 13:55:46 +0100
Subject: [PATCH 08/16] fix pages baseurl/url

---
 _config.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/_config.yml b/_config.yml
index a909c26e1..e880bea1d 100644
--- a/_config.yml
+++ b/_config.yml
@@ -5,9 +5,9 @@ color_scheme: light
 
 source: docs
 
-baseurl: ""
-url: https://evalevalai.com/
-repository: /every_eval_ever
+baseurl: "/every_eval_ever"
+url: "https://gbemike.github.io"
+repository: gbemike/every_eval_ever
 
 permalink: pretty
 

From 686644ea57780e595355a13ce4734170244c5d18 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Fri, 22 May 2026 14:02:15 +0100
Subject: [PATCH 09/16] fix: specify ruby-version in github actions workflow

---
 .github/workflows/pages.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 5d5613b1f..2f178dc4d 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -30,6 +30,7 @@ jobs:
       - name: Setup Ruby
         uses: ruby/setup-ruby@v1
         with:
+          ruby-version: '3.2'
           bundler-cache: true
 
       - name: Setup Pages

From 44ea36864d640de86d7c9d8f4176d98295af31ea Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Sat, 23 May 2026 15:00:28 +0100
Subject: [PATCH 10/16] docs: configure pages for docs.evalevalai.com

---
 _config.yml | 6 +++---
 docs/CNAME  | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 docs/CNAME

diff --git a/_config.yml b/_config.yml
index e880bea1d..523389765 100644
--- a/_config.yml
+++ b/_config.yml
@@ -5,9 +5,9 @@ color_scheme: light
 
 source: docs
 
-baseurl: "/every_eval_ever"
-url: "https://gbemike.github.io"
-repository: gbemike/every_eval_ever
+baseurl: ""
+url: "https://docs.evalevalai.com"
+repository: evaleval/every_eval_ever
 
 permalink: pretty
 
diff --git a/docs/CNAME b/docs/CNAME
new file mode 100644
index 000000000..d1695a523
--- /dev/null
+++ b/docs/CNAME
@@ -0,0 +1 @@
+docs.evalevalai.com

From d0eb386ce27be137fb8da2d6694098cacc6b47a8 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Sat, 23 May 2026 15:13:26 +0100
Subject: [PATCH 11/16] feat: ready pages.yml for main branch merge

---
 .github/workflows/pages.yml | 7 +------
 docs/CNAME                  | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)
 delete mode 100644 docs/CNAME

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 2f178dc4d..df2f0ab3c 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -1,21 +1,16 @@
 name: Deploy Docs to GitHub Pages
 
 on:
-  # Runs on pushes targeting the default branch
   push:
-    branches: ["add-read-the-docs"]
+    branches: [main]
 
-  # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
 
-# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
 permissions:
   contents: read
   pages: write
   id-token: write
 
-# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
-# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
 concurrency:
   group: "pages"
   cancel-in-progress: false
diff --git a/docs/CNAME b/docs/CNAME
deleted file mode 100644
index d1695a523..000000000
--- a/docs/CNAME
+++ /dev/null
@@ -1 +0,0 @@
-docs.evalevalai.com

From 27febff087e406b469f54d8c3f46a715d5b49fd7 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Sun, 24 May 2026 22:42:25 +0100
Subject: [PATCH 12/16] refactor: fix links and add documentation link to
 README

---
 .github/workflows/pages.yml       |  5 +++-
 README.md                         |  2 ++
 docs/contributing/index.md        | 47 +++++++++++++++++++++++++------
 docs/data-structure/schema.md     | 42 +++++++++++++++++++++------
 docs/data-structure/validation.md | 40 +++++++++++++++++++++-----
 docs/eval-converters/index.md     |  6 ++--
 docs/index.md                     |  4 +--
 7 files changed, 117 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index df2f0ab3c..5b3f109ae 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -3,6 +3,8 @@ name: Deploy Docs to GitHub Pages
 on:
   push:
     branches: [main]
+  pull_request:
+    branches: [main]
 
   workflow_dispatch:
 
@@ -45,7 +47,8 @@ jobs:
       url: ${{ steps.deployment.outputs.page_url }}
     runs-on: ubuntu-latest
     needs: build
+    if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
     steps:
       - name: Deploy
         id: deployment
-        uses: actions/deploy-pages@v5
\ No newline at end of file
+        uses: actions/deploy-pages@v5
diff --git a/README.md b/README.md
index 262a2ca11..23b277b14 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # Every Eval Ever
 
+📖 **[Documentation](https://docs.evalevalai.com)**
+
 > [EvalEval Coalition](https://evalevalai.com) — "We are a researcher community developing scientifically grounded research outputs and robust deployment infrastructure for broader impact evaluations."
 
 **Every Eval Ever** is a shared schema and crowdsourced eval database. It defines a standardized metadata format for storing AI evaluation results — from leaderboard scrapes and research papers to local evaluation runs — so that results from different frameworks can be compared, reproduced, and reused. The three components that make it work:
diff --git a/docs/contributing/index.md b/docs/contributing/index.md
index 9fb2e43f8..46b5992e7 100644
--- a/docs/contributing/index.md
+++ b/docs/contributing/index.md
@@ -6,18 +6,49 @@ nav_order: 5
 
 # Contributing
 
-Data contributions land in the datastore, while validation gates run through the validator/EvalEvalBot workflow.
+New data can be contributed to the [Hugging Face Dataset](https://huggingface.co/datasets/evaleval/EEE_datastore) using the following process:
 
-To contribute evaluation data:
+Leaderboard/evaluation data is split-up into files by individual model, and data for each model is stored using [eval.schema.json](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json). The repository is structured into folders as `data/{benchmark_name}/{developer_name}/{model_name}/`.
 
-1. Add files under `data/{benchmark}/{developer}/{model}/`
-2. Name aggregate files as `{uuid}.json`
-3. Optionally add instance-level `{uuid}_samples.jsonl`
-4. Validate before submission
+## TL;DR How to successfully submit
 
-Datastore: https://huggingface.co/datasets/evaleval/EEE_datastore
+1. Data must conform to [eval.schema.json](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json) (current version is defined in the schema file)
+2. The validation pipeline will automatically verify the data submitted in the pull request, but can also be manually triggered by typing `/eee validate changed` in a comment on the HF PR.
+3. An EvalEval member will review and merge your submission
 
-The validator checks datastore pull requests using core checks from this repository and additional checks that are being upstreamed.
+## PR Naming Convention
+
+Use these prefixes in your pull request titles:
+
+- `[Submission]` - New evaluation data
+- `[Issue #N]` - Fix for a specific GitHub issue
+- `[Feature]` - New functionality not tied to an issue
+- `[Docs]` - Documentation changes
+- `[ACL Shared Task]` - Shared task submissions (priority review)
+
+## UUID Naming Convention
+
+Each JSON file is named with a **UUID (Universally Unique Identifier)** in the format `{uuid}.json`. The UUID is automatically generated (using standard UUID v4) when creating a new evaluation result file. This ensures that:
+- **Multiple evaluations** of the same model can exist without conflicts (each gets a unique UUID)
+- **Different timestamps** are stored as separate files with different UUIDs (not as separate folders)
+- A model may have multiple result files, with each file representing different iterations or runs of the leaderboard/evaluation
+- UUIDs can be generated using Python's `uuid.uuid4()` function.
+
+**Example**: The model `openai/gpt-4o-2024-11-20` might have multiple files like:
+- `e70acf51-30ef-4c20-b7cc-51704d114d70.json` (evaluation run #1)
+- `a1b2c3d4-5678-90ab-cdef-1234567890ab.json` (evaluation run #2)
+
+Note: Each file can contain multiple individual results related to one model. See [examples in the datastore](https://huggingface.co/datasets/evaleval/EEE_datastore/tree/main/data).
+
+## How to add new eval
+
+1. Add a new folder under [data/](https://huggingface.co/datasets/evaleval/EEE_datastore/tree/main/data) on the Hugging Face datastore with a codename for your eval.
+2. For each model, use the Hugging Face (`developer_name/model_name`) naming convention to create a 2-tier folder structure.
+3. Add a JSON file with results for each model and name it `{uuid}.json`.
+4. [Optional] Include a [utils/](https://github.com/evaleval/every_eval_ever/tree/main/utils) folder in your benchmark name folder with any scripts used to generate the data (see e.g. [utils/global-mmlu-lite/adapter.py](https://github.com/evaleval/every_eval_ever/blob/main/utils/global-mmlu-lite/adapter.py)).
+5. [Submit] Two ways to submit your evaluation data:
+	- **Option A: Drag & drop via Hugging Face** - Go to [evaleval/EEE_datastore](https://huggingface.co/datasets/evaleval/EEE_datastore) -> click "Files and versions" -> "Contribute" -> "Upload files" -> drag and drop your data -> select "Open as a pull request to the main branch". See [step-by-step screenshots](https://docs.google.com/document/d/1dxTQF8ncGCzaAOIj0RX7E9Hg4THmUBzezDOYUp_XdCY/edit?usp=sharing).
+	- **Option B: Clone & PR** - Clone the [Hugging Face repository](https://huggingface.co/datasets/evaleval/EEE_datastore), add your data under `data/`, and open a pull request
 
 Before submitting, run:
 
diff --git a/docs/data-structure/schema.md b/docs/data-structure/schema.md
index c1720fa89..b73000b1b 100644
--- a/docs/data-structure/schema.md
+++ b/docs/data-structure/schema.md
@@ -9,16 +9,42 @@ nav_order: 1
 
 The canonical schemas are:
 
-- [Aggregate schema](../../eval.schema.json)
-- [Instance-level schema](../../instance_level_eval.schema.json)
+- [Aggregate schema](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json)
+- [Instance-level schema](https://github.com/evaleval/every_eval_ever/blob/main/instance_level_eval.schema.json)
 
-Both schema definitions are currently version `0.2.2`.
+Schema versions are defined in the canonical JSON Schema files linked above.
 
 The repository enforces schema compatibility by generating Pydantic models from JSON Schema and applying post-generation patches (`post_codegen.py`). This generation flow is automated in CI and can also be run manually.
 
-For aggregate records, keep these conventions:
+## Schema Instructions
 
-1. `evaluation_id` uses `{benchmark_name}/{model_id}/{retrieved_timestamp}`
-2. `source_metadata.source_type` is `documentation` or `evaluation_run`
-3. `source_data` is set per result (`url`, `hf_dataset`, or `other`)
-4. Level-based metrics use integer values plus `level_names`
+1. **`model_info`**: Use Hugging Face formatting (`developer_name/model_name`). If a model does not come from Hugging Face, use the exact API reference. Check [examples in data/livecodebenchpro](https://huggingface.co/datasets/evaleval/EEE_datastore/tree/main/data/livecodebenchpro). Notably, some do have a **date included in the model name**, but others **do not**. For example:
+- OpenAI: `gpt-4o-2024-11-20`, `gpt-5-2025-08-07`, `o3-2025-04-16`
+- Anthropic: `claude-3-7-sonnet-20250219`, `claude-3-sonnet-20240229`
+- Google: `gemini-2.5-pro`, `gemini-2.5-flash`
+- xAI (Grok): `grok-2-2024-08-13`, `grok-3-2025-01-15`
+
+2. **`evaluation_id`**: Use `{benchmark_name/model_id/retrieved_timestamp}` format (e.g. `livecodebenchpro/qwen3-235b-a22b-thinking-2507/1760492095.8105888`).
+
+3. **`inference_platform`** vs **`inference_engine`**: Where possible specify where the evaluation was run using one of these two fields.
+- `inference_platform`: Use this field when the evaluation was run through a remote API (e.g., `openai`, `huggingface`, `openrouter`, `anthropic`, `xai`).
+- `inference_engine`: Use this field when the evaluation was run locally. This is now an object with `name` and `version` (e.g. `{"name": "vllm", "version": "0.6.0"}`).
+
+4. The `source_type` on `source_metadata` has two options: `documentation` and `evaluation_run`. Use `documentation` when results are scraped from a leaderboard or paper. Use `evaluation_run` when the evaluation was run locally (e.g. via an eval converter).
+
+5. **`source_data`** is specified per evaluation result (inside `evaluation_results`), with three variants:
+- `source_type: "url"` - link to a web source (e.g. leaderboard API)
+- `source_type: "hf_dataset"` - reference to a Hugging Face dataset (e.g. `{"hf_repo": "google/IFEval"}`)
+- `source_type: "other"` - for private or proprietary datasets
+
+6. The schema is designed to accommodate both numeric and level-based (e.g. Low, Medium, High) metrics. For level-based metrics, the actual `value` should be converted to an integer (e.g. Low = 1, Medium = 2, High = 3), and the `level_names` property should be used to specify the mapping of levels to integers.
+
+7. **Timestamps**: The schema has three timestamp fields - use them as follows:
+- `retrieved_timestamp` (required) - when this record was created, in Unix epoch format (e.g. `1760492095.8105888`)
+- `evaluation_timestamp` (top-level, optional) - when the evaluation was run
+- `evaluation_results[].evaluation_timestamp` (per-result, optional) - when a specific evaluation result was produced, if different results were run at different times
+
+8. Additional details can be provided in several places in the schema. They are not required, but can be useful for detailed analysis.
+- `model_info.additional_details`: Use this field to provide any additional information about the model itself (e.g. number of parameters)
+- `evaluation_results.generation_config.generation_args`: Specify additional arguments used to generate outputs from the model
+- `evaluation_results.generation_config.additional_details`: Use this field to provide any additional information about the evaluation process that is not captured elsewhere
\ No newline at end of file
diff --git a/docs/data-structure/validation.md b/docs/data-structure/validation.md
index 56cdd7048..6234f1258 100644
--- a/docs/data-structure/validation.md
+++ b/docs/data-structure/validation.md
@@ -7,18 +7,44 @@ nav_order: 2
 
 # Validation
 
-Validate aggregate `.json` files and instance-level `.jsonl` files:
+Validation uses Pydantic models generated from the JSON schemas. This validates aggregate `.json` files against `EvaluationLog` and instance-level `_samples.jsonl` files line-by-line against `InstanceLevelEvaluationLog`. Requires [uv](https://docs.astral.sh/uv/).
 
-```bash
-uv run python -m every_eval_ever validate data/
+## Validate files with the package CLI
+
+```sh
+# Single aggregate file
+uv run python -m every_eval_ever validate data/benchmark/dev/model/uuid.json
+
+# Instance-level JSONL
+uv run python -m every_eval_ever validate data/benchmark/dev/model/uuid_samples.jsonl
+
+# Entire directory (recurses into subdirectories)
+uv run python -m every_eval_ever validate data/benchmark/dev/model/
+
+# Multiple paths
+uv run python -m every_eval_ever validate file1.json file2_samples.jsonl data/
 ```
 
-Output formats:
+File type is determined by extension: `.json` validates against `EvaluationLog`, `.jsonl` validates each line against `InstanceLevelEvaluationLog`.
+
+### Output formats
 
-```bash
-uv run python -m every_eval_ever validate --format rich data/
+```sh
+# Rich terminal output (default)
+uv run python -m every_eval_ever validate data/
+
+# Machine-readable JSON
 uv run python -m every_eval_ever validate --format json data/
+
+# GitHub Actions annotations
 uv run python -m every_eval_ever validate --format github data/
 ```
 
-Exit code is `0` when all files pass and `1` when any file fails.
+### Options
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--format {rich,json,github}` | `rich` | Output format |
+| `--max-errors N` | `50` | Maximum errors reported per JSONL file |
+
+Exit code is `0` if all files pass and `1` if any fail.
diff --git a/docs/eval-converters/index.md b/docs/eval-converters/index.md
index 8388a8af0..444d75762 100644
--- a/docs/eval-converters/index.md
+++ b/docs/eval-converters/index.md
@@ -10,7 +10,7 @@ Supported conversion targets:
 
 - Inspect AI
 - HELM
-- lm-evaluation-harness
+- lm-eval-harness
 
 These are the three main general-purpose converters expected to be supported in the core package.
 
@@ -22,6 +22,6 @@ uv run python -m every_eval_ever convert helm --log_path <path>
 uv run python -m every_eval_ever convert lm_eval --log_path <path>
 ```
 
-Adapter source code lives under [every_eval_ever/converters](../../every_eval_ever/converters/).
+Adapter source code lives under [every_eval_ever/converters](https://github.com/evaleval/every_eval_ever/tree/main/every_eval_ever/converters).
 
-One-off adapters also exist under [utils](../../utils/) for source-specific parsing and business logic.
+One-off adapters also exist under [utils](https://github.com/evaleval/every_eval_ever/tree/main/utils) for source-specific parsing and business logic.
diff --git a/docs/index.md b/docs/index.md
index e16385303..76f334041 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -10,9 +10,9 @@ nav_order: 1
 
 **Every Eval Ever** is a shared schema and crowdsourced eval database. It defines a standardized metadata format for storing AI evaluation results — from leaderboard scrapes and research papers to local evaluation runs — so that results from different frameworks can be compared, reproduced, and reused. The three components that make it work:
 
-- 📋 **A metadata schema** ([eval.schema.json](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/eval.schema.json)) that defines the information needed for meaningful comparison of evaluation results, including [instance-level data](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/instance_level_eval.schema.json)
+- 📋 **A metadata schema** ([eval.schema.json](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json)) that defines the information needed for meaningful comparison of evaluation results, including [instance-level data](https://github.com/evaleval/every_eval_ever/blob/main/instance_level_eval.schema.json)
 - 🔧 **Validation** that checks data against the schema before it enters the repository
-- 🔌 **Converters** for [Inspect AI](https://github.com/gbemike/every_eval_ever/tree/add-read-the-docs/every_eval_ever/converters/inspect), [HELM](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/every_eval_ever/converters/helm), and [lm-eval-harness](https://github.com/gbemike/every_eval_ever/blob/add-read-the-docs/every_eval_ever/converters/lm_eval), so you can transform your existing evaluation logs into the standard format
+- 🔌 **Converters** for [Inspect AI](https://github.com/evaleval/every_eval_ever/tree/main/every_eval_ever/converters/inspect), [HELM](https://github.com/evaleval/every_eval_ever/tree/main/every_eval_ever/converters/helm), and [lm-eval-harness](https://github.com/evaleval/every_eval_ever/tree/main/every_eval_ever/converters/lm_eval), so you can transform your existing evaluation logs into the standard format
 
 ## Project Components
 

From 8cf811a7fbfb41c96e084e3b9c1ebccc37f00e8d Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Tue, 26 May 2026 18:55:58 +0100
Subject: [PATCH 13/16] fix github actions build block

---
 .github/workflows/pages.yml | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 5b3f109ae..1e1de4f8a 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -8,18 +8,15 @@ on:
 
   workflow_dispatch:
 
-permissions:
-  contents: read
-  pages: write
-  id-token: write
-
 concurrency:
-  group: "pages"
-  cancel-in-progress: false
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
 
 jobs:
   build:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -30,9 +27,6 @@ jobs:
           ruby-version: '3.2'
           bundler-cache: true
 
-      - name: Setup Pages
-        uses: actions/configure-pages@v5
-
       - name: Build site
         run: bundle exec jekyll build
 
@@ -42,6 +36,10 @@ jobs:
           path: ./_site
 
   deploy:
+    permissions:
+      contents: read
+      pages: write
+      id-token: write
     environment:
       name: github-pages
       url: ${{ steps.deployment.outputs.page_url }}
@@ -51,4 +49,4 @@ jobs:
     steps:
       - name: Deploy
         id: deployment
-        uses: actions/deploy-pages@v5
+        uses: actions/deploy-pages@v5
\ No newline at end of file

From 01fdf757689771e7a02258803059c8c0eeeaab92 Mon Sep 17 00:00:00 2001
From: gbemike <gbemike2002@gmail.com>
Date: Tue, 26 May 2026 19:59:04 +0100
Subject: [PATCH 14/16] refactor: revert eee_stats script to normal

---
 every_eval_ever/helpers/eee_stats.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/every_eval_ever/helpers/eee_stats.py b/every_eval_ever/helpers/eee_stats.py
index 1879cdd9a..c3b296ac7 100644
--- a/every_eval_ever/helpers/eee_stats.py
+++ b/every_eval_ever/helpers/eee_stats.py
@@ -192,10 +192,9 @@ def analyze_data(con, schema_table, instance_table, csv_path) -> None:
                 WHEN eval_library.name IN ('unknown', 'custom') THEN 'unknown/custom'
                 ELSE 'named harness'
             END AS harness_status,
-            COUNT(*) AS n_evaluation_runs,
-            ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) AS pct
-        FROM {schema_table},
-        LATERAL UNNEST(evaluation_results) AS t(er)
+            COUNT(DISTINCT evaluation_id) AS n_evaluation_runs,
+            ROUND(100.0 * COUNT(DISTINCT evaluation_id) / SUM(COUNT(DISTINCT evaluation_id)) OVER (), 1) AS pct
+        FROM {schema_table}
         GROUP BY 1
         ORDER BY n_evaluation_runs DESC;
         """,
@@ -209,17 +208,6 @@ def analyze_data(con, schema_table, instance_table, csv_path) -> None:
     )
     section(f'eval harness percentage saved to {csv_path}')
 
-    unique_inference = execute_query(
-        con,
-        f"""
-        SELECT DISTINCT
-            -- COALESCE(model_info.inference_platform, 'unreported') AS platform_inference --
-            COUNT(DISTINCT model_info.inference_platform) AS platform_inference
-        FROM {schema_table}
-        """
-    )
-    section(f'unique inference platforms {unique_inference}')
-
     count_inference_platform = execute_query(
         con,
         f"""
@@ -832,8 +820,8 @@ def main():
             sys.exit(1)
 
         analyze_data(con, schema_table, instance_table, csv_path)
-        # create_visualisations(con, schema_table, instance_table, csv_path)
+        create_visualisations(con, schema_table, instance_table, csv_path)
 
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file

From e76dc6ad76ddfbcada8375576682674e6de44c60 Mon Sep 17 00:00:00 2001
From: Tommaso Cerruti <79256764+tommasocerruti@users.noreply.github.com>
Date: Tue, 26 May 2026 21:06:10 +0200
Subject: [PATCH 15/16] Add newline at end of eee_stats.py

Add a newline at the end of the file for better formatting.
---
 every_eval_ever/helpers/eee_stats.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/every_eval_ever/helpers/eee_stats.py b/every_eval_ever/helpers/eee_stats.py
index c3b296ac7..4871a2e11 100644
--- a/every_eval_ever/helpers/eee_stats.py
+++ b/every_eval_ever/helpers/eee_stats.py
@@ -824,4 +824,5 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
+    

From 6b8371216096eb90158fb3864cb900bb1ac6b560 Mon Sep 17 00:00:00 2001
From: Tommaso Cerruti <79256764+tommasocerruti@users.noreply.github.com>
Date: Tue, 26 May 2026 21:09:40 +0200
Subject: [PATCH 16/16] nit: restoring previous state prior to PR

---
 every_eval_ever/helpers/eee_stats.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/every_eval_ever/helpers/eee_stats.py b/every_eval_ever/helpers/eee_stats.py
index 4871a2e11..f4556c1b8 100644
--- a/every_eval_ever/helpers/eee_stats.py
+++ b/every_eval_ever/helpers/eee_stats.py
@@ -825,4 +825,3 @@ def main():
 
 if __name__ == '__main__':
     main()
-