From 97b5e762b61f282b0ff4c02e0ebc3c1663781556 Mon Sep 17 00:00:00 2001
From: Colinho22 <48288595+Colinho22@users.noreply.github.com>
Date: Sun, 21 Jun 2026 01:27:28 +0200
Subject: [PATCH] chore: bump version to 1.0.0 and consolidate release notes
 into CHANGELOG

Replace per-release release-notes-*.md files with a single CHANGELOG.md
(Keep a Changelog format), so future releases append one section instead of
adding a new top-level file each time.
---
 CHANGELOG.md                 | 71 ++++++++++++++++++++++++++++++++++++
 pyproject.toml               |  2 +-
 release-notes-v1.0.0-rc.1.md | 67 ----------------------------------
 src/maestro/__init__.py      |  2 +-
 4 files changed, 73 insertions(+), 69 deletions(-)
 create mode 100644 CHANGELOG.md
 delete mode 100644 release-notes-v1.0.0-rc.1.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..3fd7342
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,71 @@
+# Changelog
+
+All notable changes to MAESTRO are recorded here, newest first. The format
+follows [Keep a Changelog](https://keepachangelog.com/), and the project uses
+[semantic versioning](https://semver.org/): a major bump marks a milestone (see
+the release table in `.github/CONTRIBUTING.md`). Each tagged release is also a
+GitHub Release; every row in a published `maestro.db` carries the `git_commit`
+of the tag that produced it, so data and code stay cross-verifiable.
+
+## [1.0.0] - 2026-06-21
+
+Thesis experimental run. The frozen code state that produced the experimental
+data reported in the MAESTRO thesis (FHGR FS26).
+
+### Under test
+
+- Four orchestration strategies: SingleAgent, SOP, CrewAI, LangGraph, holding
+  prompts and the output contract identical so only orchestration differs.
+- Three control conditions (no LLM, deterministic): NullControl and
+  CopyInputControl (score floor), GroundTruthEchoControl (score ceiling).
+- Ten models across five providers: Anthropic (claude-opus-4-8,
+  claude-haiku-4-5-20251001), OpenAI (gpt-5.5-2026-04-23,
+  gpt-5.4-mini-2026-03-17), Mistral (mistral-medium-3-5, mistral-small-2603),
+  Google (gemini-3.5-flash, gemini-3.1-flash-lite), DeepSeek (deepseek-v4-pro,
+  deepseek-v4-flash).
+
+### Added
+
+- Diagram-type-aware label rendering: C4 and network-topology diagrams use the
+  `name\n[Type]\ntech` label, BPMN keeps bare names. The diagram type is read
+  from input metadata and given to every strategy as task context.
+- Concurrent matrix execution, capped per provider (`--provider-concurrency`,
+  default 4), with the main thread as the sole DB writer.
+- `raw_response` captured on every cell (including failures) for post-run
+  diagnosis, alongside per-call retry counts and per-invocation environment
+  capture (OS, arch, Python, git commit, library versions, image digest).
+
+### Changed
+
+- The entity-name metric scores the input-derivable label core (name and type);
+  inconsistently-authored descriptor lines and labels for input-unnamed nodes are
+  out of the scored contract by design.
+- Step-3 output is structurally validated (empty-label brackets, concatenated
+  nodes, unbalanced subgraphs) so a fixable malformation consumes the retry
+  budget instead of scoring as a parse failure.
+- CrewAI's delivered prompt is stripped to match SOP byte-for-byte, removing a
+  prompt-content confound.
+
+### Reproduce
+
+```bash
+git clone https://github.com/Colinho22/maestro.git
+cd maestro && git checkout v1.0.0
+cp .env.template .env            # add API keys
+docker compose build
+docker compose run --rm maestro python -m maestro.run --repeats 5
+```
+
+## [1.0.0-rc.1] - 2026-06-14
+
+Release candidate cut to validate the toolchain (Docker build, matrix shape,
+scoring pipeline, a small smoke run) before committing to the full paid run. Not
+the thesis dataset; that is produced under `v1.0.0`.
+
+### Added
+
+- Pre-freeze code cleanup: ASCII-only sweep, modern typing throughout, a
+  read-only analysis DB connection, `tenacity` in the provenance whitelist,
+  structured empty-response handling across all providers, and a single shared
+  step-output contract for the multi-step strategies. None of it changes
+  successful scored output.
diff --git a/pyproject.toml b/pyproject.toml
index b0d67d1..1f528c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "maestro"
-version = "1.0.0rc1"
+version = "1.0.0"
 description = "MAESTRO - Multi-Agent Evaluation for Structured Relational Output. Thesis artifact comparing orchestration frameworks for relational visualization."
 readme = "README.md"
 license = { text = "MIT" }
diff --git a/release-notes-v1.0.0-rc.1.md b/release-notes-v1.0.0-rc.1.md
deleted file mode 100644
index 1270ba1..0000000
--- a/release-notes-v1.0.0-rc.1.md
+++ /dev/null
@@ -1,67 +0,0 @@
-## MAESTRO v1.0.0-rc.1 - Release candidate (pipeline validation)
-
-A release candidate cut before the frozen `v1.0.0` thesis run. Its purpose is to
-confirm the toolchain produces sane output end to end, Docker build, matrix
-shape, scoring pipeline, and a small smoke run, before committing to the full
-multi-hour experiment. It is NOT the code state that produced the thesis data;
-that will be the `v1.0.0` tag.
-
-### Why this candidate exists
-
-The full matrix is a long, paid run. Tagging a candidate first lets the Docker
-image, the experiment matrix, and the metric pipeline be validated on a small
-subset (for example `--repeats 1` on tier 1) so a configuration or environment
-problem is caught cheaply rather than halfway through the real run.
-
-### What this release contains
-
-- Four orchestration strategies under test: `single_agent`, `sop_based`,
-  `crew_ai`, `lang_graph`, holding prompts and the output contract identical so
-  only the orchestration differs.
-- Three control conditions (no LLM, deterministic): `null_control` and
-  `copy_control` (score floor), `ground_truth_control` (score ceiling).
-- Five providers: Anthropic, OpenAI, Mistral, Gemini, DeepSeek, across a matrix
-  of `inputs x strategies x models x repeats`, stratified by complexity tier.
-- Evaluation pipeline: structural validity (via `mmdc`), entity F1 (id / name /
-  lemma), relationship F1 (relaxed / strict), container and attachment metrics,
-  and an error taxonomy.
-- Reproducibility instrumentation: per-invocation environment capture (OS, arch,
-  Python, git commit, library versions, Docker image digest), per-call retry
-  counts, and control-condition sanity floors and ceiling.
-
-### What changed going into the candidate
-
-This candidate folds in the pre-freeze code cleanup: an ASCII-only sweep of code
-and tests, modern typing throughout (`from __future__ import annotations`, no
-`typing.Optional`), a read-only analysis DB connection, `tenacity` added to the
-provenance whitelist, structured empty-response handling across all providers,
-and a single shared step-output contract for the multi-step strategies. None of
-it changes successful scored output; the empty-response work only affects how an
-already-failing cell is labeled.
-
-### How to validate the pipeline with this candidate
-
-```bash
-git clone https://github.com/Colinho22/maestro.git
-cd maestro
-git checkout v1.0.0-rc.1
-cp .env.template .env            # add API keys for the providers you will run
-
-# Matrix shape, no API calls:
-python -m maestro.run --dry-run
-
-# Small smoke run end to end:
-python -m maestro.run --strategy single_agent --tier 1 --repeats 1
-# Docker: docker compose run --rm maestro \
-#   python -m maestro.run --strategy single_agent --tier 1 --repeats 1
-```
-
-If the smoke run scores and persists a row and `--dry-run` shows the expected
-matrix, the toolchain is ready for the `v1.0.0` freeze and full run.
-
-### Scope
-
-In scope: validating Docker, matrix shape, and the scoring pipeline on a small
-subset. Out of scope: the thesis dataset (produced under `v1.0.0`) and any
-post-candidate cleanup (which lands on `main` toward `v1.0.0`). This candidate
-tag stays frozen.
diff --git a/src/maestro/__init__.py b/src/maestro/__init__.py
index 6c93125..5becc17 100644
--- a/src/maestro/__init__.py
+++ b/src/maestro/__init__.py
@@ -1 +1 @@
-__version__ = "1.0.0rc1"
+__version__ = "1.0.0"