diff --git a/Cargo.toml b/Cargo.toml
index ab5a0ce..37dbe0b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,7 @@ authors = ["genomehubs <genomehubs@genomehubs.org>"]
 description = "Generic CLI generator for genomehubs instances"
 license = "MIT"
 readme = "README.md"
+default-run = "cli-generator"
 
 [workspace]
 members = [".", "crates/genomehubs-query", "crates/genomehubs-api"]
diff --git a/agent-logs/2026-05-28_001_histogram-scatter-modular-refactor.md b/agent-logs/2026-05-28_001_histogram-scatter-modular-refactor.md
new file mode 100644
index 0000000..6106b7e
--- /dev/null
+++ b/agent-logs/2026-05-28_001_histogram-scatter-modular-refactor.md
@@ -0,0 +1,140 @@
+---
+date: 2026-05-28
+agent: GitHub Copilot
+model: claude-sonnet-4-6
+task: "Refactor histogram and scatter report functions and parsing to be modular, DRY, and cover all edge cases"
+files_changed:
+  - crates/genomehubs-api/src/report/field.rs
+  - crates/genomehubs-api/src/report/mod.rs
+  - crates/genomehubs-api/src/report/agg.rs
+  - crates/genomehubs-api/src/report/bounds.rs
+  - crates/genomehubs-api/src/report/report_types.rs
+  - crates/genomehubs-api/src/report/spec_builder.rs
+  - crates/genomehubs-query/src/report/mod.rs
+---
+
+## Task summary
+
+The user requested a full architectural refactor of the server-side report infrastructure in
+`crates/genomehubs-api/src/report/`. The prior code had duplicated field-type helpers spread
+across `agg.rs` and `bounds.rs`, a 4-case branch in `build_nested_attribute_histogram_with_categories`
+(~200 lines), non-deterministic category histogram extraction paths (4 candidate paths
+searched at runtime), and ~200 lines of duplicated tick label/value extraction in
+`spec_builder.rs`. Two additional bug fixes preceded this session: presence filters were
+not ANDed into histogram bounds queries, and per-category histograms used `{x_field}` as
+the inner container name, making extraction non-deterministic.
+
+This session replaced all of the above with a clean, type-agnostic architecture.
+
+## Key decisions
+
+- **New `field.rs` module as single source of truth**: All field-type resolution
+  (`is_rank`, `is_attribute`, `get_attribute_value_field`) and all ES nested-path logic
+  now live in a `FieldStorage` enum (`Attribute{key, es_value_field}`, `Lineage{rank}`,
+  `Root{es_field}`). Methods encode every path decision in one place, eliminating drift
+  between builder and extractor code.
+
+- **Canonical container naming enforced in `build_inner_x_agg_block`**: The per-category
+  inner x aggregation container is now always `"by_key"` (attribute) or `"at_rank"`
+  (lineage), never `{x_field}`. This makes extraction paths `O(1)` pointer dereferences
+  instead of a 4-candidate runtime search. The previous `{x_field}` naming was the root
+  cause of the "most cats fall in first bin" bug fixed in the prior session.
+
+- **`GenericBucketAgg` replaces 5 typed builder structs**: A single
+  `GenericBucketAgg { storage, bucket_type, bucket_params }` implements `AggBuilder` for
+  all field types. `build()` delegates wrapping to `wrap_in_nested(storage, …)`.
+  `extract()` delegates path resolution to `storage.main_bucket_path(agg_name, bucket_type)`.
+
+- **`build_nested_attribute_histogram_with_categories` reduced from ~200 to ~80 lines**:
+  Replaced 4 hand-written `(x_type, cat_type)` cases with type-agnostic composition:
+  `build_inner_x_agg_block` + `wrap_cat_in_nested` + `inject_category_histograms`.
+  Adding a new field storage type in future requires no changes here.
+
+- **`fill_tick_data_from_buckets` helper in `spec_builder.rs`**: Extracts the identical
+  ~60-line pattern (keyword label list OR numeric boundary computation) shared between the
+  histogram x-axis and scatter x-axis branches. Y-axis handling reuses the same helper
+  with an `explicit_labels` parameter for `yBucketLabels`. Raw scalar `yBuckets` values
+  are wrapped into pseudo-bucket objects before dispatch so the helper has a uniform
+  interface.
+
+- **`grouped_bar_size_px` helper in `genomehubs-query/src/report/mod.rs`**: Extracted
+  the 5-line pixel-width computation duplicated in the keyword and numeric branches of the
+  grouped histogram path into a pure helper: `grouped_bar_size_px(n_bins, n_cats, plot_width_px)`.
+
+- **No speculative abstraction**: Only patterns that appeared at least twice in the
+  current code were extracted. The VL `vl_scatter` function was not restructured because
+  its internal branching differs significantly from `vl_histogram`.
+
+## Interaction log
+
+| Turn | Role  | Summary                                                                           |
+| ---- | ----- | --------------------------------------------------------------------------------- |
+| 1    | User  | Requested full modular refactor of histogram/scatter report infrastructure        |
+| 2    | Agent | Created `field.rs` with `FieldStorage`, `resolve_field_storage`, wrapping helpers |
+| 3    | Agent | Registered `pub mod field;` in `mod.rs`; refactored `agg.rs` (GenericBucketAgg)   |
+| 4    | Agent | Refactored `bounds.rs` — removed duplicated helpers, imported from `field.rs`     |
+| 5    | Agent | Refactored `report_types.rs` — new `extract_cat_histograms`, `align_to_keys`      |
+| 6    | Agent | Confirmed clean build (0 errors, 0 warnings)                                      |
+| 7    | Agent | Added `bucket_keys_to_boundaries` + `fill_tick_data_from_buckets` to spec_builder |
+| 8    | Agent | (New session) Replaced duplicate tick extraction blocks in `build_plot_spec`      |
+| 9    | Agent | Extracted `grouped_bar_size_px` from `vl_histogram` numeric/keyword branches      |
+| 10   | Agent | Confirmed clean `cargo clippy --all-targets -- -D warnings` across workspace      |
+
+## Changes made
+
+### `crates/genomehubs-api/src/report/field.rs` (new file, ~350 lines)
+
+- `FieldStorage` enum with `nested_path()`, `key_filter()`, `x_container_name()`,
+  `cat_wrapper_names()`, `presence_filter()`, `bucket_field()`, `main_bucket_path()`,
+  `cat_histograms_base()`, `inner_x_path()`
+- `resolve_field_storage(field, value_type, cache)` — prefers `TaxonRank` over `Attribute`
+- `is_rank()`, `is_attribute()`, `get_attribute_value_field()` — canonical, previously duplicated
+- `wrap_in_nested()`, `wrap_cat_in_nested()`, `build_inner_x_agg_block()` — composition helpers
+
+### `crates/genomehubs-api/src/report/agg.rs`
+
+- Removed: `HistogramAggBuilder`, `DateHistogramAggBuilder`, `TermsAggBuilder`,
+  `StatsAggBuilder`, `NestedAttributeAggBuilder`, `NestedRankAggBuilder`,
+  `CompositeAggBuilder`, `ReverseNestedAggBuilder`, `GeoHashAggBuilder`
+- Added: `GenericBucketAgg` — single `AggBuilder` impl for all field types
+- `build_nested_attribute_histogram_with_categories`: 200 lines → 80 lines, fully type-agnostic
+- `inject_category_histograms`: uses `x_storage.x_container_name()` for deterministic insertion
+
+### `crates/genomehubs-api/src/report/bounds.rs`
+
+- Removed duplicated `is_rank`, `is_attribute`, `get_attribute_value_field` functions
+- Imported canonical versions from `field.rs`
+
+### `crates/genomehubs-api/src/report/report_types.rs`
+
+- Removed `presence_filter_for_axis` — replaced by `FieldStorage::presence_filter()`
+- Replaced old 4-candidate-path `extract_cat_histograms` with `FieldStorage`-based deterministic version
+- Added `align_to_keys` shared helper for per-category count alignment
+
+### `crates/genomehubs-api/src/report/spec_builder.rs`
+
+- Added `bucket_keys_to_boundaries(sorted_keys, axis_obj)` — `N` keys → `N+1` VL bin boundaries
+- Added `fill_tick_data_from_buckets(meta, axis_obj, buckets, label_source)` — unified tick extraction
+- Replaced two ~60-line duplicated blocks (histogram x-axis and scatter x-axis) with calls to helper
+- Replaced ~80-line y-axis block (scatter) with wrapped call to same helper using `explicit_labels`
+
+### `crates/genomehubs-query/src/report/mod.rs`
+
+- Added `grouped_bar_size_px(n_bins, n_cats, plot_width_px)` — extracted from two identical 5-line computations in `vl_histogram`
+
+## Notes / warnings
+
+- The new `"by_key"` / `"at_rank"` canonical container names are a **breaking change** relative
+  to any cached Elasticsearch responses or client-side code that expected `{x_field}` as the
+  container name. Any stored ES aggregation responses will be unaffected (they are computed fresh),
+  but any client that manually inspects the raw ES response shape should be updated.
+
+- `geohash_precision_for_size` in `agg.rs` has `#[allow(dead_code)]` — it is used by the
+  geo report path which is not currently exercised by the test suite.
+
+- The scatter `vl_scatter` function in `genomehubs-query/src/report/mod.rs` still has some
+  duplication with `vl_histogram` in the category handling paths. Full extraction was deferred
+  because the two functions diverge significantly in their data transformation logic.
+
+- Pending feature (deferred): 3-level nested binning for x/y/cat scatter (x-binned + y-binned +
+  category breakdown). The `FieldStorage` composition pattern makes this straightforward to add.
diff --git a/config/swagger-examples-goat.yaml b/config/swagger-examples-goat.yaml
index 6a0d94b..72382e4 100644
--- a/config/swagger-examples-goat.yaml
+++ b/config/swagger-examples-goat.yaml
@@ -256,3 +256,31 @@ examples:
         - record_id: "7227"
           result: taxon
           fields: ["genome_size"]
+
+  # ── POST /api/v3/report/batch ─────────────────────────────────────────────
+
+  - path: "/api/v3/report/batch"
+    method: post
+    name: chordata_and_nematoda_report_batch
+    summary: "Run arc reports for Chordata and Nematoda in parallel"
+    value:
+      concurrency: 2
+      reports:
+        - query:
+            taxa: ["Chordata"]
+            taxon_filter_type: tree
+            rank: species
+          report:
+            report: arc
+            feature: "bioproject=prjna533106"
+            reference: assembly_level
+            context: ""
+        - query:
+            taxa: ["Nematoda"]
+            taxon_filter_type: tree
+            rank: species
+          report:
+            report: arc
+            feature: "bioproject=prjna533106"
+            reference: assembly_level
+            context: ""
diff --git a/crates/genomehubs-api/Cargo.toml b/crates/genomehubs-api/Cargo.toml
index ceb03eb..08fcc02 100644
--- a/crates/genomehubs-api/Cargo.toml
+++ b/crates/genomehubs-api/Cargo.toml
@@ -2,6 +2,7 @@
 name = "genomehubs-api"
 version = "0.1.0"
 edition = "2021"
+default-run = "genomehubs-api"
 
 [dependencies]
 axum = { version = "0.7", features = ["tokio", "http1", "macros"] }
diff --git a/crates/genomehubs-api/src/main.rs b/crates/genomehubs-api/src/main.rs
index 3e3ac13..456eb5d 100644
--- a/crates/genomehubs-api/src/main.rs
+++ b/crates/genomehubs-api/src/main.rs
@@ -63,6 +63,7 @@ pub struct AppState {
         routes::record::get_record,
         routes::record_batch::post_record_batch,
         routes::report::post_report,
+        routes::report_batch::post_report_batch,
         routes::positional::post_positional,
         routes::result_fields::get_result_fields,
         routes::search::post_search,
@@ -100,6 +101,9 @@ pub struct AppState {
         routes::record_batch::RecordBatchResponse,
         routes::report::ReportRequest,
         routes::report::ReportResponse,
+        routes::report_batch::ReportBatchRequest,
+        routes::report_batch::ReportBatchResponse,
+        routes::report_batch::ReportBatchResultItem,
         routes::positional::PositionalRequest,
         routes::positional::PositionalResponse,
         routes::metadata::MetadataResponse,
@@ -461,6 +465,10 @@ async fn main() {
             "/api/v3/report",
             axum::routing::post(routes::report::post_report),
         )
+        .route(
+            "/api/v3/report/batch",
+            axum::routing::post(routes::report_batch::post_report_batch),
+        )
         .route(
             "/api/v3/positional",
             axum::routing::post(routes::positional::post_positional),
diff --git a/crates/genomehubs-api/src/report/agg.rs b/crates/genomehubs-api/src/report/agg.rs
index 3298557..59e607e 100644
--- a/crates/genomehubs-api/src/report/agg.rs
+++ b/crates/genomehubs-api/src/report/agg.rs
@@ -1,13 +1,23 @@
 //! Elasticsearch aggregation builders for report axes.
 //!
-//! Each `AggBuilder` produces the JSON fragment for one ES aggregation, and extracts
-//! the bucket list from the response. Builders are composable: use `CompositeAggBuilder`
-//! to nest them (e.g., histogram containing stats).
+//! All field-type detection and path logic is centralised in [`super::field`];
+//! this module is responsible only for composing valid ES aggregation JSON and
+//! extracting buckets from responses.
+//!
+//! ## Key types
+//! - [`AggBuilder`] — trait for all bucket aggregations
+//! - [`GenericBucketAgg`] — single implementation that handles attribute/lineage/root fields
+//! - [`build_nested_attribute_histogram_with_categories`] — type-agnostic 2-level agg
+//! - [`build_nested_attribute_scatter_agg`] — scatter 2-level agg with optional categories
 
 use serde_json::{json, Value};
 
 use crate::es_metadata::MetadataCache;
-use genomehubs_query::report::axis::{Scale, ValueType};
+use crate::report::field::{
+    build_inner_x_agg_block, resolve_field_storage, wrap_cat_in_nested, wrap_in_nested,
+    FieldStorage,
+};
+use genomehubs_query::report::axis::{DateInterval, Scale, ValueType};
 use genomehubs_query::report::{AxisSpec, BoundsResult};
 use std::sync::Arc;
 
@@ -27,302 +37,43 @@ pub trait AggBuilder: Send + Sync {
     fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets;
 }
 
-/// Build a numeric `histogram` aggregation.
-pub struct HistogramAggBuilder {
-    pub field: String,
-    pub interval: f64,
-    pub min: f64,
-    pub max: f64,
-    pub script: Option<String>,
-}
-
-impl AggBuilder for HistogramAggBuilder {
-    fn build(&self, agg_name: &str) -> Value {
-        let mut hist = json!({
-            "field": &self.field,
-            "interval": self.interval,
-            "extended_bounds": { "min": self.min, "max": self.max },
-            "min_doc_count": 0
-        });
-
-        if let Some(script) = &self.script {
-            hist["script"] = Value::String(script.clone());
-        }
-
-        json!({
-            agg_name: {
-                "histogram": hist
-            }
-        })
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        resp.pointer(&format!("/aggregations/{agg_name}/buckets"))
-            .and_then(|b| b.as_array())
-            .cloned()
-            .unwrap_or_default()
-    }
-}
-
-/// Build a `date_histogram` aggregation with `calendar_interval`.
-pub struct DateHistogramAggBuilder {
-    pub field: String,
-    pub calendar_interval: String, // "1d", "1w", "1M", "3M", "1y", "10y"
-    pub time_zone: Option<String>,
-}
-
-impl AggBuilder for DateHistogramAggBuilder {
-    fn build(&self, agg_name: &str) -> Value {
-        let mut agg = json!({
-            "date_histogram": {
-                "field": &self.field,
-                "calendar_interval": &self.calendar_interval,
-                "min_doc_count": 0
-            }
-        });
-        if let Some(tz) = &self.time_zone {
-            agg["date_histogram"]["time_zone"] = Value::String(tz.clone());
-        }
-        json!({ agg_name: agg })
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        resp.pointer(&format!("/aggregations/{agg_name}/buckets"))
-            .and_then(|b| b.as_array())
-            .cloned()
-            .unwrap_or_default()
-    }
-}
-
-/// Build a `terms` aggregation for categorical axes.
-pub struct TermsAggBuilder {
-    pub field: String,
-    pub size: usize,
-    pub include: Option<Vec<String>>, // fixed term list
-}
-
-impl AggBuilder for TermsAggBuilder {
-    fn build(&self, agg_name: &str) -> Value {
-        let mut terms = json!({
-            "field": format!("{}.keyword", &self.field),
-            "size": self.size,
-            "min_doc_count": 0
-        });
-        if let Some(include) = &self.include {
-            terms["include"] = json!(include);
-        }
-        json!({ agg_name: { "terms": terms } })
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        resp.pointer(&format!("/aggregations/{agg_name}/buckets"))
-            .and_then(|b| b.as_array())
-            .cloned()
-            .unwrap_or_default()
-    }
-}
-
-/// Build a `stats` sub-aggregation (used for Y-axis values within X buckets).
-#[allow(dead_code)]
-pub struct StatsAggBuilder {
-    pub field: String,
-}
+// ── GenericBucketAgg ─────────────────────────────────────────────────────────
 
-impl AggBuilder for StatsAggBuilder {
-    fn build(&self, agg_name: &str) -> Value {
-        json!({ agg_name: { "stats": { "field": &self.field } } })
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        // Stats returns a single object, not a bucket list; return wrapped
-        resp.pointer(&format!("/aggregations/{agg_name}"))
-            .cloned()
-            .into_iter()
-            .collect()
-    }
-}
-
-/// Build a `geohash_grid` aggregation for map reports.
-pub struct GeoHashAggBuilder {
-    pub field: String,
-    pub precision: u8,
-    pub size: usize,
+/// A single `AggBuilder` implementation that covers attribute, lineage and
+/// root-level fields by deriving the nested path from [`FieldStorage`].
+///
+/// Replaces the previous `NestedAttributeAggBuilder`, `NestedRankAggBuilder`,
+/// `HistogramAggBuilder` and `TermsAggBuilder` specialisations.  Call
+/// [`agg_builder_for`] to obtain a boxed instance.
+pub struct GenericBucketAgg {
+    /// Where the field lives in the ES document.
+    pub storage: FieldStorage,
+    /// ES aggregation type: `"terms"`, `"histogram"`, `"date_histogram"`, etc.
+    pub bucket_type: String,
+    /// Parameters object placed inside `{ bucket_type: params }`.
+    pub bucket_params: Value,
 }
 
-impl AggBuilder for GeoHashAggBuilder {
+impl AggBuilder for GenericBucketAgg {
     fn build(&self, agg_name: &str) -> Value {
-        json!({
-            agg_name: {
-                "geohash_grid": {
-                    "field": &self.field,
-                    "precision": self.precision,
-                    "size": self.size
-                }
-            }
-        })
+        // ES requires {agg_name: {agg_type: params}}.  Wrap params in the type first,
+        // then wrap the whole named agg in the nested envelope.
+        let named_agg =
+            json!({ &self.bucket_type: { &self.bucket_type: self.bucket_params.clone() } });
+        let container = self.storage.x_container_name();
+        let wrapped = wrap_in_nested(&self.storage, container, named_agg);
+        json!({ agg_name: wrapped })
     }
 
     fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        resp.pointer(&format!("/aggregations/{agg_name}/buckets"))
+        let path = self.storage.main_bucket_path(agg_name, &self.bucket_type);
+        resp.pointer(&path)
             .and_then(|b| b.as_array())
             .cloned()
             .unwrap_or_default()
     }
 }
 
-/// Build a `reverse_nested` aggregation (used for tree node counts).
-#[allow(dead_code)]
-pub struct ReverseNestedAggBuilder;
-
-impl AggBuilder for ReverseNestedAggBuilder {
-    fn build(&self, agg_name: &str) -> Value {
-        json!({ agg_name: { "reverse_nested": {} } })
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        resp.pointer(&format!("/aggregations/{agg_name}"))
-            .cloned()
-            .into_iter()
-            .collect()
-    }
-}
-
-/// Compose two `AggBuilder`s: parent builds outer agg; inner is nested within each bucket.
-///
-/// Used for patterns like: x-axis histogram → y-axis stats within each x bucket.
-#[allow(dead_code)]
-pub struct CompositeAggBuilder<'a> {
-    pub outer: &'a dyn AggBuilder,
-    pub inner: &'a dyn AggBuilder,
-    pub inner_name: String,
-}
-
-impl<'a> AggBuilder for CompositeAggBuilder<'a> {
-    fn build(&self, agg_name: &str) -> Value {
-        let mut outer = self.outer.build(agg_name);
-        let inner_agg = self.inner.build(&self.inner_name);
-
-        // Recursively inject inner agg into outer's nested structure
-        self.inject_inner_agg(&mut outer, agg_name, &inner_agg);
-        outer
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        self.outer.extract(resp, agg_name)
-    }
-}
-
-impl<'a> CompositeAggBuilder<'a> {
-    /// Recursively inject inner aggregation into nested structures.
-    /// Handles both direct aggregations and nested attribute aggregations.
-    #[allow(dead_code)]
-    fn inject_inner_agg(&self, outer: &mut Value, agg_name: &str, inner_agg: &Value) {
-        if let Some(outer_obj) = outer.get_mut(agg_name) {
-            // First try direct injection (for simple histogram, terms, etc.)
-            for key in &["histogram", "date_histogram", "terms", "geohash_grid"] {
-                if outer_obj.get(key).is_some() {
-                    outer_obj["aggs"] = inner_agg.clone();
-                    return;
-                }
-            }
-
-            // If not found directly, look inside nested aggregations
-            if outer_obj.get("nested").is_some() {
-                if let Some(nested_aggs) = outer_obj.get_mut("aggs") {
-                    let filter_agg_opt = if nested_aggs.get("by_key").is_some() {
-                        nested_aggs.get_mut("by_key")
-                    } else {
-                        nested_aggs.get_mut("by_value")
-                    };
-
-                    if let Some(filter_agg) = filter_agg_opt {
-                        if let Some(inner_aggs) = filter_agg.get_mut("aggs") {
-                            for key in &["histogram", "date_histogram", "terms", "geohash_grid"] {
-                                if inner_aggs.get(key).is_some() {
-                                    if self.inner_name == "cat_agg" {
-                                        if let Some(agg_def) = inner_aggs.get_mut(key) {
-                                            if agg_def.get("aggs").is_none() {
-                                                agg_def["aggs"] = json!({});
-                                            }
-                                            if let Some(cat_agg_inner) = inner_agg.get("cat_agg") {
-                                                agg_def["aggs"]["cat_agg"] = cat_agg_inner.clone();
-                                            }
-                                        }
-                                    } else if let Some(inner_value) =
-                                        inner_agg.get(&self.inner_name)
-                                    {
-                                        inner_aggs[&self.inner_name.clone()] = inner_value.clone();
-                                    } else {
-                                        inner_aggs[&self.inner_name.clone()] = inner_agg.clone();
-                                    }
-                                    return;
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-/// Determine if a field is a taxonomic rank.
-fn is_rank(field: &str, cache: &Option<Arc<tokio::sync::RwLock<MetadataCache>>>) -> bool {
-    if let Some(cache_lock) = cache {
-        if let Ok(c) = cache_lock.try_read() {
-            return c.taxonomic_ranks.contains(&field.to_string());
-        }
-    }
-    false
-}
-
-/// Determine if a field is an attribute.
-fn is_attribute(field: &str, cache: &Option<Arc<tokio::sync::RwLock<MetadataCache>>>) -> bool {
-    if let Some(cache_lock) = cache {
-        if let Ok(c) = cache_lock.try_read() {
-            if let Value::Object(groups) = &c.attr_types {
-                for (_, group) in groups {
-                    if let Value::Object(fields) = group {
-                        if fields.contains_key(field) {
-                            return true;
-                        }
-                    }
-                }
-            }
-        }
-    }
-    false
-}
-
-/// Get the exact value field for an attribute from metadata.
-/// Returns the processed_summary field (e.g., "attributes.long_value" for type=long).
-/// This MUST come from metadata, not guessed.
-fn get_attribute_value_field(
-    field: &str,
-    cache: &Option<Arc<tokio::sync::RwLock<MetadataCache>>>,
-) -> Result<String, String> {
-    if let Some(cache_lock) = cache {
-        if let Ok(c) = cache_lock.try_read() {
-            if let Value::Object(groups) = &c.attr_types {
-                // Search all groups for this field
-                for (_, group) in groups {
-                    if let Value::Object(fields) = group {
-                        if let Some(Value::Object(meta_obj)) = fields.get(field) {
-                            // Get processed_summary which is the exact ES field name
-                            if let Some(ps) =
-                                meta_obj.get("processed_summary").and_then(|v| v.as_str())
-                            {
-                                return Ok(format!("attributes.{}", ps));
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    Err(format!("field '{}' not found in metadata", field))
-}
-
 /// Return the ES aggregation type name (and matching agg name) for an x-axis value type.
 ///
 /// The agg is always named the same as its type so extraction paths are predictable:
@@ -357,6 +108,22 @@ fn build_x_agg_params(
             }
             t
         }
+        ValueType::Date => {
+            // For date histograms use calendar_interval.
+            let calendar_interval = x_bounds
+                .interval
+                .map(|i| i.to_es_interval().to_string())
+                .unwrap_or_else(|| "1y".to_string());
+            let mut params = json!({
+                "field": x_value_field,
+                "calendar_interval": calendar_interval,
+                "min_doc_count": 0
+            });
+            if let Some(domain_arr) = x_bounds.domain {
+                params["extended_bounds"] = json!({ "min": domain_arr[0], "max": domain_arr[1] });
+            }
+            params
+        }
         _ => {
             let [domain_min, domain_max] = x_bounds.domain.unwrap_or([0.0, 1.0]);
             let (hist_min, hist_max, script_opt) = match x_spec.opts.scale {
@@ -394,79 +161,172 @@ fn build_x_agg_params(
     (agg_type, params)
 }
 
-/// Build the `yHistograms` sub-aggregation used inside each x-histogram bucket.
-///
-/// Escapes nested context via `reverse_nested`, re-enters attributes, and runs a histogram
-/// on the y-field value. Supports log/sqrt scale transforms via ES script.
-///
-/// ```text
-/// yHistograms: reverse_nested
-///   by_attribute: nested(attributes)
-///     {y_field}: filter(y_field)
-///       histogram: histogram(y_value_field)
-/// ```
-fn build_y_histogram_sub_agg(
+#[allow(clippy::too_many_arguments)]
+/// Build a Y-axis sub-aggregation that adapts to the Y value type.
+/// For numeric values this produces a histogram, for date a date_histogram,
+/// and for keyword/taxon-rank a `terms` (named `top_terms`) aggregation.
+fn build_y_sub_agg(
     y_field: &str,
     y_value_field: &str,
+    y_value_type: ValueType,
     y_scale: Scale,
-    y_domain_min: f64,
-    y_domain_max: f64,
+    y_bounds_min: f64,
+    y_bounds_max: f64,
     y_ticks: usize,
+    y_interval: Option<DateInterval>,
 ) -> Value {
-    let (hist_min, hist_max, script_opt) = match y_scale {
-        Scale::Log | Scale::Log10 => {
-            let mn = y_domain_min.max(1.0).log10();
-            let mx = y_domain_max.max(1.0).log10();
-            (mn, mx, Some("Math.log10(_value)".to_string()))
-        }
-        Scale::Log2 => {
-            let mn = y_domain_min.max(1.0).log2();
-            let mx = y_domain_max.max(1.0).log2();
-            (
-                mn,
-                mx,
-                Some("Math.max(Math.log(_value)/Math.log(2), 0)".to_string()),
-            )
+    match y_value_type {
+        ValueType::TaxonRank => {
+            // For taxon ranks, aggregate within the `lineage` nested path and
+            // filter ancestors by the requested rank (e.g., "genus"), then
+            // terms-aggregate on `lineage.taxon_id` (or configured y_value_field).
+            let mut y_field_agg = serde_json::Map::new();
+            y_field_agg.insert(
+                y_field.to_string(),
+                json!({
+                    "filter": { "term": { "lineage.taxon_rank": y_field } },
+                    "aggs": {
+                        "top_terms": {
+                            "terms": {
+                                "field": y_value_field,
+                                "size": y_ticks,
+                                "min_doc_count": 0
+                            }
+                        }
+                    }
+                }),
+            );
+            json!({
+                "reverse_nested": {},
+                "aggs": {
+                    "by_attribute": {
+                        "nested": { "path": "lineage" },
+                        "aggs": Value::Object(y_field_agg)
+                    }
+                }
+            })
         }
-        Scale::Sqrt => {
-            let mn = y_domain_min.max(0.0).sqrt();
-            let mx = y_domain_max.sqrt();
-            (mn, mx, None)
+        ValueType::Keyword => {
+            // terms agg named `top_terms` inside the `attributes` nested path
+            let mut y_field_agg = serde_json::Map::new();
+            y_field_agg.insert(
+                y_field.to_string(),
+                json!({
+                    "filter": { "term": { "attributes.key": y_field } },
+                    "aggs": {
+                        "top_terms": {
+                            "terms": {
+                                "field": y_value_field,
+                                "size": y_ticks,
+                                "min_doc_count": 0
+                            }
+                        }
+                    }
+                }),
+            );
+            json!({
+                "reverse_nested": {},
+                "aggs": {
+                    "by_attribute": {
+                        "nested": { "path": "attributes" },
+                        "aggs": Value::Object(y_field_agg)
+                    }
+                }
+            })
         }
-        _ => (y_domain_min, y_domain_max, None),
-    };
+        ValueType::Date => {
+            // date_histogram using calendar_interval derived from bounds tick_count or provided interval
+            let calendar_interval = y_interval
+                .map(|i| i.to_es_interval().to_string())
+                .unwrap_or_else(|| "1y".to_string());
 
-    let interval = (hist_max - hist_min) / y_ticks.max(1) as f64;
+            let mut date_hist_params = json!({
+                "field": y_value_field,
+                "calendar_interval": calendar_interval,
+                "min_doc_count": 0
+            });
+            // Ensure buckets for empty intervals cover the full domain
+            date_hist_params["extended_bounds"] =
+                json!({ "min": y_bounds_min, "max": y_bounds_max });
 
-    let mut hist_params = json!({
-        "field": y_value_field,
-        "interval": interval,
-        "extended_bounds": { "min": hist_min, "max": hist_max },
-        "offset": hist_min,
-        "min_doc_count": 0
-    });
-    if let Some(script) = script_opt {
-        hist_params["script"] = Value::String(script);
-    }
+            let mut y_field_agg = serde_json::Map::new();
+            y_field_agg.insert(
+                y_field.to_string(),
+                json!({
+                    "filter": { "term": { "attributes.key": y_field } },
+                    "aggs": {
+                        "date_histogram": { "date_histogram": date_hist_params }
+                    }
+                }),
+            );
+            json!({
+                "reverse_nested": {},
+                "aggs": {
+                    "by_attribute": {
+                        "nested": { "path": "attributes" },
+                        "aggs": Value::Object(y_field_agg)
+                    }
+                }
+            })
+        }
+        _ => {
+            // Numeric histogram path (existing behaviour)
+            let (hist_min, hist_max, script_opt) = match y_scale {
+                Scale::Log | Scale::Log10 => {
+                    let mn = y_bounds_min.max(1.0).log10();
+                    let mx = y_bounds_max.max(1.0).log10();
+                    (mn, mx, Some("Math.log10(_value)".to_string()))
+                }
+                Scale::Log2 => {
+                    let mn = y_bounds_min.max(1.0).log2();
+                    let mx = y_bounds_max.max(1.0).log2();
+                    (
+                        mn,
+                        mx,
+                        Some("Math.max(Math.log(_value)/Math.log(2), 0)".to_string()),
+                    )
+                }
+                Scale::Sqrt => {
+                    let mn = y_bounds_min.max(0.0).sqrt();
+                    let mx = y_bounds_max.sqrt();
+                    (mn, mx, None)
+                }
+                _ => (y_bounds_min, y_bounds_max, None),
+            };
 
-    let mut y_field_agg = serde_json::Map::new();
-    y_field_agg.insert(
-        y_field.to_string(),
-        json!({
-            "filter": { "term": { "attributes.key": y_field } },
-            "aggs": { "histogram": { "histogram": hist_params } }
-        }),
-    );
+            let interval = (hist_max - hist_min) / y_ticks.max(1) as f64;
 
-    json!({
-        "reverse_nested": {},
-        "aggs": {
-            "by_attribute": {
-                "nested": { "path": "attributes" },
-                "aggs": Value::Object(y_field_agg)
+            let mut hist_params = json!({
+                "field": y_value_field,
+                "interval": interval,
+                "extended_bounds": { "min": hist_min, "max": hist_max },
+                "offset": hist_min,
+                "min_doc_count": 0
+            });
+            if let Some(script) = script_opt {
+                hist_params["script"] = Value::String(script);
             }
+
+            let mut y_field_agg = serde_json::Map::new();
+            y_field_agg.insert(
+                y_field.to_string(),
+                json!({
+                    "filter": { "term": { "attributes.key": y_field } },
+                    "aggs": { "histogram": { "histogram": hist_params } }
+                }),
+            );
+
+            json!({
+                "reverse_nested": {},
+                "aggs": {
+                    "by_attribute": {
+                        "nested": { "path": "attributes" },
+                        "aggs": Value::Object(y_field_agg)
+                    }
+                }
+            })
         }
-    })
+    }
 }
 
 #[allow(clippy::too_many_arguments)]
@@ -500,20 +360,21 @@ pub fn build_nested_attribute_scatter_agg(
     show_other: bool,
     cache: &Option<Arc<tokio::sync::RwLock<MetadataCache>>>,
 ) -> Result<Value, String> {
-    let x_field = x_spec.field.as_str();
-    let x_value_field = get_attribute_value_field(x_field, cache)?;
-    let y_value_field = get_attribute_value_field(y_field, cache)?;
-
+    let x_storage = resolve_field_storage(&x_spec.field, x_spec.value_type, cache)?;
+    let y_storage = resolve_field_storage(y_field, y_bounds.value_type, cache)?;
+    let x_value_field = x_storage.bucket_field().to_string();
+    let y_value_field = y_storage.bucket_field().to_string();
     let (x_agg_type, x_agg_params) = build_x_agg_params(x_spec, &x_value_field, x_bounds);
-
     let [y_domain_min, y_domain_max] = y_bounds.domain.unwrap_or([0.0, 1.0]);
-    let y_histogram_sub_agg = build_y_histogram_sub_agg(
+    let y_histogram_sub_agg = build_y_sub_agg(
         y_field,
         &y_value_field,
+        y_bounds.value_type,
         y_scale,
         y_domain_min,
         y_domain_max,
         y_bounds.tick_count,
+        y_bounds.interval,
     );
 
     // Main x agg with nested y-histograms.
@@ -525,8 +386,8 @@ pub fn build_nested_attribute_scatter_agg(
 
     // Category histograms (optional).
     let category_histograms_opt = if let Some(cat) = cat_field {
-        let cat_value_field = get_attribute_value_field(cat, cache)?;
         let cat_vt = cat_value_type.unwrap_or(ValueType::Keyword);
+        let cat_storage = resolve_field_storage(cat, cat_vt, cache)?;
         let is_numeric_cat = !matches!(cat_vt, ValueType::Keyword | ValueType::TaxonRank);
 
         // Skip only when keyword cat has no known labels.
@@ -536,72 +397,81 @@ pub fn build_nested_attribute_scatter_agg(
             let default_bounds = cat_bounds.unwrap_or(x_bounds);
             let (by_value_agg_type, by_value_def) = build_by_value_agg(
                 cat_vt,
-                &cat_value_field,
+                cat_storage.bucket_field(),
                 default_bounds,
                 cat_labels,
                 show_other,
             );
 
             // Per-cat x agg: same type as main x, with y-histograms nested inside.
-            let mut cat_x_field_agg = serde_json::Map::new();
-            cat_x_field_agg.insert(
-                x_field.to_string(),
-                json!({
-                    "filter": { "term": { "attributes.key": x_field } },
-                    "aggs": { x_agg_type: x_with_y.clone() }
-                }),
+            // Uses build_inner_x_agg_block so extraction paths remain deterministic.
+            // Pass raw x_agg_params; build_inner_x_agg_block wraps them in the
+            // required {name: {type: params}} nesting.  yHistograms is provided
+            // as sub_aggs so it sits inside the x bucket agg, not alongside it.
+            let per_cat_x_with_y = build_inner_x_agg_block(
+                &x_storage,
+                x_agg_type,
+                x_agg_params.clone(),
+                Some(json!({ "yHistograms": y_histogram_sub_agg.clone() })),
             );
 
+            let by_value_with_inner = json!({
+                "by_value": {
+                    by_value_agg_type: by_value_def,
+                    "aggs": per_cat_x_with_y
+                }
+            });
+            let cat_aggs = wrap_cat_in_nested(&cat_storage, by_value_with_inner);
+
             Some(json!({
                 "reverse_nested": {},
-                "aggs": {
-                    "by_attribute": {
-                        "nested": { "path": "attributes" },
-                        "aggs": {
-                            "by_cat": {
-                                "filter": { "term": { "attributes.key": cat } },
-                                "aggs": {
-                                    "by_value": {
-                                        by_value_agg_type: by_value_def,
-                                        "aggs": {
-                                            "histogram": {
-                                                "reverse_nested": {},
-                                                "aggs": {
-                                                    "by_attribute": {
-                                                        "nested": { "path": "attributes" },
-                                                        "aggs": Value::Object(cat_x_field_agg)
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
+                "aggs": cat_aggs
             }))
         }
     } else {
         None
     };
 
-    let mut by_key_aggs = json!({ x_agg_type: x_with_y });
+    // Build main x agg via generic factory and inject category histograms.
+    let x_agg_builder = agg_builder_for(x_spec, x_bounds, cache)?;
+    let mut final_agg = x_agg_builder.build(agg_name);
+
+    // Inject yHistograms into the inner agg.
+    inject_y_histograms_into_agg(&mut final_agg, agg_name, &x_storage, x_agg_type, x_with_y);
+
     if let Some(cat_hist) = category_histograms_opt {
-        by_key_aggs["categoryHistograms"] = cat_hist;
+        inject_category_histograms(&mut final_agg, agg_name, &x_storage, cat_hist);
     }
 
-    Ok(json!({
-        agg_name: {
-            "nested": { "path": "attributes" },
-            "aggs": {
-                "by_key": {
-                    "filter": { "term": { "attributes.key": x_field } },
-                    "aggs": by_key_aggs
-                }
-            }
+    Ok(final_agg)
+}
+
+/// Inject `x_with_y` (the x bucket agg including yHistograms sub-agg) into the built x agg.
+fn inject_y_histograms_into_agg(
+    final_agg: &mut Value,
+    agg_name: &str,
+    x_storage: &FieldStorage,
+    x_agg_type: &str,
+    x_with_y: Value,
+) {
+    let container = x_storage.x_container_name();
+    let root = match final_agg.get_mut(agg_name) {
+        Some(v) => v,
+        None => return,
+    };
+    let aggs_obj = match root.get_mut("aggs") {
+        Some(v) => v,
+        None => return,
+    };
+    if container.is_empty() {
+        aggs_obj[x_agg_type] = x_with_y;
+        return;
+    }
+    if let Some(container_obj) = aggs_obj.get_mut(container) {
+        if let Some(inner_aggs) = container_obj.get_mut("aggs") {
+            inner_aggs[x_agg_type] = x_with_y;
         }
-    }))
+    }
 }
 
 /// Build the `by_value` aggregation used for per-category sub-histograms.
@@ -628,6 +498,20 @@ fn build_by_value_agg(
             }
             ("filters", def)
         }
+        ValueType::Date => {
+            let calendar_interval = cat_bounds
+                .interval
+                .map(|i| i.to_es_interval().to_string())
+                .unwrap_or_else(|| "1y".to_string());
+            (
+                "date_histogram",
+                json!({
+                    "field": cat_value_field,
+                    "calendar_interval": calendar_interval,
+                    "min_doc_count": 0
+                }),
+            )
+        }
         _ => {
             let [domain_min, domain_max] = cat_bounds.domain.unwrap_or([0.0, 1.0]);
             let ticks = cat_bounds.tick_count.max(1) as f64;
@@ -647,28 +531,25 @@ fn build_by_value_agg(
 }
 
 #[allow(clippy::too_many_arguments)]
-/// Build a complete nested-attribute histogram aggregation with per-category sub-histograms.
-///
-/// Supports any x-axis value type: numeric fields use `histogram`, keyword/rank fields use
-/// `terms`. The cat axis is always filtered by term (keyword/rank); pass a keyword or rank
-/// field for `cat_field`.
+/// Build a complete histogram aggregation with per-category sub-histograms.
 ///
-/// Supports any cat-axis value type: keyword/rank fields use named `filters` (one per label),
-/// numeric fields use a `histogram` agg bucketed by the cat domain.
+/// Fully type-agnostic: any combination of `(x_storage, cat_storage)` —
+/// attribute × attribute, attribute × lineage, lineage × attribute, lineage × lineage —
+/// is handled by composing [`FieldStorage`] values from [`field`][crate::report::field]
+/// rather than by hand-writing separate cases.
 ///
-/// # Aggregation structure
+/// # Aggregation structure (generalised)
 /// ```text
-/// {agg_name}: nested(attributes)
-///   by_key: filter(x_field)
-///     {x_agg_type}: histogram or terms (main x-axis counts)
-///     categoryHistograms: reverse_nested
-///       by_attribute: nested(attributes)
-///         by_cat: filter(cat_field)
-///           by_value: filters (keyword) or histogram (numeric)
-///             histogram: reverse_nested
-///               by_attribute: nested(attributes)
-///                 {x_field}: filter(x_field)
-///                   {x_agg_type}: histogram or terms (per-category counts)
+/// {agg_name}:
+///   [x nested envelope]
+///     x_container: filter(x)
+///       {x_bucket_type}: …           ← main x counts
+///       categoryHistograms:
+///         reverse_nested: {}
+///         [cat nested envelope]
+///           cat_container: filter(cat)
+///             by_value: filters/histogram  ← per-cat buckets
+///               [per-cat inner x agg — same x nested envelope]
 /// ```
 pub fn build_nested_attribute_histogram_with_categories(
     agg_name: &str,
@@ -681,310 +562,101 @@ pub fn build_nested_attribute_histogram_with_categories(
     show_other: bool,
     cache: &Option<Arc<tokio::sync::RwLock<MetadataCache>>>,
 ) -> Result<Value, String> {
-    let x_field = x_spec.field.as_str();
-    let x_value_field = get_attribute_value_field(x_field, cache)?;
-    let cat_value_field = get_attribute_value_field(cat_field, cache)?;
+    let x_storage = resolve_field_storage(&x_spec.field, x_spec.value_type, cache)?;
+    let cat_storage = resolve_field_storage(cat_field, cat_value_type, cache)?;
 
-    let (x_agg_type, x_agg_params) = build_x_agg_params(x_spec, &x_value_field, x_bounds);
+    let (x_bucket_type, x_bucket_params) =
+        build_x_agg_params(x_spec, x_storage.bucket_field(), x_bounds);
     let (by_value_agg_type, by_value_def) = build_by_value_agg(
         cat_value_type,
-        &cat_value_field,
+        cat_storage.bucket_field(),
         cat_bounds,
         cat_labels,
         show_other,
     );
 
-    // Per-category inner x agg (same type as main).
-    let mut x_field_agg = serde_json::Map::new();
-    x_field_agg.insert(
-        x_field.to_string(),
-        json!({
-            "filter": { "term": { "attributes.key": x_field } },
-            "aggs": { x_agg_type: { x_agg_type: x_agg_params.clone() } }
-        }),
-    );
+    // Build the inner x agg block for each category bucket.
+    // Uses canonical container names ("by_key"/"at_rank") so extraction paths
+    // are deterministic via FieldStorage::inner_x_path().
+    let per_cat_x_block =
+        build_inner_x_agg_block(&x_storage, x_bucket_type, x_bucket_params.clone(), None);
+
+    // Assemble: a named "by_value" agg → per_cat_x_block sub-aggs.
+    // ES requires a name for every agg; "by_value" matches the extraction
+    // path in FieldStorage::cat_histograms_base().
+    let by_value_with_inner_x = json!({
+        "by_value": {
+            by_value_agg_type: by_value_def,
+            "aggs": per_cat_x_block
+        }
+    });
+
+    // Wrap in the cat nested envelope (by_attribute/at_cat_rank/etc.)
+    let cat_aggs = wrap_cat_in_nested(&cat_storage, by_value_with_inner_x);
 
     let category_histograms = json!({
         "reverse_nested": {},
-        "aggs": {
-            "by_attribute": {
-                "nested": { "path": "attributes" },
-                "aggs": {
-                    "by_cat": {
-                        "filter": { "term": { "attributes.key": cat_field } },
-                        "aggs": {
-                            "by_value": {
-                                by_value_agg_type: by_value_def,
-                                "aggs": {
-                                    "histogram": {
-                                        "reverse_nested": {},
-                                        "aggs": {
-                                            "by_attribute": {
-                                                "nested": { "path": "attributes" },
-                                                "aggs": Value::Object(x_field_agg)
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
+        "aggs": cat_aggs
     });
 
-    Ok(json!({
-        agg_name: {
-            "nested": { "path": "attributes" },
-            "aggs": {
-                "by_key": {
-                    "filter": { "term": { "attributes.key": x_field } },
-                    "aggs": {
-                        x_agg_type: { x_agg_type: x_agg_params },
-                        "categoryHistograms": category_histograms
-                    }
-                }
-            }
+    // Build the main x agg via the generic factory and inject categoryHistograms.
+    let x_agg_builder = agg_builder_for(x_spec, x_bounds, cache)?;
+    let mut final_agg = x_agg_builder.build(agg_name);
+
+    inject_category_histograms(&mut final_agg, agg_name, &x_storage, category_histograms);
+
+    Ok(final_agg)
+}
+
+/// Inject `category_histograms` into the correct inner `aggs` map of a pre-built x agg.
+fn inject_category_histograms(
+    final_agg: &mut Value,
+    agg_name: &str,
+    x_storage: &FieldStorage,
+    category_histograms: Value,
+) {
+    let container = x_storage.x_container_name();
+    let root = match final_agg.get_mut(agg_name) {
+        Some(v) => v,
+        None => return,
+    };
+    let aggs_obj = match root.get_mut("aggs") {
+        Some(v) => v,
+        None => return,
+    };
+    if container.is_empty() {
+        // Root-level x: inject directly into the top-level aggs.
+        aggs_obj["categoryHistograms"] = category_histograms;
+        return;
+    }
+    if let Some(container_obj) = aggs_obj.get_mut(container) {
+        if let Some(inner_aggs) = container_obj.get_mut("aggs") {
+            inner_aggs["categoryHistograms"] = category_histograms;
         }
-    }))
+    }
 }
 
 /// Select the appropriate `AggBuilder` for an axis spec.
 ///
-/// This is the main factory function; report handlers call it rather than
-/// constructing builders directly.
+/// Delegates all field-type detection to [`resolve_field_storage`] and returns a
+/// [`GenericBucketAgg`] — a single type that handles attributes, lineage ranks and
+/// root-level fields uniformly.
 pub fn agg_builder_for(
     spec: &AxisSpec,
     bounds: &BoundsResult,
     cache: &Option<Arc<tokio::sync::RwLock<MetadataCache>>>,
 ) -> Result<Box<dyn AggBuilder>, String> {
-    let is_attr = is_attribute(&spec.field, cache);
-    let is_rk = is_rank(&spec.field, cache);
-
-    match spec.value_type {
-        ValueType::Numeric => {
-            let [domain_min, domain_max] = bounds.domain.unwrap_or([0.0, 1.0]);
-
-            // For log scales, transform bounds to log space for histogram interval calculation
-            let (hist_min, hist_max) = match spec.opts.scale {
-                Scale::Log | Scale::Log10 => {
-                    let min_val = domain_min.max(1.0).log10();
-                    let max_val = domain_max.max(1.0).log10();
-                    (min_val, max_val)
-                }
-                Scale::Log2 => {
-                    let min_val = domain_min.max(1.0).log2();
-                    let max_val = domain_max.max(1.0).log2();
-                    (min_val, max_val)
-                }
-                Scale::Sqrt => {
-                    let min_val = domain_min.max(0.0).sqrt();
-                    let max_val = domain_max.sqrt();
-                    (min_val, max_val)
-                }
-                _ => (domain_min, domain_max),
-            };
-
-            // Compute interval in transformed space
-            let ticks = bounds.tick_count.max(1) as f64;
-            let interval = (hist_max - hist_min) / ticks;
-
-            if is_attr {
-                let value_field = get_attribute_value_field(&spec.field, cache)?;
-
-                // Build script transform for log scales
-                let script_opt = match spec.opts.scale {
-                    Scale::Log10 => Some("Math.log10(_value)".to_string()),
-                    Scale::Log => Some("Math.log(_value)".to_string()),
-                    Scale::Log2 => Some("Math.max(Math.log(_value)/Math.log(2), 0)".to_string()),
-                    Scale::Sqrt => Some("Math.sqrt(_value)".to_string()),
-                    _ => None,
-                };
-
-                let mut inner_agg = json!({
-                    "histogram": {
-                        "field": &value_field,
-                        "interval": interval,
-                        "extended_bounds": { "min": hist_min, "max": hist_max },
-                        "min_doc_count": 0
-                    }
-                });
-
-                if let Some(script) = script_opt {
-                    inner_agg["histogram"]["script"] = Value::String(script);
-                }
-
-                Ok(Box::new(NestedAttributeAggBuilder {
-                    field: spec.field.clone(),
-                    inner_agg_body: inner_agg,
-                    inner_agg_name: "histogram".to_string(),
-                }))
-            } else {
-                let script_opt = match spec.opts.scale {
-                    Scale::Log10 => Some("Math.log10(_value)".to_string()),
-                    Scale::Log => Some("Math.log(_value)".to_string()),
-                    Scale::Log2 => Some("Math.max(Math.log(_value)/Math.log(2), 0)".to_string()),
-                    Scale::Sqrt => Some("Math.sqrt(_value)".to_string()),
-                    _ => None,
-                };
-
-                Ok(Box::new(HistogramAggBuilder {
-                    field: spec.field.clone(),
-                    interval,
-                    min: hist_min,
-                    max: hist_max,
-                    script: script_opt,
-                }))
-            }
-        }
-        ValueType::Date => {
-            let calendar_interval = bounds
-                .interval
-                .map(|i| i.to_es_interval().to_string())
-                .unwrap_or_else(|| "1y".to_string());
-
-            if is_attr {
-                let value_field = get_attribute_value_field(&spec.field, cache)?;
-                let inner_agg = json!({
-                    "date_histogram": {
-                        "field": &value_field,
-                        "calendar_interval": &calendar_interval,
-                        "min_doc_count": 0
-                    }
-                });
-                Ok(Box::new(NestedAttributeAggBuilder {
-                    field: spec.field.clone(),
-                    inner_agg_body: inner_agg,
-                    inner_agg_name: "date_histogram".to_string(),
-                }))
-            } else {
-                Ok(Box::new(DateHistogramAggBuilder {
-                    field: spec.field.clone(),
-                    calendar_interval,
-                    time_zone: None,
-                }))
-            }
-        }
-        ValueType::Keyword | ValueType::TaxonRank => {
-            if is_attr {
-                let value_field = get_attribute_value_field(&spec.field, cache)?;
-                let inner_agg = json!({
-                    "terms": {
-                        "field": &value_field,
-                        "size": spec.opts.size,
-                        "min_doc_count": 0
-                    }
-                });
-                Ok(Box::new(NestedAttributeAggBuilder {
-                    field: spec.field.clone(),
-                    inner_agg_body: inner_agg,
-                    inner_agg_name: "terms".to_string(),
-                }))
-            } else if is_rk {
-                let inner_agg = json!({
-                    "terms": {
-                        "field": "lineage.taxon_id",
-                        "size": spec.opts.size,
-                        "min_doc_count": 0
-                    }
-                });
-                Ok(Box::new(NestedRankAggBuilder {
-                    field: spec.field.clone(),
-                    inner_agg_body: inner_agg,
-                    inner_agg_name: "terms".to_string(),
-                }))
-            } else {
-                Ok(Box::new(TermsAggBuilder {
-                    field: spec.field.clone(),
-                    size: spec.opts.size,
-                    include: if bounds.fixed_terms.is_empty() {
-                        None
-                    } else {
-                        Some(bounds.fixed_terms.clone())
-                    },
-                }))
-            }
-        }
-        ValueType::GeoPoint => Ok(Box::new(GeoHashAggBuilder {
-            field: spec.field.clone(),
-            precision: geohash_precision_for_size(spec.opts.size),
-            size: spec.opts.size,
-        })),
-    }
-}
-
-/// Wrapper that adds nested query logic around a base aggregation for nested attributes.
-pub struct NestedAttributeAggBuilder {
-    pub field: String,
-    pub inner_agg_body: Value,
-    pub inner_agg_name: String,
-}
-
-impl AggBuilder for NestedAttributeAggBuilder {
-    fn build(&self, agg_name: &str) -> Value {
-        json!({
-            agg_name: {
-                "nested": { "path": "attributes" },
-                "aggs": {
-                    "by_key": {
-                        "filter": { "term": { "attributes.key": &self.field } },
-                        "aggs": {
-                            &self.inner_agg_name: self.inner_agg_body.clone()
-                        }
-                    }
-                }
-            }
-        })
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        resp.pointer(&format!(
-            "/aggregations/{}/by_key/{}/buckets",
-            agg_name, self.inner_agg_name
-        ))
-        .and_then(|b| b.as_array())
-        .cloned()
-        .unwrap_or_default()
-    }
-}
-
-/// Wrapper that adds nested query logic around a base aggregation for nested rank (lineage) fields.
-pub struct NestedRankAggBuilder {
-    pub field: String,
-    pub inner_agg_body: Value,
-    pub inner_agg_name: String,
-}
-
-impl AggBuilder for NestedRankAggBuilder {
-    fn build(&self, agg_name: &str) -> Value {
-        json!({
-            agg_name: {
-                "nested": { "path": "lineage" },
-                "aggs": {
-                    "at_rank": {
-                        "filter": { "term": { "lineage.taxon_rank": &self.field } },
-                        "aggs": {
-                            &self.inner_agg_name: self.inner_agg_body.clone()
-                        }
-                    }
-                }
-            }
-        })
-    }
-
-    fn extract(&self, resp: &Value, agg_name: &str) -> RawBuckets {
-        resp.pointer(&format!(
-            "/aggregations/{}/at_rank/{}/buckets",
-            agg_name, self.inner_agg_name
-        ))
-        .and_then(|b| b.as_array())
-        .cloned()
-        .unwrap_or_default()
-    }
+    let storage = resolve_field_storage(&spec.field, spec.value_type, cache)?;
+    let (bucket_type, bucket_params) = build_x_agg_params(spec, storage.bucket_field(), bounds);
+    Ok(Box::new(GenericBucketAgg {
+        storage,
+        bucket_type: bucket_type.to_string(),
+        bucket_params,
+    }))
 }
 
 /// Map a requested geohash count to an ES geohash precision level (1–12).
+#[allow(dead_code)]
 fn geohash_precision_for_size(size: usize) -> u8 {
     match size {
         0..=50 => 3,
diff --git a/crates/genomehubs-api/src/report/arc.rs b/crates/genomehubs-api/src/report/arc.rs
index eb045a4..c814ccd 100644
--- a/crates/genomehubs-api/src/report/arc.rs
+++ b/crates/genomehubs-api/src/report/arc.rs
@@ -204,24 +204,31 @@ pub async fn run_arc_report(
     if config.ranks.is_some() {
         return run_per_rank_report(client, es_base, index, base_query, config).await;
     }
-    if config.rings.is_some() {
-        return run_rings_report(client, es_base, index, base_query, config).await;
-    }
-
     let feature_ref_filter = filter_expr_to_es_query(
         &combine_terms(&config.feature_term, &config.reference_term),
         base_query,
     )?;
     let reference_filter = filter_expr_to_es_query(&config.reference_term, base_query)?;
 
-    if let Some(ref context_term) = config.context_term {
+    let (feature_count, reference_count) = tokio::try_join!(
+        count_matching(client, es_base, index, &feature_ref_filter),
+        count_matching(client, es_base, index, &reference_filter),
+    )?;
+    let context_count = if let Some(ref context_term) = config.context_term {
         let context_filter = filter_expr_to_es_query(context_term, base_query)?;
 
-        let (feature_count, reference_count, context_count) = tokio::try_join!(
-            count_matching(client, es_base, index, &feature_ref_filter),
-            count_matching(client, es_base, index, &reference_filter),
-            count_matching(client, es_base, index, &context_filter),
-        )?;
+        let (context_count,) =
+            tokio::try_join!(count_matching(client, es_base, index, &context_filter))?;
+        Some(context_count)
+    } else {
+        None
+    };
+    if config.rings.is_some() {
+        return run_rings_report(client, es_base, index, base_query, config, context_count).await;
+    }
+
+    if let Some(ref context_term) = config.context_term {
+        let context_count = context_count.unwrap_or(0);
 
         let arc = safe_fraction(feature_count, reference_count);
         let arc2 = safe_fraction(reference_count, context_count);
@@ -240,11 +247,6 @@ pub async fn run_arc_report(
         });
         Ok((feature_count, 0, report_data))
     } else {
-        let (feature_count, reference_count) = tokio::try_join!(
-            count_matching(client, es_base, index, &feature_ref_filter),
-            count_matching(client, es_base, index, &reference_filter),
-        )?;
-
         let arc = safe_fraction(feature_count, reference_count);
 
         let report_data = json!({
@@ -274,6 +276,7 @@ async fn run_rings_report(
     index: &str,
     base_query: &Value,
     config: &ArcConfig,
+    context_count: Option<u64>,
 ) -> Result<(u64, u64, Value), String> {
     let rings = config.rings.as_deref().unwrap_or(&[]);
 
@@ -317,6 +320,16 @@ async fn run_rings_report(
             entry.insert("reference_count".to_string(), json!(reference_count));
             entry.insert("featureTerm".to_string(), json!(ring.feature_term));
             entry.insert("referenceTerm".to_string(), json!(ring_ref));
+
+            if let Some(context_count) = context_count {
+                let arc2 = safe_fraction(reference_count, context_count);
+                entry.insert("arc2".to_string(), json!(arc2));
+                entry.insert("context_count".to_string(), json!(context_count));
+                entry.insert(
+                    "contextTerm".to_string(),
+                    json!(config.context_term.as_deref().unwrap_or("")),
+                );
+            }
             Value::Object(entry)
         })
         .collect();
@@ -421,7 +434,10 @@ async fn msearch_counts(
     for query in queries {
         body.push_str(&serde_json::to_string(&header).unwrap());
         body.push('\n');
-        body.push_str(&serde_json::to_string(&json!({ "query": query, "size": 0 })).unwrap());
+        body.push_str(
+            &serde_json::to_string(&json!({ "query": query, "size": 0, "track_total_hits": true }))
+                .unwrap(),
+        );
         body.push('\n');
     }
 
diff --git a/crates/genomehubs-api/src/report/bounds.rs b/crates/genomehubs-api/src/report/bounds.rs
index 115f9b1..288229b 100644
--- a/crates/genomehubs-api/src/report/bounds.rs
+++ b/crates/genomehubs-api/src/report/bounds.rs
@@ -2,6 +2,8 @@
 //!
 //! Each `compute_*_bounds()` function issues one ES aggregation query to determine
 //! the actual data range for a field, then wraps it in a `BoundsResult`.
+//!
+//! All field-type detection delegates to [`crate::report::field`].
 
 use genomehubs_query::report::axis::{DateInterval, Scale, ValueType};
 use genomehubs_query::report::{AxisSpec, BoundsResult};
@@ -10,70 +12,7 @@ use serde_json::{json, Value};
 
 use crate::es_client;
 use crate::es_metadata::MetadataCache;
-
-/// Determine if a field is a taxonomic rank (from lineage).
-/// Ranks are stored in the lineage.taxon_rank nested field.
-fn is_rank(
-    field: &str,
-    cache: &Option<std::sync::Arc<tokio::sync::RwLock<MetadataCache>>>,
-) -> bool {
-    if let Some(cache_lock) = cache {
-        if let Ok(c) = cache_lock.try_read() {
-            return c.taxonomic_ranks.contains(&field.to_string());
-        }
-    }
-    false
-}
-
-/// Determine if a field is an attribute (from attributes nested array).
-fn is_attribute(
-    field: &str,
-    cache: &Option<std::sync::Arc<tokio::sync::RwLock<MetadataCache>>>,
-) -> bool {
-    if let Some(cache_lock) = cache {
-        if let Ok(c) = cache_lock.try_read() {
-            if let Value::Object(groups) = &c.attr_types {
-                for (_, group) in groups {
-                    if let Value::Object(fields) = group {
-                        if fields.contains_key(field) {
-                            return true;
-                        }
-                    }
-                }
-            }
-        }
-    }
-    false
-}
-
-/// Get the exact value field for an attribute from metadata.
-/// Returns the processed_summary field (e.g., "attributes.long_value" for type=long).
-/// This MUST come from metadata, not guessed.
-fn get_attribute_value_field(
-    field: &str,
-    cache: &Option<std::sync::Arc<tokio::sync::RwLock<MetadataCache>>>,
-) -> Result<String, String> {
-    if let Some(cache_lock) = cache {
-        if let Ok(c) = cache_lock.try_read() {
-            if let Value::Object(groups) = &c.attr_types {
-                // Search all groups for this field
-                for (_, group) in groups {
-                    if let Value::Object(fields) = group {
-                        if let Some(Value::Object(meta_obj)) = fields.get(field) {
-                            // Get processed_summary which is the exact ES field name
-                            if let Some(ps) =
-                                meta_obj.get("processed_summary").and_then(|v| v.as_str())
-                            {
-                                return Ok(format!("attributes.{}", ps));
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    Err(format!("field '{}' not found in metadata", field))
-}
+use crate::report::field::{get_attribute_value_field, is_attribute, is_rank};
 
 /// Probe Elasticsearch for the domain of a single axis field.
 ///
@@ -122,6 +61,22 @@ async fn compute_numeric_bounds(
     let is_attr = is_attribute(&spec.field, cache);
     let is_rk = is_rank(&spec.field, cache);
 
+    // If this field is a taxonomic rank, prefer that interpretation and
+    // return a `BoundsResult` for rank-type axes without probing attribute
+    // subdocuments. This avoids treating rank-like names that may also appear
+    // in attribute metadata as attributes.
+    if is_rk {
+        return Ok(BoundsResult {
+            domain: None,
+            tick_count: spec.opts.size,
+            interval: None,
+            scale: Scale::Ordinal,
+            value_type: ValueType::TaxonRank,
+            fixed_terms: vec![],
+            cat_labels: vec![],
+        });
+    }
+
     let agg_body = if is_attr {
         let value_field = get_attribute_value_field(&spec.field, cache)?;
         json!({
@@ -143,16 +98,6 @@ async fn compute_numeric_bounds(
                 }
             }
         })
-    } else if is_rk {
-        return Ok(BoundsResult {
-            domain: None,
-            tick_count: spec.opts.size,
-            interval: None,
-            scale: Scale::Ordinal,
-            value_type: ValueType::TaxonRank,
-            fixed_terms: vec![],
-            cat_labels: vec![],
-        });
     } else {
         json!({
             "size": 0,
@@ -322,20 +267,22 @@ async fn compute_keyword_bounds(
     let is_attr = is_attribute(&spec.field, cache);
     let is_rk = is_rank(&spec.field, cache);
 
-    let agg_body = if is_attr {
+    // Prefer taxon ranks over attributes: if the field looks like a rank,
+    // query lineage buckets rather than attribute nested terms.
+    let agg_body = if is_rk {
         json!({
             "size": 0,
             "query": base_query,
             "aggs": {
-                "by_attribute": {
-                    "nested": { "path": "attributes" },
+                "by_lineage": {
+                    "nested": { "path": "lineage" },
                     "aggs": {
-                        "by_key": {
-                            "filter": { "term": { "attributes.key": &spec.field } },
+                        "at_rank": {
+                            "filter": { "term": { "lineage.taxon_rank": &spec.field } },
                             "aggs": {
                                 "top_terms": {
                                     "terms": {
-                                        "field": "attributes.keyword_value.raw",
+                                        "field": "lineage.taxon_id",
                                         "size": spec.opts.size,
                                         "min_doc_count": 0
                                     }
@@ -346,20 +293,20 @@ async fn compute_keyword_bounds(
                 }
             }
         })
-    } else if is_rk {
+    } else if is_attr {
         json!({
             "size": 0,
             "query": base_query,
             "aggs": {
-                "by_lineage": {
-                    "nested": { "path": "lineage" },
+                "by_attribute": {
+                    "nested": { "path": "attributes" },
                     "aggs": {
-                        "at_rank": {
-                            "filter": { "term": { "lineage.taxon_rank": &spec.field } },
+                        "by_key": {
+                            "filter": { "term": { "attributes.key": &spec.field } },
                             "aggs": {
                                 "top_terms": {
                                     "terms": {
-                                        "field": "lineage.taxon_id",
+                                        "field": "attributes.keyword_value.raw",
                                         "size": spec.opts.size,
                                         "min_doc_count": 0
                                     }
@@ -482,9 +429,9 @@ async fn compute_geo_bounds(
 ///
 /// Selects the most appropriate calendar interval for rendering:
 /// - < 30 days → Day
-/// - < 6 years → Month
-/// - < 50 years → Year
-/// - >= 50 years → Decade
+/// - < 2 years → Month
+/// - < 4 years → Quarter
+/// - >= 4 years → Year
 pub fn auto_date_interval(range_ms: f64) -> Option<DateInterval> {
     const DAY_MS: f64 = 86_400_000.0;
     const YEAR_MS: f64 = DAY_MS * 365.25;
@@ -495,11 +442,11 @@ pub fn auto_date_interval(range_ms: f64) -> Option<DateInterval> {
 
     Some(if range_ms < 30.0 * DAY_MS {
         DateInterval::Day
-    } else if range_ms < 6.0 * YEAR_MS {
+    } else if range_ms < 2.0 * YEAR_MS {
         DateInterval::Month
-    } else if range_ms < 50.0 * YEAR_MS {
-        DateInterval::Year
+    } else if range_ms < 4.0 * YEAR_MS {
+        DateInterval::Quarter
     } else {
-        DateInterval::Decade
+        DateInterval::Year
     })
 }
diff --git a/crates/genomehubs-api/src/report/field.rs b/crates/genomehubs-api/src/report/field.rs
new file mode 100644
index 0000000..14c6fb7
--- /dev/null
+++ b/crates/genomehubs-api/src/report/field.rs
@@ -0,0 +1,442 @@
+//! Field storage resolution: single source of truth for where a field lives in ES.
+//!
+//! Every ES nested/attribute/lineage path decision in agg builders, bounds
+//! computation and extraction is derived from [`FieldStorage`].  No other file
+//! should call `is_rank`, `is_attribute`, or `get_attribute_value_field`
+//! directly; use [`resolve_field_storage`] instead.
+
+use serde_json::{json, Value};
+
+use crate::es_metadata::MetadataCache;
+use genomehubs_query::report::axis::ValueType;
+
+// ── FieldStorage ─────────────────────────────────────────────────────────────
+
+/// Where a field's values are physically stored in the ES document.
+///
+/// All agg builders and extractors derive their nested path structure from
+/// this enum so that the (x_type × cat_type) combinations are handled by
+/// composing two `FieldStorage` values rather than hand-writing four cases.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum FieldStorage {
+    /// `attributes[].key == key`, value at `es_value_field`.
+    Attribute {
+        key: String,
+        /// Full dotted ES field path, e.g. `"attributes.keyword_value.raw"`.
+        es_value_field: String,
+    },
+    /// `lineage[].taxon_rank == rank`; canonical bucket key is `lineage.taxon_id`.
+    Lineage { rank: String },
+    /// Top-level document field; `es_field` includes `.keyword` suffix when needed.
+    Root { es_field: String },
+}
+
+impl FieldStorage {
+    /// ES `nested` path required before filtering this field, if any.
+    #[allow(dead_code)]
+    pub fn nested_path(&self) -> Option<&str> {
+        match self {
+            FieldStorage::Attribute { .. } => Some("attributes"),
+            FieldStorage::Lineage { .. } => Some("lineage"),
+            FieldStorage::Root { .. } => None,
+        }
+    }
+
+    /// Term filter that restricts to documents/sub-docs containing this field.
+    #[allow(dead_code)]
+    pub fn key_filter(&self) -> Value {
+        match self {
+            FieldStorage::Attribute { key, .. } => json!({ "term": { "attributes.key": key } }),
+            FieldStorage::Lineage { rank } => json!({ "term": { "lineage.taxon_rank": rank } }),
+            FieldStorage::Root { .. } => json!({ "match_all": {} }),
+        }
+    }
+
+    /// The name of the inner filter-container used inside the nested agg.
+    ///
+    /// - Attribute in *x* position: `"by_key"`
+    /// - Lineage  in *x* position: `"at_rank"`
+    /// - Root: `""` (no container needed)
+    ///
+    /// The same names are used in the per-cat inner x agg so extraction
+    /// paths are deterministic.
+    pub fn x_container_name(&self) -> &str {
+        match self {
+            FieldStorage::Attribute { .. } => "by_key",
+            FieldStorage::Lineage { .. } => "at_rank",
+            FieldStorage::Root { .. } => "",
+        }
+    }
+
+    /// Names used when this field is in the *cat* position inside
+    /// `categoryHistograms`.
+    ///
+    /// Returns `(outer_wrapper, inner_container)` for the cat-level nesting:
+    ///
+    /// ```text
+    /// outer_wrapper: {
+    ///   nested: {path: ...},
+    ///   aggs: {
+    ///     inner_container: { filter: ..., aggs: { by_value: ... } }
+    ///   }
+    /// }
+    /// ```
+    pub fn cat_wrapper_names(&self) -> (&str, &str) {
+        match self {
+            FieldStorage::Attribute { .. } => ("by_attribute", "by_cat"),
+            FieldStorage::Lineage { .. } => ("by_lineage", "at_cat_rank"),
+            FieldStorage::Root { .. } => ("", ""),
+        }
+    }
+
+    /// Build a presence-existence filter at the *document* level.
+    ///
+    /// Used when anding presence filters into the base query so bounds
+    /// reflect only documents that will actually appear in the final plot.
+    pub fn presence_filter(&self) -> Value {
+        match self {
+            FieldStorage::Attribute { key, .. } => json!({
+                "nested": {
+                    "path": "attributes",
+                    "query": { "term": { "attributes.key": key } }
+                }
+            }),
+            FieldStorage::Lineage { rank } => json!({
+                "nested": {
+                    "path": "lineage",
+                    "query": { "term": { "lineage.taxon_rank": rank } }
+                }
+            }),
+            FieldStorage::Root { es_field } => json!({ "exists": { "field": es_field } }),
+        }
+    }
+
+    /// The canonical ES field to bucket on (passed to `terms`, `histogram`, etc.).
+    pub fn bucket_field(&self) -> &str {
+        match self {
+            FieldStorage::Attribute { es_value_field, .. } => es_value_field.as_str(),
+            FieldStorage::Lineage { .. } => "lineage.taxon_id",
+            FieldStorage::Root { es_field } => es_field.as_str(),
+        }
+    }
+
+    // ── Path helpers ─────────────────────────────────────────────────────────
+
+    /// JSON pointer to the main bucket list for a top-level agg (`agg_name`).
+    ///
+    /// ```text
+    /// attribute x: /aggregations/{agg_name}/by_key/{bucket_type}/buckets
+    /// lineage   x: /aggregations/{agg_name}/at_rank/{bucket_type}/buckets
+    /// root      x: /aggregations/{agg_name}/{bucket_type}/buckets
+    /// ```
+    pub fn main_bucket_path(&self, agg_name: &str, bucket_type: &str) -> String {
+        match self {
+            FieldStorage::Attribute { .. } => {
+                format!("/aggregations/{}/by_key/{}/buckets", agg_name, bucket_type)
+            }
+            FieldStorage::Lineage { .. } => {
+                format!("/aggregations/{}/at_rank/{}/buckets", agg_name, bucket_type)
+            }
+            FieldStorage::Root { .. } => {
+                format!("/aggregations/{}/{}/buckets", agg_name, bucket_type)
+            }
+        }
+    }
+
+    /// JSON pointer to the `by_value` buckets object inside `categoryHistograms`,
+    /// given the x-storage (self) and cat-storage.
+    ///
+    /// ```text
+    /// /aggregations/{agg_name}/{x_container}/categoryHistograms/{cat_outer}/{cat_inner}/by_value/buckets
+    /// ```
+    pub fn cat_histograms_base(
+        &self,
+        agg_name: &str,
+        cat_storage: &FieldStorage,
+    ) -> Option<String> {
+        let x_container = self.x_container_name();
+        let (cat_outer, cat_inner) = cat_storage.cat_wrapper_names();
+        if cat_outer.is_empty() {
+            // root cat not yet supported in category histogram path
+            return None;
+        }
+        let path = if x_container.is_empty() {
+            // root x
+            format!(
+                "/aggregations/{}/categoryHistograms/{}/{}/by_value/buckets",
+                agg_name, cat_outer, cat_inner
+            )
+        } else {
+            format!(
+                "/aggregations/{}/{}/categoryHistograms/{}/{}/by_value/buckets",
+                agg_name, x_container, cat_outer, cat_inner
+            )
+        };
+        Some(path)
+    }
+
+    /// JSON pointer from a per-category bucket root to the inner x histogram
+    /// buckets array.
+    ///
+    /// ```text
+    /// attribute x: /histogram/by_attribute/by_key/{bucket_type}/buckets
+    /// lineage   x: /histogram/by_lineage/at_rank/{bucket_type}/buckets
+    /// root      x: /histogram/{bucket_type}/buckets
+    /// ```
+    pub fn inner_x_path(&self, bucket_type: &str) -> String {
+        match self {
+            FieldStorage::Attribute { .. } => {
+                format!("/histogram/by_attribute/by_key/{}/buckets", bucket_type)
+            }
+            FieldStorage::Lineage { .. } => {
+                format!("/histogram/by_lineage/at_rank/{}/buckets", bucket_type)
+            }
+            FieldStorage::Root { .. } => {
+                format!("/histogram/{}/buckets", bucket_type)
+            }
+        }
+    }
+}
+
+// ── Resolution ───────────────────────────────────────────────────────────────
+
+/// Determine where `field` is stored, given its declared `value_type` and the
+/// metadata cache.
+///
+/// Taxon ranks take priority over same-named attributes.  Unknown fields fall
+/// back to a root-level field.
+pub fn resolve_field_storage(
+    field: &str,
+    value_type: ValueType,
+    cache: &Option<std::sync::Arc<tokio::sync::RwLock<MetadataCache>>>,
+) -> Result<FieldStorage, String> {
+    // Rank interpretation takes priority
+    if matches!(value_type, ValueType::TaxonRank) || is_rank(field, cache) {
+        return Ok(FieldStorage::Lineage {
+            rank: field.to_string(),
+        });
+    }
+
+    if is_attribute(field, cache) {
+        let es_value_field = get_attribute_value_field(field, cache)?;
+        return Ok(FieldStorage::Attribute {
+            key: field.to_string(),
+            es_value_field,
+        });
+    }
+
+    // Root-level field — add .keyword suffix for keyword types
+    let es_field = if matches!(value_type, ValueType::Keyword) {
+        format!("{}.keyword", field)
+    } else {
+        field.to_string()
+    };
+    Ok(FieldStorage::Root { es_field })
+}
+
+// ── Low-level helpers (used internally and by bounds.rs / agg.rs) ─────────
+
+/// Return `true` if `field` is a known taxonomic rank.
+pub fn is_rank(
+    field: &str,
+    cache: &Option<std::sync::Arc<tokio::sync::RwLock<MetadataCache>>>,
+) -> bool {
+    if let Some(lock) = cache {
+        if let Ok(c) = lock.try_read() {
+            return c.taxonomic_ranks.contains(&field.to_string());
+        }
+    }
+    false
+}
+
+/// Return `true` if `field` is a nested attribute (and not a taxonomic rank).
+pub fn is_attribute(
+    field: &str,
+    cache: &Option<std::sync::Arc<tokio::sync::RwLock<MetadataCache>>>,
+) -> bool {
+    if let Some(lock) = cache {
+        if let Ok(c) = lock.try_read() {
+            if c.taxonomic_ranks.contains(&field.to_string()) {
+                return false;
+            }
+            if let Value::Object(groups) = &c.attr_types {
+                for (_, group) in groups {
+                    if let Value::Object(fields) = group {
+                        if fields.contains_key(field) {
+                            return true;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    false
+}
+
+/// Return the fully-qualified ES field path for a nested attribute value.
+///
+/// Reads `processed_summary` from the metadata cache, e.g.
+/// `"keyword_value.raw"` → `"attributes.keyword_value.raw"`.
+pub fn get_attribute_value_field(
+    field: &str,
+    cache: &Option<std::sync::Arc<tokio::sync::RwLock<MetadataCache>>>,
+) -> Result<String, String> {
+    if let Some(lock) = cache {
+        if let Ok(c) = lock.try_read() {
+            if let Value::Object(groups) = &c.attr_types {
+                for (_, group) in groups {
+                    if let Value::Object(fields) = group {
+                        if let Some(Value::Object(meta)) = fields.get(field) {
+                            if let Some(ps) = meta.get("processed_summary").and_then(|v| v.as_str())
+                            {
+                                return Ok(format!("attributes.{}", ps));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    Err(format!("field '{}' not found in metadata", field))
+}
+
+// ── Agg block builders ───────────────────────────────────────────────────────
+
+/// Wrap `inner_aggs` in the nested + filter envelope appropriate for this storage.
+///
+/// For `Attribute`/`Lineage` this emits:
+/// ```json
+/// { "nested": {"path":"..."}, "aggs": { "{container}": { "filter": {...}, "aggs": inner_aggs } } }
+/// ```
+/// For `Root` it returns `inner_aggs` unchanged (no nesting needed).
+///
+/// `container_name` is the name of the inner filter-agg key
+/// (use [`FieldStorage::x_container_name`] or the cat container names).
+pub fn wrap_in_nested(storage: &FieldStorage, container_name: &str, inner_aggs: Value) -> Value {
+    match storage {
+        FieldStorage::Root { .. } => inner_aggs,
+        FieldStorage::Attribute { key, .. } => json!({
+            "nested": { "path": "attributes" },
+            "aggs": {
+                container_name: {
+                    "filter": { "term": { "attributes.key": key } },
+                    "aggs": inner_aggs
+                }
+            }
+        }),
+        FieldStorage::Lineage { rank } => json!({
+            "nested": { "path": "lineage" },
+            "aggs": {
+                container_name: {
+                    "filter": { "term": { "lineage.taxon_rank": rank } },
+                    "aggs": inner_aggs
+                }
+            }
+        }),
+    }
+}
+
+/// Wrap `inner_aggs` in the cat-level nested envelope (used inside
+/// `categoryHistograms` reverse-nested context).
+///
+/// Uses `cat_wrapper_names()` to determine the outer wrapper and inner
+/// container keys, keeping extraction paths deterministic.
+pub fn wrap_cat_in_nested(storage: &FieldStorage, inner_aggs: Value) -> Value {
+    let (outer, container) = storage.cat_wrapper_names();
+    if outer.is_empty() {
+        return inner_aggs;
+    }
+    match storage {
+        FieldStorage::Attribute { key, .. } => json!({
+            outer: {
+                "nested": { "path": "attributes" },
+                "aggs": {
+                    container: {
+                        "filter": { "term": { "attributes.key": key } },
+                        "aggs": inner_aggs
+                    }
+                }
+            }
+        }),
+        FieldStorage::Lineage { rank } => json!({
+            outer: {
+                "nested": { "path": "lineage" },
+                "aggs": {
+                    container: {
+                        "filter": { "term": { "lineage.taxon_rank": rank } },
+                        "aggs": inner_aggs
+                    }
+                }
+            }
+        }),
+        FieldStorage::Root { .. } => inner_aggs,
+    }
+}
+
+/// Build the inner x bucket agg block used inside each category bucket
+/// (within `categoryHistograms`).
+///
+/// `bucket_params` is the **raw** aggregation params object (e.g. the
+/// `{"field": …, "interval": …}` body for a histogram agg).  This function
+/// wraps it in the required ES `{name: {type: params}}` nesting and then
+/// adds the `reverse_nested` envelope with the correct nested path.
+///
+/// `sub_aggs` is an optional inner `"aggs"` object to attach to the x bucket
+/// agg (used by scatter to add `yHistograms` inside each x-bucket).
+///
+/// Uses `"by_key"` / `"at_rank"` container names consistently so
+/// [`FieldStorage::inner_x_path`] can compute the extraction path
+/// deterministically.
+pub fn build_inner_x_agg_block(
+    x_storage: &FieldStorage,
+    bucket_type: &str,
+    bucket_params: Value,
+    sub_aggs: Option<Value>,
+) -> Value {
+    // Build the named x bucket agg: {name: {type: raw_params[, "aggs": sub_aggs]}}
+    let mut agg_body = json!({ bucket_type: bucket_params });
+    if let Some(sa) = sub_aggs {
+        agg_body["aggs"] = sa;
+    }
+    let inner_content = json!({ bucket_type: agg_body });
+    match x_storage {
+        FieldStorage::Root { .. } => json!({
+            "histogram": {
+                "reverse_nested": {},
+                "aggs": inner_content
+            }
+        }),
+        FieldStorage::Attribute { key, .. } => json!({
+            "histogram": {
+                "reverse_nested": {},
+                "aggs": {
+                    "by_attribute": {
+                        "nested": { "path": "attributes" },
+                        "aggs": {
+                            "by_key": {
+                                "filter": { "term": { "attributes.key": key } },
+                                "aggs": inner_content
+                            }
+                        }
+                    }
+                }
+            }
+        }),
+        FieldStorage::Lineage { rank } => json!({
+            "histogram": {
+                "reverse_nested": {},
+                "aggs": {
+                    "by_lineage": {
+                        "nested": { "path": "lineage" },
+                        "aggs": {
+                            "at_rank": {
+                                "filter": { "term": { "lineage.taxon_rank": rank } },
+                                "aggs": inner_content
+                            }
+                        }
+                    }
+                }
+            }
+        }),
+    }
+}
diff --git a/crates/genomehubs-api/src/report/mod.rs b/crates/genomehubs-api/src/report/mod.rs
index ee72ffb..67c4458 100644
--- a/crates/genomehubs-api/src/report/mod.rs
+++ b/crates/genomehubs-api/src/report/mod.rs
@@ -11,7 +11,9 @@
 pub mod agg;
 pub mod arc;
 pub mod bounds;
+pub mod field;
 pub mod filter_expr;
 pub mod pipeline;
 pub mod positional;
 pub mod report_types;
+pub mod spec_builder;
diff --git a/crates/genomehubs-api/src/report/report_types.rs b/crates/genomehubs-api/src/report/report_types.rs
index 31dc9ab..80e9e3c 100644
--- a/crates/genomehubs-api/src/report/report_types.rs
+++ b/crates/genomehubs-api/src/report/report_types.rs
@@ -3,47 +3,74 @@
 //! Each handler issues ES queries, applies bounds/aggregation/pipeline logic,
 //! and returns structured report data.
 
+use chrono::Datelike;
 use genomehubs_query::query::{QueryParams, SearchQuery};
 use genomehubs_query::report::axis::{AxisInput, AxisRole, AxisSpec, AxisSummary, ValueType};
 use serde_json::{json, Value};
 use std::sync::Arc;
 
 use crate::es_client;
+use crate::index_name;
 use crate::report::agg::{
     agg_builder_for, build_nested_attribute_histogram_with_categories,
     build_nested_attribute_scatter_agg, x_bucket_agg_name,
 };
 use crate::report::bounds::compute_bounds;
+use crate::report::field::{resolve_field_storage, FieldStorage};
 use crate::report::pipeline::{Pipeline, ReportContext, ScaleStep};
 use crate::AppState;
 
-/// Extract per-category per-bucket counts from a v2-pattern `categoryHistograms` response.
+fn value_type_to_string(v: ValueType) -> &'static str {
+    match v {
+        ValueType::Numeric => "float",
+        ValueType::Keyword => "keyword",
+        ValueType::Date => "date",
+        ValueType::GeoPoint => "coordinate",
+        ValueType::TaxonRank => "keyword",
+    }
+}
+
+/// Extract per-category per-bucket counts from a `categoryHistograms` ES response.
 ///
-/// For each category label the function follows:
-/// `.../categoryHistograms/by_attribute/by_cat/by_value/buckets/{label}/histogram/by_attribute/{x_field}/histogram/buckets`
+/// Uses [`FieldStorage`] to compute deterministic JSON pointer paths rather
+/// than searching a candidate list.  The x-axis inner histogram container
+/// is always `"by_key"` (attribute) or `"at_rank"` (lineage) — see
+/// [`build_inner_x_agg_block`][crate::report::field::build_inner_x_agg_block].
 ///
-/// Returns a JSON object mapping each category key to an array of `doc_count` values, one per
-/// main-histogram bucket. Includes an `"other"` key when `show_other` is true.
+/// Returns a JSON object mapping each category key to an array of `doc_count`
+/// values, one per main-histogram bucket, aligned by key to the main buckets.
 #[allow(clippy::too_many_arguments)]
 fn extract_cat_histograms(
     resp: &Value,
     agg_name: &str,
-    x_field: &str,
+    x_storage: &FieldStorage,
+    cat_storage: &FieldStorage,
     x_bucket_agg: &str,
     main_bucket_count: usize,
     cat_labels: &[String],
     show_other: bool,
     cat_is_numeric: bool,
     main_counts: &[u64],
+    main_buckets: &[Value],
 ) -> Value {
-    let base = format!(
-        "/aggregations/{}/by_key/categoryHistograms/by_attribute/by_cat/by_value/buckets",
-        agg_name
-    );
+    let base = match x_storage.cat_histograms_base(agg_name, cat_storage) {
+        Some(p) if resp.pointer(&p).is_some() => p,
+        _ => return Value::Null,
+    };
 
-    if resp.pointer(&base).is_none() {
-        return Value::Null;
-    }
+    let inner_x = x_storage.inner_x_path(x_bucket_agg);
+
+    // Build main bucket keys list for alignment.
+    let main_keys: Vec<String> = main_buckets
+        .iter()
+        .map(|b| {
+            b.get("key")
+                .and_then(|k| k.as_str().map(|s| s.to_string()))
+                .or_else(|| b.get("key").map(|k| k.to_string()))
+                .or_else(|| b.get("id").and_then(|i| i.as_str().map(|s| s.to_string())))
+                .unwrap_or_default()
+        })
+        .collect();
 
     let mut by_cat = serde_json::Map::new();
 
@@ -55,60 +82,45 @@ fn extract_cat_histograms(
             .cloned()
             .unwrap_or_default();
         for bucket in &cat_buckets {
-            let key = bucket.get("key").and_then(|k| k.as_f64()).unwrap_or(0.0);
-            let label = key.to_string();
-            let hist_path = format!(
-                "/histogram/by_attribute/{}/{}/buckets",
-                x_field, x_bucket_agg
-            );
-            let mut counts: Vec<u64> = bucket
-                .pointer(&hist_path)
+            let key_val = bucket.get("key").cloned().unwrap_or(json!(0));
+            let label = if let Some(kf) = key_val.as_f64() {
+                kf.to_string()
+            } else if let Some(ks) = key_val.as_str() {
+                ks.to_string()
+            } else {
+                key_val.to_string()
+            };
+            let hist_buckets = bucket
+                .pointer(&inner_x)
                 .and_then(|b| b.as_array())
-                .map(|buckets| {
-                    buckets
-                        .iter()
-                        .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
-                        .collect()
-                })
+                .cloned()
                 .unwrap_or_default();
-            counts.resize(main_bucket_count, 0);
-            by_cat.insert(label, json!(counts));
+            by_cat.insert(
+                label,
+                json!(align_to_keys(&hist_buckets, &main_keys, main_bucket_count)),
+            );
         }
     } else {
         // by_value uses a filters agg — buckets is an object keyed by label.
         let mut named_sums: Vec<Vec<u64>> = Vec::with_capacity(cat_labels.len());
 
         for label in cat_labels {
-            let hist_path = format!(
-                "{}/{}/histogram/by_attribute/{}/{}/buckets",
-                base, label, x_field, x_bucket_agg
-            );
-            let mut counts: Vec<u64> = resp
+            let hist_path = format!("{}/{}{}", base, label, inner_x);
+            let hist_buckets = resp
                 .pointer(&hist_path)
                 .and_then(|b| b.as_array())
-                .map(|buckets| {
-                    buckets
-                        .iter()
-                        .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
-                        .collect()
-                })
+                .cloned()
                 .unwrap_or_default();
-            counts.resize(main_bucket_count, 0);
+            let counts = align_to_keys(&hist_buckets, &main_keys, main_bucket_count);
             named_sums.push(counts.clone());
             by_cat.insert(label.clone(), json!(counts));
         }
 
         if show_other {
-            let other_path = format!(
-                "{}/other/histogram/by_attribute/{}/{}/buckets",
-                base, x_field, x_bucket_agg
-            );
+            let other_path = format!("{}/other{}", base, inner_x);
             let other_counts: Vec<u64> =
                 if let Some(buckets) = resp.pointer(&other_path).and_then(|b| b.as_array()) {
-                    let mut v: Vec<u64> = buckets
-                        .iter()
-                        .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
-                        .collect();
+                    let mut v = align_to_keys(buckets, &main_keys, main_bucket_count);
                     v.resize(main_bucket_count, 0);
                     v
                 } else {
@@ -137,6 +149,38 @@ fn extract_cat_histograms(
     }
 }
 
+/// Align a per-category inner histogram bucket list to the main-axis key ordering.
+///
+/// Returns a `Vec<u64>` of length `main_bucket_count`, each entry being the
+/// `doc_count` for the corresponding main bucket key.  Missing inner keys
+/// produce a zero count.
+fn align_to_keys(
+    inner_buckets: &[Value],
+    main_keys: &[String],
+    main_bucket_count: usize,
+) -> Vec<u64> {
+    let mut map: std::collections::HashMap<String, u64> =
+        std::collections::HashMap::with_capacity(inner_buckets.len());
+    for b in inner_buckets {
+        let k = b.get("key").cloned().unwrap_or(json!(""));
+        let kstr = if let Some(s) = k.as_str() {
+            s.to_string()
+        } else if let Some(n) = k.as_f64() {
+            n.to_string()
+        } else {
+            k.to_string()
+        };
+        let cnt = b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0);
+        map.insert(kstr, cnt);
+    }
+    let mut counts: Vec<u64> = main_keys
+        .iter()
+        .map(|k| *map.get(k).unwrap_or(&0))
+        .collect();
+    counts.resize(main_bucket_count, 0);
+    counts
+}
+
 /// Run a histogram (or categorised histogram) report.
 ///
 /// Returns `(doc_count, took_ms, report_json)` or error.
@@ -149,33 +193,60 @@ pub async fn run_histogram_report(
     base_query: &Value,
 ) -> Result<(u64, u64, Value), String> {
     let x_spec = resolve_axis_spec(AxisRole::X, report_config, state)
+        .await
         .ok_or("report config missing 'x' axis (set 'x' field or use 'axes')")?;
-    let x_field = x_spec.field.clone();
-    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state);
+    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state).await;
+
+    // Resolve storage types up-front so presence filters and extraction paths
+    // are computed from the same source of truth.
+    let x_storage = resolve_field_storage(&x_spec.field, x_spec.value_type, &state.cache)?;
+    let cat_storage_opt: Option<FieldStorage> = if let Some(ref cat_spec) = cat_spec_opt {
+        Some(resolve_field_storage(
+            &cat_spec.field,
+            cat_spec.value_type,
+            &state.cache,
+        )?)
+    } else {
+        None
+    };
+
+    // Augment the base query for bounds computation with a presence-filter
+    // for the opposite axis so bounds reflect only records that will be
+    // plotted.
+    let cat_presence = cat_storage_opt.as_ref().map(|s| s.presence_filter());
+    let x_base_query = if let Some(f) = cat_presence {
+        json!({ "bool": { "must": [ base_query.clone(), f ] } })
+    } else {
+        base_query.clone()
+    };
 
     let x_bounds = compute_bounds(
         &state.client,
         &state.es_base,
         index,
         &x_spec,
-        base_query,
+        &x_base_query,
         &state.cache,
     )
     .await?;
 
     let agg_name = "x_agg";
-
     let x_inner_agg = x_bucket_agg_name(x_spec.value_type);
 
     // Build aggregation — categorized path supports both keyword (filters) and numeric (histogram) cat.
     let (final_agg, cat_labels, show_other_cat, cat_is_numeric) =
         if let Some(ref cat_spec) = cat_spec_opt {
+            // Require the x-axis presence when computing cat bounds so returned
+            // categories are only those that will be plotted.
+            let x_presence = x_storage.presence_filter();
+            let cat_base_query = json!({ "bool": { "must": [ base_query.clone(), x_presence ] } });
+
             let cat_bounds = compute_bounds(
                 &state.client,
                 &state.es_base,
                 index,
                 cat_spec,
-                base_query,
+                &cat_base_query,
                 &state.cache,
             )
             .await?;
@@ -220,18 +291,24 @@ pub async fn run_histogram_report(
         .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
         .collect();
 
-    let by_cat = if !cat_labels.is_empty() || cat_is_numeric {
-        extract_cat_histograms(
-            &resp,
-            agg_name,
-            x_field.as_str(),
-            x_inner_agg,
-            raw_buckets.len(),
-            &cat_labels,
-            show_other_cat,
-            cat_is_numeric,
-            &main_counts,
-        )
+    let by_cat = if let Some(ref cat_storage) = cat_storage_opt {
+        if !cat_labels.is_empty() || cat_is_numeric {
+            extract_cat_histograms(
+                &resp,
+                agg_name,
+                &x_storage,
+                cat_storage,
+                x_inner_agg,
+                raw_buckets.len(),
+                &cat_labels,
+                show_other_cat,
+                cat_is_numeric,
+                &main_counts,
+                &raw_buckets,
+            )
+        } else {
+            Value::Null
+        }
     } else {
         Value::Null
     };
@@ -242,7 +319,34 @@ pub async fn run_histogram_report(
         cat_labels: x_bounds.cat_labels.clone(),
         show_other: x_spec.opts.show_other,
     };
-    let processed_buckets = pipeline.run(raw_buckets.clone(), &ctx);
+    let processed_raw = pipeline.run(raw_buckets.clone(), &ctx);
+
+    // Align and label processed buckets. When the bounds provide an
+    // authoritative `fixed_terms` list, use that ordering and drop any
+    // unexpected buckets. Otherwise, for keyword axes drop zero-count
+    // placeholder buckets and ensure each bucket has a label.
+    let processed_buckets = if !x_bounds.fixed_terms.is_empty() {
+        align_and_label_processed_buckets(
+            processed_raw,
+            &x_bounds.fixed_terms,
+            &x_bounds.cat_labels,
+        )
+    } else {
+        let mut pb = processed_raw;
+        if matches!(x_spec.value_type, ValueType::Keyword) {
+            pb.retain(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0) > 0);
+        }
+        for b in pb.iter_mut() {
+            if b.get("label").is_none() {
+                let id_str = b
+                    .get("key")
+                    .and_then(|k| k.as_str().map(|s| s.to_string()))
+                    .unwrap_or_else(|| b.get("key").map(|k| k.to_string()).unwrap_or_default());
+                b["label"] = json!(id_str);
+            }
+        }
+        pb
+    };
 
     // allValues: flat array of doc_counts parallel to buckets.
     let all_values: Vec<u64> = raw_buckets
@@ -253,19 +357,210 @@ pub async fn run_histogram_report(
     let mut report_data = json!({
         "type": "histogram",
         "x": {
-            "field": &x_field,
+            "field": x_spec.field.as_str(),
             "scale": format!("{:?}", x_spec.opts.scale).to_lowercase(),
             "domain": x_bounds.domain,
-            "tickCount": x_bounds.tick_count
+            "tickCount": x_bounds.tick_count,
+            "value_type": value_type_to_string(x_spec.value_type)
         },
         "buckets": processed_buckets,
         "allValues": all_values
     });
 
     if !by_cat.is_null() {
-        report_data["by_cat"] = by_cat;
-        report_data["cat"] = json!(cat_spec_opt.as_ref().map(|s| s.field.as_str()));
-        report_data["cats"] = json!(cat_labels);
+        report_data["by_cat"] = by_cat.clone();
+        if let Some(ref cat_spec) = cat_spec_opt {
+            report_data["cat"] = json!({
+                "field": cat_spec.field,
+                "value_type": value_type_to_string(cat_spec.value_type),
+                "scale": format!("{:?}", cat_spec.opts.scale).to_lowercase()
+            });
+        }
+
+        // Determine the final `cats` labels. Prefer the pre-computed
+        // `cat_labels` (from bounds) when present; otherwise derive
+        // readable labels from the `by_cat` histogram keys. This covers
+        // numeric/date category histograms where `cat_labels` is empty.
+        // Also compute numeric `tick_values` (boundaries) when applicable
+        // and attach them to `report_data["cat"]["tick_values"]` so the
+        // plot-spec builder can use them for binned encodings.
+        let mut final_cat_labels = cat_labels.clone();
+        let mut cat_tick_values: Option<Vec<f64>> = None;
+        // Keep canonical raw category keys (object keys) for `report.cats`
+        // so downstream converters can look up `by_cat[cat_key]`. We'll add
+        // human-readable labels into `report.cat.tick_labels`.
+        let mut cat_keys: Vec<String> = Vec::new();
+        if final_cat_labels.is_empty() {
+            if let Some(obj) = by_cat.as_object() {
+                if !obj.is_empty() {
+                    // Preserve insertion order of the buckets as returned by ES.
+                    let keys: Vec<String> = obj.keys().cloned().collect();
+                    cat_keys = keys.clone();
+                    if let Some(ref cat_spec) = cat_spec_opt {
+                        match cat_spec.value_type {
+                            ValueType::Date => {
+                                // Parse numeric keys and compute adjacent boundaries
+                                // to present human-friendly date ranges.
+                                let nums: Vec<f64> = keys
+                                    .iter()
+                                    .map(|k| k.parse::<f64>().unwrap_or_default())
+                                    .collect();
+                                if nums.is_empty() {
+                                    final_cat_labels = keys;
+                                } else {
+                                    // estimate interval from first two keys, fallback to 1
+                                    let width = if nums.len() >= 2 {
+                                        nums[1] - nums[0]
+                                    } else {
+                                        1.0
+                                    };
+                                    let mut boundaries = nums.clone();
+                                    boundaries.push(nums.last().copied().unwrap_or(0.0) + width);
+                                    // Attach numeric boundaries for the axis
+                                    cat_tick_values = Some(boundaries.clone());
+                                    let mut labels: Vec<String> = Vec::with_capacity(nums.len());
+                                    for i in 0..nums.len() {
+                                        let left = nums[i];
+                                        let right = boundaries[i + 1];
+                                        // Heuristic: treat large values as milliseconds,
+                                        // otherwise seconds since epoch.
+                                        let left_i = left as i64;
+                                        let right_i = right as i64;
+                                        let left_dt = if left_i.abs() > 1_000_000_000_000 {
+                                            // milliseconds -> seconds + nanos
+                                            let s = left_i / 1000;
+                                            #[allow(clippy::cast_abs_to_unsigned)]
+                                            let ns = ((left_i % 1000).abs() as u32) * 1_000_000;
+                                            #[allow(deprecated)]
+                                            chrono::NaiveDateTime::from_timestamp_opt(s, ns)
+                                        } else {
+                                            #[allow(deprecated)]
+                                            chrono::NaiveDateTime::from_timestamp_opt(left_i, 0)
+                                        };
+                                        let right_dt = if right_i.abs() > 1_000_000_000_000 {
+                                            let s = right_i / 1000;
+                                            #[allow(clippy::cast_abs_to_unsigned)]
+                                            let ns = ((right_i % 1000).abs() as u32) * 1_000_000;
+                                            #[allow(deprecated)]
+                                            chrono::NaiveDateTime::from_timestamp_opt(s, ns)
+                                        } else {
+                                            #[allow(deprecated)]
+                                            chrono::NaiveDateTime::from_timestamp_opt(right_i, 0)
+                                        };
+                                        if let (Some(ldt), Some(rdt)) = (left_dt, right_dt) {
+                                            #[allow(deprecated)]
+                                            let ldt = chrono::DateTime::<chrono::Utc>::from_utc(
+                                                ldt,
+                                                chrono::Utc,
+                                            );
+                                            #[allow(deprecated)]
+                                            let rdt = chrono::DateTime::<chrono::Utc>::from_utc(
+                                                rdt,
+                                                chrono::Utc,
+                                            );
+                                            // Format as %Y-%m if day is 1, else %Y-%m-%d; collapse to %Y if month and day are both 1
+                                            let fmt_date = |dt: &chrono::DateTime<chrono::Utc>| {
+                                                let y = dt.year();
+                                                let m = dt.month();
+                                                let d = dt.day();
+                                                if m == 1 && d == 1 {
+                                                    format!("{:04}", y)
+                                                } else if d == 1 {
+                                                    format!("{:04}-{:02}", y, m)
+                                                } else {
+                                                    format!("{:04}-{:02}-{:02}", y, m, d)
+                                                }
+                                            };
+                                            labels.push(format!(
+                                                "{} to {}",
+                                                fmt_date(&ldt),
+                                                fmt_date(&rdt)
+                                            ));
+                                        } else if let Some(ldt) = left_dt {
+                                            #[allow(deprecated)]
+                                            let ldt = chrono::DateTime::<chrono::Utc>::from_utc(
+                                                ldt,
+                                                chrono::Utc,
+                                            );
+                                            labels.push(format!("{}", ldt.format("%Y-%m-%d")));
+                                        } else {
+                                            labels.push(keys[i].clone());
+                                        }
+                                    }
+                                    final_cat_labels = labels;
+                                }
+                            }
+                            _ => {
+                                // Numeric buckets: produce readable range labels
+                                let nums: Vec<f64> = keys
+                                    .iter()
+                                    .map(|k| k.parse::<f64>().unwrap_or_default())
+                                    .collect();
+                                if nums.is_empty() {
+                                    final_cat_labels = keys;
+                                } else {
+                                    let width = if nums.len() >= 2 {
+                                        nums[1] - nums[0]
+                                    } else {
+                                        1.0
+                                    };
+                                    let mut boundaries = nums.clone();
+                                    boundaries.push(nums.last().copied().unwrap_or(0.0) + width);
+                                    // Attach numeric boundaries for the axis
+                                    cat_tick_values = Some(boundaries.clone());
+                                    let mut labels: Vec<String> = Vec::with_capacity(nums.len());
+                                    for i in 0..nums.len() {
+                                        let left = nums[i];
+                                        let right = boundaries[i + 1];
+                                        let fmt = |v: f64| {
+                                            // Format as 3sf scientific/engineering notation (e.g. 2.13G)
+                                            let abs_v = v.abs();
+                                            let (scaled, suffix) = if abs_v >= 1e9 {
+                                                (v / 1e9, "G")
+                                            } else if abs_v >= 1e6 {
+                                                (v / 1e6, "M")
+                                            } else if abs_v >= 1e3 {
+                                                (v / 1e3, "k")
+                                            } else {
+                                                (v, "")
+                                            };
+                                            if suffix.is_empty() {
+                                                format!("{:.3}", scaled)
+                                            } else {
+                                                format!("{:.3}{}", scaled, suffix)
+                                            }
+                                        };
+                                        labels.push(format!("{} to{}", fmt(left), fmt(right)));
+                                    }
+                                    final_cat_labels = labels;
+                                }
+                            }
+                        }
+                    } else {
+                        final_cat_labels = keys.clone();
+                        cat_keys = keys;
+                    }
+                }
+            }
+        }
+        if let Some(tvals) = cat_tick_values {
+            report_data["cat"]["tick_values"] = json!(tvals);
+        }
+
+        // Use raw category keys for `report.cats` (these are the keys used
+        // to index `by_cat`). Provide human-readable labels under
+        // `report.cat.tick_labels` so the plot-spec builder can use them for
+        // legend/axis labeling while converters still match counts by key.
+        if cat_keys.is_empty() {
+            // Fallback: when we didn't capture keys earlier, attempt to
+            // populate from final_cat_labels (they may already be raw keys).
+            report_data["cats"] = json!(final_cat_labels.clone());
+        } else {
+            report_data["cats"] = json!(cat_keys);
+        }
+        if !final_cat_labels.is_empty() {
+            report_data["cat"]["tick_labels"] = json!(final_cat_labels);
+        }
     }
 
     Ok((total_hits, took, report_data))
@@ -506,7 +801,7 @@ pub async fn run_tree_report(
     // Prefer structured `axes` array; fall back to flat `y:` / `y_opts:` or legacy
     // `fields:` sequence (AxisSummary::Value for all).
     let tree_field_specs: Vec<(String, AxisSummary)> = {
-        let from_axes = resolve_y_specs(report_config, state);
+        let from_axes = resolve_y_specs(report_config, state).await;
         if !from_axes.is_empty() {
             from_axes
                 .into_iter()
@@ -531,7 +826,7 @@ pub async fn run_tree_report(
     };
 
     // --- Cat axis: resolve full AxisSpec + bounds (same pipeline as histogram) ---
-    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state);
+    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state).await;
     let cat_bounds_opt = if let Some(ref cat_spec) = cat_spec_opt {
         Some(
             compute_bounds(
@@ -717,8 +1012,27 @@ pub async fn run_tree_report(
     // One extra ES query using nested lineage → reverse_nested to count how many
     // taxa at `count_rank` descend from each tree node.
     if let Some(ref rank) = count_rank {
-        let descendant_counts =
-            fetch_descendant_counts(&state.client, &state.es_base, index, &lca_id, rank).await?;
+        // Speed-up: restrict descendant count aggregation to the set of tree
+        // node IDs we've already collected so ES only computes counts for
+        // those ancestors. This is much faster than enumerating all
+        // ancestors under the LCA when the tree is small or moderate-sized.
+        let candidate_ids: Vec<String> = tree_nodes.keys().cloned().collect();
+        // Use fast-path only when candidate set is reasonably small to avoid
+        // building a huge `terms` filter; fallback to composite when > 10k.
+        let candidate_slice: Option<&[String]> = if candidate_ids.len() <= 10_000 {
+            Some(candidate_ids.as_slice())
+        } else {
+            None
+        };
+        let descendant_counts = fetch_descendant_counts(
+            &state.client,
+            &state.es_base,
+            index,
+            &lca_id,
+            rank,
+            candidate_slice,
+        )
+        .await?;
         took_total += descendant_counts.took;
         for (taxon_id, count) in descendant_counts.counts {
             if let Some(node) = tree_nodes.get_mut(&taxon_id) {
@@ -823,7 +1137,7 @@ pub async fn run_map_report(
     let hexbin_field = format!("hexbin{hex_resolution}");
 
     // --- Cat axis (optional) ---
-    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state);
+    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state).await;
     let cat_bounds_opt = if let Some(ref spec) = cat_spec_opt {
         compute_bounds(
             &state.client,
@@ -1039,40 +1353,36 @@ pub async fn run_map_report(
 // Helper functions
 // ============================================================================
 
-/// Infer the ValueType of a field from metadata cache.
-/// Defaults to Numeric if not found or cache unavailable.
-fn infer_value_type(
+/// Async variant of `infer_value_type` that acquires the cache read lock
+/// via `read().await` so callers in async handlers can reliably observe
+/// populated metadata without falling back on the non-blocking `try_read`.
+async fn infer_value_type_async(
     field: &str,
     cache: &Option<Arc<tokio::sync::RwLock<crate::es_metadata::MetadataCache>>>,
 ) -> ValueType {
-    // Check if it's a rank in the metadata
     if let Some(cache_lock) = cache {
-        if let Ok(c) = cache_lock.try_read() {
-            if c.taxonomic_ranks.contains(&field.to_string()) {
-                return ValueType::TaxonRank;
-            }
-
-            // Check if it's an attribute in the metadata
-            if let serde_json::Value::Object(groups) = &c.attr_types {
-                for (_, group) in groups {
-                    if let serde_json::Value::Object(fields) = group {
-                        if let Some(serde_json::Value::Object(meta_obj)) = fields.get(field) {
-                            if let Some(type_str) = meta_obj.get("type").and_then(|v| v.as_str()) {
-                                return match type_str {
-                                    "date" => ValueType::Date,
-                                    "keyword" => ValueType::Keyword,
-                                    "long" | "integer" | "float" | "double" => ValueType::Numeric,
-                                    "geo_point" => ValueType::GeoPoint,
-                                    _ => ValueType::Keyword,
-                                };
-                            }
+        let guard = cache_lock.read().await;
+        if guard.taxonomic_ranks.contains(&field.to_string()) {
+            return ValueType::TaxonRank;
+        }
+        if let serde_json::Value::Object(groups) = &guard.attr_types {
+            for (_, group) in groups {
+                if let serde_json::Value::Object(fields) = group {
+                    if let Some(serde_json::Value::Object(meta_obj)) = fields.get(field) {
+                        if let Some(type_str) = meta_obj.get("type").and_then(|v| v.as_str()) {
+                            return match type_str {
+                                "date" => ValueType::Date,
+                                "keyword" => ValueType::Keyword,
+                                "long" | "integer" | "float" | "double" => ValueType::Numeric,
+                                "geo_point" => ValueType::GeoPoint,
+                                _ => ValueType::Keyword,
+                            };
                         }
                     }
                 }
             }
         }
     }
-    // Default to Numeric if not found in metadata
     ValueType::Numeric
 }
 
@@ -1081,7 +1391,7 @@ fn infer_value_type(
 /// Checks the structured `axes` array first. Falls back to legacy flat keys
 /// (`x`/`x_opts`, `y`/`y_opts`, `cat`/`cat_opts`) so existing request bodies
 /// continue to work unchanged.
-fn resolve_axis_spec(
+async fn resolve_axis_spec(
     role: AxisRole,
     report_config: &serde_yaml::Value,
     state: &Arc<AppState>,
@@ -1100,7 +1410,7 @@ fn resolve_axis_spec(
                 continue;
             }
             if let Ok(input) = serde_yaml::from_value::<AxisInput>(entry.clone()) {
-                let inferred = infer_value_type(&input.field, &state.cache);
+                let inferred = infer_value_type_async(&input.field, &state.cache).await;
                 return Some(input.into_spec(inferred));
             }
         }
@@ -1117,7 +1427,7 @@ fn resolve_axis_spec(
         .get(&opts_key)
         .and_then(|v| v.as_str())
         .unwrap_or("");
-    let value_type = infer_value_type(field, &state.cache);
+    let value_type = infer_value_type_async(field, &state.cache).await;
     Some(AxisSpec {
         field: field.to_string(),
         role,
@@ -1136,20 +1446,24 @@ fn resolve_axis_spec(
 /// Prefers the structured `axes` array (multiple entries, per-field `summary` and
 /// opts).  Falls back to the flat `y:` + `y_opts:` shorthand for a single field
 /// with `AxisSummary::Value`.  Returns an empty vec when neither is present.
-fn resolve_y_specs(report_config: &serde_yaml::Value, state: &Arc<AppState>) -> Vec<AxisSpec> {
+async fn resolve_y_specs(
+    report_config: &serde_yaml::Value,
+    state: &Arc<AppState>,
+) -> Vec<AxisSpec> {
     // Structured form: collect every entry with position == "y"
     if let Some(axes) = report_config.get("axes").and_then(|a| a.as_sequence()) {
-        let specs: Vec<AxisSpec> = axes
+        let inputs: Vec<AxisInput> = axes
             .iter()
             .filter(|e| e.get("position").and_then(|p| p.as_str()) == Some("y"))
             .filter_map(|e| serde_yaml::from_value::<AxisInput>(e.clone()).ok())
-            .map(|input| {
-                let inferred = infer_value_type(&input.field, &state.cache);
-                input.into_spec(inferred)
-            })
             .collect();
-        if !specs.is_empty() {
-            return specs;
+        if !inputs.is_empty() {
+            let mut out = Vec::with_capacity(inputs.len());
+            for input in inputs {
+                let inferred = infer_value_type_async(&input.field, &state.cache).await;
+                out.push(input.into_spec(inferred));
+            }
+            return out;
         }
     }
 
@@ -1162,7 +1476,7 @@ fn resolve_y_specs(report_config: &serde_yaml::Value, state: &Arc<AppState>) ->
         .get("y_opts")
         .and_then(|v| v.as_str())
         .unwrap_or("");
-    let value_type = infer_value_type(field, &state.cache);
+    let value_type = infer_value_type_async(field, &state.cache).await;
     vec![AxisSpec {
         field: field.to_string(),
         role: AxisRole::Y,
@@ -1495,63 +1809,172 @@ async fn fetch_descendant_counts(
     index: &str,
     lca_id: &str,
     count_rank: &str,
+    candidate_ids: Option<&[String]>,
 ) -> Result<DescendantCounts, String> {
-    let body = json!({
-        "size": 0,
-        "query": {
-            "bool": {
-                "must": [
-                    { "term": { "taxon_rank": count_rank } },
-                    {
-                        "nested": {
-                            "path": "lineage",
-                            "query": { "term": { "lineage.taxon_id": lca_id } }
+    // If caller provides a candidate ID set, restrict to those IDs using a
+    // nested `filter` + `terms` agg. This avoids paging and is much faster
+    // when the ID set is small relative to the full space.
+    if let Some(ids) = candidate_ids {
+        if ids.is_empty() {
+            return Ok(DescendantCounts {
+                counts: std::collections::HashMap::new(),
+                took: 0,
+            });
+        }
+        // Build a nested -> filter(terms(ids)) -> terms agg over lineage.taxon_id
+        let body = json!({
+            "size": 0,
+            "query": {
+                "bool": {
+                    "must": [
+                        { "term": { "taxon_rank": count_rank } },
+                        {
+                            "nested": {
+                                "path": "lineage",
+                                "query": { "term": { "lineage.taxon_id": lca_id } }
+                            }
+                        }
+                    ]
+                }
+            },
+            "aggs": {
+                "by_ancestor": {
+                    "nested": { "path": "lineage" },
+                    "aggs": {
+                        "filtered": {
+                            "filter": { "terms": { "lineage.taxon_id": ids } },
+                            "aggs": {
+                                "ancestors": {
+                                    "terms": { "field": "lineage.taxon_id", "size": 10000 },
+                                    "aggs": { "node_count": { "reverse_nested": {} } }
+                                }
+                            }
                         }
                     }
-                ]
+                }
             }
-        },
-        "aggs": {
-            "by_ancestor": {
-                "nested": { "path": "lineage" },
-                "aggs": {
-                    "ancestors": {
-                        "terms": {
-                            "field": "lineage.taxon_id",
-                            "size": 100000
-                        },
-                        "aggs": {
-                            "node_count": { "reverse_nested": {} }
+        });
+
+        let resp = crate::es_client::execute_search(client, es_base, index, &body).await?;
+        let took = resp.get("took").and_then(|t| t.as_u64()).unwrap_or(0);
+        let buckets = resp
+            .pointer("/aggregations/by_ancestor/filtered/ancestors/buckets")
+            .and_then(|b| b.as_array())
+            .cloned()
+            .unwrap_or_default();
+        let mut counts = std::collections::HashMap::with_capacity(buckets.len());
+        for bucket in &buckets {
+            let taxon_id = match bucket.get("key").and_then(|k| k.as_str()) {
+                Some(id) => id.to_string(),
+                None => continue,
+            };
+            let count = bucket
+                .pointer("/node_count/doc_count")
+                .and_then(|c| c.as_u64())
+                .unwrap_or(0);
+            counts.insert(taxon_id, count);
+        }
+        return Ok(DescendantCounts { counts, took });
+    }
+
+    // Use a composite aggregation inside the nested `lineage` agg so we can
+    // page through ancestor buckets without materialising them all at once.
+    // Loop until `after_key` is absent.
+    let mut counts: std::collections::HashMap<String, u64> = std::collections::HashMap::new();
+    let mut took_total: u64 = 0;
+    let mut after_key: Option<serde_json::Value> = None;
+
+    loop {
+        // Build composite aggregation block, including `after` when present.
+        let mut composite_obj = json!({
+            "size": 1000,
+            "sources": [{ "ancestor_id": { "terms": { "field": "lineage.taxon_id" } } }]
+        });
+        if let Some(ref ak) = after_key {
+            composite_obj["after"] = ak.clone();
+        }
+
+        let body = json!({
+            "size": 0,
+            "query": {
+                "bool": {
+                    "must": [
+                        { "term": { "taxon_rank": count_rank } },
+                        {
+                            "nested": {
+                                "path": "lineage",
+                                "query": { "term": { "lineage.taxon_id": lca_id } }
+                            }
+                        }
+                    ]
+                }
+            },
+            "aggs": {
+                "by_ancestor": {
+                    "nested": { "path": "lineage" },
+                    "aggs": {
+                        "ancestors": {
+                            "composite": composite_obj,
+                            "aggs": {
+                                "node_count": { "reverse_nested": {} }
+                            }
                         }
                     }
                 }
             }
-        }
-    });
+        });
 
-    let resp = crate::es_client::execute_search(client, es_base, index, &body).await?;
-    let took = resp.get("took").and_then(|t| t.as_u64()).unwrap_or(0);
+        let resp = crate::es_client::execute_search(client, es_base, index, &body).await?;
+        took_total += resp.get("took").and_then(|t| t.as_u64()).unwrap_or(0);
 
-    let buckets = resp
-        .pointer("/aggregations/by_ancestor/ancestors/buckets")
-        .and_then(|b| b.as_array())
-        .cloned()
-        .unwrap_or_default();
+        let buckets = resp
+            .pointer("/aggregations/by_ancestor/ancestors/buckets")
+            .and_then(|b| b.as_array())
+            .cloned()
+            .unwrap_or_default();
 
-    let mut counts = std::collections::HashMap::with_capacity(buckets.len());
-    for bucket in &buckets {
-        let taxon_id = match bucket.get("key").and_then(|k| k.as_str()) {
-            Some(id) => id.to_string(),
-            None => continue,
-        };
-        let count = bucket
-            .pointer("/node_count/doc_count")
-            .and_then(|c| c.as_u64())
-            .unwrap_or(0);
-        counts.insert(taxon_id, count);
+        for bucket in &buckets {
+            // Composite bucket keys are objects like { "ancestor_id": "123" }
+            let taxon_id = if let Some(obj) = bucket.get("key").and_then(|k| k.as_object()) {
+                if let Some(v) = obj.get("ancestor_id") {
+                    if let Some(s) = v.as_str() {
+                        s.to_string()
+                    } else if let Some(n) = v.as_f64() {
+                        n.to_string()
+                    } else {
+                        continue;
+                    }
+                } else {
+                    continue;
+                }
+            } else if let Some(s) = bucket.get("key").and_then(|k| k.as_str()) {
+                s.to_string()
+            } else if let Some(n) = bucket.get("key").and_then(|k| k.as_f64()) {
+                n.to_string()
+            } else {
+                continue;
+            };
+
+            let count = bucket
+                .pointer("/node_count/doc_count")
+                .and_then(|c| c.as_u64())
+                .unwrap_or(0);
+            counts.insert(taxon_id, count);
+        }
+
+        // Check for pagination `after_key`
+        after_key = resp
+            .pointer("/aggregations/by_ancestor/ancestors/after_key")
+            .cloned();
+        if after_key.is_none() {
+            break;
+        }
     }
 
-    Ok(DescendantCounts { counts, took })
+    Ok(DescendantCounts {
+        counts,
+        took: took_total,
+    })
 }
 
 /// Compute subtree counts via iterative post-order DFS.
@@ -1881,6 +2304,51 @@ fn find_attr_numeric(attrs: &[Value], field: &str) -> Option<f64> {
     None
 }
 
+/// Find the first date attribute value for `field` in an `attributes` array.
+/// Returns milliseconds since epoch as `f64` when possible.
+fn find_attr_date(attrs: &[Value], field: &str) -> Option<f64> {
+    for attr in attrs {
+        if attr.get("key").and_then(|k| k.as_str()) != Some(field) {
+            continue;
+        }
+
+        // If ES stored the date as a numeric epoch (stats use this), accept it.
+        if let Some(n) = attr.get("date_value").and_then(|v| v.as_f64()) {
+            return Some(n);
+        }
+
+        // If it's a string (ISO or yyyy-mm-dd), try parsing common formats.
+        if let Some(s) = attr.get("date_value").and_then(|v| v.as_str()) {
+            // Try RFC3339 first
+            if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(s) {
+                return Some(dt.timestamp_millis() as f64);
+            }
+            // Try simple date-only form YYYY-MM-DD
+            if let Ok(nd) = chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d") {
+                if let Some(naive_dt) = nd.and_hms_opt(0, 0, 0) {
+                    let dt = chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(
+                        naive_dt,
+                        chrono::Utc,
+                    );
+                    return Some(dt.timestamp_millis() as f64);
+                }
+            }
+            // Try common datetime without timezone
+            if let Ok(ndt) = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S") {
+                let dt =
+                    chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(ndt, chrono::Utc);
+                return Some(dt.timestamp_millis() as f64);
+            }
+            if let Ok(ndt) = chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
+                let dt =
+                    chrono::DateTime::<chrono::Utc>::from_naive_utc_and_offset(ndt, chrono::Utc);
+                return Some(dt.timestamp_millis() as f64);
+            }
+        }
+    }
+    None
+}
+
 /// Find the first keyword attribute value for `field` in an `attributes` array.
 fn find_attr_keyword(attrs: &[Value], field: &str) -> Option<String> {
     attrs
@@ -1902,24 +2370,26 @@ fn find_attr_keyword(attrs: &[Value], field: &str) -> Option<String> {
 fn extract_scatter_by_cat(
     resp: &Value,
     agg_name: &str,
-    x_field: &str,
+    x_storage: &FieldStorage,
     x_bucket_agg: &str,
+    cat_storage: &FieldStorage,
     y_field: &str,
+    y_inner_agg: &str,
     x_bucket_count: usize,
     y_bucket_count: usize,
     cat_labels: &[String],
     show_other: bool,
     cat_is_numeric: bool,
     main_counts: &[u64],
+    y_fixed_terms: Option<&[String]>,
 ) -> (Value, Value) {
-    let base = format!(
-        "/aggregations/{}/by_key/categoryHistograms/by_attribute/by_cat/by_value/buckets",
-        agg_name
-    );
+    let base = match x_storage.cat_histograms_base(agg_name, cat_storage) {
+        Some(p) if resp.pointer(&p).is_some() => p,
+        _ => return (Value::Null, Value::Null),
+    };
 
-    if resp.pointer(&base).is_none() {
-        return (Value::Null, Value::Null);
-    }
+    // Relative path from a per-category bucket to the inner x histogram buckets.
+    let inner_x = x_storage.inner_x_path(x_bucket_agg);
 
     let mut by_cat = serde_json::Map::new();
     let mut y_values_by_cat = serde_json::Map::new();
@@ -1934,12 +2404,8 @@ fn extract_scatter_by_cat(
         for bucket in &cat_buckets {
             let key = bucket.get("key").and_then(|k| k.as_f64()).unwrap_or(0.0);
             let label = key.to_string();
-            let x_path = format!(
-                "/histogram/by_attribute/{}/{}/buckets",
-                x_field, x_bucket_agg
-            );
             let x_buckets_inner = bucket
-                .pointer(&x_path)
+                .pointer(&inner_x)
                 .and_then(|b| b.as_array())
                 .cloned()
                 .unwrap_or_default();
@@ -1952,16 +2418,35 @@ fn extract_scatter_by_cat(
                         .and_then(|c| c.as_u64())
                         .unwrap_or(0),
                 );
-                let y_path = format!("/yHistograms/by_attribute/{}/histogram/buckets", y_field);
-                let y_counts = x_bucket
-                    .pointer(&y_path)
-                    .and_then(|b| b.as_array())
-                    .map(|yb| {
+                let y_path = format!(
+                    "/yHistograms/by_attribute/{}/{}/buckets",
+                    y_field, y_inner_agg
+                );
+                let y_counts = if let Some(yb) =
+                    x_bucket.pointer(&y_path).and_then(|b| b.as_array())
+                {
+                    if let Some(fixed) = y_fixed_terms {
+                        use std::collections::HashMap;
+                        let mut map: HashMap<String, u64> = HashMap::new();
+                        for b in yb {
+                            if let Some(k) = b.get("key").and_then(|k| k.as_str()) {
+                                let c = b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0);
+                                map.insert(k.to_string(), c);
+                            }
+                        }
+                        let mut aligned = Vec::with_capacity(fixed.len());
+                        for key in fixed {
+                            aligned.push(map.get(key.as_str()).copied().unwrap_or(0));
+                        }
+                        aligned
+                    } else {
                         yb.iter()
                             .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
                             .collect()
-                    })
-                    .unwrap_or_else(|| vec![0; y_bucket_count]);
+                    }
+                } else {
+                    vec![0; y_bucket_count]
+                };
                 y_counts_per_x.push(y_counts);
             }
             x_counts.resize(x_bucket_count, 0);
@@ -1980,10 +2465,7 @@ fn extract_scatter_by_cat(
         };
 
         for label in &all_labels {
-            let x_hist_path = format!(
-                "{}/{}/histogram/by_attribute/{}/{}/buckets",
-                base, label, x_field, x_bucket_agg
-            );
+            let x_hist_path = format!("{}/{}{}", base, label, inner_x);
             let x_buckets = resp
                 .pointer(&x_hist_path)
                 .and_then(|b| b.as_array())
@@ -1999,17 +2481,35 @@ fn extract_scatter_by_cat(
                         .and_then(|c| c.as_u64())
                         .unwrap_or(0),
                 );
-                let y_hist_path =
-                    format!("/yHistograms/by_attribute/{}/histogram/buckets", y_field);
-                let y_counts = x_bucket
-                    .pointer(&y_hist_path)
-                    .and_then(|b| b.as_array())
-                    .map(|yb| {
+                let y_hist_path = format!(
+                    "/yHistograms/by_attribute/{}/{}/buckets",
+                    y_field, y_inner_agg
+                );
+                let y_counts = if let Some(yb) =
+                    x_bucket.pointer(&y_hist_path).and_then(|b| b.as_array())
+                {
+                    if let Some(fixed) = y_fixed_terms {
+                        use std::collections::HashMap;
+                        let mut map: HashMap<String, u64> = HashMap::new();
+                        for b in yb {
+                            if let Some(k) = b.get("key").and_then(|k| k.as_str()) {
+                                let c = b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0);
+                                map.insert(k.to_string(), c);
+                            }
+                        }
+                        let mut aligned = Vec::with_capacity(fixed.len());
+                        for key in fixed {
+                            aligned.push(map.get(key.as_str()).copied().unwrap_or(0));
+                        }
+                        aligned
+                    } else {
                         yb.iter()
                             .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
                             .collect()
-                    })
-                    .unwrap_or_else(|| vec![0; y_bucket_count]);
+                    }
+                } else {
+                    vec![0; y_bucket_count]
+                };
                 y_counts_per_x.push(y_counts);
             }
             x_counts.resize(x_bucket_count, 0);
@@ -2059,6 +2559,189 @@ fn compute_z_domain(all_y_values: &[Vec<u64>]) -> [u64; 2] {
     }
 }
 
+/// Fetch scientific name labels for a list of taxon ids in the configured taxon index.
+/// Returns a Vec of labels aligned to the input `ids` (falls back to the id string when
+/// a name is not found).
+async fn fetch_taxon_labels(
+    state: &Arc<AppState>,
+    ids: &[String],
+    rank: &str,
+) -> Result<Vec<String>, String> {
+    if ids.is_empty() {
+        return Ok(vec![]);
+    }
+
+    let taxon_index = index_name::resolve_index_str(&state.default_result, state);
+
+    // Build msearch body: one query per id so we can preserve order in the
+    // responses.
+    let mut searches: Vec<(String, Value)> = Vec::new();
+    for id in ids {
+        let q = json!({
+            "query": {
+                "bool": {
+                    "filter": [
+                        { "term": { "taxon_id": id } },
+                        { "term": { "taxon_rank": rank } }
+                    ]
+                }
+            },
+            "_source": ["taxon_id", "scientific_name"]
+        });
+        searches.push((taxon_index.clone(), q));
+    }
+
+    let nd = es_client::build_msearch_body(&searches);
+    let resp = es_client::execute_msearch(&state.client, &state.es_base, &nd).await?;
+
+    let mut labels: Vec<String> = Vec::with_capacity(ids.len());
+    if let Some(resps) = resp.get("responses").and_then(|r| r.as_array()) {
+        for (i, r) in resps.iter().enumerate() {
+            if let Some(total) = r.pointer("/hits/total/value").and_then(|v| v.as_u64()) {
+                if total >= 1 {
+                    if let Some(hit) = r.pointer("/hits/hits/0/_source/scientific_name") {
+                        if let Some(s) = hit.as_str() {
+                            labels.push(s.to_string());
+                            continue;
+                        }
+                    }
+                }
+            }
+            // fallback: use the id string
+            labels.push(ids.get(i).cloned().unwrap_or_default());
+        }
+    }
+    Ok(labels)
+}
+
+/// Build a canonical, labelled buckets array from raw ES buckets.
+///
+/// If `fixed_terms` is non-empty, produce buckets in that order and
+/// include only those terms (this prevents appending unexpected buckets
+/// produced by ES). If `bucket_labels` aligns with `fixed_terms`, use
+/// those human-readable labels; otherwise fall back to any `label` field
+/// on the bucket or the id string.
+fn build_structured_buckets(
+    raw_buckets: &[Value],
+    fixed_terms: &[String],
+    bucket_labels: &[String],
+) -> Vec<Value> {
+    use std::collections::HashMap;
+    // Build key -> bucket map for fast lookup
+    let mut map: HashMap<String, Value> = HashMap::new();
+    for b in raw_buckets {
+        if let Some(kv) = b.get("key") {
+            let ks = if let Some(s) = kv.as_str() {
+                s.to_string()
+            } else {
+                kv.to_string()
+            };
+            map.insert(ks, b.clone());
+        }
+    }
+
+    let mut out: Vec<Value> = Vec::new();
+    if !fixed_terms.is_empty() {
+        // Use fixed_terms ordering and labels when available
+        for (i, id) in fixed_terms.iter().enumerate() {
+            let id_str = id.clone();
+            let label = if !bucket_labels.is_empty() && bucket_labels.len() == fixed_terms.len() {
+                bucket_labels.get(i).cloned().unwrap_or(id_str.clone())
+            } else if let Some(b) = map.get(&id_str) {
+                b.get("label")
+                    .and_then(|v| v.as_str())
+                    .map(|s| s.to_string())
+                    .unwrap_or(id_str.clone())
+            } else {
+                id_str.clone()
+            };
+            let count = map
+                .get(&id_str)
+                .and_then(|b| b.get("doc_count").and_then(|c| c.as_u64()))
+                .unwrap_or(0);
+            out.push(json!({"id": id_str, "label": label, "count": count}));
+        }
+    } else {
+        // No fixed terms: preserve raw bucket order, attach label if present
+        for b in raw_buckets {
+            let id_val = b.get("key").cloned().unwrap_or(Value::Null);
+            let id_str = if let Some(s) = id_val.as_str() {
+                s.to_string()
+            } else {
+                id_val.to_string()
+            };
+            let label = b
+                .get("label")
+                .and_then(|v| v.as_str())
+                .map(|s| s.to_string())
+                .unwrap_or(id_str.clone());
+            let count = b.get("doc_count").and_then(|v| v.as_u64()).unwrap_or(0);
+            out.push(json!({"id": id_str, "label": label, "count": count}));
+        }
+    }
+    out
+}
+
+/// Align processed buckets (which may include `key_scaled` etc.) to `fixed_terms` and
+/// attach `label` fields. If `fixed_terms` is empty, return processed buckets with
+/// labels attached where possible.
+fn align_and_label_processed_buckets(
+    mut processed: Vec<Value>,
+    fixed_terms: &[String],
+    fixed_labels: &[String],
+) -> Vec<Value> {
+    use std::collections::HashMap;
+    if fixed_terms.is_empty() {
+        // Attach labels if provided in fixed_labels (unlikely when empty)
+        for (i, b) in processed.iter_mut().enumerate() {
+            if let Some(_lbl) = b.get("label").and_then(|v| v.as_str()) {
+                // already has label
+            } else if i < fixed_labels.len() {
+                b["label"] = json!(fixed_labels[i].clone());
+            }
+        }
+        return processed;
+    }
+
+    // Map existing processed buckets by id string
+    let mut map: HashMap<String, Value> = HashMap::new();
+    for b in processed.into_iter() {
+        let id_str = if let Some(k) = b.get("key") {
+            if let Some(s) = k.as_str() {
+                s.to_string()
+            } else {
+                k.to_string()
+            }
+        } else if let Some(id) = b.get("id") {
+            id.as_str().map(|s| s.to_string()).unwrap_or(id.to_string())
+        } else {
+            continue;
+        };
+        map.insert(id_str, b);
+    }
+
+    let mut out: Vec<Value> = Vec::new();
+    for (i, id) in fixed_terms.iter().enumerate() {
+        let bucket = map.remove(id);
+        let mut b = if let Some(existing) = bucket {
+            existing
+        } else {
+            // Create empty bucket placeholder
+            json!({"key": id.clone(), "doc_count": 0})
+        };
+        let label = if !fixed_labels.is_empty() && fixed_labels.len() == fixed_terms.len() {
+            fixed_labels.get(i).cloned().unwrap_or(id.clone())
+        } else if let Some(lbl) = b.get("label").and_then(|v| v.as_str()) {
+            lbl.to_string()
+        } else {
+            id.clone()
+        };
+        b["label"] = json!(label);
+        out.push(b);
+    }
+    out
+}
+
 /// Fetch raw point data for scatter when total hits are within the scatter threshold.
 ///
 /// Returns an object mapping category name to an array of `{scientific_name, taxonId, x, y, cat}`
@@ -2069,16 +2752,68 @@ async fn fetch_raw_point_data(
     index: &str,
     base_query: &Value,
     x_field: &str,
+    x_is_taxon_rank: bool,
     y_field: &str,
+    y_is_taxon_rank: bool,
     cat_field: Option<&str>,
     cat_labels: &[String],
     show_other: bool,
+    x_fixed_terms: Option<&[String]>,
     threshold: usize,
 ) -> Value {
+    // Build combined query: base_query AND optional x_bucket filter when
+    // `x_fixed_terms` provided. This ensures raw points align with the
+    // canonical buckets used to compute axis ticks and avoid stray points
+    // that fall outside those buckets.
+    let mut final_query: Value = base_query.clone();
+    if let Some(fixed) = x_fixed_terms {
+        // Convert slice into JSON array
+        let fixed_json = json!(fixed);
+        if x_is_taxon_rank {
+            // Nested lineage filter: require ancestor at the requested rank
+            // whose taxon_id is one of the fixed terms.
+            let extra_filter = json!({
+                "nested": {
+                    "path": "lineage",
+                    "query": {
+                        "bool": {
+                            "must": [
+                                { "term": { "lineage.taxon_rank": x_field } },
+                                { "terms": { "lineage.taxon_id": fixed_json } }
+                            ]
+                        }
+                    }
+                }
+            });
+            final_query = json!({ "bool": { "must": [ base_query.clone(), extra_filter ] } });
+        } else {
+            // Non-rank: try to match either nested attributes (attributes.key)
+            // or a top-level `.keyword` field. Use a SHOULD so either form
+            // matching will include the document.
+            let attr_filter = json!({
+                "nested": {
+                    "path": "attributes",
+                    "query": {
+                        "bool": {
+                            "must": [
+                                { "term": { "attributes.key": x_field } },
+                                { "terms": { "attributes.keyword_value.raw": fixed_json } }
+                            ]
+                        }
+                    }
+                }
+            });
+            let top_filter = json!({ "terms": { format!("{}.keyword", x_field): fixed_json } });
+            let should_filter = json!({ "bool": { "should": [ attr_filter, top_filter ], "minimum_should_match": 1 } });
+            final_query = json!({ "bool": { "must": [ base_query.clone(), should_filter ] } });
+        }
+    }
+
+    // Request `lineage` so we can resolve ancestor IDs when the axis is a taxon rank.
     let es_body = json!({
         "size": threshold,
-        "query": base_query,
-        "_source": ["scientific_name", "taxon_id", "attributes"]
+        "query": final_query,
+        "_source": ["scientific_name", "taxon_id", "attributes", "lineage"]
     });
 
     let resp = match es_client::execute_search(&state.client, &state.es_base, index, &es_body).await
@@ -2120,13 +2855,105 @@ async fn fetch_raw_point_data(
             .cloned()
             .unwrap_or_default();
 
-        let x_val = match find_attr_numeric(&attrs, x_field) {
-            Some(v) => v,
-            None => continue,
+        // Extract x and y values. When the axis is a taxon rank, prefer the
+        // ancestor id found in `lineage`. Otherwise prefer numeric/date/keyword
+        // attributes as before.
+        let mut x_label_for_point: Option<String> = None;
+        let x_json_val = if x_is_taxon_rank {
+            // Try to find ancestor at the requested rank in the `lineage` array.
+            let mut found: Option<Value> = None;
+            if let Some(lineage_arr) = src.get("lineage").and_then(|l| l.as_array()) {
+                for anc in lineage_arr {
+                    if anc.get("taxon_rank").and_then(|r| r.as_str()) == Some(x_field) {
+                        if let Some(idv) = anc.get("taxon_id") {
+                            if let Some(s) = idv.as_str() {
+                                found = Some(json!(s.to_string()));
+                            } else if let Some(n) = idv.as_u64() {
+                                found = Some(json!(n.to_string()));
+                            }
+                        }
+                        // Try to capture scientific_name from the ancestor for labeling
+                        if x_label_for_point.is_none() {
+                            if let Some(sn) = anc.get("scientific_name").and_then(|v| v.as_str()) {
+                                x_label_for_point = Some(sn.to_string());
+                            } else if let Some(nm) = anc.get("name").and_then(|v| v.as_str()) {
+                                x_label_for_point = Some(nm.to_string());
+                            }
+                        }
+                        if found.is_some() {
+                            break;
+                        }
+                    }
+                }
+            }
+            if let Some(v) = found {
+                v
+            } else if let Some(v) = find_attr_numeric(&attrs, x_field) {
+                json!(v)
+            } else if let Some(d) = find_attr_date(&attrs, x_field) {
+                json!(d)
+            } else if let Some(s) = find_attr_keyword(&attrs, x_field) {
+                json!(s)
+            } else {
+                continue;
+            }
+        } else if let Some(v) = find_attr_numeric(&attrs, x_field) {
+            json!(v)
+        } else if let Some(d) = find_attr_date(&attrs, x_field) {
+            json!(d)
+        } else if let Some(s) = find_attr_keyword(&attrs, x_field) {
+            json!(s)
+        } else {
+            continue;
         };
-        let y_val = match find_attr_numeric(&attrs, y_field) {
-            Some(v) => v,
-            None => continue,
+
+        let mut y_label_for_point: Option<String> = None;
+        let y_json_val = if y_is_taxon_rank {
+            // y-axis as taxon rank — resolve ancestor id from lineage if present.
+            let mut found: Option<Value> = None;
+            if let Some(lineage_arr) = src.get("lineage").and_then(|l| l.as_array()) {
+                for anc in lineage_arr {
+                    if anc.get("taxon_rank").and_then(|r| r.as_str()) == Some(y_field) {
+                        if let Some(idv) = anc.get("taxon_id") {
+                            if let Some(s) = idv.as_str() {
+                                found = Some(json!(s.to_string()));
+                            } else if let Some(n) = idv.as_u64() {
+                                found = Some(json!(n.to_string()));
+                            }
+                        }
+                        // capture ancestor scientific name for label
+                        if y_label_for_point.is_none() {
+                            if let Some(sn) = anc.get("scientific_name").and_then(|v| v.as_str()) {
+                                y_label_for_point = Some(sn.to_string());
+                            } else if let Some(nm) = anc.get("name").and_then(|v| v.as_str()) {
+                                y_label_for_point = Some(nm.to_string());
+                            }
+                        }
+                        if found.is_some() {
+                            break;
+                        }
+                    }
+                }
+            }
+            if let Some(v) = found {
+                v
+            } else if let Some(v) = find_attr_numeric(&attrs, y_field) {
+                json!(v)
+            } else if let Some(d) = find_attr_date(&attrs, y_field) {
+                json!(d)
+            } else if let Some(s) = find_attr_keyword(&attrs, y_field) {
+                json!(s)
+            } else {
+                continue;
+            }
+        } else if let Some(v) = find_attr_numeric(&attrs, y_field) {
+            json!(v)
+        } else if let Some(d) = find_attr_date(&attrs, y_field) {
+            json!(d)
+        } else if let Some(s) = find_attr_keyword(&attrs, y_field) {
+            json!(s)
+        } else {
+            continue;
         };
 
         let cat_key = if let Some(cf) = cat_field {
@@ -2142,13 +2969,20 @@ async fn fetch_raw_point_data(
             "all".to_string()
         };
 
-        raw_data.entry(cat_key.clone()).or_default().push(json!({
+        let mut point_obj = json!({
             "scientific_name": scientific_name,
             "taxonId": taxon_id,
-            "x": x_val,
-            "y": y_val,
+            "x": x_json_val,
+            "y": y_json_val,
             "cat": cat_key
-        }));
+        });
+        if let Some(lbl) = x_label_for_point {
+            point_obj["x_label"] = json!(lbl);
+        }
+        if let Some(lbl) = y_label_for_point {
+            point_obj["y_label"] = json!(lbl);
+        }
+        raw_data.entry(cat_key.clone()).or_default().push(point_obj);
     }
 
     let mut result = serde_json::Map::new();
@@ -2176,32 +3010,59 @@ pub async fn run_scatter_report(
     base_query: &Value,
 ) -> Result<(u64, u64, Value), String> {
     let x_spec = resolve_axis_spec(AxisRole::X, report_config, state)
+        .await
         .ok_or("report config missing 'x' axis (set 'x' field or use 'axes')")?;
     let y_spec = resolve_axis_spec(AxisRole::Y, report_config, state)
+        .await
         .ok_or("scatter report requires 'y' axis (set 'y' field or use 'axes')")?;
     let x_field = x_spec.field.clone();
     let y_field = y_spec.field.clone();
-    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state);
+    let cat_spec_opt = resolve_axis_spec(AxisRole::Cat, report_config, state).await;
     let scatter_threshold = report_config
         .get("scatter_threshold")
         .and_then(|v| v.as_u64())
         .unwrap_or(1000) as usize;
 
+    // Augment the base query for bounds computation with a presence-filter
+    // for the opposite axis so bounds reflect only records that will appear
+    // in the final plot. This avoids empty buckets caused by one axis being
+    // filtered out by the other.
+    let y_storage = resolve_field_storage(&y_spec.field, y_spec.value_type, &state.cache)?;
+    let x_base_query = json!({
+        "bool": { "must": [ base_query.clone(), y_storage.presence_filter() ] }
+    });
+
     let x_bounds = compute_bounds(
         &state.client,
         &state.es_base,
         index,
         &x_spec,
-        base_query,
+        &x_base_query,
         &state.cache,
     )
     .await?;
+    // If this is a taxon-rank axis and bounds provided a fixed term list (ids),
+    // attempt to fetch human-readable labels (scientific names) for each id so
+    // the final report can include a labelled mapping. Fall back to the
+    // original bounds.cat_labels when lookup fails.
+    let mut x_bucket_labels: Vec<String> = x_bounds.cat_labels.clone();
+    if matches!(x_spec.value_type, ValueType::TaxonRank) && !x_bounds.fixed_terms.is_empty() {
+        if let Ok(labels) = fetch_taxon_labels(state, &x_bounds.fixed_terms, &x_spec.field).await {
+            if labels.len() == x_bounds.fixed_terms.len() {
+                x_bucket_labels = labels;
+            }
+        }
+    }
+    let x_storage = resolve_field_storage(&x_spec.field, x_spec.value_type, &state.cache)?;
+    let y_presence = x_storage.presence_filter();
+    let y_base_query = json!({ "bool": { "must": [ base_query.clone(), y_presence ] } });
+
     let y_bounds = compute_bounds(
         &state.client,
         &state.es_base,
         index,
         &y_spec,
-        base_query,
+        &y_base_query,
         &state.cache,
     )
     .await?;
@@ -2257,19 +3118,36 @@ pub async fn run_scatter_report(
         .unwrap_or(0);
 
     // ---- Extract main x buckets (histogram or terms depending on x type) ----
-    let x_hist_path = format!("/aggregations/{}/by_key/{}/buckets", agg_name, x_inner_agg);
-    let x_raw_buckets = resp
+    let x_hist_path = x_storage.main_bucket_path(agg_name, x_inner_agg);
+    let mut x_raw_buckets = resp
         .pointer(&x_hist_path)
         .and_then(|b| b.as_array())
         .cloned()
         .unwrap_or_default();
-    let x_bucket_count = x_raw_buckets.len();
 
-    // Keys may be numeric (histogram) or string (terms) — collect as raw JSON Values.
-    let x_bucket_keys: Vec<Value> = x_raw_buckets
-        .iter()
-        .filter_map(|b| b.get("key").cloned())
-        .collect();
+    // Respect the definitive fixed term order calculated during bounds.
+    // If `x_bounds.fixed_terms` is non-empty, reorder the returned buckets to
+    // match that list. Append any unexpected buckets at the end.
+    if !x_bounds.fixed_terms.is_empty() {
+        let mut ordered: Vec<Value> = Vec::with_capacity(x_raw_buckets.len());
+        for id in &x_bounds.fixed_terms {
+            if let Some(pos) = x_raw_buckets
+                .iter()
+                .position(|b| b.get("key").and_then(|k| k.as_str()) == Some(id.as_str()))
+            {
+                ordered.push(x_raw_buckets[pos].clone());
+            }
+        }
+        // Append any remaining buckets not present in fixed_terms
+        for b in &x_raw_buckets {
+            let key = b.get("key").and_then(|k| k.as_str()).unwrap_or("");
+            if !x_bounds.fixed_terms.iter().any(|t| t == key) {
+                ordered.push(b.clone());
+            }
+        }
+        x_raw_buckets = ordered;
+    }
+    let x_bucket_count = x_raw_buckets.len();
 
     let all_values: Vec<u64> = x_raw_buckets
         .iter()
@@ -2279,27 +3157,164 @@ pub async fn run_scatter_report(
     // ---- Extract allYValues (per x-bucket y-histogram) and yBuckets ----
     let y_bucket_count = y_bounds.tick_count;
     let mut all_y_values: Vec<Vec<u64>> = Vec::with_capacity(x_bucket_count);
-    let mut y_bucket_keys: Vec<f64> = Vec::new();
+    let mut y_bucket_keys: Vec<Value> = Vec::new();
+    let mut y_bucket_labels: Vec<String> = Vec::new();
+
+    // If bounds provided canonical fixed terms for a keyword/rank Y axis,
+    // prefer that ordering for yBuckets so keys are consistent across x buckets.
+    if matches!(
+        y_bounds.value_type,
+        ValueType::Keyword | ValueType::TaxonRank
+    ) && !y_bounds.fixed_terms.is_empty()
+    {
+        y_bucket_keys = y_bounds
+            .fixed_terms
+            .iter()
+            .map(|s| Value::String(s.clone()))
+            .collect();
+    }
+
+    // If this is a taxon-rank Y axis and bounds provided fixed term ids,
+    // attempt to fetch human-readable labels (scientific names). Keep
+    // `y_bucket_keys` as the canonical ids used for bin alignment, and
+    // separately store `y_bucket_labels` for display.
+    if matches!(y_spec.value_type, ValueType::TaxonRank) && !y_bounds.fixed_terms.is_empty() {
+        if let Ok(labels) = fetch_taxon_labels(state, &y_bounds.fixed_terms, &y_spec.field).await {
+            if labels.len() == y_bounds.fixed_terms.len() {
+                y_bucket_labels = labels;
+                // Ensure the canonical keys remain the ids from fixed_terms
+                // (they were set earlier from `y_bounds.fixed_terms`).
+            }
+        }
+    }
+
+    // Determine inner agg name for y histograms so we can locate buckets
+    // inside each x-bucket's `yHistograms` result.
+    let y_inner_agg = if matches!(
+        y_bounds.value_type,
+        ValueType::Keyword | ValueType::TaxonRank
+    ) {
+        "top_terms"
+    } else if matches!(y_bounds.value_type, ValueType::Date) {
+        "date_histogram"
+    } else {
+        "histogram"
+    };
+
+    // If we still have no canonical y keys, scan *all* x-buckets and collect
+    // the union of y bucket keys found. This avoids using a single
+    // first-non-empty-bucket ordering which can produce too-small yBuckets
+    // when some x-buckets yield sparse date/rank histograms.
+    if y_bucket_keys.is_empty() {
+        use std::collections::HashSet;
+        let mut seen: HashSet<String> = HashSet::new();
+        let mut ordered_keys: Vec<Value> = Vec::new();
+        for x_bucket in &x_raw_buckets {
+            let y_hist_path = format!(
+                "/yHistograms/by_attribute/{}/{}/buckets",
+                y_field, y_inner_agg
+            );
+            if let Some(ybuckets) = x_bucket.pointer(&y_hist_path).and_then(|b| b.as_array()) {
+                for b in ybuckets {
+                    if let Some(kv) = b.get("key").cloned() {
+                        let ks = if let Some(s) = kv.as_str() {
+                            s.to_string()
+                        } else {
+                            kv.to_string()
+                        };
+                        if seen.insert(ks) {
+                            ordered_keys.push(kv);
+                        }
+                    }
+                }
+            }
+        }
+
+        if !ordered_keys.is_empty() {
+            // If all keys are numeric, sort ascending numerically; otherwise
+            // keep discovery order which tends to reflect term ordering.
+            let all_numeric = ordered_keys.iter().all(|v| v.as_f64().is_some());
+            if all_numeric {
+                ordered_keys.sort_by(|a, b| {
+                    a.as_f64()
+                        .partial_cmp(&b.as_f64())
+                        .unwrap_or(std::cmp::Ordering::Equal)
+                });
+            }
+            y_bucket_keys = ordered_keys;
+        }
+    }
 
     for x_bucket in &x_raw_buckets {
-        let y_hist_path = format!("/yHistograms/by_attribute/{}/histogram/buckets", y_field);
+        let y_hist_path = format!(
+            "/yHistograms/by_attribute/{}/{}/buckets",
+            y_field, y_inner_agg
+        );
         let y_buckets_opt = x_bucket.pointer(&y_hist_path).and_then(|b| b.as_array());
 
         if let Some(ybuckets) = y_buckets_opt {
+            // If we don't already have canonical keys, initialise from this first non-empty bucket
             if y_bucket_keys.is_empty() {
                 y_bucket_keys = ybuckets
                     .iter()
-                    .filter_map(|b| b.get("key").and_then(|k| k.as_f64()))
+                    .filter_map(|b| b.get("key").cloned())
                     .collect();
             }
-            all_y_values.push(
-                ybuckets
-                    .iter()
-                    .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
-                    .collect(),
-            );
-        } else {
+
+            // Build counts aligned to `y_bucket_keys`. For keyword/rank keys this
+            // ensures the same ordering even if some x buckets lack particular terms.
+            if matches!(
+                y_bounds.value_type,
+                ValueType::Keyword | ValueType::TaxonRank
+            ) {
+                use std::collections::HashMap;
+                let mut map: HashMap<String, u64> = HashMap::new();
+                for b in ybuckets {
+                    if let Some(kv) = b.get("key") {
+                        // Normalize the bucket key to a string regardless of JSON type
+                        let key_s = if let Some(s) = kv.as_str() {
+                            s.to_string()
+                        } else if let Some(n) = kv.as_u64() {
+                            n.to_string()
+                        } else if let Some(n) = kv.as_i64() {
+                            n.to_string()
+                        } else if let Some(f) = kv.as_f64() {
+                            f.to_string()
+                        } else {
+                            kv.to_string()
+                        };
+                        let c = b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0);
+                        map.insert(key_s, c);
+                    }
+                }
+                let mut aligned: Vec<u64> = Vec::with_capacity(y_bucket_keys.len());
+                for k in &y_bucket_keys {
+                    let key_s = if let Some(s) = k.as_str() {
+                        s.to_string()
+                    } else if let Some(n) = k.as_u64() {
+                        n.to_string()
+                    } else if let Some(n) = k.as_i64() {
+                        n.to_string()
+                    } else if let Some(f) = k.as_f64() {
+                        f.to_string()
+                    } else {
+                        k.to_string()
+                    };
+                    aligned.push(map.get(&key_s).copied().unwrap_or(0));
+                }
+                all_y_values.push(aligned);
+            } else {
+                all_y_values.push(
+                    ybuckets
+                        .iter()
+                        .map(|b| b.get("doc_count").and_then(|c| c.as_u64()).unwrap_or(0))
+                        .collect(),
+                );
+            }
+        } else if y_bucket_keys.is_empty() {
             all_y_values.push(vec![0; y_bucket_count]);
+        } else {
+            all_y_values.push(vec![0; y_bucket_keys.len()]);
         }
     }
 
@@ -2307,34 +3322,64 @@ pub async fn run_scatter_report(
 
     // ---- Extract per-category data ----
     let (by_cat, y_values_by_cat) = if !cat_labels.is_empty() || cat_is_numeric {
+        // Resolve cat_storage so extract_scatter_by_cat can build deterministic paths.
+        let cat_storage_for_extract = cat_spec_opt
+            .as_ref()
+            .and_then(|spec| resolve_field_storage(&spec.field, spec.value_type, &state.cache).ok())
+            .unwrap_or(FieldStorage::Root {
+                es_field: String::new(),
+            });
         extract_scatter_by_cat(
             &resp,
             agg_name,
-            x_field.as_str(),
+            &x_storage,
             x_inner_agg,
+            &cat_storage_for_extract,
             y_field.as_str(),
+            y_inner_agg,
             x_bucket_count,
             y_bucket_count,
             &cat_labels,
             show_other_cat,
             cat_is_numeric,
             &all_values,
+            if !y_bounds.fixed_terms.is_empty() {
+                Some(&y_bounds.fixed_terms[..])
+            } else {
+                None
+            },
         )
     } else {
         (Value::Null, Value::Null)
     };
 
-    // ---- Fetch raw point data if below threshold ----
-    let raw_data = if total_hits as usize <= scatter_threshold {
+    // ---- Fetch raw point data when needed ----
+    // Previously we only fetched raw points when total hits <= threshold.
+    // For categorical axes (keyword/taxon) we also want raw points so the
+    // client/converter can jitter points within categories for visibility.
+    // Only fetch rawData when the total matched hits are within the configured
+    // `scatter_threshold`. Previously we also fetched raw points for categorical
+    // axes to enable client jittering; that behaviour is opt-in and not the
+    // default — respect the threshold by default.
+    let should_fetch_raw = total_hits as usize <= scatter_threshold;
+
+    let raw_data = if should_fetch_raw {
         fetch_raw_point_data(
             state,
             index,
             base_query,
             x_field.as_str(),
+            matches!(x_spec.value_type, ValueType::TaxonRank),
             y_field.as_str(),
+            matches!(y_spec.value_type, ValueType::TaxonRank),
             cat_field_str,
             &cat_labels,
             show_other_cat,
+            if !x_bounds.fixed_terms.is_empty() {
+                Some(&x_bounds.fixed_terms[..])
+            } else {
+                None
+            },
             scatter_threshold,
         )
         .await
@@ -2342,29 +3387,49 @@ pub async fn run_scatter_report(
         Value::Null
     };
 
+    // Build a single structured `buckets` array where each element is an
+    // object `{ id, label, count }`. Use `x_bounds.fixed_terms` (when
+    // present) as the authoritative ordering to avoid appending spurious
+    // buckets returned by ES.
+    let buckets_struct: Vec<Value> =
+        build_structured_buckets(&x_raw_buckets, &x_bounds.fixed_terms, &x_bucket_labels);
+
     let mut report_data = json!({
         "type": "scatter",
         "x": {
             "field": x_field,
             "scale": format!("{:?}", x_spec.opts.scale).to_lowercase(),
-            "domain": x_bounds.domain
+            "domain": x_bounds.domain,
+            "value_type": value_type_to_string(x_spec.value_type)
         },
         "y": {
             "field": y_field,
             "scale": format!("{:?}", y_spec.opts.scale).to_lowercase(),
-            "domain": y_bounds.domain
+            "domain": y_bounds.domain,
+            "value_type": value_type_to_string(y_spec.value_type)
         },
-        "buckets": x_bucket_keys,
+        "buckets": buckets_struct,
         "allValues": all_values,
         "yBuckets": y_bucket_keys,
+        "yBucketLabels": y_bucket_labels,
         "allYValues": all_y_values,
         "zDomain": z_domain
     });
 
+    // Historically we returned `bucketLabels` separately; clients should now
+    // consume the structured `buckets` array. Keep `bucketLabels` absent to
+    // avoid duplication.
+
     if !by_cat.is_null() {
         report_data["by_cat"] = by_cat;
         report_data["yValuesByCat"] = y_values_by_cat;
-        report_data["cat"] = json!(cat_spec_opt.as_ref().map(|s| s.field.as_str()));
+        if let Some(ref cat_spec) = cat_spec_opt {
+            report_data["cat"] = json!({
+                "field": cat_spec.field,
+                "value_type": value_type_to_string(cat_spec.value_type),
+                "scale": format!("{:?}", cat_spec.opts.scale).to_lowercase()
+            });
+        }
         report_data["cats"] = json!(cat_labels);
     }
 
diff --git a/crates/genomehubs-api/src/report/spec_builder.rs b/crates/genomehubs-api/src/report/spec_builder.rs
new file mode 100644
index 0000000..e28c3c3
--- /dev/null
+++ b/crates/genomehubs-api/src/report/spec_builder.rs
@@ -0,0 +1,737 @@
+//! Server-side PlotSpec construction helpers.
+//!
+//! Build a fully-resolved `PlotSpec` from a report payload and optional
+//! `display` hints. This lives in the API crate because it may consult
+//! server-side knowledge (report JSON shapes) and is not intended for the
+//! WASM-local build path.
+
+use serde_json::{json, Value};
+
+use genomehubs_query::report::display::TickLabelPlacement;
+use genomehubs_query::report::plot_spec::{AxisMeta, PlotReportType, SeriesMeta};
+use genomehubs_query::report::DisplaySpec;
+use genomehubs_query::report::PlotSpec;
+
+fn parse_display(display: Option<&Value>) -> DisplaySpec {
+    if let Some(dv) = display {
+        if let Some(s) = dv.as_str() {
+            serde_yaml::from_str(s).unwrap_or_default()
+        } else {
+            serde_json::from_value(dv.clone()).unwrap_or_default()
+        }
+    } else {
+        DisplaySpec::default()
+    }
+}
+
+fn domain_from_value(v: Option<&Value>) -> [f64; 2] {
+    if let Some(Value::Array(arr)) = v {
+        if arr.len() >= 2 {
+            let a = arr[0].as_f64().unwrap_or(0.0);
+            let b = arr[1].as_f64().unwrap_or(a + 1.0);
+            return [a, b];
+        }
+    }
+    [0.0, 1.0]
+}
+
+fn make_axis_meta(
+    field: &str,
+    scale: Option<&str>,
+    domain_val: Option<&Value>,
+    value_type_hint: Option<&str>,
+) -> AxisMeta {
+    let domain = domain_from_value(domain_val);
+    let scale_s = scale
+        .map(|s| s.to_string())
+        .unwrap_or_else(|| "linear".to_string());
+    let value_type = value_type_hint.map(|s| s.to_string()).unwrap_or_else(|| {
+        if domain != [0.0, 1.0] {
+            "float".to_string()
+        } else {
+            "keyword".to_string()
+        }
+    });
+
+    let tick_label_placement = if value_type == "keyword" {
+        TickLabelPlacement::BetweenTicks
+    } else {
+        TickLabelPlacement::OnTick
+    };
+
+    AxisMeta {
+        field: field.to_string(),
+        label: None,
+        scale: scale_s,
+        domain,
+        tick_values: vec![],
+        tick_labels: vec![],
+        value_type,
+        tick_label_placement,
+        tick_label_stride: 1,
+        tick_label_max_length: None,
+    }
+}
+
+fn build_series_from_cats(cats: Option<&Value>) -> Vec<SeriesMeta> {
+    if let Some(Value::Array(arr)) = cats {
+        arr.iter()
+            .filter_map(|v| v.as_str().map(|s| s.to_string()))
+            .map(|key| SeriesMeta {
+                key: key.clone(),
+                label: key,
+                color: None,
+            })
+            .collect()
+    } else {
+        Vec::new()
+    }
+}
+
+/// Compute bin boundary values from an ordered list of numeric bucket keys.
+///
+/// Returns `keys.len() + 1` values: the original `keys` plus one extra right
+/// boundary estimated as `last_key + (last_key − second_to_last_key)`.
+///
+/// The boundary list is used by Vega-Lite `binned` encodings to draw each bar
+/// from its left edge to its right edge without overlap or gap.
+fn bucket_keys_to_boundaries(sorted_keys: &[f64], axis_obj: &Value) -> Vec<f64> {
+    if sorted_keys.is_empty() {
+        return vec![];
+    }
+    let width = if sorted_keys.len() >= 2 {
+        sorted_keys[1] - sorted_keys[0]
+    } else {
+        // Estimate width from domain / tickCount when there is only one bucket.
+        axis_obj
+            .get("domain")
+            .and_then(|d| d.as_array())
+            .map(|arr| {
+                if arr.len() >= 2 {
+                    let lo = arr[0].as_f64().unwrap_or(0.0);
+                    let hi = arr[1].as_f64().unwrap_or(lo + 1.0);
+                    let ticks = axis_obj
+                        .get("tickCount")
+                        .and_then(|v| v.as_u64())
+                        .unwrap_or(10) as f64;
+                    (hi - lo) / ticks.max(1.0)
+                } else {
+                    1.0
+                }
+            })
+            .unwrap_or(1.0)
+    };
+    let mut boundaries = sorted_keys.to_vec();
+    boundaries.push(sorted_keys[sorted_keys.len() - 1] + width);
+    boundaries
+}
+
+/// Extract numeric or keyword tick data from a bucket array and write it onto `meta`.
+///
+/// For keyword axes: extracts `label` (or `id`) strings → `meta.tick_labels`.
+/// For numeric axes: extracts `key` / `id` floats, sorts them, computes bin
+/// boundaries → `meta.tick_values`.
+///
+/// `axis_obj` is the axis spec JSON object (provides `domain` / `tickCount`
+/// for single-bucket width estimation).
+///
+/// `label_source` is an optional pre-built label list that takes priority over
+/// bucket-derived labels (used for y-axis when the server supplies
+/// `yBucketLabels` directly).
+fn fill_tick_data_from_buckets(
+    meta: &mut AxisMeta,
+    axis_obj: &Value,
+    buckets: &[Value],
+    label_source: Option<&[Value]>,
+) {
+    if meta.value_type == "keyword" {
+        // Prefer explicit labels when provided (e.g. yBucketLabels for taxon ranks).
+        if let Some(lbls) = label_source {
+            let labels: Vec<String> = lbls
+                .iter()
+                .map(|v| v.as_str().unwrap_or("").to_string())
+                .collect();
+            if !labels.is_empty() {
+                meta.tick_labels = labels;
+                return;
+            }
+        }
+        // Fall back to label/id fields from bucket objects.
+        let labels: Vec<String> = buckets
+            .iter()
+            .map(|b| {
+                b.get("label")
+                    .and_then(|l| l.as_str())
+                    .map(|s| s.to_string())
+                    .or_else(|| {
+                        b.get("id")
+                            .or_else(|| b.get("key"))
+                            .and_then(|v| v.as_str().map(|s| s.to_string()))
+                    })
+                    .unwrap_or_default()
+            })
+            .collect();
+        if !labels.is_empty() {
+            meta.tick_labels = labels;
+        }
+    } else {
+        // Numeric: build sorted boundary list from bucket numeric keys.
+        let mut keys: Vec<f64> = buckets
+            .iter()
+            .filter_map(|b| {
+                b.get("key").and_then(|v| v.as_f64()).or_else(|| {
+                    b.get("id")
+                        .and_then(|v| v.as_str())
+                        .and_then(|s| s.parse().ok())
+                })
+            })
+            .collect();
+        if !keys.is_empty() {
+            keys.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+            meta.tick_values = bucket_keys_to_boundaries(&keys, axis_obj);
+        }
+    }
+}
+
+/// Build a merged PlotSpec for multiple arc reports.
+///
+/// Each entry in `reports` is expected to be the `report` object returned by
+/// the arc report handlers (either a scalar `arc` or an array of ring objects).
+/// The function normalises ring entries, computes a `scaled` value in [0,1]
+/// (arc is already in [0,1]; arc2 values are scaled relative to the max
+/// arc2 value across the batch), and returns a `PlotSpec` with `x` axis using
+/// the `scaled` field and a `data.entries` array containing all rings.
+pub fn build_arc_plot_spec_from_reports(
+    reports: &[Value],
+    batch_display: Option<&Value>,
+) -> Result<PlotSpec, String> {
+    // Parse batch display into a DisplaySpec so defaults are available.
+    let display_spec = parse_display(batch_display);
+
+    // Normalise reports into a flat list of ring-like objects.
+    let mut entries: Vec<Value> = Vec::new();
+    let mut report_labels: Vec<String> = Vec::new();
+
+    for (ri, rep) in reports.iter().enumerate() {
+        let label = rep
+            .get("featureTerm")
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string())
+            .or_else(|| {
+                rep.get("referenceTerm")
+                    .and_then(|v| v.as_str())
+                    .map(|s| s.to_string())
+            })
+            .or_else(|| {
+                rep.get("queryString")
+                    .and_then(|v| v.as_str())
+                    .map(|s| s.to_string())
+            })
+            .unwrap_or_else(|| format!("report{}", ri));
+        report_labels.push(label.clone());
+
+        if let Some(arr) = rep.get("arc").and_then(|v| v.as_array()) {
+            for ring in arr.iter() {
+                let mut obj = match ring {
+                    Value::Object(m) => m.clone(),
+                    other => {
+                        let mut m = serde_json::Map::new();
+                        m.insert("arc".to_string(), other.clone());
+                        m
+                    }
+                };
+                obj.insert("report_index".to_string(), json!(ri));
+                obj.insert("report_label".to_string(), json!(label.clone()));
+                entries.push(Value::Object(obj));
+            }
+        } else if let Some(arc_v) = rep.get("arc") {
+            let mut obj = serde_json::Map::new();
+            obj.insert("arc".to_string(), arc_v.clone());
+            if let Some(arc2v) = rep.get("arc2") {
+                obj.insert("arc2".to_string(), arc2v.clone());
+            }
+            if let Some(fc) = rep.get("feature_count") {
+                obj.insert("feature_count".to_string(), fc.clone());
+            }
+            if let Some(rc) = rep.get("reference_count") {
+                obj.insert("reference_count".to_string(), rc.clone());
+            }
+            if let Some(ft) = rep.get("featureTerm") {
+                obj.insert("featureTerm".to_string(), ft.clone());
+            }
+            if let Some(rt) = rep.get("referenceTerm") {
+                obj.insert("referenceTerm".to_string(), rt.clone());
+            }
+            obj.insert("report_index".to_string(), json!(ri));
+            obj.insert("report_label".to_string(), json!(label.clone()));
+            entries.push(Value::Object(obj));
+        }
+    }
+
+    // Compute scaling factor for arc2 values (if present)
+    let max_arc2 = entries
+        .iter()
+        .filter_map(|e| e.get("arc2").and_then(|v| v.as_f64()))
+        .fold(0.0_f64, f64::max);
+
+    // Compute scaled value for each entry and assemble final entries array.
+    let mut final_entries: Vec<Value> = Vec::new();
+    for e in entries.into_iter() {
+        if let Value::Object(mut m) = e {
+            let scaled = if let Some(a) = m.get("arc").and_then(|v| v.as_f64()) {
+                a
+            } else if let Some(a2) = m.get("arc2").and_then(|v| v.as_f64()) {
+                if max_arc2 > 0.0 {
+                    a2 / max_arc2
+                } else {
+                    0.0
+                }
+            } else {
+                0.0
+            };
+            m.insert("scaled".to_string(), json!(scaled));
+            final_entries.push(Value::Object(m));
+        } else {
+            // Non-object entry: wrap it into an object with scaled=0
+            final_entries.push(json!({"scaled": 0.0}));
+        }
+    }
+
+    // Series metadata: one series per input report (labeled by report label)
+    let mut series_meta: Vec<SeriesMeta> = Vec::new();
+    for (i, label) in report_labels.iter().enumerate() {
+        series_meta.push(SeriesMeta {
+            key: format!("report_{i}"),
+            label: label.clone(),
+            color: None,
+        });
+    }
+
+    // X axis: scaled arc values in [0,1]
+    let x_meta = AxisMeta {
+        field: "scaled".to_string(),
+        label: Some("Arc (scaled)".to_string()),
+        scale: "linear".to_string(),
+        domain: [0.0, 1.0],
+        tick_values: vec![],
+        tick_labels: vec![],
+        value_type: "float".to_string(),
+        tick_label_placement: TickLabelPlacement::OnTick,
+        tick_label_stride: 1,
+        tick_label_max_length: None,
+    };
+
+    let data = json!({
+        "type": "arc_batch",
+        "entries": final_entries,
+        "reports": report_labels,
+    });
+
+    let spec = PlotSpec {
+        report_type: PlotReportType::Arc,
+        x: Some(x_meta),
+        y: None,
+        cat: None,
+        z: None,
+        series: series_meta,
+        display: display_spec,
+        data,
+    };
+
+    Ok(spec)
+}
+
+/// Build a `PlotSpec` from a v3 report payload and optional `display` hints.
+///
+/// `report_type` is the canonical report string (e.g. "histogram", "scatter").
+/// `report_data` is the JSON returned by the report handlers. `display` may be
+/// a YAML string or JSON object and will be merged into the resulting spec.
+pub fn build_plot_spec(
+    report_type: &str,
+    report_data: &Value,
+    display: Option<&Value>,
+) -> Result<PlotSpec, String> {
+    let pr = PlotReportType::parse(report_type).unwrap_or(PlotReportType::Histogram);
+    let display_spec = parse_display(display);
+
+    // Normalise histogram display options: prefer explicit `mode` when present;
+    // otherwise derive it from legacy boolean flags for compatibility.
+    if let Some(hist) = display_spec.histogram.as_ref() {
+        // nothing to do when mode already set
+        if hist.mode.is_none() {
+            // We'll fill in a sensible default later when serialising the
+            // PlotSpec; clone and adjust the DisplaySpec to ensure the
+            // resulting `plot_spec.display.histogram.mode` is always present
+            // for clients and converters.
+        }
+    }
+
+    // Ensure the returned DisplaySpec contains a canonical `histogram.mode`
+    // when histogram options are present. This keeps downstream converters
+    // simple: `mode` is authoritative and overrides `stacked`/`cumulative`.
+    let mut display_spec = display_spec;
+    if let Some(hist_opts) = display_spec.histogram.as_mut() {
+        if hist_opts.mode.is_none() {
+            if hist_opts.stacked.unwrap_or(false) {
+                hist_opts.mode = Some("stacked".to_string());
+            } else if hist_opts.cumulative.unwrap_or(false) {
+                hist_opts.mode = Some("cumulative".to_string());
+            } else {
+                // default behaviour remains stacked for backward-compatibility
+                hist_opts.mode = Some("stacked".to_string());
+            }
+        }
+        // Keep boolean `stacked` consistent with `mode` for consumers
+        match hist_opts.mode.as_deref() {
+            Some("stacked") => hist_opts.stacked = Some(true),
+            Some("grouped") | Some("facet") | Some("cumulative") => hist_opts.stacked = Some(false),
+            _ => {}
+        }
+    }
+    if let Some(arc_opts) = display_spec.arc.as_mut() {
+        if arc_opts.mode.is_none() {
+            arc_opts.mode = Some("grouped".to_string());
+        }
+        if arc_opts.shape.is_none() {
+            arc_opts.shape = Some("auto".to_string());
+        }
+    }
+
+    // Default empty values
+    let mut x: Option<AxisMeta> = None;
+    let mut y: Option<AxisMeta> = None;
+    let z: Option<AxisMeta> = None;
+    let mut series: Vec<SeriesMeta> = Vec::new();
+
+    match pr {
+        PlotReportType::Histogram => {
+            if let Some(x_obj) = report_data.get("x") {
+                if let Some(field) = x_obj.get("field").and_then(|v| v.as_str()) {
+                    let scale = x_obj.get("scale").and_then(|v| v.as_str());
+                    let domain = x_obj.get("domain");
+                    let value_type_hint = x_obj.get("value_type").and_then(|v| v.as_str());
+                    x = Some(make_axis_meta(field, scale, domain, value_type_hint));
+                    if let Some(meta) = x.as_mut() {
+                        let axis_opts = display_spec
+                            .histogram
+                            .as_ref()
+                            .and_then(|h| h.x_axis.as_ref());
+                        genomehubs_query::report::spec_builder::resolve_axis_display(
+                            meta, axis_opts,
+                        );
+                        let buckets = report_data
+                            .get("buckets")
+                            .and_then(|v| v.as_array())
+                            .map(|a| a.as_slice())
+                            .unwrap_or(&[]);
+                        fill_tick_data_from_buckets(meta, x_obj, buckets, None);
+                    }
+                }
+            }
+            // Series from cats
+            series = build_series_from_cats(report_data.get("cats"));
+            // Y axis: histogram counts (doc_count) — ensure converter receives
+            // authoritative axis metadata so it does not need to guess.
+            if let Some(buckets) = report_data.get("buckets").and_then(|v| v.as_array()) {
+                let counts: Vec<f64> = buckets
+                    .iter()
+                    .map(|b| b.get("doc_count").and_then(|c| c.as_f64()).unwrap_or(0.0))
+                    .collect();
+                let max = counts.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
+                let domain = if max.is_finite() {
+                    [0.0, if max > 0.0 { max } else { 1.0 }]
+                } else {
+                    [0.0, 1.0]
+                };
+                y = Some(AxisMeta {
+                    field: "doc_count".to_string(),
+                    label: Some("count".to_string()),
+                    scale: "linear".to_string(),
+                    domain,
+                    tick_values: vec![],
+                    tick_labels: vec![],
+                    value_type: "integer".to_string(),
+                    tick_label_placement: TickLabelPlacement::OnTick,
+                    tick_label_stride: 1,
+                    tick_label_max_length: None,
+                });
+            } else {
+                y = Some(make_axis_meta(
+                    "doc_count",
+                    Some("linear"),
+                    None,
+                    Some("integer"),
+                ));
+            }
+        }
+        PlotReportType::Scatter => {
+            if let Some(x_obj) = report_data.get("x") {
+                if let Some(field) = x_obj.get("field").and_then(|v| v.as_str()) {
+                    let scale = x_obj.get("scale").and_then(|v| v.as_str());
+                    let domain = x_obj.get("domain");
+                    let value_type_hint = x_obj.get("value_type").and_then(|v| v.as_str());
+                    x = Some(make_axis_meta(field, scale, domain, value_type_hint));
+                    if let Some(meta) = x.as_mut() {
+                        let axis_opts = display_spec
+                            .scatter
+                            .as_ref()
+                            .and_then(|s| s.x_axis.as_ref())
+                            .or_else(|| {
+                                display_spec
+                                    .histogram
+                                    .as_ref()
+                                    .and_then(|h| h.x_axis.as_ref())
+                            });
+                        genomehubs_query::report::spec_builder::resolve_axis_display(
+                            meta, axis_opts,
+                        );
+                        let buckets = report_data
+                            .get("buckets")
+                            .and_then(|v| v.as_array())
+                            .map(|a| a.as_slice())
+                            .unwrap_or(&[]);
+                        fill_tick_data_from_buckets(meta, x_obj, buckets, None);
+                    }
+                }
+            }
+            if let Some(y_obj) = report_data.get("y") {
+                if let Some(field) = y_obj.get("field").and_then(|v| v.as_str()) {
+                    let scale = y_obj.get("scale").and_then(|v| v.as_str());
+                    let domain = y_obj.get("domain");
+                    let value_type_hint = y_obj.get("value_type").and_then(|v| v.as_str());
+                    y = Some(make_axis_meta(field, scale, domain, value_type_hint));
+                    if let Some(meta) = y.as_mut() {
+                        let axis_opts = display_spec
+                            .scatter
+                            .as_ref()
+                            .and_then(|s| s.y_axis.as_ref())
+                            .or_else(|| {
+                                display_spec
+                                    .histogram
+                                    .as_ref()
+                                    .and_then(|h| h.y_axis.as_ref())
+                            });
+                        genomehubs_query::report::spec_builder::resolve_axis_display(
+                            meta, axis_opts,
+                        );
+                        // Prefer explicit yBucketLabels (human-readable taxon rank names)
+                        // then fall back to yBuckets for both keyword and numeric axes.
+                        let explicit_labels: Option<&[Value]> = report_data
+                            .get("yBucketLabels")
+                            .and_then(|v| v.as_array())
+                            .map(|a| a.as_slice());
+                        let y_buckets: Vec<Value> = report_data
+                            .get("yBuckets")
+                            .and_then(|v| v.as_array())
+                            .map(|arr| {
+                                arr.iter()
+                                    .map(|v| match v {
+                                        // Convert raw scalar bucket keys into fake bucket objects
+                                        // so fill_tick_data_from_buckets can process them.
+                                        Value::Number(_) | Value::String(_) => {
+                                            serde_json::json!({ "key": v })
+                                        }
+                                        other => other.clone(),
+                                    })
+                                    .collect()
+                            })
+                            .unwrap_or_default();
+                        fill_tick_data_from_buckets(meta, y_obj, &y_buckets, explicit_labels);
+                    }
+                }
+            }
+            series = build_series_from_cats(report_data.get("cats"));
+        }
+        PlotReportType::CountPerRank => {
+            // Count per rank: x is rank labels (keyword), y is count
+            if let Some(buckets) = report_data.get("buckets").and_then(|v| v.as_array()) {
+                // pick first bucket's rank field name via keys
+                // we'll construct a dummy x axis named "rank"
+                x = Some(make_axis_meta(
+                    "rank",
+                    Some("ordinal"),
+                    None,
+                    Some("keyword"),
+                ));
+                // y domain from counts
+                let counts: Vec<f64> = buckets
+                    .iter()
+                    .map(|b| b.get("count").and_then(|c| c.as_f64()).unwrap_or(0.0))
+                    .collect();
+                let min = counts.iter().cloned().fold(f64::INFINITY, f64::min);
+                let max = counts.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
+                let domain = if min.is_finite() && max.is_finite() {
+                    [min, if max > min { max } else { min + 1.0 }]
+                } else {
+                    [0.0, 1.0]
+                };
+                y = Some(AxisMeta {
+                    field: "count".to_string(),
+                    label: Some("count".to_string()),
+                    scale: "linear".to_string(),
+                    domain,
+                    tick_values: vec![],
+                    tick_labels: vec![],
+                    value_type: "integer".to_string(),
+                    tick_label_placement: TickLabelPlacement::OnTick,
+                    tick_label_stride: 1,
+                    tick_label_max_length: None,
+                });
+            }
+        }
+        PlotReportType::Sources => {
+            // Sources returns buckets; treat as categorical x + numeric y
+            if let Some(buckets) = report_data.get("buckets").and_then(|v| v.as_array()) {
+                x = Some(make_axis_meta(
+                    "source",
+                    Some("ordinal"),
+                    None,
+                    Some("keyword"),
+                ));
+                let counts: Vec<f64> = buckets
+                    .iter()
+                    .map(|b| b.get("count").and_then(|c| c.as_f64()).unwrap_or(0.0))
+                    .collect();
+                let min = counts.iter().cloned().fold(f64::INFINITY, f64::min);
+                let max = counts.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
+                let domain = if min.is_finite() && max.is_finite() {
+                    [min, if max > min { max } else { min + 1.0 }]
+                } else {
+                    [0.0, 1.0]
+                };
+                y = Some(AxisMeta {
+                    field: "count".to_string(),
+                    label: Some("count".to_string()),
+                    scale: "linear".to_string(),
+                    domain,
+                    tick_values: vec![],
+                    tick_labels: vec![],
+                    value_type: "integer".to_string(),
+                    tick_label_placement: TickLabelPlacement::OnTick,
+                    tick_label_stride: 1,
+                    tick_label_max_length: None,
+                });
+            }
+        }
+        PlotReportType::Arc => {
+            dbg!(&report_data);
+        }
+        PlotReportType::Tree
+        | PlotReportType::Map
+        | PlotReportType::Oxford
+        | PlotReportType::Ribbon
+        | PlotReportType::Painting => {
+            // Positional / complex reports: rely on display/data only. Axis
+            // metadata for these are highly report-specific and are handled by
+            // the positional endpoint's own PlotSpec builder. Here we provide
+            // a conservative default: embed the full report JSON as data and
+            // leave axes empty.
+        }
+    }
+
+    // Build `cat` AxisMeta from report_data["cat"] when present. This keeps
+    // categorical metadata (field, value_type, scale, tick labels) in the
+    // canonical PlotSpec so converters can deterministically render legends
+    // and category axes.
+    let mut cat_meta: Option<AxisMeta> = None;
+    if let Some(cat_obj) = report_data.get("cat") {
+        if let Some(field) = cat_obj.get("field").and_then(|v| v.as_str()) {
+            let scale = cat_obj.get("scale").and_then(|v| v.as_str());
+            let domain = cat_obj.get("domain");
+            let value_type_hint = cat_obj.get("value_type").and_then(|v| v.as_str());
+            let mut cm = make_axis_meta(field, scale, domain, value_type_hint);
+            // Apply any top-level display label for categories if provided
+            if let Some(label) = display_spec.cat_label.as_ref() {
+                cm.label = Some(label.clone());
+            }
+            // Prefer explicit tick labels supplied under `report_data["cat"]["tick_labels"]`.
+            if let Some(lbls) = cat_obj.get("tick_labels").and_then(|v| v.as_array()) {
+                let labels: Vec<String> = lbls
+                    .iter()
+                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                    .collect();
+                if !labels.is_empty() {
+                    cm.tick_labels = labels;
+                }
+            }
+
+            // Populate tick labels for categorical cat axes from report_data["cats"]
+            if cm.value_type == "keyword" {
+                if let Some(cats_arr) = report_data.get("cats").and_then(|v| v.as_array()) {
+                    let labels: Vec<String> = cats_arr
+                        .iter()
+                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
+                        .collect();
+                    if !labels.is_empty() {
+                        cm.tick_labels = labels;
+                    }
+                }
+            } else {
+                // Numeric cat axes: prefer explicit numeric `tick_values` supplied
+                // in `report_data["cat"]["tick_values"]`. Fall back to parsing
+                // `report_data["cats"]` when not present.
+                if let Some(vals) = report_data
+                    .get("cat")
+                    .and_then(|c| c.get("tick_values"))
+                    .and_then(|v| v.as_array())
+                {
+                    let nums: Vec<f64> = vals.iter().filter_map(|v| v.as_f64()).collect();
+                    if !nums.is_empty() {
+                        cm.tick_values = nums;
+                    }
+                } else if let Some(cats_arr) = report_data.get("cats").and_then(|v| v.as_array()) {
+                    let mut nums: Vec<f64> = Vec::new();
+                    for v in cats_arr.iter() {
+                        if let Some(n) = v.as_f64() {
+                            nums.push(n);
+                        } else if let Some(s) = v.as_str() {
+                            if let Ok(n) = s.parse::<f64>() {
+                                nums.push(n);
+                            }
+                        }
+                    }
+                    if !nums.is_empty() {
+                        nums.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+                        let width = if nums.len() >= 2 {
+                            nums[1] - nums[0]
+                        } else {
+                            1.0
+                        };
+                        let mut boundaries = nums.clone();
+                        let last = nums[nums.len() - 1] + width;
+                        boundaries.push(last);
+                        cm.tick_values = boundaries;
+                    }
+                }
+            }
+            cat_meta = Some(cm);
+        }
+    }
+
+    // If the server supplied human-readable category tick labels, apply
+    // them to the series labels so the legend displays friendly names while
+    // the underlying series keys remain the raw category keys used in
+    // `data.by_cat`.
+    if let Some(ref cm) = cat_meta {
+        if !cm.tick_labels.is_empty() {
+            for (i, lbl) in cm.tick_labels.iter().enumerate() {
+                if let Some(s) = series.get_mut(i) {
+                    s.label = lbl.clone();
+                }
+            }
+        }
+    }
+
+    let plot_spec = PlotSpec {
+        report_type: pr,
+        x,
+        y,
+        cat: cat_meta,
+        z,
+        series,
+        display: display_spec,
+        data: report_data.clone(),
+    };
+
+    Ok(plot_spec)
+}
diff --git a/crates/genomehubs-api/src/routes/count_batch.rs b/crates/genomehubs-api/src/routes/count_batch.rs
index 7e4a0c0..e56d9b8 100644
--- a/crates/genomehubs-api/src/routes/count_batch.rs
+++ b/crates/genomehubs-api/src/routes/count_batch.rs
@@ -1,6 +1,6 @@
 use axum::{extract::Json, Extension};
 use serde::{Deserialize, Deserializer, Serialize};
-use serde_json::Value;
+use serde_json::{json, Value};
 use std::sync::Arc;
 
 use super::deserialize_helpers;
@@ -12,10 +12,14 @@ fn combine_es_bodies(
     combine_with: &genomehubs_query::query::CombineStrategy,
 ) -> serde_json::Value {
     if bodies.is_empty() {
-        return serde_json::json!({ "query": { "match_all": {} }, "size": 0 });
+        return serde_json::json!({ "query": { "match_all": {} }, "size": 0, "track_total_hits": true });
     }
     if bodies.len() == 1 {
-        return bodies.into_iter().next().unwrap();
+        let mut count_query = bodies.into_iter().next().unwrap();
+        if let Some(obj) = count_query.as_object_mut() {
+            obj.insert("track_total_hits".to_string(), serde_json::json!(true));
+        }
+        return count_query;
     }
 
     // Extract the "query" clause from each body; combine with bool.should/must
@@ -46,6 +50,7 @@ fn combine_es_bodies(
     let mut result = bodies.into_iter().next().unwrap();
     if let Some(obj) = result.as_object_mut() {
         obj.insert("query".to_string(), combined_query);
+        obj.insert("track_total_hits".to_string(), serde_json::json!(true));
     }
     result
 }
@@ -118,9 +123,13 @@ fn build_msearch_body(searches: &[(String, serde_json::Value)]) -> String {
         .iter()
         .flat_map(|(index, body)| {
             let header = serde_json::json!({ "index": index });
+            let count_body = json!({
+                "query": body.get("query").cloned().unwrap_or_else(|| serde_json::json!({"match_all": {}})),
+                "track_total_hits": true
+            });
             vec![
                 serde_json::to_string(&header).unwrap(),
-                serde_json::to_string(body).unwrap(),
+                serde_json::to_string(&count_body).unwrap(),
             ]
         })
         .collect::<Vec<_>>()
diff --git a/crates/genomehubs-api/src/routes/mod.rs b/crates/genomehubs-api/src/routes/mod.rs
index 98d888a..ac529eb 100644
--- a/crates/genomehubs-api/src/routes/mod.rs
+++ b/crates/genomehubs-api/src/routes/mod.rs
@@ -61,6 +61,7 @@ pub mod positional;
 pub mod record;
 pub mod record_batch;
 pub mod report;
+pub mod report_batch;
 pub mod result_fields;
 pub mod search;
 pub mod search_batch;
diff --git a/crates/genomehubs-api/src/routes/positional.rs b/crates/genomehubs-api/src/routes/positional.rs
index 77c8099..97122f3 100644
--- a/crates/genomehubs-api/src/routes/positional.rs
+++ b/crates/genomehubs-api/src/routes/positional.rs
@@ -580,6 +580,7 @@ pub async fn post_positional(
             report_type: pr,
             x: None,
             y: None,
+            cat: None,
             z: None,
             series: Vec::new(),
             display: genomehubs_query::report::DisplaySpec::default(),
diff --git a/crates/genomehubs-api/src/routes/report.rs b/crates/genomehubs-api/src/routes/report.rs
index 27454ec..08fe472 100644
--- a/crates/genomehubs-api/src/routes/report.rs
+++ b/crates/genomehubs-api/src/routes/report.rs
@@ -8,11 +8,15 @@ use genomehubs_query::query::{QueryParams, SearchQuery};
 
 use crate::{index_name, report::report_types, routes::ApiStatus, AppState};
 
-#[derive(utoipa::ToSchema)]
+#[derive(Serialize, utoipa::ToSchema)]
 pub struct ReportRequest {
     pub query_yaml: String,
     pub params_yaml: String,
     pub report_yaml: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub include_plot_spec: Option<bool>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub display: Option<serde_json::Value>,
 }
 
 impl<'de> Deserialize<'de> for ReportRequest {
@@ -55,10 +59,15 @@ impl<'de> Deserialize<'de> for ReportRequest {
                 return Err(de::Error::missing_field("report or report_yaml"));
             };
 
+        let include_plot_spec = map.get("include_plot_spec").and_then(|v| v.as_bool());
+        let display = map.get("display").cloned();
+
         Ok(ReportRequest {
             query_yaml,
             params_yaml,
             report_yaml,
+            include_plot_spec,
+            display,
         })
     }
 }
@@ -67,6 +76,8 @@ impl<'de> Deserialize<'de> for ReportRequest {
 pub struct ReportResponse {
     pub status: ApiStatus,
     pub report: Value,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub plot_spec: Option<Value>,
 }
 
 #[utoipa::path(
@@ -88,6 +99,7 @@ pub async fn post_report(
             return Json(ReportResponse {
                 status: ApiStatus::error($msg),
                 report: Value::Null,
+                plot_spec: None,
             })
         };
     }
@@ -209,15 +221,33 @@ pub async fn post_report(
         unknown => Err(format!("unknown report type: {unknown}")),
     };
 
-    // Return response
+    // Return response (optionally include a minimal PlotSpec when requested)
     match result {
-        Ok((hits, took, report_data)) => Json(ReportResponse {
-            status: ApiStatus::query_ok(hits, took),
-            report: report_data,
-        }),
+        Ok((hits, took, report_data)) => {
+            let plot_spec_value: Option<Value> =
+                if req.include_plot_spec.unwrap_or(false) || req.display.is_some() {
+                    match crate::report::spec_builder::build_plot_spec(
+                        report_type,
+                        &report_data,
+                        req.display.as_ref(),
+                    ) {
+                        Ok(spec) => serde_json::to_value(&spec).ok(),
+                        Err(_) => None,
+                    }
+                } else {
+                    None
+                };
+
+            Json(ReportResponse {
+                status: ApiStatus::query_ok(hits, took),
+                report: report_data,
+                plot_spec: plot_spec_value,
+            })
+        }
         Err(e) => Json(ReportResponse {
             status: ApiStatus::error(e),
             report: Value::Null,
+            plot_spec: None,
         }),
     }
 }
diff --git a/crates/genomehubs-api/src/routes/report_batch.rs b/crates/genomehubs-api/src/routes/report_batch.rs
new file mode 100644
index 0000000..7abf6d3
--- /dev/null
+++ b/crates/genomehubs-api/src/routes/report_batch.rs
@@ -0,0 +1,137 @@
+use axum::{extract::Json, Extension};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::sync::Arc;
+
+use crate::{routes::ApiStatus, AppState};
+
+/// Batch request for running multiple reports in one HTTP call.
+#[derive(Deserialize, utoipa::ToSchema)]
+pub struct ReportBatchRequest {
+    /// Array of report requests to execute in batch (max 50).
+    pub reports: Vec<crate::routes::report::ReportRequest>,
+    /// Optional concurrency limit (1..=32).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub concurrency: Option<usize>,
+    /// Optionally request a combined PlotSpec for the batch and provide
+    /// display hints that apply to the combined spec.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub include_plot_spec: Option<bool>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub display: Option<serde_json::Value>,
+}
+
+#[derive(Serialize, utoipa::ToSchema)]
+pub struct ReportBatchResultItem {
+    pub status: ApiStatus,
+    pub report: Value,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub plot_spec: Option<Value>,
+}
+
+#[derive(Serialize, utoipa::ToSchema)]
+pub struct ReportBatchResponse {
+    pub status: ApiStatus,
+    /// Per-request results in the same order as the input `reports`.
+    pub results: Vec<ReportBatchResultItem>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub plot_spec: Option<Value>,
+}
+
+#[utoipa::path(
+    post,
+    path = "/api/v3/report/batch",
+    tag = "Data",
+    summary = "Generate multiple reports in a single request",
+    description = "Execute multiple report requests concurrently; returns per-item report responses.",
+    request_body(content = ReportBatchRequest),
+    responses((status = 200, description = "Batch report results", body = ReportBatchResponse))
+)]
+#[axum::debug_handler]
+pub async fn post_report_batch(
+    Extension(state): Extension<Arc<AppState>>,
+    Json(req): Json<ReportBatchRequest>,
+) -> Json<ReportBatchResponse> {
+    if req.reports.len() > 50 {
+        return Json(ReportBatchResponse {
+            status: ApiStatus::error("maximum 50 reports per request".to_string()),
+            results: vec![],
+            plot_spec: None,
+        });
+    }
+
+    let concurrency = req.concurrency.unwrap_or(8).clamp(1, 32);
+    let semaphore = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+    // Spawn a task per report; each task acquires a semaphore permit so we bound
+    // the number of concurrently-executing handlers.
+    let mut handles = Vec::with_capacity(req.reports.len());
+    for report_req in req.reports.into_iter() {
+        let sem = semaphore.clone();
+        let st = state.clone();
+        let handle = tokio::spawn(async move {
+            let _permit = sem.acquire_owned().await.expect("semaphore closed");
+            // Call the existing single-report handler directly so we reuse
+            // the same parsing, chain resolution, and dispatch logic.
+            let resp = crate::routes::report::post_report(Extension(st), Json(report_req)).await;
+            let Json(report_resp) = resp;
+            ReportBatchResultItem {
+                status: report_resp.status,
+                report: report_resp.report,
+                plot_spec: report_resp.plot_spec,
+            }
+        });
+        handles.push(handle);
+    }
+
+    // Await all tasks and preserve input order.
+    let mut results: Vec<ReportBatchResultItem> = Vec::with_capacity(handles.len());
+    for h in handles {
+        match h.await {
+            Ok(item) => results.push(item),
+            Err(e) => results.push(ReportBatchResultItem {
+                status: ApiStatus::error(format!("task join failed: {e}")),
+                report: Value::Null,
+                plot_spec: None,
+            }),
+        }
+    }
+
+    // If the caller requested a batch-level PlotSpec (or supplied a top-level
+    // `display`), attempt to build a combined arc PlotSpec from any arc
+    // reports in the results.  We only produce a combined spec when there
+    // are arc-type reports present.
+    let top_plot_spec: Option<Value> =
+        if req.include_plot_spec.unwrap_or(false) || req.display.is_some() {
+            let arc_reports: Vec<Value> = results
+                .iter()
+                .filter_map(|r| {
+                    r.report
+                        .get("type")
+                        .and_then(|v| v.as_str())
+                        .filter(|s| *s == "arc")
+                        .map(|_| r.report.clone())
+                })
+                .collect();
+
+            if !arc_reports.is_empty() {
+                match crate::report::spec_builder::build_arc_plot_spec_from_reports(
+                    &arc_reports,
+                    req.display.as_ref(),
+                ) {
+                    Ok(spec) => serde_json::to_value(&spec).ok(),
+                    Err(_) => None,
+                }
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+    Json(ReportBatchResponse {
+        status: ApiStatus::ok(),
+        results,
+        plot_spec: top_plot_spec,
+    })
+}
diff --git a/crates/genomehubs-api/target/openapi.json b/crates/genomehubs-api/target/openapi.json
index 02cb627..125309a 100644
--- a/crates/genomehubs-api/target/openapi.json
+++ b/crates/genomehubs-api/target/openapi.json
@@ -588,6 +588,38 @@
         }
       }
     },
+    "/api/v3/report/batch": {
+      "post": {
+        "tags": [
+          "Data"
+        ],
+        "summary": "Generate multiple reports in a single request",
+        "description": "Execute multiple report requests concurrently; returns per-item report responses.",
+        "operationId": "post_report_batch",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ReportBatchRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Batch report results",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ReportBatchResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
     "/api/v3/search": {
       "get": {
         "tags": [
@@ -1390,6 +1422,16 @@
           "positional_yaml"
         ],
         "properties": {
+          "display": {
+            "description": "Optional display spec (JSON) applied to the returned PlotSpec."
+          },
+          "include_plot_spec": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "When true, produce a serialisable `PlotSpec` alongside the report."
+          },
           "positional_yaml": {
             "type": "string"
           },
@@ -1409,6 +1451,7 @@
           "report"
         ],
         "properties": {
+          "plot_spec": {},
           "report": {},
           "status": {
             "$ref": "#/components/schemas/ApiStatus"
@@ -1531,6 +1574,72 @@
           }
         }
       },
+      "ReportBatchRequest": {
+        "type": "object",
+        "description": "Batch request for running multiple reports in one HTTP call.",
+        "required": [
+          "reports"
+        ],
+        "properties": {
+          "concurrency": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "description": "Optional concurrency limit (1..=32).",
+            "minimum": 0
+          },
+          "display": {},
+          "include_plot_spec": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Optionally request a combined PlotSpec for the batch and provide\ndisplay hints that apply to the combined spec."
+          },
+          "reports": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ReportRequest"
+            },
+            "description": "Array of report requests to execute in batch (max 50)."
+          }
+        }
+      },
+      "ReportBatchResponse": {
+        "type": "object",
+        "required": [
+          "status",
+          "results"
+        ],
+        "properties": {
+          "plot_spec": {},
+          "results": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ReportBatchResultItem"
+            },
+            "description": "Per-request results in the same order as the input `reports`."
+          },
+          "status": {
+            "$ref": "#/components/schemas/ApiStatus"
+          }
+        }
+      },
+      "ReportBatchResultItem": {
+        "type": "object",
+        "required": [
+          "status",
+          "report"
+        ],
+        "properties": {
+          "plot_spec": {},
+          "report": {},
+          "status": {
+            "$ref": "#/components/schemas/ApiStatus"
+          }
+        }
+      },
       "ReportRequest": {
         "type": "object",
         "required": [
@@ -1539,6 +1648,13 @@
           "report_yaml"
         ],
         "properties": {
+          "display": {},
+          "include_plot_spec": {
+            "type": [
+              "boolean",
+              "null"
+            ]
+          },
           "params_yaml": {
             "type": "string"
           },
@@ -1557,6 +1673,7 @@
           "report"
         ],
         "properties": {
+          "plot_spec": {},
           "report": {},
           "status": {
             "$ref": "#/components/schemas/ApiStatus"
diff --git a/crates/genomehubs-api/tests/e2e_scatter_axis_types.rs b/crates/genomehubs-api/tests/e2e_scatter_axis_types.rs
new file mode 100644
index 0000000..ebbdeec
--- /dev/null
+++ b/crates/genomehubs-api/tests/e2e_scatter_axis_types.rs
@@ -0,0 +1,113 @@
+use serde_json::{json, Value};
+
+use reqwest::Client;
+
+use genomehubs_query::report::plot_spec_to_vega_lite_json;
+
+// This is an end-to-end test that posts to a running API at localhost:3000.
+// It iterates a small set of axis-type combinations in both raw and binned
+// modes and asserts the server-provided `plot_spec` includes axis `value_type`
+// and that the converter produces a valid Vega-Lite spec (no error payload).
+
+#[tokio::test]
+async fn e2e_scatter_axis_type_permutations() -> Result<(), Box<dyn std::error::Error>> {
+    let client = Client::new();
+    let base_url =
+        std::env::var("GH_API_URL").unwrap_or_else(|_| "http://localhost:3000".to_string());
+    let url = format!("{}/api/v3/report", base_url);
+
+    // Axis candidates: rank-like (genus), numeric, keyword, date
+    let axes = vec!["genus", "assembly_span", "assembly_level", "assembly_date"];
+
+    for x in &axes {
+        for y in &axes {
+            for &threshold in &[1000_i64, 10_i64] {
+                let req_body = json!({
+                    "query": {"index":"taxon", "taxa": ["canidae"], "taxon_filter_type": "tree"},
+                    "params": {},
+                    "report": {"report":"scatter", "x": x, "y": y, "scatter_threshold": threshold},
+                    "include_plot_spec": true,
+                    "display": {"title": format!("scatter {} vs {} thresh {}", x, y, threshold)}
+                });
+
+                let resp = client
+                    .post(&url)
+                    .header("accept", "application/json")
+                    .json(&req_body)
+                    .send()
+                    .await?;
+
+                let status = resp.status();
+                if !status.is_success() {
+                    let body = resp.text().await.unwrap_or_default();
+                    panic!(
+                        "API returned non-success for x={} y={} threshold={}: status={} body={}",
+                        x, y, threshold, status, body
+                    );
+                }
+
+                let resp_json: Value = resp.json().await?;
+
+                dbg!(&resp_json);
+
+                let plot_spec = resp_json
+                    .get("plot_spec")
+                    .cloned()
+                    .ok_or_else(|| format!("no plot_spec in response for x={} y={}", x, y))?;
+
+                // Server must provide authoritative axis value types
+                let x_vt = plot_spec
+                    .get("x")
+                    .and_then(|v| v.get("value_type"))
+                    .and_then(|v| v.as_str())
+                    .ok_or_else(|| format!("plot_spec.x.value_type missing for x={} y={}", x, y))?;
+                let y_vt = plot_spec
+                    .get("y")
+                    .and_then(|v| v.get("value_type"))
+                    .and_then(|v| v.as_str())
+                    .ok_or_else(|| format!("plot_spec.y.value_type missing for x={} y={}", x, y))?;
+
+                eprintln!(
+                    "Testing x={} ({}) y={} ({}) threshold={}",
+                    x, x_vt, y, y_vt, threshold
+                );
+
+                // Convert plot_spec to Vega-Lite using the workspace converter
+                let ps_str = serde_json::to_string(&plot_spec)?;
+                let vl_json_str = plot_spec_to_vega_lite_json(&ps_str);
+                let vl_val: Value = serde_json::from_str(&vl_json_str).map_err(|e| {
+                    format!(
+                        "converter returned invalid JSON: {} -- payload: {}",
+                        e, vl_json_str
+                    )
+                })?;
+
+                if vl_val.get("error").is_some() {
+                    panic!(
+                        "converter returned error for x={} y={} threshold={}: {}",
+                        x, y, threshold, vl_json_str
+                    );
+                }
+
+                // Determine mark type (supports either string or object `mark` forms)
+                let mark_type = match vl_val.get("mark") {
+                    Some(Value::String(s)) => s.clone(),
+                    Some(Value::Object(obj)) => obj
+                        .get("type")
+                        .and_then(|t| t.as_str())
+                        .unwrap_or("")
+                        .to_string(),
+                    _ => "".to_string(),
+                };
+
+                if threshold >= 1000 {
+                    assert!(mark_type == "point" || mark_type == "circle" || mark_type == "symbol", "expected point-like mark for raw mode but got {:?} for x={} y={} threshold={}", mark_type, x, y, threshold);
+                } else {
+                    assert!(mark_type == "rect" || mark_type == "bar", "expected rect/bar mark for binned mode but got {:?} for x={} y={} threshold={}", mark_type, x, y, threshold);
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
diff --git a/crates/genomehubs-query/src/local_report/builder.rs b/crates/genomehubs-query/src/local_report/builder.rs
index 9cc40e1..31da2e8 100644
--- a/crates/genomehubs-query/src/local_report/builder.rs
+++ b/crates/genomehubs-query/src/local_report/builder.rs
@@ -121,6 +121,7 @@ pub fn local_plot_spec(
         x: Some(x_meta),
         y: y_meta,
         z: None,
+        cat: None,
         series: vec![],
         display,
         data,
diff --git a/crates/genomehubs-query/src/report/axis.rs b/crates/genomehubs-query/src/report/axis.rs
index 456ac6b..295a675 100644
--- a/crates/genomehubs-query/src/report/axis.rs
+++ b/crates/genomehubs-query/src/report/axis.rs
@@ -64,7 +64,6 @@ pub enum DateInterval {
     Month,
     Quarter,
     Year,
-    Decade,
 }
 
 impl DateInterval {
@@ -74,9 +73,8 @@ impl DateInterval {
             DateInterval::Day => "1d",
             DateInterval::Week => "1w",
             DateInterval::Month => "1M",
-            DateInterval::Quarter => "3M",
+            DateInterval::Quarter => "1q",
             DateInterval::Year => "1y",
-            DateInterval::Decade => "10y",
         }
     }
 }
@@ -356,9 +354,8 @@ fn parse_date_interval(s: &str) -> Option<DateInterval> {
         "day" | "1d" => Some(DateInterval::Day),
         "week" | "1w" => Some(DateInterval::Week),
         "month" | "1M" | "1m" => Some(DateInterval::Month),
-        "quarter" | "3M" | "3m" => Some(DateInterval::Quarter),
+        "quarter" | "3M" | "3m" | "1q" => Some(DateInterval::Quarter),
         "year" | "1y" => Some(DateInterval::Year),
-        "decade" | "10y" => Some(DateInterval::Decade),
         _ => None,
     }
 }
@@ -700,10 +697,9 @@ mod tests {
             ("1M", DateInterval::Month),
             ("quarter", DateInterval::Quarter),
             ("3M", DateInterval::Quarter),
+            ("1q", DateInterval::Quarter),
             ("year", DateInterval::Year),
             ("1y", DateInterval::Year),
-            ("decade", DateInterval::Decade),
-            ("10y", DateInterval::Decade),
         ];
         for (interval_str, expected_interval) in intervals {
             let opts = AxisOpts::parse(&format!(";;;;;{}", interval_str));
@@ -721,9 +717,8 @@ mod tests {
         assert_eq!(DateInterval::Day.to_es_interval(), "1d");
         assert_eq!(DateInterval::Week.to_es_interval(), "1w");
         assert_eq!(DateInterval::Month.to_es_interval(), "1M");
-        assert_eq!(DateInterval::Quarter.to_es_interval(), "3M");
+        assert_eq!(DateInterval::Quarter.to_es_interval(), "1q");
         assert_eq!(DateInterval::Year.to_es_interval(), "1y");
-        assert_eq!(DateInterval::Decade.to_es_interval(), "10y");
     }
 
     // ── AxisSpec tests ──
diff --git a/crates/genomehubs-query/src/report/display.rs b/crates/genomehubs-query/src/report/display.rs
index b76721f..a7288ff 100644
--- a/crates/genomehubs-query/src/report/display.rs
+++ b/crates/genomehubs-query/src/report/display.rs
@@ -94,6 +94,9 @@ pub struct AxisOptions {
 pub struct HistogramOptions {
     /// Stack category series instead of overlaying them.
     pub stacked: Option<bool>,
+    /// Display mode for categorized histograms: "stacked", "grouped", or "facet".
+    /// When present, overrides `stacked` where applicable.
+    pub mode: Option<String>,
     /// Cumulative sum mode: each bar shows the sum of all preceding bars.
     pub cumulative: Option<bool>,
     /// Y-axis scale: `"linear"` (default), `"log10"`, or `"proportion"`.
@@ -203,6 +206,11 @@ pub struct TreeOptions {
 pub struct ArcOptions {
     /// Show percentage labels inside arc segments (default: `true`).
     pub show_labels: Option<bool>,
+    /// Display mode for multiple arcs: "grouped", or "facet".
+    pub mode: Option<String>,
+    /// Shape of the arcs: "auto" (default), "ring", "pie", "rainbow", "horizontal" or "vertical".
+    /// "auto" resolves to "ring" for single arc, and "rainbow" for 2 or more.
+    pub shape: Option<String>,
 }
 
 /// Sources data-attribution bar chart display options.
diff --git a/crates/genomehubs-query/src/report/mod.rs b/crates/genomehubs-query/src/report/mod.rs
index a98d98c..634cd98 100644
--- a/crates/genomehubs-query/src/report/mod.rs
+++ b/crates/genomehubs-query/src/report/mod.rs
@@ -207,7 +207,7 @@ pub fn report_yaml_from_url_params(url: &str) -> Result<(String, String, String)
 
 // ── Vega-Lite conversion ──────────────────────────────────────────────────────
 
-/// Convert a `PlotSpec` JSON string into a Vega-Lite v5 specification JSON string.
+/// Convert a `PlotSpec` JSON string into a Vega-Lite v6 specification JSON string.
 ///
 /// Accepts the full `/report` response envelope (extracts `plot_spec` automatically)
 /// or a bare `PlotSpec` object.  Returns an error JSON on failure.
@@ -244,7 +244,7 @@ pub fn plot_spec_to_vega_lite_json(input: &str) -> String {
         .unwrap_or(12.0);
 
     let mut base = serde_json::json!({
-        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
+        "$schema": "https://vega.github.io/schema/vega-lite/v6.json",
         "width": width,
         "height": height,
         "config": {
@@ -277,8 +277,20 @@ pub fn plot_spec_to_vega_lite_json(input: &str) -> String {
             base
         }
         "arc" => {
-            base["mark"] = serde_json::Value::String("arc".to_string());
-            base
+            // Special-case batch arc payloads produced by the API: these use
+            // data.type == "arc_batch" and contain `entries` with per-ring
+            // `scaled` values (0..1) and `report_index`/`report_label`.
+            if spec_val
+                .get("data")
+                .and_then(|d| d.get("type"))
+                .and_then(|t| t.as_str())
+                == Some("arc_batch")
+            {
+                vl_arc_batch(spec_val, base)
+            } else {
+                base["mark"] = serde_json::Value::String("arc".to_string());
+                base
+            }
         }
         _ => base,
     };
@@ -289,26 +301,273 @@ pub fn plot_spec_to_vega_lite_json(input: &str) -> String {
     }
 }
 
-fn vl_histogram(spec: &serde_json::Value, mut base: serde_json::Value) -> serde_json::Value {
-    let x_meta = spec.get("x").unwrap_or(&serde_json::Value::Null);
-    let x_field = x_meta
-        .get("field")
+/// Build a Vega-Lite encoding object for an axis given optional server-side
+/// axis metadata and optional numeric boundaries (bin edges).
+fn make_vl_axis_encoding(
+    axis_meta_opt: Option<&serde_json::Value>,
+    data_field: &str,
+    label_hint: Option<&str>,
+    boundaries_opt: Option<&[f64]>,
+    prefer_nominal: bool,
+    z_index: Option<i64>,
+) -> Result<serde_json::Value, String> {
+    // Require server-provided axis metadata including `value_type`.
+    let meta = axis_meta_opt.ok_or_else(|| {
+        format!(
+            "missing axis metadata for field '{}' — server must provide axis.value_type",
+            data_field
+        )
+    })?;
+
+    let value_type = meta
+        .get("value_type")
         .and_then(|v| v.as_str())
-        .unwrap_or("key");
-    let x_label = x_meta
+        .ok_or_else(|| format!("axis metadata for '{}' lacks 'value_type'", data_field))?;
+
+    let label = meta
         .get("label")
         .and_then(|v| v.as_str())
-        .unwrap_or(x_field);
-    let x_scale_str = x_meta
+        .or(label_hint)
+        .unwrap_or(data_field);
+
+    let scale_hint = meta
         .get("scale")
         .and_then(|v| v.as_str())
         .unwrap_or("linear");
-    let x_vl_scale = if x_scale_str == "log10" {
+
+    // Map canonical server `value_type` strings to Vega-Lite types deterministically.
+    let vl_type = if prefer_nominal || value_type == "keyword" {
+        "nominal"
+    } else if value_type == "date" {
+        "temporal"
+    } else if value_type == "float"
+        || value_type == "integer"
+        || value_type == "number"
+        || value_type == "coordinate"
+    {
+        "quantitative"
+    } else {
+        return Err(format!(
+            "unknown axis value_type '{}' for field '{}'",
+            value_type, data_field
+        ));
+    };
+
+    let scale_type = if vl_type == "nominal" {
+        "band"
+    } else if vl_type == "temporal" {
+        "time"
+    } else if scale_hint == "log10" {
         "log"
     } else {
         "linear"
     };
 
+    // Helper to convert JSON values (number or numeric string) to f64
+    fn json_to_f64(v: &serde_json::Value) -> Option<f64> {
+        if let Some(n) = v.as_f64() {
+            Some(n)
+        } else if let Some(s) = v.as_str() {
+            s.parse::<f64>().ok()
+        } else {
+            None
+        }
+    }
+
+    // Tick values: explicit computed boundaries (if provided) because these
+    // are derived from bin edges and are usually the best ticks for histograms;
+    // otherwise use server-provided tick values if present and non-empty.
+    let tick_values_json = if let Some(b) = boundaries_opt {
+        if vl_type == "temporal" {
+            Some(serde_json::Value::Array(
+                b.iter()
+                    .map(|v| serde_json::Value::Number((*v as i64).into()))
+                    .collect(),
+            ))
+        } else {
+            Some(serde_json::Value::Array(
+                b.iter().map(|v| serde_json::Value::from(*v)).collect(),
+            ))
+        }
+    } else {
+        meta.get("tick_values")
+            .and_then(|tv| tv.as_array())
+            .and_then(|arr| {
+                if arr.is_empty() {
+                    None
+                } else if vl_type == "temporal" {
+                    // Convert numeric tick values to datetime signals; leave strings alone
+                    Some(serde_json::Value::Array(
+                        arr.iter()
+                            .map(|v| {
+                                if let Some(n) = v.as_f64() {
+                                    serde_json::json!({"signal": format!("datetime({})", n as i64)})
+                                } else if let Some(s) = v.as_str() {
+                                    serde_json::Value::String(s.to_string())
+                                } else {
+                                    v.clone()
+                                }
+                            })
+                            .collect(),
+                    ))
+                } else {
+                    Some(serde_json::Value::Array(arr.clone()))
+                }
+            })
+    };
+
+    // Domain: prefer explicit computed boundaries (if provided) because these
+    // are derived from bin edges and are usually the correct visual domain;
+    // otherwise fall back to a server-provided domain (robustly parsed).
+    let domain_opt = boundaries_opt
+        .and_then(|b| {
+            if !b.is_empty() {
+                Some((b[0], *b.last().unwrap()))
+            } else {
+                None
+            }
+        })
+        .or_else(|| {
+            meta.get("domain")
+                .and_then(|d| d.as_array())
+                .and_then(|arr| {
+                    if arr.len() >= 2 {
+                        let lo = json_to_f64(&arr[0]).unwrap_or(0.0);
+                        let hi = json_to_f64(&arr[1]).unwrap_or(lo + 1.0);
+                        Some((lo, hi))
+                    } else {
+                        None
+                    }
+                })
+        });
+
+    // Build scale object
+    let mut scale_obj = serde_json::Map::new();
+    scale_obj.insert(
+        "type".to_string(),
+        serde_json::Value::String(scale_type.to_string()),
+    );
+    if let Some((lo, hi)) = domain_opt {
+        if vl_type == "temporal" {
+            scale_obj.insert(
+                "domain".to_string(),
+                serde_json::Value::Array(vec![
+                    serde_json::Value::Number((lo as i64).into()),
+                    serde_json::Value::Number((hi as i64).into()),
+                ]),
+            );
+        } else {
+            scale_obj.insert("domain".to_string(), serde_json::json!([lo, hi]));
+        }
+    }
+    if vl_type == "nominal" {
+        scale_obj.insert(
+            "paddingOuter".to_string(),
+            serde_json::Value::Number((0).into()),
+        );
+        // Remove inner padding so adjacent categorical bars fill the full
+        // width between ticks (useful for histogram-style categorical axes).
+        scale_obj.insert(
+            "paddingInner".to_string(),
+            serde_json::Value::Number((0).into()),
+        );
+        // If the server provided explicit tick values for a nominal axis,
+        // use them as the scale domain to preserve ordering (e.g. taxon id
+        // list or human-readable bucket labels).
+        #[allow(clippy::collapsible_match)]
+        if let Some(tv) = &tick_values_json {
+            if let serde_json::Value::Array(arr) = tv {
+                if !arr.is_empty() {
+                    scale_obj.insert("domain".to_string(), serde_json::Value::Array(arr.clone()));
+                }
+            }
+        }
+    }
+
+    // Build axis object
+    let mut axis_obj = serde_json::Map::new();
+    axis_obj.insert(
+        "title".to_string(),
+        serde_json::Value::String(label.to_string()),
+    );
+    if let Some(tv) = tick_values_json {
+        axis_obj.insert("values".to_string(), tv);
+    }
+    if vl_type == "temporal" {
+        // Choose a sensible date format. Prefer server-declared interval when
+        // present (e.g. "year" -> show year only). Otherwise heuristically
+        // infer from computed boundaries if available.
+        let mut date_fmt = "%Y-%m-%d".to_string();
+        if let Some(interval_str) = meta.get("interval").and_then(|v| v.as_str()) {
+            match interval_str {
+                "year" | "decade" => date_fmt = "%Y".to_string(),
+                "month" | "quarter" => date_fmt = "%Y-%m".to_string(),
+                _ => date_fmt = "%Y-%m-%d".to_string(),
+            }
+        } else if let Some(b) = boundaries_opt {
+            if b.len() >= 2 {
+                let width = (b[1] - b[0]).abs();
+                let day_ms = 86400.0 * 1000.0;
+                let year_ms = 365.0 * day_ms;
+                if width >= year_ms {
+                    date_fmt = "%Y".to_string();
+                } else if width >= 28.0 * day_ms {
+                    date_fmt = "%Y-%m".to_string();
+                } else {
+                    date_fmt = "%Y-%m-%d".to_string();
+                }
+            }
+        }
+        axis_obj.insert("format".to_string(), serde_json::Value::String(date_fmt));
+    } else if vl_type == "quantitative" {
+        axis_obj.insert(
+            "format".to_string(),
+            serde_json::Value::String(".3s".to_string()),
+        );
+    } else if vl_type == "nominal" {
+        axis_obj.insert("grid".to_string(), serde_json::Value::Bool(true));
+        axis_obj.insert(
+            "tickBand".to_string(),
+            serde_json::Value::String("extent".to_string()),
+        );
+    }
+    if let Some(z) = z_index {
+        axis_obj.insert("zindex".to_string(), serde_json::Value::Number(z.into()));
+    }
+
+    Ok(serde_json::json!({
+        "field": data_field,
+        "type": vl_type,
+        "scale": serde_json::Value::Object(scale_obj),
+        "axis": serde_json::Value::Object(axis_obj)
+    }))
+}
+
+/// Compute the pixel width for a single bar in a grouped histogram or scatter bar chart.
+///
+/// Divides the available `plot_width_px` evenly across `n_bins` bins, reserves 90 %
+/// of each bin for bar content, then splits that space evenly among `n_cats` categories.
+/// The result is clamped to a minimum of 2 px so bars remain visible for large datasets.
+fn grouped_bar_size_px(n_bins: f64, n_cats: f64, plot_width_px: f64) -> f64 {
+    let bin_pixel = if n_bins > 0.0 {
+        plot_width_px / n_bins
+    } else {
+        10.0
+    };
+    ((bin_pixel * 0.9) / n_cats.max(1.0)).max(2.0)
+}
+
+fn vl_histogram(spec: &serde_json::Value, mut base: serde_json::Value) -> serde_json::Value {
+    let x_meta = spec.get("x").unwrap_or(&serde_json::Value::Null);
+    let x_field = x_meta
+        .get("field")
+        .and_then(|v| v.as_str())
+        .unwrap_or("key");
+    let x_label = x_meta
+        .get("label")
+        .and_then(|v| v.as_str())
+        .unwrap_or(x_field);
+
     let display = spec.get("display").unwrap_or(&serde_json::Value::Null);
     let hist = display.get("histogram").unwrap_or(&serde_json::Value::Null);
     let y_scale_str = hist
@@ -320,37 +579,875 @@ fn vl_histogram(spec: &serde_json::Value, mut base: serde_json::Value) -> serde_
     } else {
         "linear"
     };
+    let y_min = match y_vl_scale {
+        "log" => 1.0, // avoid log(0) issues
+        _ => 0.0,
+    };
     let y_label = display
         .get("y_label")
         .and_then(|v| v.as_str())
         .unwrap_or("Count");
 
-    let buckets = spec
+    // Transform ES-style buckets (key, doc_count) into left/right bar values
+    // with explicit `x` (left) and `x2` (right) so Vega-Lite draws bars with
+    // bin boundaries. Also compute axis `values` ticks at each boundary.
+    let raw_buckets = spec
         .get("data")
         .and_then(|d| d.get("buckets"))
         .cloned()
         .unwrap_or_else(|| serde_json::json!([]));
 
-    base["data"] = serde_json::json!({"values": buckets});
+    // If the server provided per-category breakdowns (`by_cat`) produce
+    // a long-form dataset so Vega-Lite can render stacked/grouped/faceted
+    // histograms. Preserve axis metadata for numeric/date axes by keeping
+    // `x` as numeric/temporal values when possible so tick formatting is
+    // delegated to `make_vl_axis_encoding` (server-side metadata remains
+    // authoritative).
+    if let Some(by_cat_val) = spec.get("data").and_then(|d| d.get("by_cat")).cloned() {
+        if by_cat_val.is_object() {
+            let hist_mode = hist
+                .get("mode")
+                .and_then(|v| v.as_str())
+                .unwrap_or("grouped");
+            let hist_cumulative = hist
+                .get("cumulative")
+                .and_then(|v| v.as_bool())
+                .unwrap_or(false);
+
+            // Category order preference: explicit `data.cats` else object keys order
+            let cats: Vec<String> = spec
+                .get("data")
+                .and_then(|d| d.get("cats"))
+                .and_then(|v| v.as_array())
+                .map(|arr| {
+                    arr.iter()
+                        .map(|s| s.as_str().unwrap_or("").to_string())
+                        .collect()
+                })
+                .unwrap_or_else(|| by_cat_val.as_object().unwrap().keys().cloned().collect());
+
+            // Optional mapping from raw cat key -> human-readable label. If
+            // the server provided `report_data.cat.tick_labels` and a
+            // parallel `report_data.cats` ordering, use that to map keys to
+            // friendly labels for display (legend, color). This preserves
+            // raw keys for lookups while presenting readable names.
+            let mut cat_label_map: Option<std::collections::HashMap<String, String>> = None;
+            if let Some(lbls) = spec
+                .get("data")
+                .and_then(|d| d.get("cat"))
+                .and_then(|c| c.get("tick_labels"))
+                .and_then(|v| v.as_array())
+            {
+                if lbls.len() == cats.len() {
+                    let mut m = std::collections::HashMap::new();
+                    for (i, k) in cats.iter().enumerate() {
+                        if let Some(lbl) = lbls.get(i).and_then(|v| v.as_str()) {
+                            m.insert(k.clone(), lbl.to_string());
+                        }
+                    }
+                    if !m.is_empty() {
+                        cat_label_map = Some(m);
+                    }
+                }
+            }
+
+            // Decide how to treat the x axis: preserve numeric/temporal type
+            // when the server indicates it (so axis ticks/formatting are kept).
+            let x_value_type = x_meta
+                .get("value_type")
+                .and_then(|v| v.as_str())
+                .unwrap_or("keyword");
+
+            // Helper: extract bucket count for (cat, idx)
+            let get_count = |cat: &str, idx: usize| -> f64 {
+                by_cat_val
+                    .get(cat)
+                    .and_then(|arr| arr.as_array())
+                    .and_then(|a| a.get(idx))
+                    .and_then(|v| v.as_f64())
+                    .unwrap_or(0.0)
+            };
+
+            if x_value_type == "keyword" {
+                // Categorical: previous behaviour — string `x` values with server
+                // ordering preserved; support stacked/grouped/facet via color/xOffset/facet.
+                let bucket_labels: Vec<String> = raw_buckets
+                    .as_array()
+                    .map(|arr| {
+                        arr.iter()
+                            .map(|b| {
+                                b.get("label")
+                                    .and_then(|v| v.as_str())
+                                    .map(|s| s.to_string())
+                                    .or_else(|| {
+                                        b.get("key").and_then(|k| k.as_str().map(|s| s.to_string()))
+                                    })
+                                    .or_else(|| {
+                                        b.get("id").and_then(|k| k.as_str().map(|s| s.to_string()))
+                                    })
+                                    .unwrap_or_else(|| b.to_string())
+                            })
+                            .collect()
+                    })
+                    .unwrap_or_default();
+
+                // Long-form values and compute per-bucket sums. Support
+                // cumulative mode by maintaining running totals per category.
+                let mut values: Vec<serde_json::Value> = Vec::new();
+                let mut max_sum: f64 = 0.0;
+                let mut running: Vec<f64> = vec![0.0; cats.len()];
+                let mut cat_max: Vec<f64> = vec![0.0; cats.len()];
+                for (i, bl) in bucket_labels.iter().enumerate() {
+                    let mut bucket_sum = 0.0_f64;
+                    for (ci, cat) in cats.iter().enumerate() {
+                        let count = get_count(cat, i);
+                        let display_count = if hist_cumulative {
+                            running[ci] += count;
+                            running[ci]
+                        } else {
+                            count
+                        };
+                        // track per-category maximum for grouped/facet domains
+                        if display_count > cat_max[ci] {
+                            cat_max[ci] = display_count;
+                        }
+                        bucket_sum += display_count;
+                        let mut obj = serde_json::Map::new();
+                        obj.insert("x".to_string(), serde_json::Value::String(bl.clone()));
+                        let display_cat = cat_label_map
+                            .as_ref()
+                            .and_then(|m| m.get(cat))
+                            .cloned()
+                            .unwrap_or_else(|| cat.clone());
+                        obj.insert("cat".to_string(), serde_json::Value::String(display_cat));
+                        obj.insert(
+                            "doc_count".to_string(),
+                            serde_json::Value::from(display_count),
+                        );
+                        values.push(serde_json::Value::Object(obj));
+                    }
+                    if bucket_sum > max_sum {
+                        max_sum = bucket_sum;
+                    }
+                }
+                let max_cat_max = cat_max.iter().cloned().fold(0.0_f64, f64::max);
+
+                // X encoding uses nominal domain derived from bucket labels
+                let x_meta_override =
+                    serde_json::json!({"tick_values": bucket_labels, "value_type": "keyword"});
+                let mut x_encoding = match make_vl_axis_encoding(
+                    Some(&x_meta_override),
+                    "x",
+                    Some(x_label),
+                    None,
+                    true,
+                    Some(1),
+                ) {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+
+                // When grouped mode is requested on a nominal x axis, allow
+                // some inner padding so `xOffset` can place multiple bars
+                // side-by-side inside each band. Default `paddingInner=0`
+                // would make bars occupy the full band and overlap.
+                if hist_mode == "grouped" {
+                    if let Some(x_enc_obj) = x_encoding.as_object() {
+                        if let Some(scale_val) = x_enc_obj.get("scale").cloned() {
+                            if scale_val.is_object() {
+                                let mut scale_map =
+                                    scale_val.as_object().cloned().unwrap_or_default();
+                                scale_map.insert("paddingInner".to_string(), serde_json::json!(0));
+                                scale_map.insert("paddingOuter".to_string(), serde_json::json!(0));
+                                // replace the scale in x_encoding
+                                if let Some(x_enc_obj_mut) = x_encoding.as_object_mut() {
+                                    x_enc_obj_mut.insert(
+                                        "scale".to_string(),
+                                        serde_json::Value::Object(scale_map),
+                                    );
+                                }
+                            }
+                        }
+                    }
+                }
+
+                // Y axis encoding: doc_count; prefer quantitative with sensible domain
+                let y_axis_meta = spec.get("y");
+                let mut y_encoding = match make_vl_axis_encoding(
+                    y_axis_meta,
+                    "doc_count",
+                    Some(y_label),
+                    None,
+                    false,
+                    None,
+                ) {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+                // Determine y domain depending on histogram mode:
+                // - stacked: domain = max total per bin (max_sum)
+                // - grouped/facet: domain = max per-category bar height (max_cat_max)
+                let desired_y_max = match hist_mode {
+                    "grouped" | "facet" => {
+                        if max_cat_max > 0.0 {
+                            max_cat_max
+                        } else {
+                            1.0
+                        }
+                    }
+                    _ => {
+                        if max_sum > 0.0 {
+                            max_sum
+                        } else {
+                            1.0
+                        }
+                    }
+                };
+
+                if let Some(obj) = y_encoding.as_object_mut() {
+                    obj.insert(
+                        "aggregate".to_string(),
+                        serde_json::Value::String("sum".to_string()),
+                    );
+                    if let Some(scale_val) = obj.get_mut("scale") {
+                        if scale_val.is_object() {
+                            scale_val.as_object_mut().unwrap().insert(
+                                "domain".to_string(),
+                                serde_json::json!([0.0, desired_y_max]),
+                            );
+                        }
+                    }
+                }
+
+                base["data"] = serde_json::json!({"values": values});
+
+                // Compute pixel-based grouped bar size for categorical bins
+                let plot_width_px =
+                    base.get("width").and_then(|v| v.as_u64()).unwrap_or(600) as f64;
+                let grouped_bar_px = grouped_bar_size_px(
+                    bucket_labels.len() as f64,
+                    cats.len() as f64,
+                    plot_width_px,
+                );
+
+                match hist_mode {
+                    "grouped" => {
+                        // Use xOffset to separate categories within each bucket.
+                        // Also explicitly disable stacking so viewers that auto-stack
+                        // aggregated colour channels will render grouped bars.
+                        if let Some(y_obj) = y_encoding.as_object_mut() {
+                            y_obj.insert("stack".to_string(), serde_json::Value::Null);
+                        }
+                        base["mark"] = serde_json::json!({"type": "bar", "size": grouped_bar_px});
+                        base["encoding"] = serde_json::json!({
+                            "x": x_encoding,
+                            "y": y_encoding,
+                            "color": {"field": "cat", "type": "nominal"},
+                            "xOffset": {"field": "cat", "type": "nominal"}
+                        });
+                        return base;
+                    }
+                    "facet" => {
+                        // Use facet: row by category (small multiples). Place the
+                        // `mark` and `encoding` inside `spec` only; do not set a
+                        // top-level `mark` (invalid with `facet`).
+                        let spec_obj = serde_json::json!({
+                            "mark": {"type": "bar"},
+                            "encoding": {"x": x_encoding, "y": y_encoding, "y2": {"datum": y_min}}
+                        });
+                        base["facet"] =
+                            serde_json::json!({"row": {"field": "cat", "type": "nominal"}});
+                        base["spec"] = spec_obj;
+                        // Keep colour/legend off for facet default; caller can style separately
+                        return base;
+                    }
+                    _ => {
+                        // default: stacked — let Vega-Lite perform stacking via
+                        // the `color` encoding and aggregated `y`.
+                        base["mark"] = serde_json::json!({"type": "bar", "size": grouped_bar_px});
+                        base["encoding"] = serde_json::json!({
+                            "x": x_encoding,
+                            "y": y_encoding,
+                            "color": {"field": "cat", "type": "nominal"}
+                        });
+                        return base;
+                    }
+                }
+            } else {
+                // Numeric / temporal buckets: preserve numeric x values so axis
+                // formatting and tick values computed by the server are retained.
+                // Compute numeric keys and boundaries similar to the non-cat path.
+                let mut keys_num: Vec<f64> = Vec::new();
+                if let Some(arr) = raw_buckets.as_array() {
+                    for b in arr {
+                        if let Some(k) = b.get("key").and_then(|v| v.as_f64()) {
+                            keys_num.push(k);
+                        }
+                    }
+                }
+
+                // Determine bin width
+                let width = if keys_num.len() >= 2 {
+                    keys_num[1] - keys_num[0]
+                } else if let Some(domain_arr) = x_meta.get("domain").and_then(|d| d.as_array()) {
+                    if domain_arr.len() >= 2 {
+                        let lo = domain_arr[0].as_f64().unwrap_or(0.0);
+                        let hi = domain_arr[1].as_f64().unwrap_or(lo + 1.0);
+                        let tick_count = x_meta
+                            .get("tickCount")
+                            .and_then(|v| v.as_u64())
+                            .unwrap_or(10) as f64;
+                        (hi - lo) / tick_count.max(1.0)
+                    } else {
+                        1.0
+                    }
+                } else {
+                    1.0
+                };
+
+                // Build numeric boundaries
+                let mut boundaries_f64: Vec<f64> = Vec::new();
+                if !keys_num.is_empty() {
+                    for k in &keys_num {
+                        boundaries_f64.push(*k);
+                    }
+                    let last_right = if keys_num.len() >= 2 {
+                        keys_num[keys_num.len() - 1] + (keys_num[1] - keys_num[0])
+                    } else {
+                        keys_num[0] + width
+                    };
+                    boundaries_f64.push(last_right);
+                }
+
+                // Long-form numeric values with either (x,x2) for stacked mode
+                // or center-based `x` for grouped mode so bars can be narrower
+                // and offset with `xOffset`.
+                let mut values: Vec<serde_json::Value> = Vec::new();
+                let mut max_sum: f64 = 0.0;
+
+                // Precompute bin centers
+                let mut centers: Vec<f64> = Vec::new();
+                for (i, left) in keys_num.iter().enumerate() {
+                    let right = if i + 1 < keys_num.len() {
+                        keys_num[i + 1]
+                    } else {
+                        left + width
+                    };
+                    centers.push(left + (right - left) / 2.0);
+                }
+
+                // Compute pixel-based bar size for grouped mode so bars fit side-by-side
+                let plot_width_px =
+                    base.get("width").and_then(|v| v.as_u64()).unwrap_or(600) as f64;
+                let n_cats = cats.len() as f64;
+                let grouped_bar_px =
+                    grouped_bar_size_px(keys_num.len() as f64, n_cats, plot_width_px);
+
+                // Precompute domain span for converting pixel offsets into data units
+                let domain_min = boundaries_f64.first().cloned().unwrap_or(0.0);
+                let domain_max = boundaries_f64.last().cloned().unwrap_or(domain_min + 1.0);
+                let domain_span = if domain_max > domain_min {
+                    domain_max - domain_min
+                } else {
+                    1.0
+                };
+
+                let mut running: Vec<f64> = vec![0.0; cats.len()];
+                let mut cat_max: Vec<f64> = vec![0.0; cats.len()];
+                for (i, left) in keys_num.iter().enumerate() {
+                    let right = if i + 1 < keys_num.len() {
+                        keys_num[i + 1]
+                    } else {
+                        left + width
+                    };
+                    let mut bucket_sum = 0.0_f64;
+                    for (ci, cat) in cats.iter().enumerate() {
+                        let count = get_count(cat, i);
+                        let display_count = if hist_cumulative {
+                            running[ci] += count;
+                            running[ci]
+                        } else {
+                            count
+                        };
+                        // track per-category maximum for grouped/facet scaling
+                        if display_count > cat_max[ci] {
+                            cat_max[ci] = display_count;
+                        }
+                        bucket_sum += display_count;
+                        let mut obj = serde_json::Map::new();
+                        if hist_mode == "grouped" {
+                            // Compute a small data-space shift for this category so
+                            // bars are placed side-by-side without relying on
+                            // viewer support for `xOffset`.
+                            let ci_f = ci as f64;
+                            let center_index = (n_cats - 1.0) / 2.0;
+                            let data_per_pixel = domain_span / plot_width_px.max(1.0);
+                            let bar_data_width = grouped_bar_px * data_per_pixel;
+                            let shift = (ci_f - center_index) * bar_data_width;
+                            let x_val = centers[i] + shift;
+                            obj.insert("x".to_string(), serde_json::Value::from(x_val));
+                        } else {
+                            // Stacked / default: use range [x,x2]
+                            obj.insert("x".to_string(), serde_json::Value::from(*left));
+                            obj.insert("x2".to_string(), serde_json::Value::from(right));
+                        }
+                        let display_cat = cat_label_map
+                            .as_ref()
+                            .and_then(|m| m.get(cat))
+                            .cloned()
+                            .unwrap_or_else(|| cat.clone());
+                        obj.insert("cat".to_string(), serde_json::Value::String(display_cat));
+                        obj.insert(
+                            "doc_count".to_string(),
+                            serde_json::Value::from(display_count),
+                        );
+                        values.push(serde_json::Value::Object(obj));
+                    }
+                    if bucket_sum > max_sum {
+                        max_sum = bucket_sum;
+                    }
+                }
+
+                // Build x encoding using numeric boundaries so axis formatting is correct
+                let x_encoding = match make_vl_axis_encoding(
+                    spec.get("x"),
+                    "x",
+                    Some(x_label),
+                    Some(&boundaries_f64),
+                    false,
+                    Some(1),
+                ) {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+
+                // Y axis encoding: doc_count; aggregate per x and ensure domain starts at 0
+                let y_axis_meta = spec.get("y");
+                let mut y_encoding = match make_vl_axis_encoding(
+                    y_axis_meta,
+                    "doc_count",
+                    Some(y_label),
+                    None,
+                    false,
+                    None,
+                ) {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+                let max_cat_max = cat_max.iter().cloned().fold(0.0_f64, f64::max);
+                let desired_y_max = match hist_mode {
+                    "grouped" | "facet" => {
+                        if max_cat_max > 0.0 {
+                            max_cat_max
+                        } else {
+                            1.0
+                        }
+                    }
+                    _ => {
+                        if max_sum > 0.0 {
+                            max_sum
+                        } else {
+                            1.0
+                        }
+                    }
+                };
+                if let Some(obj) = y_encoding.as_object_mut() {
+                    obj.insert(
+                        "aggregate".to_string(),
+                        serde_json::Value::String("sum".to_string()),
+                    );
+                    if let Some(scale_val) = obj.get_mut("scale") {
+                        if scale_val.is_object() {
+                            scale_val.as_object_mut().unwrap().insert(
+                                "domain".to_string(),
+                                serde_json::json!([0.0, desired_y_max]),
+                            );
+                        }
+                    }
+                }
+
+                base["data"] = serde_json::json!({"values": values});
+
+                match hist_mode {
+                    "facet" => {
+                        // Facet: small multiples by category; keep shared x scale
+                        let spec_obj = serde_json::json!({
+                            "mark": {"type": "bar"},
+                            "encoding": {"x": x_encoding, "x2": {"field": "x2"}, "y": y_encoding, "y2": {"datum": y_min}}
+                        });
+                        base["facet"] =
+                            serde_json::json!({"row": {"field": "cat", "type": "nominal"}});
+                        base["spec"] = spec_obj;
+                        return base;
+                    }
+                    "grouped" => {
+                        // Grouped: use xOffset (Vega-Lite v5+) to offset categories within numeric bins
+                        // and explicitly disable stacking on the y encoding so viewers
+                        // do not aggregate into stacked bars.
+                        if let Some(y_obj) = y_encoding.as_object_mut() {
+                            y_obj.insert("stack".to_string(), serde_json::Value::Null);
+                        }
+                        base["mark"] = serde_json::json!({"type": "bar", "size": grouped_bar_px});
+                        base["encoding"] = serde_json::json!({
+                            "x": x_encoding,
+                            "y": y_encoding,
+                            "color": {"field": "cat", "type": "nominal"},
+                            "xOffset": {"field": "cat", "type": "nominal"}
+                        });
+                        return base;
+                    }
+                    _ => {
+                        // Default: attempt stacked. Vega-Lite stacking across numeric
+                        // continuous axes is not universally supported; for numeric
+                        // axes we fallback to grouped behaviour to preserve axis
+                        // formatting. If the server prefers true stacked nominal
+                        // bins it can provide `x.tick_labels` and the client can
+                        // request `value_type: keyword` instead.
+                        base["mark"] = serde_json::json!({"type": "bar"});
+                        base["encoding"] = serde_json::json!({
+                            "x": x_encoding,
+                            "x2": {"field": "x2"},
+                            "y": y_encoding,
+                            "color": {"field": "cat", "type": "nominal"},
+                            "xOffset": {"field": "cat", "type": "nominal"}
+                        });
+                        return base;
+                    }
+                }
+            }
+        }
+    }
+
+    let mut values: Vec<serde_json::Value> = Vec::new();
+    let mut keys: Vec<f64> = Vec::new();
+    let x_value_type = x_meta
+        .get("value_type")
+        .and_then(|v| v.as_str())
+        .unwrap_or("keyword");
+
+    if let Some(arr) = raw_buckets.as_array() {
+        if x_value_type == "keyword" {
+            // Categorical histogram: emit one value per category with
+            // a string `x` field and numeric `doc_count` so Vega-Lite can
+            // render nominal bars with the server-provided tick order.
+            for b in arr {
+                let label = b
+                    .get("label")
+                    .and_then(|v| v.as_str())
+                    .map(|s| s.to_string())
+                    .or_else(|| b.get("key").and_then(|k| k.as_str().map(|s| s.to_string())))
+                    .or_else(|| b.get("id").and_then(|k| k.as_str().map(|s| s.to_string())))
+                    .unwrap_or_else(|| b.to_string());
+                let count = b.get("doc_count").and_then(|v| v.as_f64()).unwrap_or(0.0);
+                let mut obj = serde_json::Map::new();
+                obj.insert("x".to_string(), serde_json::Value::String(label.clone()));
+                obj.insert("doc_count".to_string(), serde_json::Value::from(count));
+                if let Some(kv) = b.get("key") {
+                    obj.insert("key".to_string(), kv.clone());
+                } else if let Some(idv) = b.get("id") {
+                    obj.insert("id".to_string(), idv.clone());
+                }
+                values.push(serde_json::Value::Object(obj));
+            }
+        } else {
+            for b in arr {
+                if let Some(k) = b.get("key").and_then(|v| v.as_f64()) {
+                    keys.push(k);
+                }
+            }
+            // Determine bin width
+            let width = if keys.len() >= 2 {
+                keys[1] - keys[0]
+            } else if let Some(domain_arr) = x_meta.get("domain").and_then(|d| d.as_array()) {
+                if domain_arr.len() >= 2 {
+                    let lo = domain_arr[0].as_f64().unwrap_or(0.0);
+                    let hi = domain_arr[1].as_f64().unwrap_or(lo + 1.0);
+                    let tick_count = x_meta
+                        .get("tickCount")
+                        .and_then(|v| v.as_u64())
+                        .unwrap_or(10) as f64;
+                    (hi - lo) / tick_count.max(1.0)
+                } else {
+                    1.0
+                }
+            } else {
+                1.0
+            };
+
+            for (i, b) in arr.iter().enumerate() {
+                let key = b.get("key").and_then(|v| v.as_f64()).unwrap_or(0.0);
+                let right = if i + 1 < keys.len() {
+                    keys[i + 1]
+                } else {
+                    key + width
+                };
+                let count = b.get("doc_count").and_then(|v| v.as_f64()).unwrap_or(0.0);
+                let mut obj = serde_json::Map::new();
+                obj.insert("x".to_string(), serde_json::Value::from(key));
+                obj.insert("x2".to_string(), serde_json::Value::from(right));
+                obj.insert("doc_count".to_string(), serde_json::Value::from(count));
+                // Preserve original key for backwards compatibility
+                obj.insert("key".to_string(), serde_json::Value::from(key));
+                values.push(serde_json::Value::Object(obj));
+            }
+        }
+    }
+
+    if x_value_type == "keyword" {
+        // Extract category order from the buckets (labels) to use as tick values
+        let mut cats: Vec<String> = Vec::new();
+        if let Some(arr) = raw_buckets.as_array() {
+            for b in arr {
+                let label = b
+                    .get("label")
+                    .and_then(|v| v.as_str())
+                    .map(|s| s.to_string())
+                    .or_else(|| b.get("key").and_then(|k| k.as_str().map(|s| s.to_string())))
+                    .or_else(|| b.get("id").and_then(|k| k.as_str().map(|s| s.to_string())))
+                    .unwrap_or_default();
+                cats.push(label);
+            }
+        }
+
+        let x_meta_override = serde_json::json!({"tick_values": cats, "value_type": "keyword"});
+        let x_encoding = match make_vl_axis_encoding(
+            Some(&x_meta_override),
+            "x",
+            Some(x_label),
+            None,
+            true,
+            Some(1),
+        ) {
+            Ok(v) => v,
+            Err(e) => return serde_json::json!({"error": e}),
+        };
+
+        // Y axis encoding: doc_count; prefer quantitative with sensible domain
+        let y_axis_meta = spec.get("y");
+        let mut y_encoding =
+            match make_vl_axis_encoding(y_axis_meta, "doc_count", Some(y_label), None, false, None)
+            {
+                Ok(v) => v,
+                Err(e) => return serde_json::json!({"error": e}),
+            };
+        // Ensure y domain starts at zero for histograms
+        if let Some(scale_obj) = y_encoding.get_mut("scale") {
+            if scale_obj.is_object() {
+                let max_val = values
+                    .iter()
+                    .filter_map(|o| o.get("doc_count").and_then(|v| v.as_f64()))
+                    .fold(0.0_f64, |a, b| a.max(b));
+                scale_obj.as_object_mut().unwrap().insert(
+                    "domain".to_string(),
+                    serde_json::json!([0.0, if max_val > 0.0 { max_val } else { 1.0 }]),
+                );
+            }
+        }
+
+        base["data"] = serde_json::json!({"values": values});
+        base["mark"] = serde_json::json!({"type": "bar"});
+        base["encoding"] = serde_json::json!({
+            "x": x_encoding,
+            "y": y_encoding,
+            "y2": {"datum": y_min}
+        });
+        let _ = x_field;
+        return base;
+    }
+
+    // Compute numeric boundaries (left edges + final right edge)
+    let mut boundaries_f64: Vec<f64> = Vec::new();
+    if !keys.is_empty() {
+        for k in &keys {
+            boundaries_f64.push(*k);
+        }
+        // final right
+        let last_right = if keys.len() >= 2 {
+            keys[keys.len() - 1] + (keys[1] - keys[0])
+        } else {
+            keys[0] + 1.0
+        };
+        boundaries_f64.push(last_right);
+    }
+
+    // X axis encoding: use server axis meta + computed boundaries
+    let x_encoding = match make_vl_axis_encoding(
+        spec.get("x"),
+        "x",
+        Some(x_label),
+        Some(&boundaries_f64),
+        false,
+        Some(1),
+    ) {
+        Ok(v) => v,
+        Err(e) => return serde_json::json!({"error": e}),
+    };
+
+    // Y axis encoding: doc_count; prefer quantitative with sensible domain
+    let y_axis_meta = spec.get("y");
+    let y_encoding =
+        match make_vl_axis_encoding(y_axis_meta, "doc_count", Some(y_label), None, false, None) {
+            Ok(v) => v,
+            Err(e) => return serde_json::json!({"error": e}),
+        };
+
+    base["data"] = serde_json::json!({"values": values});
     base["mark"] = serde_json::json!({"type": "bar"});
     base["encoding"] = serde_json::json!({
-        "x": {
-            "field": "key",
-            "type": "quantitative",
-            "scale": {"type": x_vl_scale},
-            "axis": {"title": x_label}
-        },
-        "y": {
-            "field": "doc_count",
-            "type": "quantitative",
-            "scale": {"type": y_vl_scale},
-            "axis": {"title": y_label}
-        }
+        "x": x_encoding,
+        "x2": {"field": "x2"},
+        "y": y_encoding,
+        "y2": {"datum": y_min}
     });
     let _ = x_field;
     base
 }
 
+/// Vega-Lite renderer for `arc_batch` PlotSpec data.
+/// Produces a layered semicircular concentric ring chart coloured by series.
+fn vl_arc_batch(spec: &serde_json::Value, mut base: serde_json::Value) -> serde_json::Value {
+    let data_entries = spec
+        .get("data")
+        .and_then(|d| d.get("entries"))
+        .and_then(|v| v.as_array())
+        .cloned()
+        .unwrap_or_default();
+
+    // Top-level display options (unused for now but kept for future tuning)
+    let _display = spec.get("display").unwrap_or(&serde_json::Value::Null);
+    let width = base.get("width").and_then(|v| v.as_u64()).unwrap_or(600) as f64;
+    let height = base.get("height").and_then(|v| v.as_u64()).unwrap_or(400) as f64;
+
+    // Unique series (report labels) in encountered order
+    let mut labels: Vec<String> = Vec::new();
+    for e in &data_entries {
+        if let Some(lbl) = e.get("report_label").and_then(|v| v.as_str()) {
+            if !labels.contains(&lbl.to_string()) {
+                labels.push(lbl.to_string());
+            }
+        }
+    }
+    let n = labels.len().max(1) as f64;
+
+    // radius allocation: leave small inner padding and outer padding
+    let max_radius = (height.min(width) / 2.0) * 0.9;
+    let inner_padding = 8.0;
+    let slot = ((max_radius - inner_padding) / n).max(8.0);
+
+    // Shared color encoding using a rainbow scheme
+    let color_encoding = serde_json::json!({
+        "field": "report_label",
+        "type": "nominal",
+        "scale": {"scheme": "rainbow"}
+    });
+
+    // Build layers: one data-backed background + wedge per series
+    let mut layers: Vec<serde_json::Value> = Vec::new();
+    for (i, _lbl) in labels.iter().enumerate() {
+        // collect entries belonging to this report index
+        let mut entries_for_i: Vec<serde_json::Value> = Vec::new();
+        for e in &data_entries {
+            if let Some(idx) = e.get("report_index").and_then(|v| v.as_i64()) {
+                if idx as usize == i {
+                    entries_for_i.push(e.clone());
+                }
+            } else if let Some(idx) = e.get("report_index").and_then(|v| v.as_u64()) {
+                if idx as usize == i {
+                    entries_for_i.push(e.clone());
+                }
+            }
+        }
+        if entries_for_i.is_empty() {
+            continue;
+        }
+
+        let inner = (inner_padding + (i as f64) * slot).round();
+        let outer = (inner + slot * 0.8).round();
+
+        // Background full semicircle (light grey) values
+        let mut bg_vals: Vec<serde_json::Value> = Vec::new();
+        for ev in &entries_for_i {
+            let mut be = ev.clone();
+            if let serde_json::Value::Object(ref mut m) = be {
+                m.insert(
+                    "endAngle".to_string(),
+                    serde_json::json!(std::f64::consts::PI),
+                );
+            }
+            bg_vals.push(be);
+        }
+
+        // Foreground wedge values (scaled -> endAngle)
+        let mut wedge_vals: Vec<serde_json::Value> = Vec::new();
+        for ev in &entries_for_i {
+            let mut we = ev.clone();
+            let scaled = ev.get("scaled").and_then(|v| v.as_f64()).unwrap_or(0.0_f64);
+            if let serde_json::Value::Object(ref mut m) = we {
+                m.insert(
+                    "endAngle".to_string(),
+                    serde_json::json!(scaled * std::f64::consts::PI),
+                );
+            }
+            wedge_vals.push(we);
+        }
+
+        // Background layer
+        let background = serde_json::json!({
+            "data": {"values": bg_vals},
+            "mark": {
+                "type": "arc",
+                "innerRadius": {"value": inner},
+                "outerRadius": {"value": outer},
+                "cornerRadius": 6,
+                "opacity": 0.25
+            },
+            "encoding": {
+                "theta": {
+                    "field": "endAngle",
+                    "type": "quantitative",
+                    "scale": {"domain": [0.0, std::f64::consts::PI]}
+                },
+                "theta2": {"value": 0},
+                "color": {"value": "#d9d9d9"}
+            }
+        });
+
+        // Wedge layer (coloured)
+        let mut wedge = serde_json::json!({
+            "data": {"values": wedge_vals},
+            "mark": {
+                "type": "arc",
+                "innerRadius": {"value": inner + 1.0},
+                "outerRadius": {"value": outer - 1.0},
+                "cornerRadius": 6
+            },
+            "encoding": {
+                "theta": {
+                    "field": "endAngle",
+                    "type": "quantitative",
+                    "scale": {"domain": [0.0, std::f64::consts::PI]}
+                },
+                "theta2": {"value": 0}
+            }
+        });
+
+        // Insert color scale into wedge encoding
+        if let Some(obj) = wedge.as_object_mut() {
+            if let Some(enc) = obj.get_mut("encoding") {
+                if let Some(enc_obj) = enc.as_object_mut() {
+                    enc_obj.insert("color".to_string(), color_encoding.clone());
+                }
+            }
+        }
+
+        layers.push(background);
+        layers.push(wedge);
+    }
+
+    base["layer"] = serde_json::Value::Array(layers);
+    base
+}
+
 fn vl_scatter(spec: &serde_json::Value, mut base: serde_json::Value) -> serde_json::Value {
     let x_meta = spec.get("x").unwrap_or(&serde_json::Value::Null);
     let x_field = x_meta.get("field").and_then(|v| v.as_str()).unwrap_or("x");
@@ -358,15 +1455,6 @@ fn vl_scatter(spec: &serde_json::Value, mut base: serde_json::Value) -> serde_js
         .get("label")
         .and_then(|v| v.as_str())
         .unwrap_or(x_field);
-    let x_scale_str = x_meta
-        .get("scale")
-        .and_then(|v| v.as_str())
-        .unwrap_or("linear");
-    let x_vl_scale = if x_scale_str == "log10" {
-        "log"
-    } else {
-        "linear"
-    };
 
     let y_meta = spec.get("y").unwrap_or(&serde_json::Value::Null);
     let y_field = y_meta.get("field").and_then(|v| v.as_str()).unwrap_or("y");
@@ -374,38 +1462,900 @@ fn vl_scatter(spec: &serde_json::Value, mut base: serde_json::Value) -> serde_js
         .get("label")
         .and_then(|v| v.as_str())
         .unwrap_or(y_field);
-    let y_scale_str = y_meta
-        .get("scale")
-        .and_then(|v| v.as_str())
-        .unwrap_or("linear");
-    let y_vl_scale = if y_scale_str == "log10" {
-        "log"
+
+    // Extract optional server-provided buckets. Support two shapes:
+    // 1) legacy: `buckets` is an array of primitive ids (strings/numbers) and
+    //    `bucketLabels` may be present as a parallel array of labels.
+    // 2) structured: `buckets` is an array of objects `{id,label,count}`.
+    let bucket_labels_opt: Option<Vec<String>>;
+    let mut bucket_ids_opt: Option<Vec<String>> = None;
+
+    if let Some(buckets_arr) = spec
+        .get("data")
+        .and_then(|d| d.get("buckets"))
+        .and_then(|v| v.as_array())
+        .cloned()
+    {
+        if !buckets_arr.is_empty() && buckets_arr[0].is_object() {
+            // structured array of objects
+            let mut ids: Vec<String> = Vec::new();
+            let mut labels: Vec<String> = Vec::new();
+            for obj in &buckets_arr {
+                if let Some(idv) = obj.get("id").or_else(|| obj.get("key")) {
+                    if let Some(s) = idv.as_str() {
+                        ids.push(s.to_string());
+                    } else {
+                        ids.push(idv.to_string());
+                    }
+                } else {
+                    ids.push(obj.to_string());
+                }
+                if let Some(lv) = obj.get("label").or_else(|| obj.get("name")) {
+                    if let Some(s) = lv.as_str() {
+                        labels.push(s.to_string());
+                    } else {
+                        labels.push(lv.to_string());
+                    }
+                } else {
+                    labels.push(String::new());
+                }
+            }
+            bucket_ids_opt = Some(ids);
+            bucket_labels_opt = Some(labels);
+        } else {
+            // legacy primitive array
+            bucket_ids_opt = Some(
+                buckets_arr
+                    .iter()
+                    .map(|k| {
+                        if let Some(s) = k.as_str() {
+                            s.to_string()
+                        } else {
+                            k.to_string()
+                        }
+                    })
+                    .collect(),
+            );
+            // try separate `bucketLabels` field as fallback but treat empty
+            // arrays or arrays of empty strings as absent.
+            bucket_labels_opt = spec
+                .get("data")
+                .and_then(|d| d.get("bucketLabels"))
+                .and_then(|v| v.as_array())
+                .and_then(|arr| {
+                    let vec: Vec<String> = arr
+                        .iter()
+                        .map(|s| s.as_str().unwrap_or("").to_string())
+                        .collect();
+                    if vec.iter().all(|s| s.is_empty()) {
+                        None
+                    } else {
+                        Some(vec)
+                    }
+                });
+        }
     } else {
-        "linear"
+        // no buckets array at all; attempt to read `bucketLabels` only
+        bucket_labels_opt = spec
+            .get("data")
+            .and_then(|d| d.get("bucketLabels"))
+            .and_then(|v| v.as_array())
+            .map(|arr| {
+                arr.iter()
+                    .map(|s| s.as_str().unwrap_or("").to_string())
+                    .collect()
+            });
+    }
+
+    // Build id->label map when both arrays are present and aligned.
+    let id_to_label: Option<std::collections::HashMap<String, String>> =
+        if let (Some(ids), Some(labels)) = (&bucket_ids_opt, &bucket_labels_opt) {
+            if ids.len() == labels.len() {
+                let mut m = std::collections::HashMap::new();
+                for (i, id) in ids.iter().enumerate() {
+                    m.insert(id.clone(), labels[i].clone());
+                }
+                Some(m)
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+    // Build y id->label map from `yBuckets` + `yBucketLabels` when available.
+    let y_id_to_label: Option<std::collections::HashMap<String, String>> = {
+        let y_ids_opt: Option<Vec<String>> = spec
+            .get("data")
+            .and_then(|d| d.get("yBuckets"))
+            .and_then(|v| v.as_array())
+            .map(|arr| {
+                arr.iter()
+                    .map(|k| k.as_str().unwrap_or(&k.to_string()).to_string())
+                    .collect()
+            });
+        let y_labels_opt: Option<Vec<String>> = spec
+            .get("data")
+            .and_then(|d| d.get("yBucketLabels"))
+            .and_then(|v| v.as_array())
+            .and_then(|arr| {
+                let vec: Vec<String> = arr
+                    .iter()
+                    .map(|s| s.as_str().unwrap_or("").to_string())
+                    .collect();
+                if vec.iter().all(|s| s.is_empty()) {
+                    None
+                } else {
+                    Some(vec)
+                }
+            });
+
+        if let (Some(ids), Some(labels)) = (y_ids_opt, y_labels_opt) {
+            if ids.len() == labels.len() {
+                let mut m = std::collections::HashMap::new();
+                for (i, id) in ids.iter().enumerate() {
+                    m.insert(id.clone(), labels[i].clone());
+                }
+                Some(m)
+            } else {
+                None
+            }
+        } else {
+            None
+        }
     };
 
-    let cells = spec
+    let cells = if let Some(existing_cells) = spec.get("data").and_then(|d| d.get("cells")) {
+        existing_cells.clone()
+    } else {
+        // API scatter responses provide raw points grouped by category under
+        // data.rawData.{cat}[]; flatten them into a single values array.
+        let mut flattened: Vec<serde_json::Value> = Vec::new();
+        if let Some(raw_data_obj) = spec
+            .get("data")
+            .and_then(|d| d.get("rawData"))
+            .and_then(|v| v.as_object())
+        {
+            for (cat_key, points) in raw_data_obj {
+                if let Some(point_arr) = points.as_array() {
+                    for point in point_arr {
+                        let mut point_obj = point.as_object().cloned().unwrap_or_default();
+                        if !point_obj.contains_key("cat") {
+                            point_obj.insert(
+                                "cat".to_string(),
+                                serde_json::Value::String(cat_key.clone()),
+                            );
+                        }
+                        // If we have an id->label map, attach an `x_label` field
+                        // for this point so categorical axes can display
+                        // human-readable labels while preserving ids.
+                        if let Some(map) = id_to_label.as_ref() {
+                            // find a candidate id on the point
+                            let mut key_opt: Option<String> = None;
+                            if let Some(s) = point_obj.get("x").and_then(|v| v.as_str()) {
+                                key_opt = Some(s.to_string());
+                            } else if let Some(s) = point_obj.get("cat").and_then(|v| v.as_str()) {
+                                key_opt = Some(s.to_string());
+                            } else if let Some(n) =
+                                point_obj.get("taxonId").and_then(|v| v.as_i64())
+                            {
+                                key_opt = Some(n.to_string());
+                            } else if let Some(s) =
+                                point_obj.get("taxonId").and_then(|v| v.as_str())
+                            {
+                                key_opt = Some(s.to_string());
+                            }
+                            if let Some(k) = key_opt {
+                                if let Some(lbl) = map.get(&k) {
+                                    point_obj.insert(
+                                        "x_label".to_string(),
+                                        serde_json::Value::String(lbl.clone()),
+                                    );
+                                }
+                            }
+
+                            // Populate `y_label` when possible so categorical Y
+                            // encodings that expect `y_label` find a value.
+                            if !point_obj.contains_key("y_label") {
+                                // If we have a y id->label map prefer that.
+                                if let Some(y_map) = y_id_to_label.as_ref() {
+                                    let mut y_key_opt: Option<String> = None;
+                                    if let Some(s) = point_obj.get("y").and_then(|v| v.as_str()) {
+                                        y_key_opt = Some(s.to_string());
+                                    } else if let Some(n) =
+                                        point_obj.get("y").and_then(|v| v.as_i64())
+                                    {
+                                        y_key_opt = Some(n.to_string());
+                                    }
+                                    if let Some(yk) = y_key_opt {
+                                        if let Some(y_lbl) = y_map.get(&yk) {
+                                            point_obj.insert(
+                                                "y_label".to_string(),
+                                                serde_json::Value::String(y_lbl.clone()),
+                                            );
+                                        } else {
+                                            // Fall back to copying the existing `y` string
+                                            point_obj.insert(
+                                                "y_label".to_string(),
+                                                serde_json::Value::String(yk),
+                                            );
+                                        }
+                                    }
+                                } else {
+                                    // No mapping available: if `y` is already a string
+                                    // copy it to `y_label` so encoders using that
+                                    // field render correctly.
+                                    if let Some(s) = point_obj.get("y").and_then(|v| v.as_str()) {
+                                        point_obj.insert(
+                                            "y_label".to_string(),
+                                            serde_json::Value::String(s.to_string()),
+                                        );
+                                    } else if let Some(n) =
+                                        point_obj.get("y").and_then(|v| v.as_i64())
+                                    {
+                                        point_obj.insert(
+                                            "y_label".to_string(),
+                                            serde_json::Value::String(n.to_string()),
+                                        );
+                                    }
+                                }
+                            }
+                        }
+                        flattened.push(serde_json::Value::Object(point_obj));
+                    }
+                }
+            }
+        }
+        serde_json::Value::Array(flattened)
+    };
+
+    // Pre-compute boundaries or category labels from buckets so tick marks can
+    // be applied even when raw point `cells` are present. We handle numeric
+    // and string buckets differently: numeric buckets yield numeric
+    // boundaries; string buckets yield categorical tick values.
+    let mut x_boundaries_f64: Vec<f64> = Vec::new();
+    let mut y_boundaries_f64: Vec<f64> = Vec::new();
+    let mut x_categories: Option<Vec<String>> = None;
+    let mut y_categories: Option<Vec<String>> = None;
+
+    if let Some(x_keys_arr) = spec
         .get("data")
-        .and_then(|d| d.get("cells"))
+        .and_then(|d| d.get("buckets"))
+        .and_then(|v| v.as_array())
         .cloned()
-        .unwrap_or_else(|| serde_json::json!([]));
+    {
+        // If the buckets are structured objects, prefer the extracted ids
+        // from `bucket_ids_opt`. Otherwise fall back to primitive handling.
+        if !x_keys_arr.is_empty() && x_keys_arr[0].is_object() {
+            // Structured buckets (objects) are typed by server-provided
+            // axis metadata. Require `value_type` to decide how to treat ids.
+            if let Some(ids) = bucket_ids_opt.clone() {
+                match x_meta.get("value_type").and_then(|v| v.as_str()) {
+                    Some("float") | Some("integer") | Some("date") | Some("coordinate") => {
+                        let x_keys_num: Vec<f64> = ids
+                            .iter()
+                            .map(|s| s.parse::<f64>().unwrap_or(0.0))
+                            .collect();
+                        // Determine bin width
+                        let width = if x_keys_num.len() >= 2 {
+                            x_keys_num[1] - x_keys_num[0]
+                        } else if let Some(domain_arr) =
+                            x_meta.get("domain").and_then(|d| d.as_array())
+                        {
+                            if domain_arr.len() >= 2 {
+                                let lo = domain_arr[0].as_f64().unwrap_or(0.0);
+                                let hi = domain_arr[1].as_f64().unwrap_or(lo + 1.0);
+                                let tick_count = x_meta
+                                    .get("tickCount")
+                                    .and_then(|v| v.as_u64())
+                                    .unwrap_or(10)
+                                    as f64;
+                                (hi - lo) / tick_count.max(1.0)
+                            } else {
+                                1.0
+                            }
+                        } else {
+                            1.0
+                        };
 
-    base["data"] = serde_json::json!({"values": cells});
-    base["mark"] = serde_json::Value::String("point".to_string());
-    base["encoding"] = serde_json::json!({
-        "x": {
-            "field": "x",
-            "type": "quantitative",
-            "scale": {"type": x_vl_scale},
-            "axis": {"title": x_label}
-        },
-        "y": {
-            "field": "y",
-            "type": "quantitative",
-            "scale": {"type": y_vl_scale},
-            "axis": {"title": y_label}
+                        for k in &x_keys_num {
+                            x_boundaries_f64.push(*k);
+                        }
+                        let last_right = if x_keys_num.len() >= 2 {
+                            x_keys_num[x_keys_num.len() - 1] + (x_keys_num[1] - x_keys_num[0])
+                        } else {
+                            x_keys_num[0] + width
+                        };
+                        x_boundaries_f64.push(last_right);
+                    }
+                    Some("keyword") => {
+                        x_categories = Some(ids);
+                    }
+                    Some(other) => {
+                        return serde_json::json!({"error": format!("unsupported axis value_type '{}' for x buckets", other)});
+                    }
+                    None => {
+                        return serde_json::json!({"error": "missing axis value_type for x buckets; server must provide axis.value_type"});
+                    }
+                }
+            } else {
+                // No extracted ids available; stringify structured objects into labels
+                x_categories = Some(
+                    x_keys_arr
+                        .iter()
+                        .map(|o| match o.get("id").or_else(|| o.get("key")) {
+                            Some(idv) => {
+                                if let Some(s) = idv.as_str() {
+                                    s.to_string()
+                                } else {
+                                    idv.to_string()
+                                }
+                            }
+                            None => match o.get("label").or_else(|| o.get("name")) {
+                                Some(lv) => {
+                                    if let Some(s) = lv.as_str() {
+                                        s.to_string()
+                                    } else {
+                                        lv.to_string()
+                                    }
+                                }
+                                None => o.to_string(),
+                            },
+                        })
+                        .collect(),
+                );
+            }
+        } else {
+            // For primitive arrays, require server-provided type information.
+            match x_meta.get("value_type").and_then(|v| v.as_str()) {
+                Some("keyword") => {
+                    x_categories = Some(
+                        x_keys_arr
+                            .iter()
+                            .map(|k| k.as_str().unwrap_or("").to_string())
+                            .collect(),
+                    );
+                }
+                Some("float") | Some("integer") | Some("number") | Some("date")
+                | Some("coordinate") => {
+                    // parse values to f64 as needed
+                    let to_f64 = |v: &serde_json::Value| -> f64 {
+                        v.as_f64()
+                            .or_else(|| v.as_str().and_then(|s| s.parse::<f64>().ok()))
+                            .unwrap_or(0.0)
+                    };
+                    let x_keys: Vec<f64> = x_keys_arr.iter().map(to_f64).collect();
+                    if !x_keys.is_empty() {
+                        let width = if x_keys.len() >= 2 {
+                            x_keys[1] - x_keys[0]
+                        } else if let Some(domain_arr) = spec
+                            .get("x")
+                            .and_then(|x| x.get("domain"))
+                            .and_then(|d| d.as_array())
+                        {
+                            if domain_arr.len() >= 2 {
+                                let lo = domain_arr[0].as_f64().unwrap_or(0.0);
+                                let hi = domain_arr[1].as_f64().unwrap_or(lo + 1.0);
+                                let tick_count =
+                                    spec.get("x")
+                                        .and_then(|x| x.get("tickCount"))
+                                        .and_then(|v| v.as_u64())
+                                        .unwrap_or(10) as f64;
+                                (hi - lo) / tick_count.max(1.0)
+                            } else {
+                                1.0
+                            }
+                        } else {
+                            1.0
+                        };
+
+                        for k in &x_keys {
+                            x_boundaries_f64.push(*k);
+                        }
+                        let last_right = if x_keys.len() >= 2 {
+                            x_keys[x_keys.len() - 1] + (x_keys[1] - x_keys[0])
+                        } else {
+                            x_keys[0] + width
+                        };
+                        x_boundaries_f64.push(last_right);
+                    }
+                }
+                Some(other) => {
+                    return serde_json::json!({"error": format!("unsupported axis value_type '{}' for x primitive buckets", other)});
+                }
+                None => {
+                    return serde_json::json!({"error": "missing axis value_type for x primitive buckets; server must provide axis.value_type"});
+                }
+            }
         }
-    });
+    }
+
+    if let Some(y_keys_arr) = spec
+        .get("data")
+        .and_then(|d| d.get("yBuckets"))
+        .and_then(|v| v.as_array())
+        .cloned()
+    {
+        // Prefer explicit label array when provided by the server. This
+        // keeps `yBuckets` as the canonical ids used for bin alignment and
+        // uses `yBucketLabels` for human-readable axis categories.
+        let y_labels_opt: Option<Vec<String>> = spec
+            .get("data")
+            .and_then(|d| d.get("yBucketLabels"))
+            .and_then(|v| v.as_array())
+            .and_then(|arr| {
+                let vec: Vec<String> = arr
+                    .iter()
+                    .map(|s| s.as_str().unwrap_or("").to_string())
+                    .collect();
+                if vec.iter().all(|s| s.is_empty()) {
+                    None
+                } else {
+                    Some(vec)
+                }
+            });
+
+        // Require server-provided `value_type` for the Y axis as well.
+        match y_meta.get("value_type").and_then(|v| v.as_str()) {
+            Some("keyword") => {
+                if let Some(lbls) = y_labels_opt {
+                    y_categories = Some(lbls);
+                } else {
+                    y_categories = Some(
+                        y_keys_arr
+                            .iter()
+                            .map(|k| k.as_str().unwrap_or("").to_string())
+                            .collect(),
+                    );
+                }
+            }
+            Some("float") | Some("integer") | Some("number") | Some("date")
+            | Some("coordinate") => {
+                let to_f64 = |v: &serde_json::Value| -> f64 {
+                    v.as_f64()
+                        .or_else(|| v.as_str().and_then(|s| s.parse::<f64>().ok()))
+                        .unwrap_or(0.0)
+                };
+                let y_keys: Vec<f64> = y_keys_arr.iter().map(to_f64).collect();
+                if !y_keys.is_empty() {
+                    let height = if y_keys.len() >= 2 {
+                        y_keys[1] - y_keys[0]
+                    } else if let Some(domain_arr) = spec
+                        .get("y")
+                        .and_then(|y| y.get("domain"))
+                        .and_then(|d| d.as_array())
+                    {
+                        if domain_arr.len() >= 2 {
+                            let lo = domain_arr[0].as_f64().unwrap_or(0.0);
+                            let hi = domain_arr[1].as_f64().unwrap_or(lo + 1.0);
+                            let tick_count = spec
+                                .get("y")
+                                .and_then(|y| y.get("tickCount"))
+                                .and_then(|v| v.as_u64())
+                                .unwrap_or(10) as f64;
+                            (hi - lo) / tick_count.max(1.0)
+                        } else {
+                            1.0
+                        }
+                    } else {
+                        1.0
+                    };
+
+                    for k in &y_keys {
+                        y_boundaries_f64.push(*k);
+                    }
+                    let last_top = if y_keys.len() >= 2 {
+                        y_keys[y_keys.len() - 1] + (y_keys[1] - y_keys[0])
+                    } else {
+                        y_keys[0] + height
+                    };
+                    y_boundaries_f64.push(last_top);
+                }
+            }
+            Some(other) => {
+                return serde_json::json!({"error": format!("unsupported axis value_type '{}' for y buckets", other)});
+            }
+            None => {
+                return serde_json::json!({"error": "missing axis value_type for y buckets; server must provide axis.value_type"});
+            }
+        }
+    }
+
+    // If we have point data (cells/rawData) render points, otherwise check for
+    // binned 2D data (`allYValues` + `yBuckets`) and render as a heatmap rect grid.
+    let mut is_cells_empty = true;
+    if let serde_json::Value::Array(arr) = &cells {
+        is_cells_empty = arr.is_empty();
+    }
+
+    if !is_cells_empty {
+        // Use shared axis encoding that respects server-provided axis meta.
+        // Prefer computed bin boundaries (if we extracted them above) so tick
+        // marks align with histogram/scatter bin edges even when raw points
+        // (`cells`) are present.
+        // If we have categorical bucket labels, prefer them for axis ticks.
+        let mut x_meta_override_value: Option<serde_json::Value> = None;
+        if let Some(ref cats) = x_categories {
+            // Prefer server-provided human-readable bucket labels when present.
+            if let Some(ref labels) = bucket_labels_opt {
+                x_meta_override_value =
+                    Some(serde_json::json!({"tick_values": labels, "value_type": "keyword"}));
+            } else {
+                x_meta_override_value =
+                    Some(serde_json::json!({"tick_values": cats, "value_type": "keyword"}));
+            }
+        }
+        let x_enc = if let Some(ref meta) = x_meta_override_value {
+            // When using human-readable labels, the data objects will include
+            // an `x_label` field; use that field for axis encoding so labels
+            // render in the intended order.
+            let enc_res = if bucket_labels_opt.is_some() {
+                make_vl_axis_encoding(Some(meta), "x_label", Some(x_label), None, true, None)
+            } else {
+                make_vl_axis_encoding(Some(meta), "x", Some(x_label), None, true, None)
+            };
+            match enc_res {
+                Ok(v) => v,
+                Err(e) => return serde_json::json!({"error": e}),
+            }
+        } else {
+            let x_bound_opt: Option<&[f64]> = if x_boundaries_f64.is_empty() {
+                None
+            } else {
+                Some(x_boundaries_f64.as_slice())
+            };
+            match make_vl_axis_encoding(spec.get("x"), "x", Some(x_label), x_bound_opt, false, None)
+            {
+                Ok(v) => v,
+                Err(e) => return serde_json::json!({"error": e}),
+            }
+        };
+
+        let mut y_meta_override_value: Option<serde_json::Value> = None;
+        if let Some(ref cats) = y_categories {
+            // Use the y bucket categories for y-axis tick values. Do NOT reuse
+            // `bucket_labels_opt` which holds labels for the x-axis buckets.
+            y_meta_override_value =
+                Some(serde_json::json!({"tick_values": cats, "value_type": "keyword"}));
+        }
+        let y_enc = if let Some(ref meta) = y_meta_override_value {
+            match make_vl_axis_encoding(Some(meta), "y_label", Some(y_label), None, true, None) {
+                Ok(v) => v,
+                Err(e) => return serde_json::json!({"error": e}),
+            }
+        } else {
+            let y_bound_opt: Option<&[f64]> = if y_boundaries_f64.is_empty() {
+                None
+            } else {
+                Some(y_boundaries_f64.as_slice())
+            };
+            match make_vl_axis_encoding(spec.get("y"), "y", Some(y_label), y_bound_opt, false, None)
+            {
+                Ok(v) => v,
+                Err(e) => return serde_json::json!({"error": e}),
+            }
+        };
+
+        base["data"] = serde_json::json!({"values": cells});
+        base["mark"] = serde_json::Value::String("point".to_string());
+
+        // Build encoding map and add jitter offsets when axes are categorical.
+        let mut encoding_map = serde_json::Map::new();
+        encoding_map.insert("x".to_string(), x_enc);
+        encoding_map.insert("y".to_string(), y_enc);
+
+        let mut transforms: Vec<serde_json::Value> = Vec::new();
+        // Add a small pixel-offset jitter for categorical axes using Vega's
+        // `random()` expression. Offsets are in pixels and encoded via
+        // `xOffset`/`yOffset` which Vega-Lite supports for point marks.
+        if x_categories.is_some() {
+            transforms.push(serde_json::json!({"calculate": "(random()-0.5) * (random()-0.5)", "as": "_xOffset"}));
+            encoding_map.insert(
+                "xOffset".to_string(),
+                serde_json::json!({"field": "_xOffset", "scale":{"domain":[-1,1]}, "type": "quantitative"}),
+            );
+        }
+        if y_categories.is_some() {
+            transforms.push(serde_json::json!({"calculate": "(random()-0.5) * (random()-0.5)", "as": "_yOffset"}));
+            encoding_map.insert(
+                "yOffset".to_string(),
+                serde_json::json!({"field": "_yOffset", "scale":{"domain":[-1,1]}, "type": "quantitative"}),
+            );
+        }
+
+        if spec.get("data").and_then(|d| d.get("cats")).is_some() {
+            // If the spec includes a top-level `cats` array, add a color encoding
+            // that maps the category field to a color scheme.
+            // This is a common pattern for scatter plots with categorical grouping.
+            encoding_map.insert(
+                "color".to_string(),
+                serde_json::json!({"field": "cat", "type": "nominal"}),
+            );
+        }
+
+        if !transforms.is_empty() {
+            base["transform"] = serde_json::Value::Array(transforms);
+        }
+
+        base["encoding"] = serde_json::Value::Object(encoding_map);
+    } else {
+        // Attempt binned heatmap: x buckets + yBuckets + allYValues
+        let maybe_x_keys = spec
+            .get("data")
+            .and_then(|d| d.get("buckets"))
+            .and_then(|v| v.as_array())
+            .cloned();
+        let maybe_y_keys = spec
+            .get("data")
+            .and_then(|d| d.get("yBuckets"))
+            .and_then(|v| v.as_array())
+            .cloned();
+        let maybe_all_y = spec
+            .get("data")
+            .and_then(|d| d.get("allYValues"))
+            .and_then(|v| v.as_array())
+            .cloned();
+
+        if let (Some(x_keys_arr), Some(y_keys_arr), Some(all_y_arr)) =
+            (maybe_x_keys, maybe_y_keys, maybe_all_y)
+        {
+            // Decide whether x/y buckets are categorical (strings) or numeric.
+            let x_is_categorical = x_categories.is_some();
+            let y_is_categorical = y_categories.is_some();
+
+            // Prepare numeric vectors if needed. Support primitive numeric
+            // arrays as well as structured object buckets where ids were
+            // extracted into `bucket_ids_opt`.
+            let x_keys: Vec<f64> = if !x_is_categorical {
+                if !x_keys_arr.is_empty() && x_keys_arr[0].is_object() {
+                    if let Some(ids) = bucket_ids_opt.clone() {
+                        ids.iter()
+                            .map(|s| s.parse::<f64>().unwrap_or(0.0))
+                            .collect()
+                    } else {
+                        Vec::new()
+                    }
+                } else {
+                    x_keys_arr
+                        .iter()
+                        .map(|k| k.as_f64().unwrap_or(0.0))
+                        .collect()
+                }
+            } else {
+                Vec::new()
+            };
+            let y_keys: Vec<f64> = if !y_is_categorical {
+                y_keys_arr
+                    .iter()
+                    .map(|k| k.as_f64().unwrap_or(0.0))
+                    .collect()
+            } else {
+                Vec::new()
+            };
+
+            let x_width = if !x_is_categorical && x_keys.len() >= 2 {
+                x_keys[1] - x_keys[0]
+            } else if !x_is_categorical {
+                if let Some(domain_arr) = spec
+                    .get("x")
+                    .and_then(|x| x.get("domain"))
+                    .and_then(|d| d.as_array())
+                {
+                    if domain_arr.len() >= 2 {
+                        let lo = domain_arr[0].as_f64().unwrap_or(0.0);
+                        let hi = domain_arr[1].as_f64().unwrap_or(lo + 1.0);
+                        (hi - lo) / (x_keys.len() as f64).max(1.0)
+                    } else {
+                        1.0
+                    }
+                } else {
+                    1.0
+                }
+            } else {
+                1.0
+            };
+
+            let y_height = if !y_is_categorical && y_keys.len() >= 2 {
+                y_keys[1] - y_keys[0]
+            } else if !y_is_categorical {
+                if let Some(domain_arr) = spec
+                    .get("y")
+                    .and_then(|y| y.get("domain"))
+                    .and_then(|d| d.as_array())
+                {
+                    if domain_arr.len() >= 2 {
+                        let lo = domain_arr[0].as_f64().unwrap_or(0.0);
+                        let hi = domain_arr[1].as_f64().unwrap_or(lo + 1.0);
+                        (hi - lo) / (y_keys.len() as f64).max(1.0)
+                    } else {
+                        1.0
+                    }
+                } else {
+                    1.0
+                }
+            } else {
+                1.0
+            };
+
+            // Build rects from allYValues: outer array per x-bucket, inner per y-bucket
+            let mut rects: Vec<serde_json::Value> = Vec::new();
+            for (xi, x_bucket) in all_y_arr.iter().enumerate() {
+                if let Some(y_counts) = x_bucket.as_array() {
+                    for (yi, count_val) in y_counts.iter().enumerate() {
+                        let count_opt = count_val
+                            .as_u64()
+                            .or_else(|| count_val.as_i64().map(|n| n as u64))
+                            .and_then(|n| if n == 0 { None } else { Some(n) });
+
+                        // Only emit rects for buckets with a non-zero count.
+                        if let Some(count) = count_opt {
+                            let mut obj = serde_json::Map::new();
+                            if x_is_categorical {
+                                let x_cat = x_categories
+                                    .as_ref()
+                                    .and_then(|v| v.get(xi))
+                                    .cloned()
+                                    .unwrap_or_default();
+                                obj.insert(
+                                    "x".to_string(),
+                                    serde_json::Value::String(x_cat.clone()),
+                                );
+                                if let Some(ref labels) = bucket_labels_opt {
+                                    if let Some(lbl) = labels.get(xi) {
+                                        obj.insert(
+                                            "x_label".to_string(),
+                                            serde_json::Value::String(lbl.clone()),
+                                        );
+                                    }
+                                }
+                            } else {
+                                let left = *x_keys.get(xi).unwrap_or(&0.0);
+                                let right = if xi + 1 < x_keys.len() {
+                                    x_keys[xi + 1]
+                                } else {
+                                    left + x_width
+                                };
+                                obj.insert("x".to_string(), serde_json::Value::from(left));
+                                obj.insert("x2".to_string(), serde_json::Value::from(right));
+                            }
+
+                            if y_is_categorical {
+                                let y_cat = y_categories
+                                    .as_ref()
+                                    .and_then(|v| v.get(yi))
+                                    .cloned()
+                                    .unwrap_or_default();
+                                obj.insert("y".to_string(), serde_json::Value::String(y_cat));
+                            } else {
+                                let bottom = *y_keys.get(yi).unwrap_or(&0.0);
+                                let top = if yi + 1 < y_keys.len() {
+                                    y_keys[yi + 1]
+                                } else {
+                                    bottom + y_height
+                                };
+                                obj.insert("y".to_string(), serde_json::Value::from(bottom));
+                                obj.insert("y2".to_string(), serde_json::Value::from(top));
+                            }
+
+                            obj.insert("count".to_string(), serde_json::Value::from(count));
+                            rects.push(serde_json::Value::Object(obj));
+                        }
+                    }
+                }
+            }
+
+            // Colour domain from zDomain if provided (as Value)
+            let color_domain_value = spec
+                .get("data")
+                .and_then(|d| d.get("zDomain"))
+                .and_then(|v| v.as_array())
+                .and_then(|arr| {
+                    if arr.len() >= 2 {
+                        let a = arr[0].as_f64().unwrap_or(0.0);
+                        let b = arr[1].as_f64().unwrap_or(a + 1.0);
+                        Some(serde_json::Value::Array(vec![
+                            serde_json::Value::from(a),
+                            serde_json::Value::from(b),
+                        ]))
+                    } else {
+                        None
+                    }
+                })
+                .unwrap_or_else(|| serde_json::Value::Array(vec![]));
+
+            // Build axis encodings: use categorical tick_values when available,
+            // otherwise use numeric boundaries computed above.
+            let mut encoding_map = serde_json::Map::new();
+
+            if let Some(ref cats) = x_categories {
+                let x_meta = serde_json::json!({"tick_values": cats, "value_type": "keyword"});
+                let x_enc_res =
+                    make_vl_axis_encoding(Some(&x_meta), "x", Some(x_label), None, true, Some(1));
+                let x_enc = match x_enc_res {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+                encoding_map.insert("x".to_string(), x_enc);
+            } else {
+                let mut x_boundaries_num: Vec<f64> = x_keys.clone();
+                if !x_boundaries_num.is_empty() {
+                    let last_right = if x_boundaries_num.len() >= 2 {
+                        x_boundaries_num[x_boundaries_num.len() - 1]
+                            + (x_boundaries_num[1] - x_boundaries_num[0])
+                    } else {
+                        x_boundaries_num[0] + x_width
+                    };
+                    x_boundaries_num.push(last_right);
+                }
+                let x_enc = match make_vl_axis_encoding(
+                    spec.get("x"),
+                    "x",
+                    Some(x_label),
+                    Some(&x_boundaries_num),
+                    false,
+                    Some(1),
+                ) {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+                encoding_map.insert("x".to_string(), x_enc);
+                encoding_map.insert("x2".to_string(), serde_json::json!({"field": "x2"}));
+            }
+
+            if let Some(ref cats) = y_categories {
+                let y_meta = serde_json::json!({"tick_values": cats, "value_type": "keyword"});
+                let y_enc_res =
+                    make_vl_axis_encoding(Some(&y_meta), "y", Some(y_label), None, true, Some(1));
+                let y_enc = match y_enc_res {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+                encoding_map.insert("y".to_string(), y_enc);
+            } else {
+                let mut y_boundaries_num: Vec<f64> = y_keys.clone();
+                if !y_boundaries_num.is_empty() {
+                    let last_top = if y_boundaries_num.len() >= 2 {
+                        y_boundaries_num[y_boundaries_num.len() - 1]
+                            + (y_boundaries_num[1] - y_boundaries_num[0])
+                    } else {
+                        y_boundaries_num[0] + y_height
+                    };
+                    y_boundaries_num.push(last_top);
+                }
+                let y_enc = match make_vl_axis_encoding(
+                    spec.get("y"),
+                    "y",
+                    Some(y_label),
+                    Some(&y_boundaries_num),
+                    false,
+                    Some(1),
+                ) {
+                    Ok(v) => v,
+                    Err(e) => return serde_json::json!({"error": e}),
+                };
+                encoding_map.insert("y".to_string(), y_enc);
+                encoding_map.insert("y2".to_string(), serde_json::json!({"field": "y2"}));
+            }
+
+            encoding_map.insert(
+                "color".to_string(),
+                serde_json::json!({
+                    "field": "count",
+                    "type": "quantitative",
+                    "scale": {"type": "linear", "domain": color_domain_value}
+                }),
+            );
+
+            base["data"] = serde_json::json!({"values": rects});
+            base["mark"] = serde_json::json!({"type": "rect"});
+            base["encoding"] = serde_json::Value::Object(encoding_map);
+        } else {
+            // Fallback to empty points if nothing useful present
+            base["data"] = serde_json::json!({"values": serde_json::json!([])});
+            base["mark"] = serde_json::Value::String("point".to_string());
+            base["encoding"] = serde_json::json!({});
+        }
+    }
     let _ = (x_field, y_field);
     base
 }
@@ -496,4 +2446,70 @@ mod tests {
         let url = "https://goat.genomehubs.org/api/v2/search?result=taxon";
         assert!(report_yaml_from_url_params(url).is_err());
     }
+
+    #[test]
+    fn scatter_vega_lite_uses_raw_data_when_cells_missing() {
+        let spec = serde_json::json!({
+            "report_type": "scatter",
+            "x": {"field": "genome_size", "scale": "linear", "value_type": "float"},
+            "y": {"field": "busco_total", "scale": "linear", "value_type": "float"},
+            "data": {
+                "rawData": {
+                    "all": [
+                        {"x": 10.0, "y": 20.0}
+                    ]
+                }
+            }
+        });
+
+        let out = plot_spec_to_vega_lite_json(&spec.to_string());
+        let parsed: serde_json::Value = serde_json::from_str(&out).unwrap();
+        let values = parsed
+            .pointer("/data/values")
+            .and_then(|v| v.as_array())
+            .unwrap();
+
+        assert_eq!(values.len(), 1);
+        assert_eq!(values[0].get("x").and_then(|v| v.as_f64()), Some(10.0));
+        assert_eq!(values[0].get("y").and_then(|v| v.as_f64()), Some(20.0));
+        assert_eq!(values[0].get("cat").and_then(|v| v.as_str()), Some("all"));
+    }
+
+    #[test]
+    fn scatter_vega_lite_renders_heatmap_from_binned_values() {
+        let spec = serde_json::json!({
+            "report_type": "scatter",
+            "x": {"field": "x", "scale": "linear", "value_type": "float"},
+            "y": {"field": "y", "scale": "linear", "value_type": "float"},
+            "data": {
+                "buckets": [0.0, 10.0],
+                "yBuckets": [0.0, 5.0],
+                "allYValues": [[1,2],[3,4]],
+                "zDomain": [1,4]
+            }
+        });
+
+        let out = plot_spec_to_vega_lite_json(&spec.to_string());
+        let parsed: serde_json::Value = serde_json::from_str(&out).unwrap();
+        assert_eq!(
+            parsed
+                .get("mark")
+                .and_then(|m| m.get("type"))
+                .and_then(|t| t.as_str()),
+            Some("rect")
+        );
+        let values = parsed
+            .pointer("/data/values")
+            .and_then(|v| v.as_array())
+            .unwrap();
+        // 2 x-buckets * 2 y-buckets -> 4 rects
+        assert_eq!(values.len(), 4);
+        // check a sample rect has expected keys
+        let sample = &values[0];
+        assert!(sample.get("x").is_some());
+        assert!(sample.get("x2").is_some());
+        assert!(sample.get("y").is_some());
+        assert!(sample.get("y2").is_some());
+        assert!(sample.get("count").is_some());
+    }
 }
diff --git a/crates/genomehubs-query/src/report/plot_spec.rs b/crates/genomehubs-query/src/report/plot_spec.rs
index b6670f8..a593beb 100644
--- a/crates/genomehubs-query/src/report/plot_spec.rs
+++ b/crates/genomehubs-query/src/report/plot_spec.rs
@@ -151,6 +151,8 @@ pub struct PlotSpec {
     pub x: Option<AxisMeta>,
     /// Secondary (Y) axis metadata, if applicable.
     pub y: Option<AxisMeta>,
+    /// Category axis metadata (for series / categorical axes), if applicable.
+    pub cat: Option<AxisMeta>,
     /// Tertiary (Z / heatmap density) axis metadata, if applicable.
     pub z: Option<AxisMeta>,
     /// Series (category) metadata. Empty for non-categorised plots.
@@ -207,6 +209,7 @@ mod tests {
                 tick_label_max_length: None,
             }),
             y: None,
+            cat: None,
             z: None,
             series: vec![SeriesMeta {
                 key: "chromosome".to_string(),
diff --git a/crates/genomehubs-query/tests/fix_y_axis.rs b/crates/genomehubs-query/tests/fix_y_axis.rs
new file mode 100644
index 0000000..6f3cc31
--- /dev/null
+++ b/crates/genomehubs-query/tests/fix_y_axis.rs
@@ -0,0 +1,28 @@
+use genomehubs_query::plot_spec_to_vega_lite_json;
+use serde_json::json;
+
+#[test]
+fn y_axis_uses_yBuckets_for_raw_points() {
+    let spec = json!({
+        "report_type": "scatter",
+        "x": {"field":"assembly_span", "label":"assembly_span", "scale":"linear"},
+        "y": {"field":"assembly_level","label":"assembly_level","scale":"linear"},
+        "data": {
+            "buckets": [{"id":"1","label":"B1"},{"id":"2","label":"B2"}],
+            "yBuckets": ["Scaffold","Chromosome"],
+            "rawData": {
+                "all": [
+                    {"x":1.0,"y":"Scaffold","cat":"all"},
+                    {"x":2.0,"y":"Chromosome","cat":"all"}
+                ]
+            }
+        }
+    });
+
+    let out = plot_spec_to_vega_lite_json(&spec.to_string());
+    let parsed: serde_json::Value = serde_json::from_str(&out).unwrap();
+    let y_values = parsed.pointer("/encoding/y/axis/values").unwrap();
+    let arr = y_values.as_array().unwrap();
+    assert_eq!(arr[0].as_str().unwrap(), "Scaffold");
+    assert_eq!(arr[1].as_str().unwrap(), "Chromosome");
+}
diff --git a/docs/planning/phases/phase-12-plot-spec.md b/docs/planning/phases/phase-12-plot-spec.md
index 9cf2bf2..aac69a7 100644
--- a/docs/planning/phases/phase-12-plot-spec.md
+++ b/docs/planning/phases/phase-12-plot-spec.md
@@ -318,7 +318,7 @@ Vega-Lite JSON. Called by the user when they want interactive rendering.
 export function plotSpecToVegaLite(plotSpec) {
   const display = plotSpec.display ?? {};
   const base = {
-    $schema: "https://vega.github.io/schema/vega-lite/v5.json",
+    $schema: "https://vega.github.io/schema/vega-lite/v6.json",
     title: display.title,
     width: display.width ?? 600,
     height: display.height ?? 400,
diff --git a/python/cli_generator/query.py b/python/cli_generator/query.py
index abd20f9..626aaf5 100644
--- a/python/cli_generator/query.py
+++ b/python/cli_generator/query.py
@@ -26,7 +26,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Sequence
 
 if TYPE_CHECKING:
     import pandas
@@ -1312,6 +1312,91 @@ def count_batch(
             counts.append(int(result.get("total") or 0))
         return counts
 
+    def report_batch(
+        self,
+        reports: Sequence[ReportBuilder | tuple[QueryBuilder, ReportBuilder]],
+        api_base: str = "https://goat.genomehubs.org/api",
+        api_version: str = "v3",
+    ) -> list[dict[str, Any]]:
+        """Execute multiple reports in a single batch request.
+
+        Args:
+            reports: List of ReportBuilder objects or (QueryBuilder, ReportBuilder)
+                pairs. When a bare ReportBuilder is provided the calling
+                QueryBuilder (``self``) is used as the query scope.
+            api_base: Base URL of the API.
+            api_version: API version string (default: ``"v3"``).
+
+        Returns:
+            List of per-report result dicts. Each dict contains at least
+            ``"report"`` and ``"status"`` keys and may include ``"plot_spec"``.
+
+        Raises:
+            ValueError: If more than 100 reports are provided, or items are
+                of an unsupported shape.
+        """
+        import json
+        import urllib.request
+
+        if len(reports) > 100:
+            raise ValueError("maximum 100 reports per batch request")
+
+        url = f"{api_base}/{api_version}/report/batch"
+        payload_reports = []
+        for item in reports:
+            # Accept either a bare ReportBuilder (use self as the query)
+            # or a (QueryBuilder, ReportBuilder) pair.
+            if isinstance(item, tuple) or isinstance(item, list):
+                if len(item) != 2:
+                    raise ValueError("report_batch() tuple items must be (QueryBuilder, ReportBuilder)")
+                qb, rb = item[0], item[1]
+            else:
+                # Assume ReportBuilder and use self as the query scope.
+                qb, rb = self, item
+
+            # Help static type checkers: narrow dynamic unions to the expected types
+            from typing import cast
+
+            qb = cast("QueryBuilder", qb)
+            rb = cast("ReportBuilder", rb)
+
+            # Build per-item POST payload
+            payload_item: dict[str, Any] = {
+                "query_yaml": qb.to_query_yaml(),
+                "params_yaml": qb.to_params_yaml(),
+                "report_yaml": rb.to_report_yaml(),
+            }
+            if getattr(rb, "_display", None) is not None:
+                payload_item["display"] = rb._display
+            if getattr(rb, "_include_plot_spec", False):
+                payload_item["include_plot_spec"] = True
+            payload_reports.append(payload_item)
+
+        req = urllib.request.Request(
+            url,
+            data=json.dumps({"reports": payload_reports}).encode("utf-8"),
+            headers={"Content-Type": "application/json"},
+        )
+        with urllib.request.urlopen(req) as resp:
+            body_text = resp.read().decode("utf-8")
+
+        batch_data = json.loads(body_text)
+        results: list[dict[str, Any]] = []
+        for item in batch_data.get("results", []):
+            # Preserve plot_spec wrapper when present (client may request it).
+            if "plot_spec" in item:
+                results.append(item)
+            else:
+                entry: dict[str, Any] = {
+                    "report": item.get("report", {}),
+                    "status": item.get("status", {}),
+                }
+                if "error" in item:
+                    entry["error"] = item["error"]
+                results.append(entry)
+
+        return results
+
     def record(
         self,
         record_id: str,
@@ -2769,7 +2854,7 @@ def plot_spec_to_vega_lite(spec: dict[str, Any]) -> dict[str, Any]:
     """
     display: dict[str, Any] = spec.get("display") or {}
     base: dict[str, Any] = {
-        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
+        "$schema": "https://vega.github.io/schema/vega-lite/v6.json",
         "width": display.get("width", 600),
         "height": display.get("height", 400),
         "config": {
diff --git a/scripts/generate_reports.sh b/scripts/generate_reports.sh
new file mode 100755
index 0000000..72099ff
--- /dev/null
+++ b/scripts/generate_reports.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+OUTDIR=~/reports
+mkdir -p "$OUTDIR"
+FORMAT=png
+AXES="assembly_level assembly_span assembly_date genus"
+THRESHOLDS="10 1000"
+MODES="stacked grouped facet"
+CUMULATIVE="true false"
+
+
+function vl_convert() {
+  local format="$1"
+  local output_file="$2"
+  if [[ "$format" == "svg" ]]; then
+    python3 -c "import vl_convert as vlc, json, sys; spec=json.loads(sys.stdin.read()); print(vlc.vegalite_to_svg(spec), end='')" > "$output_file"
+  else
+    python3 -c "import vl_convert as vlc, json, sys; spec=json.loads(sys.stdin.read()); sys.stdout.buffer.write(vlc.vegalite_to_png(spec))" > "$output_file"
+  fi
+}
+
+# # Test histogram report with different axis and mode combinations
+# category_axis="assembly_level"
+# for x_axis in $AXES; do
+#   for mode in $MODES; do
+#     for cumulative in $CUMULATIVE; do
+#       echo "Testing histogram with x=$x_axis, mode=$mode, cumulative=$cumulative"
+#       curl -s -X POST 'http://localhost:3000/api/v3/report' -H 'accept: application/json' -H 'Content-Type: application/json' -d "{\"query\":{\"index\":\"taxon\", \"taxa\": [\"canidae\"], \"taxon_filter_type\": \"tree\"},\"params\":{},\"report\":{\"report\":\"histogram\",\"x\":\"$x_axis\",\"cat\":\"$category_axis\",\"bucket_count\":20},\"include_plot_spec\":true,\"display\":{\"title\":\"histogram test\",\"histogram\":{\"mode\":\"$mode\",\"cumulative\":$cumulative}}}" \
+#       | cargo run --quiet --bin plot_to_vl \
+#       | vl_convert "$FORMAT" "$OUTDIR/histogram_${x_axis}_${mode}_cumulative_${cumulative}.$FORMAT"
+#     done
+#   done
+# done
+
+
+# # Test histogram report with different axis and category combinations
+# for x_axis in $AXES; do
+#   for cat_axis in $AXES; do
+#     echo "Testing histogram with x=$x_axis, category=$cat_axis"
+#     curl -s -X POST 'http://localhost:3000/api/v3/report' -H 'accept: application/json' -H 'Content-Type: application/json' -d "{\"query\":{\"index\":\"taxon\", \"taxa\": [\"canidae\"], \"taxon_filter_type\": \"tree\"},\"params\":{},\"report\":{\"report\":\"histogram\",\"x\":\"$x_axis\",\"cat\":\"$cat_axis\",\"bucket_count\":20},\"include_plot_spec\":true,\"display\":{\"title\":\"histogram test\"}}" \
+#     | cargo run --quiet --bin plot_to_vl \
+#     | vl_convert "$FORMAT" "$OUTDIR/histogram_${x_axis}_by_${cat_axis}.$FORMAT"
+#   done
+# done
+
+
+
+# # Test scatter report with different axis combinations and thresholds
+
+# for x_axis in $AXES; do
+#   for y_axis in $AXES; do
+#     for threshold in $THRESHOLDS; do
+#       echo "Testing scatter with x=$x_axis, y=$y_axis, threshold=$threshold"
+#       shape=$(if [[ "$threshold" -le 10 ]]; then echo "rect"; else echo "point"; fi)
+#       curl -s -X POST 'http://localhost:3000/api/v3/report' -H 'accept: application/json' -H 'Content-Type: application/json' -d "{\"query\":{\"index\":\"taxon\", \"taxa\": [\"canidae\"], \"taxon_filter_type\": \"tree\"},\"params\":{},\"report\":{\"report\":\"scatter\",\"x\":\"$x_axis\",\"y\":\"$y_axis\",\"scatter_threshold\":$threshold},\"include_plot_spec\":true,\"display\":{\"title\":\"scatter test\"}}" \
+#       | cargo run --quiet --bin plot_to_vl \
+#       | vl_convert "$FORMAT" "$OUTDIR/scatter_${shape}_${x_axis}_${y_axis}.$FORMAT"
+#     done
+#   done
+# done
+
+
+# test scatter with categories
+for x_axis in $AXES; do
+  y_axis=assembly_span
+  for cat_axis in axes; do
+    for threshold in $THRESHOLDS; do
+      echo "Testing scatter with x=$x_axis, cat=$cat_axis, threshold=$threshold"
+      shape=$(if [[ "$threshold" -le 10 ]]; then echo "rect"; else echo "point"; fi)
+      curl -s -X POST 'http://localhost:3000/api/v3/report' -H 'accept: application/json' -H 'Content-Type: application/json' -d "{\"query\":{\"index\":\"taxon\", \"taxa\": [\"canidae\"], \"taxon_filter_type\": \"tree\"},\"params\":{},\"report\":{\"report\":\"scatter\",\"x\":\"$x_axis\",\"y\":\"$y_axis\",\"cat\":\"$cat_axis\",\"scatter_threshold\":$threshold},\"include_plot_spec\":true,\"display\":{\"title\":\"scatter test\"}}" \
+      | cargo run --quiet --bin plot_to_vl \
+      | vl_convert "$FORMAT" "$OUTDIR/scatter_${shape}_${x_axis}_${y_axis}_by_${cat_axis}.$FORMAT"
+    done
+  done
+done
diff --git a/src/bin/plot_to_vl.rs b/src/bin/plot_to_vl.rs
new file mode 100644
index 0000000..b5597f9
--- /dev/null
+++ b/src/bin/plot_to_vl.rs
@@ -0,0 +1,12 @@
+use std::io::{self, Read};
+
+fn main() {
+    let mut input = String::new();
+    if let Err(e) = io::stdin().read_to_string(&mut input) {
+        eprintln!("failed to read stdin: {}", e);
+        std::process::exit(2);
+    }
+
+    let out = genomehubs_query::plot_spec_to_vega_lite_json(&input);
+    println!("{}", out);
+}
diff --git a/src/core/query_builder.rs b/src/core/query_builder.rs
index cb55040..21dd6c7 100644
--- a/src/core/query_builder.rs
+++ b/src/core/query_builder.rs
@@ -85,6 +85,7 @@ pub fn build_search_body(
         "size": size,
         "from": offset,
         "query": { "bool": { "filter": [] } },
+        "track_total_hits": true,
         "_source": { "include": ["taxon_id","scientific_name","taxon_rank","parent","taxon_names.*","lineage.*"], "exclude": [] }
     });
 
diff --git a/templates/docs/reference/query-builder.qmd.tera b/templates/docs/reference/query-builder.qmd.tera
index 98f3afc..f721145 100644
--- a/templates/docs/reference/query-builder.qmd.tera
+++ b/templates/docs/reference/query-builder.qmd.tera
@@ -1011,6 +1011,82 @@ curl -s -X POST {{ api_base }}/v3/count/batch \
 
 ---
 
+### `report_batch(reports, api_base, api_version) -> list[dict]`
+
+Execute multiple reports in a single request to `/v3/report/batch`. Each
+input item may be either a bare `ReportBuilder` (in which case the calling
+`QueryBuilder` is used as the query scope) or a `(QueryBuilder, ReportBuilder)`
+pair to run the report against a different query. Returns a list of per-report
+result dicts in the same order as the input. Individual results from `report_batch`
+may include `plot_spec` when requested by the client.
+
+::: {.panel-tabset group="language"}
+
+## Python
+
+```python
+from {{ sdk_name }}.query import QueryBuilder, ReportBuilder
+
+qb = QueryBuilder("taxon").set_taxa(["Mammalia"], filter_type="tree")
+rb1 = ReportBuilder("arc").set_feature("has_assembly")
+rb2 = ReportBuilder("arc").set_feature("has_annotation").set_include_plot_spec()
+results = qb.report_batch([rb1, rb2], api_base="{{ api_base }}", api_version="{{ api_version }}")
+for res in results:
+    print(res.get("status"))
+    if "plot_spec" in res:
+        print("plot_spec included")
+```
+
+## R
+
+```r
+rb1 <- ReportBuilder$new("arc")$set_feature("has_assembly")
+rb2 <- ReportBuilder$new("arc")$set_feature("has_annotation")$set_include_plot_spec(TRUE)
+results <- qb$report_batch(list(rb1, rb2))
+for (r in results) {
+  print(r$status)
+  if (!is.null(r$plot_spec)) cat("plot_spec included\n")
+}
+```
+
+## JavaScript
+
+```javascript
+const rb1 = new ReportBuilder('arc').setFeature('has_assembly');
+const rb2 = new ReportBuilder('arc').setFeature('has_annotation').setIncludePlotSpec(true);
+const results = await qb.reportBatch([rb1, rb2]);
+results.forEach(r => {
+  console.log(r.status);
+  if (r.plot_spec) console.log('plot_spec included');
+});
+```
+
+## API
+
+```bash
+curl -s -X POST {{ api_base }}/v3/report/batch \
+  -H "Content-Type: application/json" \
+  -d '{
+    "reports": [
+      {
+        "query_yaml": "index: taxon\n...",
+        "params_yaml": "size: 10\npage: 1\n",
+        "report_yaml": "report: arc\nfeature: has_assembly\n"
+      },
+      {
+        "query_yaml": "index: taxon\n...",
+        "params_yaml": "size: 10\npage: 1\n",
+        "report_yaml": "report: arc\nfeature: has_annotation\n",
+        "include_plot_spec": true
+      }
+    ]
+  }'
+```
+
+:::
+
+---
+
 ### `chain_query(query_key, query_string) -> QueryBuilder`
 
 Chain a new query onto the results of the previous query.  Specifies that this
diff --git a/templates/js/query.browser.js.tera b/templates/js/query.browser.js.tera
index 8f12cc5..326d549 100644
--- a/templates/js/query.browser.js.tera
+++ b/templates/js/query.browser.js.tera
@@ -954,6 +954,53 @@ class QueryBuilder {
     return (data.results ?? []).map((r) => Number(r?.status?.hits ?? 0));
   }
 
+  /**
+   * Execute multiple reports in a single batch request.
+   * @param {(ReportBuilder|[QueryBuilder,ReportBuilder]|{query:QueryBuilder,report:ReportBuilder})[]} reports
+   * @param {string} [apiBase=API_BASE]
+   * @returns {Promise<object[]>}
+   */
+  async reportBatch(reports, apiBase = API_BASE) {
+    if (reports.length > 100) throw new Error("maximum 100 reports per batch request");
+    const url = `${apiBase}/${API_VERSION}/report/batch`;
+    const payload = {
+      reports: reports.map((item) => {
+        let qb, rb;
+        if (Array.isArray(item) && item.length === 2) {
+          qb = item[0];
+          rb = item[1];
+        } else if (item && item.query && item.report) {
+          qb = item.query;
+          rb = item.report;
+        } else {
+          qb = this;
+          rb = item;
+        }
+        const entry = {
+          query_yaml: qb.toQueryYaml(),
+          params_yaml: qb.toParamsYaml(),
+          report_yaml: rb.toReportYaml(),
+        };
+        if (rb._display != null) entry.display = rb._display;
+        if (rb._includePlotSpec) entry.include_plot_spec = true;
+        return entry;
+      }),
+    };
+    const resp = await fetch(url, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(payload),
+    });
+    if (!resp.ok) throw new Error(`API request failed: ${resp.status} ${resp.statusText}`);
+    const data = JSON.parse(await resp.text());
+    return (data.results ?? []).map((res) => {
+      if (res.plot_spec != null) return res;
+      const out = { report: res.report ?? {}, status: res.status ?? {} };
+      if (res.error != null) out.error = res.error;
+      return out;
+    });
+  }
+
   /**
    * Fetch a single record by ID.
    * @param {string} recordId
diff --git a/templates/js/query.js b/templates/js/query.js
index b5f7ffc..47c64ce 100644
--- a/templates/js/query.js
+++ b/templates/js/query.js
@@ -1092,6 +1092,51 @@ class QueryBuilder {
     return counts;
   }
 
+  /**
+   * Execute multiple reports in a single batch request.
+   * @param {(ReportBuilder|[QueryBuilder,ReportBuilder]|{query:QueryBuilder,report:ReportBuilder})[]} reports
+   * @param {string} [apiBase=API_BASE]
+   * @returns {Promise<object[]>}
+   */
+  async reportBatch(reports, apiBase = API_BASE) {
+    if (reports.length > 100)
+      throw new Error("maximum 100 reports per batch request");
+
+    const batchData = await this._postJson(`${apiBase}/v3/report/batch`, {
+      reports: reports.map((item) => {
+        let qb, rb;
+        if (Array.isArray(item) && item.length === 2) {
+          qb = item[0];
+          rb = item[1];
+        } else if (item && item.query && item.report) {
+          qb = item.query;
+          rb = item.report;
+        } else {
+          qb = this;
+          rb = item;
+        }
+        const entry = {
+          query_yaml: qb.toQueryYaml(),
+          params_yaml: qb.toParamsYaml(),
+          report_yaml: rb.toReportYaml(),
+        };
+        if (rb._display != null) entry.display = rb._display;
+        if (rb._includePlotSpec) entry.include_plot_spec = true;
+        return entry;
+      }),
+    });
+
+    return (batchData.results ?? []).map((res) => {
+      if (res.plot_spec != null) return res;
+      const out = {
+        report: res.report ?? {},
+        status: res.status ?? {},
+      };
+      if (res.error != null) out.error = res.error;
+      return out;
+    });
+  }
+
   /**
    * Fetch a single record by ID or identifier.
    * @param {string} recordId - Record ID to fetch
@@ -2204,7 +2249,7 @@ function parseSearchWithLineageSummary(raw, configJson) {
 function plotSpecToVegaLite(plotSpec) {
   const display = plotSpec.display ?? {};
   const base = {
-    $schema: "https://vega.github.io/schema/vega-lite/v5.json",
+    $schema: "https://vega.github.io/schema/vega-lite/v6.json",
     title: display.title ?? undefined,
     width: display.width ?? 600,
     height: display.height ?? 400,
diff --git a/templates/python/query.py.tera b/templates/python/query.py.tera
index 85df85b..659e11b 100644
--- a/templates/python/query.py.tera
+++ b/templates/python/query.py.tera
@@ -5,7 +5,7 @@ Generated by cli-generator. Do not edit.
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Sequence
 
 from . import {{ sdk_name }} as _ext
 
@@ -949,6 +949,59 @@ class QueryBuilder:
             return data
         return data.get("report", data)
 
+    def report_batch(
+        self,
+        reports: Sequence[ReportBuilder | tuple[QueryBuilder, ReportBuilder]],
+        max_reports: int = 100,
+    ) -> list[dict[str, Any]]:
+        """Run a batch of reports and return a list of results.
+
+        Each entry in ``reports`` may be either a ``ReportBuilder`` (which
+        uses the calling builder's embedded query) or a ``(QueryBuilder,
+        ReportBuilder)`` tuple for per-item queries. The method posts a
+        `POST /v3/report/batch` request and returns the parsed results.
+        """
+        if len(reports) > max_reports:
+            raise ValueError(f"at most {max_reports} reports are allowed")
+
+        payload_reports: list[dict[str, Any]] = []
+        for rb in reports:
+            if isinstance(rb, (list, tuple)):
+                if len(rb) != 2:
+                    raise ValueError("tuples in reports must be (QueryBuilder, ReportBuilder)")
+                qb, rb = rb  # type: ignore
+            else:
+                qb = self
+
+            # Narrow types for static checkers
+            from typing import cast
+
+            rb = cast("ReportBuilder", rb)
+            qb = cast("QueryBuilder", qb)
+
+            report_doc: dict[str, Any] = {
+                "query_yaml": qb.to_query_yaml(),
+                "params_yaml": qb.to_params_yaml(),
+                "report_yaml": rb.to_report_yaml(),
+            }
+            if getattr(rb, "_display", None) is not None:
+                report_doc["display"] = rb._display
+            if getattr(rb, "_include_plot_spec", False):
+                report_doc["include_plot_spec"] = True
+
+            payload_reports.append(report_doc)
+
+        url = f"{API_BASE}/v3/report/batch"
+        batch_resp = self._post_json(url, {"reports": payload_reports})
+
+        results: list[dict[str, Any]] = []
+        for item in batch_resp.get("results", []):
+            if isinstance(item, dict) and "report" in item:
+                results.append(item)
+            else:
+                results.append(item)
+        return results
+
     def search_batch(
         self,
         queries: list["QueryBuilder"],
@@ -2051,7 +2104,7 @@ def plot_spec_to_vega_lite(spec: dict[str, Any]) -> dict[str, Any]:
     """
     display: dict[str, Any] = spec.get("display") or {}
     base: dict[str, Any] = {
-        "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
+        "$schema": "https://vega.github.io/schema/vega-lite/v6.json",
         "width": display.get("width", 600),
         "height": display.get("height", 400),
         "config": {
diff --git a/templates/r/query.R b/templates/r/query.R
index 3ff8895..d66f475 100644
--- a/templates/r/query.R
+++ b/templates/r/query.R
@@ -983,6 +983,52 @@ QueryBuilder <- R6::R6Class(
       if (length(counts) == 0) numeric(0) else counts
     },
 
+    #' @description Execute multiple reports in a single batch request.
+    #' @param reports List of ReportBuilder objects.
+    #' @param api_base Base URL of the API (default: from package).
+    #' @return List of per-report result lists, one per input report.
+    report_batch = function(reports, api_base = NULL) {
+      if (length(reports) > 100) {
+        stop("maximum 100 reports per batch request")
+      }
+
+      if (is.null(api_base)) {
+        api_base <- private$api_base_url
+      }
+
+      url <- paste0(api_base, "/", private$api_version, "/report/batch")
+      payload <- list(
+        reports = lapply(reports, function(rb) {
+          item <- list(
+            query_yaml = self$to_query_yaml(),
+            params_yaml = self$to_params_yaml(),
+            report_yaml = rb$to_report_yaml()
+          )
+          if (!is.null(rb$.__enclos_env__$private$._display)) item$display <- rb$.__enclos_env__$private$._display
+          if (isTRUE(rb$.__enclos_env__$private$._include_plot_spec)) item$include_plot_spec <- TRUE
+          item
+        })
+      )
+
+      resp <- httr::POST(url,
+        body = jsonlite::toJSON(payload, auto_unbox = TRUE),
+        httr::add_headers("Content-Type" = "application/json"),
+        httr::accept("application/json")
+      )
+      httr::stop_for_status(resp)
+      raw_text <- httr::content(resp, as = "text", encoding = "UTF-8")
+      batch_data <- jsonlite::fromJSON(raw_text, simplifyVector = FALSE)
+
+      lapply(batch_data$results %||% list(), function(item) {
+        if (!is.null(item$plot_spec)) {
+          return(item)
+        }
+        out <- list(report = item$report %||% list(), status = item$status %||% list())
+        if (!is.null(item$error)) out$error <- item$error
+        out
+      })
+    },
+
     #' @description Fetch a single record by ID or identifier.
     #' @param record_id Record ID to fetch (required).
     #' @param result Result type (taxon|assembly|sample); defaults to index type.
diff --git a/templates/rust/main.rs.tera b/templates/rust/main.rs.tera
index 3578880..301cd25 100644
--- a/templates/rust/main.rs.tera
+++ b/templates/rust/main.rs.tera
@@ -296,6 +296,12 @@ enum {{ index.name | capitalize }}Commands {
         #[arg(long, default_value = "")]
         query: String,
 
+        /// YAML file containing a single report mapping or a sequence of report
+        /// mappings. When supplied the CLI will run each report in the file
+        /// (batch mode) or the single report mapping and print JSON results.
+        #[arg(long, value_name = "FILE")]
+        file: Option<std::path::PathBuf>,
+
         /// Maximum number of results to return per page.
         #[arg(long, default_value = "50")]
         size: usize,
@@ -994,7 +1000,7 @@ fn run(cli: Cli) -> anyhow::Result<()> {
                 }
             }
             {{ index.name | capitalize }}Commands::Report {
-                report_type, taxon, taxon_filter, filter, rank, query,
+                report_type, taxon, taxon_filter, filter, rank, query, file,
                 x, x_opts, y, cat, cat_rank, count_rank, collapse_monotypic,
                 include_plot_spec, display, taxonomy,
             } => {
@@ -1038,21 +1044,141 @@ fn run(cli: Cli) -> anyhow::Result<()> {
                     taxonomy,
                     ..Default::default()
                 };
-                let report_opts = generated::client::ReportOptions {
-                    x, x_opts, y, cat, cat_rank, count_rank,
-                    collapse_monotypic,
-                    include_plot_spec,
-                    display,
-                    ..Default::default()
-                };
-                let raw = generated::client::report(
-                    generated::indexes::Index::{{ index.name | capitalize }},
-                    &full_query,
-                    &report_type,
-                    &opts,
-                    &report_opts,
-                )?;
-                println!("{raw}");
+
+                // CLI-level report defaults are applied per-item when running a
+                // batch file (we construct the per-item opts from these values).
+
+                if let Some(ref path) = file {
+                    let reports = load_report_file(
+                        path,
+                        &taxon_filter,
+                        taxon.as_deref(),
+                        rank.as_deref(),
+                        &filter,
+                        &full_query,
+                        suppress_divergence_warnings,
+                    )?;
+
+                    let mut results: Vec<serde_json::Value> = Vec::new();
+                    for (q, cfg) in reports {
+                        let item_report_type = cfg
+                            .get("report")
+                            .and_then(|v| v.as_str())
+                            .map(str::to_string)
+                            .unwrap_or_else(|| report_type.clone());
+
+                        let mut per_report_opts = generated::client::ReportOptions {
+                            x: x.clone(),
+                            x_opts: x_opts.clone(),
+                            y: y.clone(),
+                            cat: cat.clone(),
+                            cat_rank: cat_rank.clone(),
+                            count_rank: count_rank.clone(),
+                            collapse_monotypic,
+                            include_plot_spec,
+                            display: display.clone(),
+                            ..Default::default()
+                        };
+
+                        if let Some(s) = cfg.get("x").and_then(|v| v.as_str()) {
+                            per_report_opts.x = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("x_opts").and_then(|v| v.as_str()).or(cfg.get("xOpts").and_then(|v| v.as_str())) {
+                            per_report_opts.x_opts = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("y").and_then(|v| v.as_str()) {
+                            per_report_opts.y = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("y_opts").and_then(|v| v.as_str()).or(cfg.get("yOpts").and_then(|v| v.as_str())) {
+                            per_report_opts.y_opts = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("cat").and_then(|v| v.as_str()) {
+                            per_report_opts.cat = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("cat_opts").and_then(|v| v.as_str()).or(cfg.get("catOpts").and_then(|v| v.as_str())) {
+                            per_report_opts.cat_opts = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("rank").and_then(|v| v.as_str()) {
+                            per_report_opts.rank = Some(s.to_string());
+                        }
+                        if let Some(arr) = cfg.get("fields").and_then(|v| v.as_array()) {
+                            per_report_opts.fields = arr.iter().filter_map(|vv| vv.as_str().map(|s| s.to_string())).collect();
+                        }
+                        if let Some(s) = cfg.get("status_filter").and_then(|v| v.as_str()) {
+                            per_report_opts.status_filter = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("cat_rank").and_then(|v| v.as_str()).or(cfg.get("catRank").and_then(|v| v.as_str())) {
+                            per_report_opts.cat_rank = Some(s.to_string());
+                        }
+                        if let Some(b) = cfg.get("collapse_monotypic").and_then(|v| v.as_bool()).or(cfg.get("collapseMonotypic").and_then(|v| v.as_bool())) {
+                            per_report_opts.collapse_monotypic = b;
+                        }
+                        if let Some(s) = cfg.get("preserve_rank").and_then(|v| v.as_str()).or(cfg.get("preserveRank").and_then(|v| v.as_str())) {
+                            per_report_opts.preserve_rank = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("count_rank").and_then(|v| v.as_str()).or(cfg.get("countRank").and_then(|v| v.as_str())) {
+                            per_report_opts.count_rank = Some(s.to_string());
+                        }
+                        if let Some(s) = cfg.get("location_field").and_then(|v| v.as_str()).or(cfg.get("locationField").and_then(|v| v.as_str())) {
+                            per_report_opts.location_field = Some(s.to_string());
+                        }
+                        if let Some(n) = cfg.get("hex_resolution").and_then(|v| v.as_i64()) {
+                            per_report_opts.hex_resolution = Some(n as u32);
+                        }
+                        if let Some(n) = cfg.get("map_threshold").and_then(|v| v.as_i64()) {
+                            per_report_opts.map_threshold = Some(n as u32);
+                        }
+                        if let Some(n) = cfg.get("scatter_threshold").and_then(|v| v.as_i64()) {
+                            per_report_opts.scatter_threshold = Some(n as u32);
+                        }
+                        if let Some(b) = cfg.get("include_plot_spec").and_then(|v| v.as_bool()) {
+                            per_report_opts.include_plot_spec = b;
+                        }
+                        if let Some(display_val) = cfg.get("display") {
+                            if let Ok(s) = serde_json::to_string(display_val) {
+                                per_report_opts.display = Some(s);
+                                per_report_opts.include_plot_spec = true;
+                            }
+                        }
+
+                        let raw = generated::client::report(
+                            generated::indexes::Index::{{ index.name | capitalize }},
+                            &q,
+                            &item_report_type,
+                            &opts,
+                            &per_report_opts,
+                        )?;
+                        if let Ok(val) = serde_json::from_str::<serde_json::Value>(&raw) {
+                            results.push(val);
+                        } else {
+                            results.push(serde_json::Value::String(raw));
+                        }
+                    }
+
+                    let json = serde_json::to_string(&results).context("serialising batch reports")?;
+                    println!("{json}");
+                } else {
+                    let report_opts = generated::client::ReportOptions {
+                        x: x.clone(),
+                        x_opts: x_opts.clone(),
+                        y: y.clone(),
+                        cat: cat.clone(),
+                        cat_rank: cat_rank.clone(),
+                        count_rank: count_rank.clone(),
+                        collapse_monotypic,
+                        include_plot_spec,
+                        display: display.clone(),
+                        ..Default::default()
+                    };
+                    let raw = generated::client::report(
+                        generated::indexes::Index::{{ index.name | capitalize }},
+                        &full_query,
+                        &report_type,
+                        &opts,
+                        &report_opts,
+                    )?;
+                    println!("{raw}");
+                }
             }
             {{ index.name | capitalize }}Commands::Lookup { search_term, size, format } => {
                 let body = generated::client::lookup(
@@ -1762,6 +1888,86 @@ fn build_queries_from_patches(
         .collect()
 }
 
+    /// Load a report file and produce a list of `(query_string, report_cfg)` pairs.
+    ///
+    /// Supported forms:
+    /// - Mapping with top-level `reports:` sequence (optional `shared:` mapping)
+    /// - Sequence of report mappings
+    /// - Single report mapping
+    fn load_report_file(
+        file_path: &std::path::Path,
+        taxon_filter: &TaxonFilter,
+        cli_taxon: Option<&str>,
+        cli_rank: Option<&str>,
+        cli_filters: &[String],
+        base_query: &str,
+        suppress_divergence_warnings: bool,
+    ) -> anyhow::Result<Vec<(String, serde_json::Value)>> {
+        let content = std::fs::read_to_string(file_path)
+            .with_context(|| format!("reading report file {}", file_path.display()))?;
+        let trimmed = content.trim();
+
+        let parsed: serde_yaml::Value = serde_yaml::from_str(trimmed)
+            .unwrap_or(serde_yaml::Value::Null);
+
+        // Extract sequence of items and optional shared mapping.
+        let (items, shared) = match &parsed {
+            serde_yaml::Value::Mapping(map)
+                if map.contains_key(&serde_yaml::Value::String("reports".into())) =>
+            {
+                let shared = map
+                    .get(&serde_yaml::Value::String("shared".into()))
+                    .and_then(|v| v.as_mapping())
+                    .cloned();
+                let seq = map
+                    .get(&serde_yaml::Value::String("reports".into()))
+                    .and_then(|v| v.as_sequence())
+                    .cloned()
+                    .unwrap_or_default();
+                (seq, shared)
+            }
+            serde_yaml::Value::Sequence(seq) => (seq.clone(), None),
+            serde_yaml::Value::Mapping(map) => (vec![serde_yaml::Value::Mapping(map.clone())], None),
+            _ => anyhow::bail!("report file must be a YAML mapping or sequence"),
+        };
+
+        // Build query strings for items using existing query patch logic.
+        let queries = build_queries_from_patches(
+            &items,
+            shared.as_ref(),
+            taxon_filter,
+            cli_taxon,
+            cli_rank,
+            cli_filters,
+            base_query,
+            suppress_divergence_warnings,
+        )?;
+
+        // Convert each item mapping into a JSON object with report-specific keys
+        // (remove taxon/rank/filter which were applied to the query string).
+        let mut cfgs: Vec<serde_json::Value> = Vec::new();
+        for it in items.iter() {
+            let map = it.as_mapping().ok_or_else(|| anyhow::anyhow!("each report entry must be a YAML mapping"))?;
+            let mut obj = serde_json::Map::new();
+            for (k, v) in map.iter() {
+                if let Some(ks) = k.as_str() {
+                    if ks == "taxon" || ks == "rank" || ks == "filter" || ks == "shared" {
+                        continue;
+                    }
+                    let val = serde_json::to_value(v).unwrap_or(serde_json::Value::Null);
+                    obj.insert(ks.to_string(), val);
+                }
+            }
+            cfgs.push(serde_json::Value::Object(obj));
+        }
+
+        if queries.len() != cfgs.len() {
+            anyhow::bail!("internal error: queries/report configs length mismatch")
+        }
+
+        Ok(queries.into_iter().zip(cfgs.into_iter()).collect())
+    }
+
 /// Convert raw `--filter FIELD OP VALUE` triples into expression strings.
 fn cli_filter_fragments(filter: &[String]) -> Vec<String> {
     filter
diff --git a/templates/snippets/js_snippet.tera b/templates/snippets/js_snippet.tera
index 93c8bc9..dc1fb2a 100644
--- a/templates/snippets/js_snippet.tera
+++ b/templates/snippets/js_snippet.tera
@@ -61,6 +61,13 @@ qb.countBatch(queries).then((counts) =>
   counts.forEach((n, i) => console.log(`Query ${i}: ${n} records`))
 );
 
+{% elif call_type == "report_batch" -%}
+// Run a batch of reports
+const reports = [rb]; // extend with ReportBuilder instances or [QueryBuilder, ReportBuilder] pairs
+qb.reportBatch(reports).then((results) =>
+  results.forEach((res, i) => console.log(`Report ${i}:`, res))
+);
+
 {% elif call_type == "positional" -%}
 // Run a positional report
 const assemblies = [{% for asm in positional_assemblies %}"{{ asm }}", {% endfor %}];
diff --git a/templates/snippets/python_snippet.tera b/templates/snippets/python_snippet.tera
index e908aca..499a206 100644
--- a/templates/snippets/python_snippet.tera
+++ b/templates/snippets/python_snippet.tera
@@ -64,6 +64,13 @@ counts = qb.count_batch(queries)
 for i, n in enumerate(counts):
     print(f"Query {i}: {n} records")
 
+{% elif call_type == "report_batch" -%}
+# Run a batch of reports
+reports = [rb]  # extend this list with ReportBuilder instances or (QueryBuilder, ReportBuilder) pairs
+results = qb.report_batch(reports)
+for i, res in enumerate(results):
+    print(f"Report {i}: {res}")
+
 {% elif call_type == "positional" -%}
 # Run a positional report
 assemblies = [{% for asm in positional_assemblies %}"{{ asm }}", {% endfor %}]
diff --git a/templates/snippets/r_snippet.tera b/templates/snippets/r_snippet.tera
index e7fce3a..2a01769 100644
--- a/templates/snippets/r_snippet.tera
+++ b/templates/snippets/r_snippet.tera
@@ -61,6 +61,15 @@ queries <- list(qb)  # extend with additional QueryBuilder instances
 counts <- qb$count_batch(queries)
 for (i in seq_along(counts)) cat("Query", i, ":", counts[[i]], "records\n")
 
+{% elif call_type == "report_batch" -%}
+# Run a batch of reports
+reports <- list(rb)  # extend with ReportBuilder instances
+results <- qb$report_batch(reports)
+for (i in seq_along(results)) {
+  cat(sprintf("Report %d:\n", i))
+  print(results[[i]])
+}
+
 {% elif call_type == "positional" -%}
 # Run a positional report
 assemblies <- c({% for asm in positional_assemblies %}"{{ asm }}", {% endfor %})
diff --git a/tests/python/test_report_batch.py b/tests/python/test_report_batch.py
new file mode 100644
index 0000000..4a47303
--- /dev/null
+++ b/tests/python/test_report_batch.py
@@ -0,0 +1,11 @@
+import pytest
+
+from cli_generator import QueryBuilder, ReportBuilder
+
+
+def test_report_batch_exceeds_limit():
+    qb = QueryBuilder("taxon")
+    rb = ReportBuilder("histogram")
+    reports = [rb] * 101
+    with pytest.raises(ValueError):
+        qb.report_batch(reports)
diff --git a/tests/python/test_sdk_parity.py b/tests/python/test_sdk_parity.py
index 6b7a6d9..6cb95c0 100644
--- a/tests/python/test_sdk_parity.py
+++ b/tests/python/test_sdk_parity.py
@@ -362,6 +362,18 @@
         "js_name": "report",
         "r_name": "report",
     },
+    "report": {
+        "params": ["report"],
+        "python_name": "report",
+        "js_name": "report",
+        "r_name": "report",
+    },
+    "report_batch": {
+        "params": ["reports", "max_reports"],
+        "python_name": "report_batch",
+        "js_name": "reportBatch",
+        "r_name": "report_batch",
+    },
     "chain_query": {
         "params": ["query_key", "query_string"],
         "python_name": "chain_query",