esa · gomezzz · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/.flake8 b/.flake8
@@ -5,6 +5,29 @@
 #   this file, may be copied, modified, propagated, or distributed except according to
 #   the terms contained in the file 'LICENCE.txt'.
 [flake8]
-exclude =
-    examples
-    setup.py
+max-line-length = 100
+extend-ignore = E203, E501, W503, W293, W291, F541, F841
+exclude = 
+    .git,
+    __pycache__,
+    docs/source/conf.py,
+    old,
+    build,
+    examples,
+    setup.py
+    dist,
+    *.egg-info,
+    .venv,
+    venv,
+    env,
+    .pytest_cache,
+    htmlcov,
+    logs,
+    tmp,
+    screenshots,
+    workflow_*.json,
+    tracking*.json,
+    conftest.py,
+    playwright.config.py,
+    .csv,
+    paper_scripts
diff --git a/.github/workflows/dead_code.yml b/.github/workflows/dead_code.yml
@@ -0,0 +1,44 @@
+#   Copyright (c) European Space Agency, 2025.
+#
+#   This file is subject to the terms and conditions defined in file 'LICENCE.txt', which
+#   is part of this source code package. No part of the package, including
+#   this file, may be copied, modified, propagated, or distributed except according to
+#   the terms contained in the file 'LICENCE.txt'.
+name: Dead Code Detection
+
+on: [pull_request]
+
+jobs:
+  vulture-strict:
+    name: Vulture (100% confidence - blocking)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install vulture
+        run: pip install vulture>=2.10
+      - name: Run vulture (100% confidence)
+        run: |
+          echo "Running vulture dead code detection (100% confidence - blocking)..."
+          vulture anomaly_match/ .vulture_whitelist.py --min-confidence 100
+
+  vulture-warnings:
+    name: Vulture (60% confidence - not required)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install vulture
+        run: pip install vulture>=2.10
+      - name: Run vulture (60% confidence)
+        run: |
+          echo "Running vulture dead code detection (60% confidence)..."
+          echo "This check fails if potential dead code is found, but is not required to pass."
+          echo ""
+          vulture anomaly_match/ .vulture_whitelist.py --min-confidence 60
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -30,6 +30,7 @@ jobs:
           flake8 . --count --show-source --statistics --max-line-length=127 --ignore=E402,W503,E203
   build:
     runs-on: ubuntu-latest
+    timeout-minutes: 10
     permissions:
       pull-requests: write
       contents: read

diff --git a/.gitignore b/.gitignore
@@ -189,8 +189,11 @@ paper_scripts/test_plots_output
 paper_scripts/**/*.png
 paper_scripts/**/*.jpg
 paper_scripts/**/*.jpeg
+paper_scripts/**/*.pdf
 science_paper/**/*.png
 science_paper/**/*.jpg
 science_paper/**/*.jpeg
 pytest-coverage.txt
 pytest.xml
+# IDE and editor settings
+.vscode/
diff --git a/.vulture_whitelist.py b/.vulture_whitelist.py
@@ -0,0 +1,64 @@
+#   Copyright (c) European Space Agency, 2025.
+#
+#   This file is subject to the terms and conditions defined in file 'LICENCE.txt', which
+#   is part of this source code package. No part of the package, including
+#   this file, may be copied, modified, propagated, or distributed except according to
+#   the terms contained in the file 'LICENCE.txt'.
+"""
+Vulture whitelist file.
+
+Add entries here for code that vulture incorrectly identifies as unused.
+Format: function_name  # noqa - comment explaining why it's used
+"""
+
+# SessionIOHandler methods - public API used in tests
+save_model_checkpoint  # noqa - Used in test_session_io_handler.py, test_model_io_integration.py
+load_model_checkpoint  # noqa - Used in test_model_io_integration.py
+list_sessions  # noqa - Used in test_session_io_handler.py
+save_run  # noqa - Used in test_run_label_migration.py
+save_labels_to_output_dir  # noqa - Used in test_run_label_migration.py
+
+# FixMatch class attributes
+requires_grad  # noqa - PyTorch tensor property set to disable gradient for EMA model
+
+# AnomalyDetectionDataset methods used in tests (tests/dataset_test.py)
+_read_and_resize_image  # noqa - Used in test_read_and_resize_different_formats
+unlabeled_filepaths  # noqa - Used in test_anomaly_detection_dataset_properties
+save_as_hdf5  # noqa - Used in test_anomaly_detection_dataset_hdf5
+load_from_hdf5  # noqa - Used in test_anomaly_detection_dataset_hdf5
+
+# Transform functions used in paper_scripts/
+get_strong_transforms  # noqa - Used in paper_scripts/get_example_images.py
+
+# File I/O utility functions - public API
+get_image_paths_from_folder  # noqa - Companion to get_image_names_from_folder, tested
+
+# Session class public API
+start_UI  # noqa - Public API - used in StarterNotebook.ipynb
+
+# Widget methods - public API
+update_image_display  # noqa - Public API method for updating image display
+
+# ipywidgets style/layout attributes - used by ipywidgets framework
+_.style  # noqa - Widget.py: progress_bar.style for visual feedback
+_.button_color  # noqa - ipywidgets button styling
+_.font_size  # noqa - ipywidgets widget styling
+_.width  # noqa - ipywidgets layout attribute
+_.height  # noqa - ipywidgets layout attribute
+
+# Learning rate scheduler utility - tested in tests/utils_test.py
+get_cosine_schedule_with_warmup  # noqa - Used in tests and available for external use
+
+# Configuration attributes - validated and documented
+bn_momentum  # noqa - Part of default config for batch normalization momentum
+N_batch_prediction  # noqa - Used in prediction scripts for batch size
+
+# Seed utility function - used in paper_scripts/paper_benchmark.py and tests
+set_seeds  # noqa - Used for reproducibility in benchmarks and testing
+
+# PyTorch CUDA attribute - set in set_seeds.py for deterministic/performance mode
+_.benchmark  # noqa - torch.backends.cudnn.benchmark attribute
+
+# Image processing functions used in prediction scripts (root level, excluded from scan)
+process_single_wrapper  # noqa - Used in prediction_process_hdf5.py, prediction_process_zarr.py
+_.n_expected_channels  # noqa - fitsbolt config attribute set dynamically
diff --git a/CHANGELOG.MD b/CHANGELOG.MD
@@ -5,6 +5,35 @@
 [//]: # (this file, may be copied, modified, propagated, or distributed except according to)
 [//]: # (the terms contained in the file 'LICENCE.txt'.)
 
+## [v1.2.0] – 2025-01-13
+
+### Added
+- **Cutana streaming integration** for catalogue-based predictions with parquet and CSV support
+- **FitsBolt integration** for consistent FITS normalization across training and prediction
+- **Iteration score storage** for tracking unlabeled and test data scores per iteration
+- **Automatic batch size estimation** using exponential and binary search for optimal GPU memory usage
+- **Full resolution image preview** button in the UI for detailed inspection
+- **Dead code detection** CI workflow using Vulture for codebase maintenance
+
+### Changed
+- **Refactored Widget architecture** by extracting PreviewWidget for better code organization
+- **FitsBolt config persistence** in model checkpoints for reproducible normalization
+- **Parquet format** for Cutana buffer instead of CSV for improved performance
+- **Black line-length** updated to 100 characters for better readability
+
+### Fixed
+- **Gallery filename display** for long filenames with improved shortening (#237)
+- **Duplicate result accumulation** in prediction process (#238)
+- **Error handling** for iteration score CSV saves (#236)
+- **FITS extension handling** in Cutana streaming
+- **Tensor handling** improvements throughout the codebase
+
+### Removed
+- **Dead code cleanup** removing unused functions and imports identified by Vulture
+- **IDE/editor files** from repository with updated .gitignore
+
+---
+
 ## [v1.1.0] – 2025-07-04
 
 ### Added

diff --git a/CITATION.cff b/CITATION.cff
@@ -5,6 +5,12 @@ authors:
 - family-names: "Gómez"
   given-names: "Pablo"
   orcid: "https://orcid.org/0000-0002-5631-8240"
+- family-names: "Ruhberg"
+  given-names: "Laslo E."
+  orcid: "https://orcid.org/0009-0003-3810-1245"
+- family-names: "Nardone"
+  given-names: "Maria Teresa"
+  orcid: "https://orcid.org/0009-0001-4102-9630"
 - family-names: "O'Ryan"
   given-names: "David"
   orcid: "https://orcid.org/0000-0003-1217-4617"
@@ -19,11 +25,18 @@ preferred-citation:
   - family-names: "Gómez"
     given-names: "Pablo"
     orcid: "https://orcid.org/0000-0002-5631-8240"
+  - family-names: "Ruhberg"
+    given-names: "Laslo E."
+    orcid: "https://orcid.org/0009-0003-3810-1245"
+  - family-names: "Nardone"
+    given-names: "Maria Teresa"
+    orcid: "https://orcid.org/0009-0001-4102-9630"
   - family-names: "O'Ryan"
     given-names: "David"
     orcid: "https://orcid.org/0000-0003-1217-4617"
   title: "AnomalyMatch: Discovering Rare Objects of Interest with Semi-supervised and Active Learning"
-  journal: "arXiv preprint"
+  journal: "arXiv e-prints"
   year: 2025
+  month: 5
   doi: 10.48550/arXiv.2505.03509
-  url: "https://arxiv.org/abs/2505.03509"
+  url: "https://arxiv.org/abs/2505.03509"
diff --git a/README.md b/README.md
@@ -75,9 +75,15 @@ session_name_timestamp/
 ├── session_metadata.json    # Complete session tracking data
 ├── labeled_data.csv         # All labelled samples
 ├── config.toml              # Final configuration
-└── model.pth                # Model checkpoint
+├── model.pth                # Model checkpoint
+└── iteration_scores/        # Per-iteration prediction scores
+    ├── iteration_1_unlabelled_scores.csv
+    ├── iteration_1_test_scores.csv
+    └── ...
 ```
 
+**Iteration Scores:** After each training iteration, AnomalyMatch stores prediction scores for both unlabelled and test data (if `test_ratio > 0`). These CSV files contain filenames and their corresponding anomaly scores, enabling analysis of how predictions evolve across training iterations.
+
 You can view any saved session using:
 ```python
 import anomaly_match as am
@@ -178,11 +184,46 @@ cfg.prediction_search_dir = "/path/to/directory/containing/zarr/files"
 
 AnomalyMatch will automatically discover all `.zarr` files in the specified directory and process them efficiently in parallel. Each Zarr file should contain image data with optional metadata in a corresponding `.parquet` file.
 
+#### Multiple Zarr Files for Prediction
+
+When running predictions on large datasets split across multiple Zarr files, AnomalyMatch automatically discovers and processes all Zarr stores in `prediction_search_dir`. Two folder structures are supported:
+
+**Option 1: Direct Zarr files**
+```
+prediction_search_dir/
+├── dataset_part1.zarr/
+│   └── images/           # Zarr array with shape (N, H, W, C)
+├── dataset_part1_metadata.parquet
+├── dataset_part2.zarr/
+│   └── images/
+└── dataset_part2_metadata.parquet
+```
+
+**Option 2: Batch folders with images.zarr subdirectory**
+```
+prediction_search_dir/
+├── batch_001/
+│   ├── images.zarr/
+│   │   └── images/
+│   └── images_metadata.parquet
+├── batch_002/
+│   ├── images.zarr/
+│   │   └── images/
+│   └── images_metadata.parquet
+```
+
+**Metadata requirements:**
+- Parquet files should contain a `filename`, `original_filename`, or `source_id` column
+- For direct zarr files: `<zarr_name>_metadata.parquet` in the same directory
+- For batch folders: `images_metadata.parquet` in the batch folder
+
+**Filename handling:** To prevent collisions across zarr files, filenames are automatically prefixed with the zarr/batch folder name (e.g., `batch_001__image_000042`).
+
 ### FITS File Handling
 
 - By default, the first extension (index 0) is used when loading FITS files
 - You can specify a particular extension using the `fits_extension` parameter in the configuration:
-  - Set `cfg.fits_extension` in your code to control which FITS extensions to use
+  - Set `cfg.normalisation.fits_extension` in your code to control which FITS extensions to use
   - Integer values (e.g., `0`, `1`, `2`) to access extensions by index
   - String values (e.g., `"PRIMARY"`, `"SCIENCE"`) to access extensions by name
   - List of integers or strings (e.g., `[0, 1, 2]` or `["PRIMARY", "SCIENCE", "ERROR"]`) to combine multiple extensions
@@ -198,9 +239,23 @@ AnomalyMatch will automatically discover all `.zarr` files in the specified dire
 
 When working with FITS files containing multiple images or data products, specify which extension(s) to use in the configuration.
 
+### Cutana Streaming Integration
+
+AnomalyMatch supports streaming predictions via [Cutana](https://github.com/esa/cutana), which enables on-the-fly cutout extraction from FITS tiles. This is particularly useful for Euclid mission data, which Cutana primarily targets.
+
+**How to use Cutana streaming:**
+
+1. Prepare a Cutana-compatible source catalogue (CSV or Parquet) with columns for coordinates and FITS file paths
+2. Set `cfg.prediction_search_dir` to a folder containing your catalogue files
+3. AnomalyMatch will automatically detect the catalogues and stream cutouts via Cutana
+
+**FITS extension configuration:** When using Cutana streaming, ensure `cfg.normalisation.fits_extension` matches the FITS extensions referenced in your catalogue. For multi-band Euclid data, this might be `["VIS", "NIR-H", "NIR-J"]` or similar, depending on your catalogue structure.
+
+For more details on catalogue format and Cutana configuration, see the [Cutana documentation](https://github.com/esa/cutana).
+
 ## Normalisation and Stretching
 - Normalisation can be selected in the UI via a drop-down. Alternatively it can be changed by setting e.g.
-    `cfg.normalisation_method = am.NormalisationMethod.ZSCALE`
+    `cfg.normalisation.normalisation_method = am.NormalisationMethod.ZSCALE`
 - Current options are
     - `CONVERSION_ONLY`: no normalisation
     - `LOG`: [logarithmic normalisation](https://docs.astropy.org/en/stable/api/astropy.visualization.LogStretch.html#astropy.visualization.LogStretch)
@@ -218,8 +273,9 @@ When working with FITS files containing multiple images or data products, specif
 - `logLevel`: Controls verbosity of training/session logs.
 - `test_ratio`: Proportion of data used for evaluation (0.0 disables test evaluation, > 0 shows AUROC/AUPRC curves).
 - `size`: Dimensions to which images are resized (below 96x96 is not recommended).
-- `N_to_load`: Number of unlabeled images loaded into the training dataset at once.
+- `N_to_load`: Number of unlabeled images loaded into the training dataset at once. From this (`uratio`*`batch_size`*`num_train_iter`) (5*16*200) unlabeled images will be sampled for training.
 - `output_dir`: Folder for storing results (e.g., labeled_data.csv or final logs).
+- `prediction_batch_size`: Batch size for prediction. If not set, AnomalyMatch automatically estimates an optimal batch size based on available GPU memory.
 
 ## Advanced CFG Parameters
 
@@ -249,6 +305,7 @@ The following advanced parameters can be configured:
 
 ### Additional Parameters
 - `fits_extension`: Extension(s) to use for FITS files, can be int, string, or list of int/string (default: None)
+- `fits_combination`: Dictonary with keys `R`,`G`,`B` of lists of length of `fits_extension` denoting how the specified fits_extensions are (linearly) mapped to the R,G,B channels. 
 - `interpolation_order`: 0-5 corresponding to [skimage resize interpolation orders](https://scikit-image.org/docs/stable/api/skimage.transform.html#skimage.transform.warp) (default: 1 (Bi-linear))
 - `normalisation_method`: Normalisation method to be applied during file loading. Can also be selected in the UI dropdown. Correspons to an entry from the class NormalisationMethod (default: `NormalisationMethod.CONVERSION_ONLY`)