From eaf8ff9b46614854f5e2d2d84a5959afbcb2cf51 Mon Sep 17 00:00:00 2001
From: Andrii Ryzhkov <andrii.ryzhkov@pm.me>
Date: Sun, 12 Apr 2026 10:02:17 +0200
Subject: [PATCH] Add model_card to config.json for all models

---
 darktable_ai/config.py                        |  3 +++
 darktable_ai/convert.py                       |  5 ++++-
 models/denoise-nafnet/model.yaml              | 11 +++++++++++
 models/denoise-nind/model.yaml                | 11 +++++++++++
 models/embed-openclip-vitb32/model.yaml       | 11 +++++++++++
 models/mask-object-sam21-base-plus/model.yaml | 11 +++++++++++
 models/mask-object-sam21-small/model.yaml     | 11 +++++++++++
 models/mask-object-sam21-tiny/model.yaml      | 11 +++++++++++
 models/mask-object-segnext-b2hq/model.yaml    | 11 +++++++++++
 models/upscale-bsrgan/model.yaml              | 11 +++++++++++
 10 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/darktable_ai/config.py b/darktable_ai/config.py
index 836bc39..e659957 100644
--- a/darktable_ai/config.py
+++ b/darktable_ai/config.py
@@ -44,6 +44,8 @@ class ModelConfig:
     dep_group: str = "core"
     skip: bool = False
 
+    model_card: dict[str, str] = field(default_factory=dict)
+
     repo: RepoConfig | None = None
     checkpoints: list[Checkpoint] = field(default_factory=list)
     convert: list[ConvertStep] = field(default_factory=list)
@@ -115,6 +117,7 @@ def load_model_config(model_dir: Path, root_dir: Path) -> ModelConfig:
         arch=data.get("arch", "generic"),
         tiling=data.get("tiling", False),
         dep_group=data.get("dep_group", "core"),
+        model_card=data.get("model_card", {}),
         skip=skip,
         repo=repo,
         checkpoints=checkpoints,
diff --git a/darktable_ai/convert.py b/darktable_ai/convert.py
index 2ea927c..8966382 100644
--- a/darktable_ai/convert.py
+++ b/darktable_ai/convert.py
@@ -46,7 +46,10 @@ def generate_config_json(config: ModelConfig) -> None:
         "tiling": config.tiling,
     }
 
-    config_file.write_text(json.dumps(data, indent=4) + "\n")
+    if config.model_card:
+        data["model_card"] = config.model_card
+
+    config_file.write_text(json.dumps(data, indent=4, ensure_ascii=False) + "\n")
     print(f"  Generated: {config_file}")
 
 
diff --git a/models/denoise-nafnet/model.yaml b/models/denoise-nafnet/model.yaml
index 97184a5..196c6bd 100644
--- a/models/denoise-nafnet/model.yaml
+++ b/models/denoise-nafnet/model.yaml
@@ -7,6 +7,17 @@ tiling: true
 type: single
 dep_group: nafnet
 
+model_card:
+  long_description: "NAFNet (Nonlinear Activation Free Network) lightweight denoiser trained on the SIDD smartphone denoising dataset"
+  scope: "single-image denoising"
+  author: "Megvii Research"
+  source: "https://github.com/megvii-research/NAFNet"
+  paper: "https://arxiv.org/abs/2204.04676"
+  license: "MIT"
+  training_data: "SIDD – 30K real smartphone noisy/clean pairs captured by authors (5 devices)"
+  training_data_license: "MIT"
+  notes: "all components publicly available under permissive licenses"
+
 repo:
   submodule: vendor/NAFNet
   setup: "python setup.py develop --no_cuda_ext"
diff --git a/models/denoise-nind/model.yaml b/models/denoise-nind/model.yaml
index b6125be..393366f 100644
--- a/models/denoise-nind/model.yaml
+++ b/models/denoise-nind/model.yaml
@@ -14,6 +14,17 @@ checkpoints:
   - url: "https://github.com/trougnouf/nind-denoise/raw/master/models/nind_denoise/2019-08-03T16:14_nn_train.py_--g_network_UNet_--weight_SSIM_1_--batch_size_65_--test_reserve_ursulines-red_stefantiek_ursulines-building_MuseeL-Bobo_CourtineDeVillersDebris_MuseeL-Bobo-C500D_--train_data_datasets-train-NIND_128_96_--g_model_path_models-20/generator_280.pt"
     path: "temp/denoise-nind/generator_280.pt"
 
+model_card:
+  long_description: "Image denoiser trained on the Natural Image Noise Dataset (NIND) from Wikimedia Commons"
+  scope: "single-image denoising"
+  author: "Benoit Brummer (Catholic University of Louvain)"
+  source: "https://github.com/trougnouf/nind-denoise"
+  paper: "https://arxiv.org/abs/1906.00270"
+  license: "GPL-3.0"
+  training_data: "real-world noise/clean pairs photographed by authors, published on Wikimedia Commons"
+  training_data_license: "CC BY 4.0 / CC0 (per-image, Wikimedia Commons)"
+  notes: "all components publicly available under open licenses"
+
 convert:
   - script: convert.py
     args:
diff --git a/models/embed-openclip-vitb32/model.yaml b/models/embed-openclip-vitb32/model.yaml
index e13ec78..d580d02 100644
--- a/models/embed-openclip-vitb32/model.yaml
+++ b/models/embed-openclip-vitb32/model.yaml
@@ -6,6 +6,17 @@ version: "1.0"
 type: single
 dep_group: openclip
 
+model_card:
+  long_description: "Vision Transformer (ViT-B/32) image encoder from the OpenCLIP project; produces 512-dimensional embeddings used for auto-tagging and image similarity search"
+  scope: "image feature extraction for tagging and similarity"
+  author: "LAION (Ilharco, Wortsman, Carlini et al.)"
+  source: "https://github.com/mlfoundations/open_clip"
+  paper: "https://arxiv.org/abs/2212.07143"
+  license: "MIT"
+  training_data: "LAION-2B: 2B image-text pairs from Common Crawl, filtered using CLIP for quality"
+  training_data_license: "CC-BY-4.0 metadata; images are web-crawled with mixed licenses"
+  notes: "training images are web-crawled; individual image licenses are not verified; narrow feature extractor, does not generate or modify images"
+
 convert:
   - script: convert.py
     args:
diff --git a/models/mask-object-sam21-base-plus/model.yaml b/models/mask-object-sam21-base-plus/model.yaml
index beeafc3..bbf04b7 100644
--- a/models/mask-object-sam21-base-plus/model.yaml
+++ b/models/mask-object-sam21-base-plus/model.yaml
@@ -7,6 +7,17 @@ arch: sam2
 type: split
 dep_group: sam21
 
+model_card:
+  long_description: "Segment Anything Model 2.1 with Hiera Base Plus encoder for interactive object segmentation"
+  scope: "interactive object segmentation"
+  author: "Meta (Facebook Research)"
+  source: "https://github.com/facebookresearch/sam2"
+  paper: "https://arxiv.org/abs/2408.00714"
+  license: "Apache-2.0"
+  training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)"
+  training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license"
+  notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta"
+
 repo:
   submodule: vendor/sam2
 
diff --git a/models/mask-object-sam21-small/model.yaml b/models/mask-object-sam21-small/model.yaml
index 717ab72..73ce510 100644
--- a/models/mask-object-sam21-small/model.yaml
+++ b/models/mask-object-sam21-small/model.yaml
@@ -7,6 +7,17 @@ arch: sam2
 type: split
 dep_group: sam21
 
+model_card:
+  long_description: "Segment Anything Model 2.1 with Hiera Small encoder for interactive object segmentation"
+  scope: "interactive object segmentation"
+  author: "Meta (Facebook Research)"
+  source: "https://github.com/facebookresearch/sam2"
+  paper: "https://arxiv.org/abs/2408.00714"
+  license: "Apache-2.0"
+  training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)"
+  training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license"
+  notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta"
+
 repo:
   submodule: vendor/sam2
 
diff --git a/models/mask-object-sam21-tiny/model.yaml b/models/mask-object-sam21-tiny/model.yaml
index b6ebff3..f980dbb 100644
--- a/models/mask-object-sam21-tiny/model.yaml
+++ b/models/mask-object-sam21-tiny/model.yaml
@@ -7,6 +7,17 @@ arch: sam2
 type: split
 dep_group: sam21
 
+model_card:
+  long_description: "Segment Anything Model 2.1 with Hiera Tiny encoder for interactive object segmentation"
+  scope: "interactive object segmentation"
+  author: "Meta (Facebook Research)"
+  source: "https://github.com/facebookresearch/sam2"
+  paper: "https://arxiv.org/abs/2408.00714"
+  license: "Apache-2.0"
+  training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)"
+  training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license"
+  notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta"
+
 repo:
   submodule: vendor/sam2
 
diff --git a/models/mask-object-segnext-b2hq/model.yaml b/models/mask-object-segnext-b2hq/model.yaml
index 459ac43..29d3648 100644
--- a/models/mask-object-segnext-b2hq/model.yaml
+++ b/models/mask-object-segnext-b2hq/model.yaml
@@ -7,6 +7,17 @@ arch: segnext
 type: split
 dep_group: segnext
 
+model_card:
+  long_description: "SegNext with ViT-B backbone and SAx2 cross-attention, fine-tuned on HQSeg-44K for high-quality mask boundaries in semantic segmentation"
+  scope: "semantic object segmentation"
+  author: "UNC Chapel Hill / SysCV"
+  source: "https://github.com/uncbiag/SegNext"
+  paper: "https://arxiv.org/abs/2312.01171"
+  license: "MIT"
+  training_data: "COCO (118K images) + LVIS (100K images) + HQSeg-44K (44K images, fine-tune)"
+  training_data_license: "COCO: CC BY 4.0; LVIS: CC BY 4.0; HQSeg-44K: mixed (see datasets)"
+  notes: "HQSeg-44K aggregates multiple datasets with varying licenses; individual dataset terms apply"
+
 repo:
   submodule: vendor/SegNext
 
diff --git a/models/upscale-bsrgan/model.yaml b/models/upscale-bsrgan/model.yaml
index cbe6a8a..f8c3034 100644
--- a/models/upscale-bsrgan/model.yaml
+++ b/models/upscale-bsrgan/model.yaml
@@ -7,6 +7,17 @@ tiling: true
 type: multi
 dep_group: bsrgan
 
+model_card:
+  long_description: "BSRGAN blind image super-resolution using practical degradation model; includes both 2x and 4x upscaling variants with RRDBNet architecture"
+  scope: "image upscaling (2x and 4x blind super-resolution)"
+  author: "Kai Zhang (ETH Zurich)"
+  source: "https://github.com/cszn/BSRGAN"
+  paper: "https://arxiv.org/abs/2103.14006"
+  license: "Apache-2.0"
+  training_data: "DIV2K, Flickr2K, WED, OST – standard SR research datasets with synthetic practical degradation"
+  training_data_license: "DIV2K: CC0; Flickr2K, WED, OST: no explicit open-source licenses"
+  notes: "training datasets Flickr2K/WED/OST do not have explicit open-source licenses"
+
 checkpoints:
   - url: "https://github.com/cszn/KAIR/releases/download/v1.0/BSRGAN.pth"
     path: "temp/upscale-bsrgan/BSRGAN.pth"