From eaf8ff9b46614854f5e2d2d84a5959afbcb2cf51 Mon Sep 17 00:00:00 2001 From: Andrii Ryzhkov Date: Sun, 12 Apr 2026 10:02:17 +0200 Subject: [PATCH] Add model_card to config.json for all models --- darktable_ai/config.py | 3 +++ darktable_ai/convert.py | 5 ++++- models/denoise-nafnet/model.yaml | 11 +++++++++++ models/denoise-nind/model.yaml | 11 +++++++++++ models/embed-openclip-vitb32/model.yaml | 11 +++++++++++ models/mask-object-sam21-base-plus/model.yaml | 11 +++++++++++ models/mask-object-sam21-small/model.yaml | 11 +++++++++++ models/mask-object-sam21-tiny/model.yaml | 11 +++++++++++ models/mask-object-segnext-b2hq/model.yaml | 11 +++++++++++ models/upscale-bsrgan/model.yaml | 11 +++++++++++ 10 files changed, 95 insertions(+), 1 deletion(-) diff --git a/darktable_ai/config.py b/darktable_ai/config.py index 836bc39..e659957 100644 --- a/darktable_ai/config.py +++ b/darktable_ai/config.py @@ -44,6 +44,8 @@ class ModelConfig: dep_group: str = "core" skip: bool = False + model_card: dict[str, str] = field(default_factory=dict) + repo: RepoConfig | None = None checkpoints: list[Checkpoint] = field(default_factory=list) convert: list[ConvertStep] = field(default_factory=list) @@ -115,6 +117,7 @@ def load_model_config(model_dir: Path, root_dir: Path) -> ModelConfig: arch=data.get("arch", "generic"), tiling=data.get("tiling", False), dep_group=data.get("dep_group", "core"), + model_card=data.get("model_card", {}), skip=skip, repo=repo, checkpoints=checkpoints, diff --git a/darktable_ai/convert.py b/darktable_ai/convert.py index 2ea927c..8966382 100644 --- a/darktable_ai/convert.py +++ b/darktable_ai/convert.py @@ -46,7 +46,10 @@ def generate_config_json(config: ModelConfig) -> None: "tiling": config.tiling, } - config_file.write_text(json.dumps(data, indent=4) + "\n") + if config.model_card: + data["model_card"] = config.model_card + + config_file.write_text(json.dumps(data, indent=4, ensure_ascii=False) + "\n") print(f" Generated: {config_file}") diff --git a/models/denoise-nafnet/model.yaml b/models/denoise-nafnet/model.yaml index 97184a5..196c6bd 100644 --- a/models/denoise-nafnet/model.yaml +++ b/models/denoise-nafnet/model.yaml @@ -7,6 +7,17 @@ tiling: true type: single dep_group: nafnet +model_card: + long_description: "NAFNet (Nonlinear Activation Free Network) lightweight denoiser trained on the SIDD smartphone denoising dataset" + scope: "single-image denoising" + author: "Megvii Research" + source: "https://github.com/megvii-research/NAFNet" + paper: "https://arxiv.org/abs/2204.04676" + license: "MIT" + training_data: "SIDD – 30K real smartphone noisy/clean pairs captured by authors (5 devices)" + training_data_license: "MIT" + notes: "all components publicly available under permissive licenses" + repo: submodule: vendor/NAFNet setup: "python setup.py develop --no_cuda_ext" diff --git a/models/denoise-nind/model.yaml b/models/denoise-nind/model.yaml index b6125be..393366f 100644 --- a/models/denoise-nind/model.yaml +++ b/models/denoise-nind/model.yaml @@ -14,6 +14,17 @@ checkpoints: - url: "https://github.com/trougnouf/nind-denoise/raw/master/models/nind_denoise/2019-08-03T16:14_nn_train.py_--g_network_UNet_--weight_SSIM_1_--batch_size_65_--test_reserve_ursulines-red_stefantiek_ursulines-building_MuseeL-Bobo_CourtineDeVillersDebris_MuseeL-Bobo-C500D_--train_data_datasets-train-NIND_128_96_--g_model_path_models-20/generator_280.pt" path: "temp/denoise-nind/generator_280.pt" +model_card: + long_description: "Image denoiser trained on the Natural Image Noise Dataset (NIND) from Wikimedia Commons" + scope: "single-image denoising" + author: "Benoit Brummer (Catholic University of Louvain)" + source: "https://github.com/trougnouf/nind-denoise" + paper: "https://arxiv.org/abs/1906.00270" + license: "GPL-3.0" + training_data: "real-world noise/clean pairs photographed by authors, published on Wikimedia Commons" + training_data_license: "CC BY 4.0 / CC0 (per-image, Wikimedia Commons)" + notes: "all components publicly available under open licenses" + convert: - script: convert.py args: diff --git a/models/embed-openclip-vitb32/model.yaml b/models/embed-openclip-vitb32/model.yaml index e13ec78..d580d02 100644 --- a/models/embed-openclip-vitb32/model.yaml +++ b/models/embed-openclip-vitb32/model.yaml @@ -6,6 +6,17 @@ version: "1.0" type: single dep_group: openclip +model_card: + long_description: "Vision Transformer (ViT-B/32) image encoder from the OpenCLIP project; produces 512-dimensional embeddings used for auto-tagging and image similarity search" + scope: "image feature extraction for tagging and similarity" + author: "LAION (Ilharco, Wortsman, Carlini et al.)" + source: "https://github.com/mlfoundations/open_clip" + paper: "https://arxiv.org/abs/2212.07143" + license: "MIT" + training_data: "LAION-2B: 2B image-text pairs from Common Crawl, filtered using CLIP for quality" + training_data_license: "CC-BY-4.0 metadata; images are web-crawled with mixed licenses" + notes: "training images are web-crawled; individual image licenses are not verified; narrow feature extractor, does not generate or modify images" + convert: - script: convert.py args: diff --git a/models/mask-object-sam21-base-plus/model.yaml b/models/mask-object-sam21-base-plus/model.yaml index beeafc3..bbf04b7 100644 --- a/models/mask-object-sam21-base-plus/model.yaml +++ b/models/mask-object-sam21-base-plus/model.yaml @@ -7,6 +7,17 @@ arch: sam2 type: split dep_group: sam21 +model_card: + long_description: "Segment Anything Model 2.1 with Hiera Base Plus encoder for interactive object segmentation" + scope: "interactive object segmentation" + author: "Meta (Facebook Research)" + source: "https://github.com/facebookresearch/sam2" + paper: "https://arxiv.org/abs/2408.00714" + license: "Apache-2.0" + training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)" + training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license" + notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta" + repo: submodule: vendor/sam2 diff --git a/models/mask-object-sam21-small/model.yaml b/models/mask-object-sam21-small/model.yaml index 717ab72..73ce510 100644 --- a/models/mask-object-sam21-small/model.yaml +++ b/models/mask-object-sam21-small/model.yaml @@ -7,6 +7,17 @@ arch: sam2 type: split dep_group: sam21 +model_card: + long_description: "Segment Anything Model 2.1 with Hiera Small encoder for interactive object segmentation" + scope: "interactive object segmentation" + author: "Meta (Facebook Research)" + source: "https://github.com/facebookresearch/sam2" + paper: "https://arxiv.org/abs/2408.00714" + license: "Apache-2.0" + training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)" + training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license" + notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta" + repo: submodule: vendor/sam2 diff --git a/models/mask-object-sam21-tiny/model.yaml b/models/mask-object-sam21-tiny/model.yaml index b6ebff3..f980dbb 100644 --- a/models/mask-object-sam21-tiny/model.yaml +++ b/models/mask-object-sam21-tiny/model.yaml @@ -7,6 +7,17 @@ arch: sam2 type: split dep_group: sam21 +model_card: + long_description: "Segment Anything Model 2.1 with Hiera Tiny encoder for interactive object segmentation" + scope: "interactive object segmentation" + author: "Meta (Facebook Research)" + source: "https://github.com/facebookresearch/sam2" + paper: "https://arxiv.org/abs/2408.00714" + license: "Apache-2.0" + training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)" + training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license" + notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta" + repo: submodule: vendor/sam2 diff --git a/models/mask-object-segnext-b2hq/model.yaml b/models/mask-object-segnext-b2hq/model.yaml index 459ac43..29d3648 100644 --- a/models/mask-object-segnext-b2hq/model.yaml +++ b/models/mask-object-segnext-b2hq/model.yaml @@ -7,6 +7,17 @@ arch: segnext type: split dep_group: segnext +model_card: + long_description: "SegNext with ViT-B backbone and SAx2 cross-attention, fine-tuned on HQSeg-44K for high-quality mask boundaries in semantic segmentation" + scope: "semantic object segmentation" + author: "UNC Chapel Hill / SysCV" + source: "https://github.com/uncbiag/SegNext" + paper: "https://arxiv.org/abs/2312.01171" + license: "MIT" + training_data: "COCO (118K images) + LVIS (100K images) + HQSeg-44K (44K images, fine-tune)" + training_data_license: "COCO: CC BY 4.0; LVIS: CC BY 4.0; HQSeg-44K: mixed (see datasets)" + notes: "HQSeg-44K aggregates multiple datasets with varying licenses; individual dataset terms apply" + repo: submodule: vendor/SegNext diff --git a/models/upscale-bsrgan/model.yaml b/models/upscale-bsrgan/model.yaml index cbe6a8a..f8c3034 100644 --- a/models/upscale-bsrgan/model.yaml +++ b/models/upscale-bsrgan/model.yaml @@ -7,6 +7,17 @@ tiling: true type: multi dep_group: bsrgan +model_card: + long_description: "BSRGAN blind image super-resolution using practical degradation model; includes both 2x and 4x upscaling variants with RRDBNet architecture" + scope: "image upscaling (2x and 4x blind super-resolution)" + author: "Kai Zhang (ETH Zurich)" + source: "https://github.com/cszn/BSRGAN" + paper: "https://arxiv.org/abs/2103.14006" + license: "Apache-2.0" + training_data: "DIV2K, Flickr2K, WED, OST – standard SR research datasets with synthetic practical degradation" + training_data_license: "DIV2K: CC0; Flickr2K, WED, OST: no explicit open-source licenses" + notes: "training datasets Flickr2K/WED/OST do not have explicit open-source licenses" + checkpoints: - url: "https://github.com/cszn/KAIR/releases/download/v1.0/BSRGAN.pth" path: "temp/upscale-bsrgan/BSRGAN.pth"