Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions darktable_ai/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class ModelConfig:
dep_group: str = "core"
skip: bool = False

model_card: dict[str, str] = field(default_factory=dict)

repo: RepoConfig | None = None
checkpoints: list[Checkpoint] = field(default_factory=list)
convert: list[ConvertStep] = field(default_factory=list)
Expand Down Expand Up @@ -115,6 +117,7 @@ def load_model_config(model_dir: Path, root_dir: Path) -> ModelConfig:
arch=data.get("arch", "generic"),
tiling=data.get("tiling", False),
dep_group=data.get("dep_group", "core"),
model_card=data.get("model_card", {}),
skip=skip,
repo=repo,
checkpoints=checkpoints,
Expand Down
5 changes: 4 additions & 1 deletion darktable_ai/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ def generate_config_json(config: ModelConfig) -> None:
"tiling": config.tiling,
}

config_file.write_text(json.dumps(data, indent=4) + "\n")
if config.model_card:
data["model_card"] = config.model_card

config_file.write_text(json.dumps(data, indent=4, ensure_ascii=False) + "\n")
print(f" Generated: {config_file}")


Expand Down
11 changes: 11 additions & 0 deletions models/denoise-nafnet/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ tiling: true
type: single
dep_group: nafnet

model_card:
long_description: "NAFNet (Nonlinear Activation Free Network) lightweight denoiser trained on the SIDD smartphone denoising dataset"
scope: "single-image denoising"
author: "Megvii Research"
source: "https://github.com/megvii-research/NAFNet"
paper: "https://arxiv.org/abs/2204.04676"
license: "MIT"
training_data: "SIDD – 30K real smartphone noisy/clean pairs captured by authors (5 devices)"
training_data_license: "MIT"
notes: "all components publicly available under permissive licenses"

repo:
submodule: vendor/NAFNet
setup: "python setup.py develop --no_cuda_ext"
Expand Down
11 changes: 11 additions & 0 deletions models/denoise-nind/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,17 @@ checkpoints:
- url: "https://github.com/trougnouf/nind-denoise/raw/master/models/nind_denoise/2019-08-03T16:14_nn_train.py_--g_network_UNet_--weight_SSIM_1_--batch_size_65_--test_reserve_ursulines-red_stefantiek_ursulines-building_MuseeL-Bobo_CourtineDeVillersDebris_MuseeL-Bobo-C500D_--train_data_datasets-train-NIND_128_96_--g_model_path_models-20/generator_280.pt"
path: "temp/denoise-nind/generator_280.pt"

model_card:
long_description: "Image denoiser trained on the Natural Image Noise Dataset (NIND) from Wikimedia Commons"
scope: "single-image denoising"
author: "Benoit Brummer (Catholic University of Louvain)"
source: "https://github.com/trougnouf/nind-denoise"
paper: "https://arxiv.org/abs/1906.00270"
license: "GPL-3.0"
training_data: "real-world noise/clean pairs photographed by authors, published on Wikimedia Commons"
training_data_license: "CC BY 4.0 / CC0 (per-image, Wikimedia Commons)"
notes: "all components publicly available under open licenses"

convert:
- script: convert.py
args:
Expand Down
11 changes: 11 additions & 0 deletions models/embed-openclip-vitb32/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@ version: "1.0"
type: single
dep_group: openclip

model_card:
long_description: "Vision Transformer (ViT-B/32) image encoder from the OpenCLIP project; produces 512-dimensional embeddings used for auto-tagging and image similarity search"
scope: "image feature extraction for tagging and similarity"
author: "LAION (Ilharco, Wortsman, Carlini et al.)"
source: "https://github.com/mlfoundations/open_clip"
paper: "https://arxiv.org/abs/2212.07143"
license: "MIT"
training_data: "LAION-2B: 2B image-text pairs from Common Crawl, filtered using CLIP for quality"
training_data_license: "CC-BY-4.0 metadata; images are web-crawled with mixed licenses"
notes: "training images are web-crawled; individual image licenses are not verified; narrow feature extractor, does not generate or modify images"

convert:
- script: convert.py
args:
Expand Down
11 changes: 11 additions & 0 deletions models/mask-object-sam21-base-plus/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ arch: sam2
type: split
dep_group: sam21

model_card:
long_description: "Segment Anything Model 2.1 with Hiera Base Plus encoder for interactive object segmentation"
scope: "interactive object segmentation"
author: "Meta (Facebook Research)"
source: "https://github.com/facebookresearch/sam2"
paper: "https://arxiv.org/abs/2408.00714"
license: "Apache-2.0"
training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)"
training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license"
notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta"

repo:
submodule: vendor/sam2

Expand Down
11 changes: 11 additions & 0 deletions models/mask-object-sam21-small/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ arch: sam2
type: split
dep_group: sam21

model_card:
long_description: "Segment Anything Model 2.1 with Hiera Small encoder for interactive object segmentation"
scope: "interactive object segmentation"
author: "Meta (Facebook Research)"
source: "https://github.com/facebookresearch/sam2"
paper: "https://arxiv.org/abs/2408.00714"
license: "Apache-2.0"
training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)"
training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license"
notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta"

repo:
submodule: vendor/sam2

Expand Down
11 changes: 11 additions & 0 deletions models/mask-object-sam21-tiny/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ arch: sam2
type: split
dep_group: sam21

model_card:
long_description: "Segment Anything Model 2.1 with Hiera Tiny encoder for interactive object segmentation"
scope: "interactive object segmentation"
author: "Meta (Facebook Research)"
source: "https://github.com/facebookresearch/sam2"
paper: "https://arxiv.org/abs/2408.00714"
license: "Apache-2.0"
training_data: "SA-V (50.9K videos) + SA-1B (11M stock images)"
training_data_license: "SA-V: CC BY 4.0; SA-1B: custom Meta research-only license"
notes: "model weights are Apache-2.0; SA-1B training data has a separate research-only license from Meta"

repo:
submodule: vendor/sam2

Expand Down
11 changes: 11 additions & 0 deletions models/mask-object-segnext-b2hq/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ arch: segnext
type: split
dep_group: segnext

model_card:
long_description: "SegNext with ViT-B backbone and SAx2 cross-attention, fine-tuned on HQSeg-44K for high-quality mask boundaries in semantic segmentation"
scope: "semantic object segmentation"
author: "UNC Chapel Hill / SysCV"
source: "https://github.com/uncbiag/SegNext"
paper: "https://arxiv.org/abs/2312.01171"
license: "MIT"
training_data: "COCO (118K images) + LVIS (100K images) + HQSeg-44K (44K images, fine-tune)"
training_data_license: "COCO: CC BY 4.0; LVIS: CC BY 4.0; HQSeg-44K: mixed (see datasets)"
notes: "HQSeg-44K aggregates multiple datasets with varying licenses; individual dataset terms apply"

repo:
submodule: vendor/SegNext

Expand Down
11 changes: 11 additions & 0 deletions models/upscale-bsrgan/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ tiling: true
type: multi
dep_group: bsrgan

model_card:
long_description: "BSRGAN blind image super-resolution using practical degradation model; includes both 2x and 4x upscaling variants with RRDBNet architecture"
scope: "image upscaling (2x and 4x blind super-resolution)"
author: "Kai Zhang (ETH Zurich)"
source: "https://github.com/cszn/BSRGAN"
paper: "https://arxiv.org/abs/2103.14006"
license: "Apache-2.0"
training_data: "DIV2K, Flickr2K, WED, OST – standard SR research datasets with synthetic practical degradation"
training_data_license: "DIV2K: CC0; Flickr2K, WED, OST: no explicit open-source licenses"
notes: "training datasets Flickr2K/WED/OST do not have explicit open-source licenses"

checkpoints:
- url: "https://github.com/cszn/KAIR/releases/download/v1.0/BSRGAN.pth"
path: "temp/upscale-bsrgan/BSRGAN.pth"
Expand Down
Loading