MatchCake · JeremieGince · Nov 3, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -30,7 +30,7 @@ jobs:
 
     - name: Build the sphinx docs
       run: |
-        uv run sphinx-apidoc -f -o ./sphinx/source ./src/bolightningpipeline
+        uv run sphinx-apidoc -f -o ./sphinx/source ./src/matchcake_opt
         uv run make -C sphinx clean
         uv run python sphinx/clean_html_files.py
         uv run make -C sphinx html

diff --git a/.gitignore b/.gitignore
@@ -305,3 +305,4 @@ coverage.json
 /notebooks/Digits2D
 /notebooks/Cifar10
 /notebooks/data
+/.tmp
diff --git a/README.md b/README.md
@@ -129,6 +129,7 @@ For more detailed examples see
 
 
 ## License
+[Apache License 2.0](LICENSE)
 
 
 ## Acknowledgements

diff --git a/notebooks/automl_pipeline_tutorial.ipynb b/notebooks/automl_pipeline_tutorial.ipynb
@@ -37,7 +37,7 @@
     ")\n",
     "from torchvision.transforms import Resize\n",
     "\n",
-    "from matchcake_opt.datasets import *\n",
+    "from matchcake_opt.datamodules.datamodule import DataModule\n",
     "from matchcake_opt.modules.classification_model import ClassificationModel\n",
     "from matchcake_opt.tr_pipeline.automl_pipeline import AutoMLPipeline"
    ],
@@ -259,7 +259,7 @@
     "checkpoint_folder = Path(job_output_folder) / \"checkpoints\"\n",
     "pipeline_args = dict(\n",
     "    max_epochs=100,  # increase at least to 256\n",
-    "    max_time=\"00:00:02:00\",  # DD:HH:MM:SS, increase at least to \"00:01:00:00\"\n",
+    "    max_time=\"00:00:01:00\",  # DD:HH:MM:SS, increase at least to \"00:01:00:00\"\n",
     ")"
    ],
    "id": "d8db16a0825411",

diff --git a/notebooks/datasets_normalisation.ipynb b/notebooks/datasets_normalisation.ipynb
@@ -0,0 +1,259 @@
+{
+ "cells": [
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "# Normalization of Datasets",
+   "id": "8b10448261733a07"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## MEDMNIST: PathMNIST",
+   "id": "822c3957d720a63b"
+  },
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2025-11-03T14:40:00.436371Z",
+     "start_time": "2025-11-03T14:39:56.065478Z"
+    }
+   },
+   "source": [
+    "from medmnist import PathMNIST\n",
+    "from torchvision.transforms import v2\n",
+    "from pathlib import Path\n",
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "PRECISION = 5"
+   ],
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-11-03T14:40:16.084198Z",
+     "start_time": "2025-11-03T14:40:00.440379Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "root = Path(\"./data/\") / \"PathMNIST\"\n",
+    "root.mkdir(parents=True, exist_ok=True)\n",
+    "data = PathMNIST(\n",
+    "    root=root,\n",
+    "    split=\"train\",\n",
+    "    download=True,\n",
+    "    transform=v2.Compose(\n",
+    "        [\n",
+    "            v2.ToImage(),\n",
+    "            v2.ToDtype(torch.float32, scale=True),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "imgs = torch.stack([d[0] for d in data], dim=-1).permute(3, 1, 2, 0).cpu().numpy()\n",
+    "imgs = imgs.reshape(-1, data.imgs.shape[-1])\n",
+    "print(f\"PathMNIST Dataset Shape: {data.imgs.shape}\")\n",
+    "print(f\"PathMNIST Dataset Means: {np.round(imgs.mean(0), PRECISION)}\")\n",
+    "print(f\"PathMNIST Dataset Stds: {np.round(imgs.std(0), PRECISION)}\")"
+   ],
+   "id": "56bb1e919827063c",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PathMNIST Dataset Shape: (89996, 28, 28, 3)\n",
+      "PathMNIST Dataset Means: [0.23778 0.23778 0.23778]\n",
+      "PathMNIST Dataset Stds: [0.35807 0.3089  0.35218]\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## MEDMNIST: RetinaMNIST",
+   "id": "b51bc4fe4aca42e4"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-11-03T14:40:16.493835Z",
+     "start_time": "2025-11-03T14:40:16.145319Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from medmnist import RetinaMNIST\n",
+    "\n",
+    "\n",
+    "root = Path(\"./data/\") / \"RetinaMNIST\"\n",
+    "root.mkdir(parents=True, exist_ok=True)\n",
+    "data = RetinaMNIST(\n",
+    "    root=root,\n",
+    "    split=\"train\",\n",
+    "    download=True,\n",
+    "    transform=v2.Compose(\n",
+    "        [\n",
+    "            v2.ToImage(),\n",
+    "            v2.ToDtype(torch.float32, scale=True),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "imgs = torch.stack([d[0] for d in data], dim=-1).permute(3, 1, 2, 0).cpu().numpy()\n",
+    "imgs = imgs.reshape(-1, data.imgs.shape[-1])\n",
+    "print(f\"RetinaMNIST Dataset Shape: {data.imgs.shape}\")\n",
+    "print(f\"RetinaMNIST Dataset Means: {np.round(imgs.mean(0), PRECISION)}\")\n",
+    "print(f\"RetinaMNIST Dataset Stds: {np.round(imgs.std(0), PRECISION)}\")"
+   ],
+   "id": "159a7610d26e86bb",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RetinaMNIST Dataset Shape: (1080, 28, 28, 3)\n",
+      "RetinaMNIST Dataset Means: [0.39862 0.24519 0.15615]\n",
+      "RetinaMNIST Dataset Stds: [0.29827 0.20057 0.15053]\n"
+     ]
+    }
+   ],
+   "execution_count": 3
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## CIFAR10",
+   "id": "2fbc0f268d1be290"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-11-03T14:40:26.853063Z",
+     "start_time": "2025-11-03T14:40:16.504986Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from torchvision.datasets import CIFAR10\n",
+    "\n",
+    "root = Path(\"./data/\") / \"CIFAR10\"\n",
+    "root.mkdir(parents=True, exist_ok=True)\n",
+    "data = CIFAR10(\n",
+    "    root=root,\n",
+    "    train=True,\n",
+    "    download=True,\n",
+    "    transform=v2.Compose(\n",
+    "        [\n",
+    "            v2.ToImage(),\n",
+    "            v2.ToDtype(torch.float32, scale=True),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "imgs = torch.stack([d[0] for d in data], dim=-1).permute(3, 1, 2, 0).cpu().numpy()\n",
+    "print(f\"CIFAR10 Dataset Shape: {imgs.shape}\")\n",
+    "imgs = imgs.reshape(-1, imgs.shape[-1])\n",
+    "print(f\"CIFAR10 Dataset Means: {np.round(imgs.mean(0), PRECISION)}\")\n",
+    "print(f\"CIFAR10 Dataset Stds: {np.round(imgs.std(0), PRECISION)}\")"
+   ],
+   "id": "b8edde9c8da4faeb",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CIFAR10 Dataset Shape: (50000, 32, 32, 3)\n",
+      "CIFAR10 Dataset Means: [0.32768 0.32768 0.32768]\n",
+      "CIFAR10 Dataset Stds: [0.27755 0.2693  0.26812]\n"
+     ]
+    }
+   ],
+   "execution_count": 4
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## MNIST",
+   "id": "7139efe270121e2b"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-11-03T14:40:34.124994Z",
+     "start_time": "2025-11-03T14:40:26.868597Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from torchvision.datasets import MNIST\n",
+    "\n",
+    "root = Path(\"./data/\") / \"MNIST\"\n",
+    "root.mkdir(parents=True, exist_ok=True)\n",
+    "data = MNIST(\n",
+    "    root=root,\n",
+    "    train=True,\n",
+    "    download=True,\n",
+    "    transform=v2.Compose(\n",
+    "        [\n",
+    "            v2.ToImage(),\n",
+    "            v2.ToDtype(torch.float32, scale=True),\n",
+    "        ]\n",
+    "    ),\n",
+    ")\n",
+    "imgs = torch.stack([d[0] for d in data], dim=-1).permute(3, 1, 2, 0).cpu().numpy()\n",
+    "print(f\"MNIST Dataset Shape: {imgs.shape}\")\n",
+    "imgs = imgs.reshape(-1, imgs.shape[-1])\n",
+    "print(f\"MNIST Dataset Means: {np.round(imgs.mean(0), PRECISION)}\")\n",
+    "print(f\"MNIST Dataset Stds: {np.round(imgs.std(0), PRECISION)}\")"
+   ],
+   "id": "2313354e9a3c61f0",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MNIST Dataset Shape: (60000, 28, 28, 1)\n",
+      "MNIST Dataset Means: [0.13066]\n",
+      "MNIST Dataset Stds: [0.30811]\n"
+     ]
+    }
+   ],
+   "execution_count": 5
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "----------------------",
+   "id": "53856f3e16fff049"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/ligthning_pipeline_tutorial.ipynb b/notebooks/ligthning_pipeline_tutorial.ipynb
@@ -34,7 +34,7 @@
     ")\n",
     "from torchvision.transforms import Resize\n",
     "\n",
-    "from matchcake_opt.datasets import *\n",
+    "from matchcake_opt.datamodules.datamodule import DataModule\n",
     "from matchcake_opt.modules.classification_model import ClassificationModel\n",
     "from matchcake_opt.tr_pipeline.lightning_pipeline import LightningPipeline"
    ],
@@ -275,7 +275,7 @@
     "    datamodule=datamodule,\n",
     "    checkpoint_folder=checkpoint_folder,\n",
     "    max_epochs=10,\n",
-    "    max_time=\"00:00:03:00\",  # DD:HH:MM:SS\n",
+    "    max_time=\"00:00:01:00\",  # DD:HH:MM:SS\n",
     "    overwrite_fit=True,\n",
     "    verbose=True,\n",
     "    **model_args,\n",

diff --git a/notebooks/nif_deep_learning.ipynb b/notebooks/nif_deep_learning.ipynb
@@ -29,7 +29,7 @@
     "from matchcake import NonInteractingFermionicDevice\n",
     "from matchcake.operations import SptmAngleEmbedding, SptmfRxRx, SptmFHH\n",
     "\n",
-    "from matchcake_opt.datasets import *\n",
+    "from matchcake_opt.datamodules.datamodule import DataModule\n",
     "from matchcake_opt.modules.classification_model import ClassificationModel\n",
     "from matchcake_opt.tr_pipeline.automl_pipeline import AutoMLPipeline\n",
     "from matchcake_opt.tr_pipeline.lightning_pipeline import LightningPipeline"
@@ -157,7 +157,7 @@
     "checkpoint_folder = Path(job_output_folder) / \"checkpoints\"\n",
     "pipeline_args = dict(\n",
     "    max_epochs=128,  # increase at least to 256\n",
-    "    max_time=\"00:00:02:00\",  # DD:HH:MM:SS, increase at least to \"00:01:00:00\"\n",
+    "    max_time=\"00:00:01:00\",  # DD:HH:MM:SS, increase at least to \"00:01:00:00\"\n",
     ")"
    ],
    "id": "412328c44c55e453",
@@ -211,7 +211,7 @@
     "    datamodule=datamodule,\n",
     "    checkpoint_folder=checkpoint_folder,\n",
     "    max_epochs=10,\n",
-    "    max_time=\"00:00:03:00\",  # DD:HH:MM:SS\n",
+    "    max_time=\"00:00:01:00\",  # DD:HH:MM:SS\n",
     "    overwrite_fit=True,\n",
     "    verbose=True,\n",
     "    **model_args,\n",
Original file line number	Diff line number	Diff line change
Expand Up		@@ -129,6 +129,7 @@ For more detailed examples see


		## License
		[Apache License 2.0](LICENSE)


		## Acknowledgements
Expand Down