From fe2c3e577cfc97d52dea5277806acda19d7a4f65 Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Tue, 3 Feb 2026 11:50:22 +0100
Subject: [PATCH 01/13] ci: Switch to ruff

---
 .pre-commit-config.yaml | 30 +++++-------------------------
 pyproject.toml          | 26 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a01494aa..f8e10ded 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,32 +9,12 @@ repos:
     -   id: check-docstring-first  # Check a common error of defining a docstring after code.
     -   id: check-merge-conflict  # Check for files that contain merge conflict strings.
 
--   repo: https://github.com/PyCQA/flake8
-    rev: 7.3.0
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.0
     hooks:
-    -   id: flake8  # Check style and syntax. Does not modify code, issues have to be solved manually.
-        args: [
-            '--ignore=E501,E203,W503,E402',  # Ignore line length problems, space after colon problems, line break occurring before a binary operator problems, module level import not at top of file problems.
-        ]
-
--   repo: https://github.com/pycqa/isort
-    rev: 7.0.0
-    hooks:
-    -   id: isort  # Sort imports.
-        args: [
-            --multi-line=3,
-            --line-length=100,
-            --trailing-comma,
-            --force-grid-wrap=0,
-            --use-parentheses,
-            --ensure-newline-before-comments,
-        ]
-
--   repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 25.12.0
-    hooks:
-    -   id: black  # Format code.
-        args: [--line-length=100]
+      - id: ruff
+        args: [ --fix ]
+      - id: ruff-format
 
 ci:
     autoupdate_commit_msg: 'chore: Update pre-commit hooks'
diff --git a/pyproject.toml b/pyproject.toml
index c91e8ed5..60ac4492 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,6 +66,7 @@ Changelog = "https://github.com/TorchJD/torchjd/blob/main/CHANGELOG.md"
 
 [dependency-groups]
 check = [
+    "ruff>=0.14.14",
     "mypy>=1.16.0",
     "pre-commit>=2.9.2",  # isort doesn't work before 2.9.2
 ]
@@ -114,3 +115,28 @@ exclude_lines = [
     "pragma: not covered",
     "@overload",
 ]
+
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+
+[tool.ruff.lint]
+select = [
+    "E",   # pycodestyle Error
+    "F",   # Pyflakes
+    "W",   # pycodestyle Warning
+    "I",   # isort
+    "UP",  # pyupgrade
+    "B",   # flake8-bugbear
+]
+
+ignore = [
+    "E501", # line-too-long (handled by the formatter)
+    "E402", # module-import-not-at-top-of-file
+]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+
+[tool.ruff.format]
+quote-style = "double"

From 3b9b3f6692e60a6656ff3687218e2466191067ae Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Tue, 3 Feb 2026 11:50:46 +0100
Subject: [PATCH 02/13] run pre commit hooks

---
 src/torchjd/__init__.py                       |  3 +--
 src/torchjd/aggregation/_graddrop.py          |  2 +-
 src/torchjd/aggregation/_mgda.py              |  2 +-
 src/torchjd/aggregation/_utils/str.py         |  2 +-
 src/torchjd/autogram/_gramian_accumulator.py  |  6 ++----
 src/torchjd/autogram/_gramian_computer.py     | 10 ++++++----
 src/torchjd/autogram/_module_hook_manager.py  |  2 +-
 src/torchjd/autojac/_jac.py                   |  3 +--
 src/torchjd/autojac/_jac_to_grad.py           |  2 +-
 src/torchjd/autojac/_mtl_backward.py          |  2 +-
 .../autojac/_transform/_diagonalize.py        |  2 +-
 .../autojac/_transform/_differentiate.py      |  2 +-
 .../autojac/_transform/_materialize.py        |  2 +-
 tests/conftest.py                             |  4 ++--
 tests/doc/test_autogram.py                    |  2 +-
 tests/doc/test_rst.py                         | 18 ++++++++---------
 tests/plots/interactive_plotter.py            |  2 +-
 tests/profiling/plot_memory_timeline.py       |  4 ++--
 tests/profiling/run_profiler.py               |  2 +-
 tests/profiling/speed_grad_vs_jac_vs_gram.py  |  4 +---
 tests/unit/aggregation/_matrix_samplers.py    |  3 +--
 tests/unit/aggregation/test_nash_mtl.py       |  2 +-
 tests/unit/autogram/test_engine.py            |  2 +-
 .../autojac/_transform/test_accumulate.py     | 20 +++++++++----------
 tests/unit/autojac/test_jac.py                |  2 +-
 tests/unit/autojac/test_mtl_backward.py       |  2 +-
 tests/unit/autojac/test_utils.py              |  2 +-
 tests/utils/architectures.py                  |  3 ++-
 tests/utils/forward_backwards.py              |  4 ++--
 tests/utils/tensors.py                        |  1 +
 30 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/src/torchjd/__init__.py b/src/torchjd/__init__.py
index a74b6c78..4253561a 100644
--- a/src/torchjd/__init__.py
+++ b/src/torchjd/__init__.py
@@ -1,8 +1,7 @@
 from collections.abc import Callable
 from warnings import warn as _warn
 
-from .autojac import backward as _backward
-from .autojac import mtl_backward as _mtl_backward
+from .autojac import backward as _backward, mtl_backward as _mtl_backward
 
 _deprecated_items: dict[str, tuple[str, Callable]] = {
     "backward": ("autojac", _backward),
diff --git a/src/torchjd/aggregation/_graddrop.py b/src/torchjd/aggregation/_graddrop.py
index 6e0620ca..b6ea1327 100644
--- a/src/torchjd/aggregation/_graddrop.py
+++ b/src/torchjd/aggregation/_graddrop.py
@@ -74,5 +74,5 @@ def __str__(self) -> str:
         if self.leak is None:
             leak_str = ""
         else:
-            leak_str = f"([{', '.join(['{:.2f}'.format(l_).rstrip('0') for l_ in self.leak])}])"
+            leak_str = f"([{', '.join([f'{l_:.2f}'.rstrip('0') for l_ in self.leak])}])"
         return f"GradDrop{leak_str}"
diff --git a/src/torchjd/aggregation/_mgda.py b/src/torchjd/aggregation/_mgda.py
index f6edbf6b..99d60316 100644
--- a/src/torchjd/aggregation/_mgda.py
+++ b/src/torchjd/aggregation/_mgda.py
@@ -53,7 +53,7 @@ def forward(self, gramian: PSDMatrix) -> Tensor:
         dtype = gramian.dtype
 
         alpha = torch.ones(gramian.shape[0], device=device, dtype=dtype) / gramian.shape[0]
-        for i in range(self.max_iters):
+        for _i in range(self.max_iters):
             t = torch.argmin(gramian @ alpha)
             e_t = torch.zeros(gramian.shape[0], device=device, dtype=dtype)
             e_t[t] = 1.0
diff --git a/src/torchjd/aggregation/_utils/str.py b/src/torchjd/aggregation/_utils/str.py
index 8fda8b26..82a04540 100644
--- a/src/torchjd/aggregation/_utils/str.py
+++ b/src/torchjd/aggregation/_utils/str.py
@@ -7,5 +7,5 @@ def vector_to_str(vector: Tensor) -> str:
     `1.23, 1., ...`.
     """
 
-    weights_str = ", ".join(["{:.2f}".format(value).rstrip("0") for value in vector])
+    weights_str = ", ".join([f"{value:.2f}".rstrip("0") for value in vector])
     return weights_str
diff --git a/src/torchjd/autogram/_gramian_accumulator.py b/src/torchjd/autogram/_gramian_accumulator.py
index 91ace4e6..e9fe81f8 100644
--- a/src/torchjd/autogram/_gramian_accumulator.py
+++ b/src/torchjd/autogram/_gramian_accumulator.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 from torchjd._linalg import PSDMatrix
 
 
@@ -13,7 +11,7 @@ class GramianAccumulator:
     """
 
     def __init__(self) -> None:
-        self._gramian: Optional[PSDMatrix] = None
+        self._gramian: PSDMatrix | None = None
 
     def reset(self) -> None:
         self._gramian = None
@@ -25,7 +23,7 @@ def accumulate_gramian(self, gramian: PSDMatrix) -> None:
             self._gramian = gramian
 
     @property
-    def gramian(self) -> Optional[PSDMatrix]:
+    def gramian(self) -> PSDMatrix | None:
         """
         Get the Gramian matrix accumulated so far.
 
diff --git a/src/torchjd/autogram/_gramian_computer.py b/src/torchjd/autogram/_gramian_computer.py
index f5be882c..8c1546e0 100644
--- a/src/torchjd/autogram/_gramian_computer.py
+++ b/src/torchjd/autogram/_gramian_computer.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Optional, cast
+from typing import cast
 
 from torch import Tensor
 from torch.utils._pytree import PyTree
@@ -16,12 +16,14 @@ def __call__(
         grad_outputs: tuple[Tensor, ...],
         args: tuple[PyTree, ...],
         kwargs: dict[str, PyTree],
-    ) -> Optional[PSDMatrix]:
+    ) -> PSDMatrix | None:
         """Compute what we can for a module and optionally return the gramian if it's ready."""
 
+    @abstractmethod
     def track_forward_call(self) -> None:
         """Track that the module's forward was called. Necessary in some implementations."""
 
+    @abstractmethod
     def reset(self) -> None:
         """Reset state if any. Necessary in some implementations."""
 
@@ -40,7 +42,7 @@ class JacobianBasedGramianComputerWithCrossTerms(JacobianBasedGramianComputer):
     def __init__(self, jacobian_computer: JacobianComputer):
         super().__init__(jacobian_computer)
         self.remaining_counter = 0
-        self.summed_jacobian: Optional[Matrix] = None
+        self.summed_jacobian: Matrix | None = None
 
     def reset(self) -> None:
         self.remaining_counter = 0
@@ -55,7 +57,7 @@ def __call__(
         grad_outputs: tuple[Tensor, ...],
         args: tuple[PyTree, ...],
         kwargs: dict[str, PyTree],
-    ) -> Optional[PSDMatrix]:
+    ) -> PSDMatrix | None:
         """Compute what we can for a module and optionally return the gramian if it's ready."""
 
         jacobian_matrix = self.jacobian_computer(rg_outputs, grad_outputs, args, kwargs)
diff --git a/src/torchjd/autogram/_module_hook_manager.py b/src/torchjd/autogram/_module_hook_manager.py
index f72b2c75..21f64e09 100644
--- a/src/torchjd/autogram/_module_hook_manager.py
+++ b/src/torchjd/autogram/_module_hook_manager.py
@@ -141,7 +141,7 @@ def __call__(
             *rg_outputs,
         )
 
-        for idx, output in zip(rg_output_indices, autograd_fn_rg_outputs):
+        for idx, output in zip(rg_output_indices, autograd_fn_rg_outputs, strict=False):
             flat_outputs[idx] = output
 
         return tree_unflatten(flat_outputs, output_spec)
diff --git a/src/torchjd/autojac/_jac.py b/src/torchjd/autojac/_jac.py
index 41b5f108..1c809d2d 100644
--- a/src/torchjd/autojac/_jac.py
+++ b/src/torchjd/autojac/_jac.py
@@ -1,5 +1,4 @@
-from collections.abc import Sequence
-from typing import Iterable
+from collections.abc import Iterable, Sequence
 
 from torch import Tensor
 
diff --git a/src/torchjd/autojac/_jac_to_grad.py b/src/torchjd/autojac/_jac_to_grad.py
index 61427467..d2d4ce09 100644
--- a/src/torchjd/autojac/_jac_to_grad.py
+++ b/src/torchjd/autojac/_jac_to_grad.py
@@ -87,7 +87,7 @@ def _disunite_gradient(
     gradient_vector: Tensor, jacobians: list[Tensor], tensors: list[TensorWithJac]
 ) -> list[Tensor]:
     gradient_vectors = gradient_vector.split([t.numel() for t in tensors])
-    gradients = [g.view(t.shape) for g, t in zip(gradient_vectors, tensors)]
+    gradients = [g.view(t.shape) for g, t in zip(gradient_vectors, tensors, strict=False)]
     return gradients
 
 
diff --git a/src/torchjd/autojac/_mtl_backward.py b/src/torchjd/autojac/_mtl_backward.py
index 5755c9ee..92192139 100644
--- a/src/torchjd/autojac/_mtl_backward.py
+++ b/src/torchjd/autojac/_mtl_backward.py
@@ -132,7 +132,7 @@ def _create_transform(
             OrderedSet([loss]),
             retain_graph,
         )
-        for task_params, loss in zip(tasks_params, losses)
+        for task_params, loss in zip(tasks_params, losses, strict=False)
     ]
 
     # Transform that stacks the gradients of the losses w.r.t. the shared representations into a
diff --git a/src/torchjd/autojac/_transform/_diagonalize.py b/src/torchjd/autojac/_transform/_diagonalize.py
index 339f0bcc..9a99625b 100644
--- a/src/torchjd/autojac/_transform/_diagonalize.py
+++ b/src/torchjd/autojac/_transform/_diagonalize.py
@@ -65,7 +65,7 @@ def __call__(self, tensors: TensorDict) -> TensorDict:
         diagonal_matrix = torch.cat(flattened_considered_values).diag()
         diagonalized_tensors = {
             key: diagonal_matrix[:, begin:end].reshape((-1,) + key.shape)
-            for (begin, end), key in zip(self.indices, self.key_order)
+            for (begin, end), key in zip(self.indices, self.key_order, strict=False)
         }
         return diagonalized_tensors
 
diff --git a/src/torchjd/autojac/_transform/_differentiate.py b/src/torchjd/autojac/_transform/_differentiate.py
index ddd1f064..3ac2a86d 100644
--- a/src/torchjd/autojac/_transform/_differentiate.py
+++ b/src/torchjd/autojac/_transform/_differentiate.py
@@ -41,7 +41,7 @@ def __call__(self, tensors: TensorDict) -> TensorDict:
         tensor_outputs = [tensors[output] for output in self.outputs]
 
         differentiated_tuple = self._differentiate(tensor_outputs)
-        new_differentiations = dict(zip(self.inputs, differentiated_tuple))
+        new_differentiations = dict(zip(self.inputs, differentiated_tuple, strict=False))
         return type(tensors)(new_differentiations)
 
     @abstractmethod
diff --git a/src/torchjd/autojac/_transform/_materialize.py b/src/torchjd/autojac/_transform/_materialize.py
index 98f60e99..17d75d5d 100644
--- a/src/torchjd/autojac/_transform/_materialize.py
+++ b/src/torchjd/autojac/_transform/_materialize.py
@@ -16,7 +16,7 @@ def materialize(
     """
 
     tensors = []
-    for optional_tensor, input in zip(optional_tensors, inputs):
+    for optional_tensor, input in zip(optional_tensors, inputs, strict=False):
         if optional_tensor is None:
             tensors.append(torch.zeros_like(input))
         else:
diff --git a/tests/conftest.py b/tests/conftest.py
index 06c3d98b..5288aa1f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -53,11 +53,11 @@ def pytest_make_parametrize_id(config, val, argname):
     MAX_SIZE = 40
     optional_string = None  # Returning None means using pytest's way of making the string
 
-    if isinstance(val, (Aggregator, ModuleFactory, Weighting)):
+    if isinstance(val, Aggregator | ModuleFactory | Weighting):
         optional_string = str(val)
     elif isinstance(val, Tensor):
         optional_string = "T" + str(list(val.shape))  # T to indicate that it's a tensor
-    elif isinstance(val, (tuple, list, set)) and len(val) < 20:
+    elif isinstance(val, tuple | list | set) and len(val) < 20:
         optional_string = str(val)
     elif isinstance(val, RaisesExc):
         optional_string = " or ".join([f"{exc.__name__}" for exc in val.expected_exceptions])
diff --git a/tests/doc/test_autogram.py b/tests/doc/test_autogram.py
index 4445bc67..a07952af 100644
--- a/tests/doc/test_autogram.py
+++ b/tests/doc/test_autogram.py
@@ -22,7 +22,7 @@ def test_engine():
     # Create the engine before the backward pass, and only once.
     engine = Engine(model, batch_dim=0)
 
-    for input, target in zip(inputs, targets):
+    for input, target in zip(inputs, targets, strict=False):
         output = model(input).squeeze(dim=1)  # shape: [16]
         losses = criterion(output, target)  # shape: [16]
 
diff --git a/tests/doc/test_rst.py b/tests/doc/test_rst.py
index 6513d41c..934954b3 100644
--- a/tests/doc/test_rst.py
+++ b/tests/doc/test_rst.py
@@ -33,7 +33,7 @@ def test_amp():
     task1_targets = torch.randn(8, 16, 1)  # 8 batches of 16 targets for the first task
     task2_targets = torch.randn(8, 16, 1)  # 8 batches of 16 targets for the second task
 
-    for input, target1, target2 in zip(inputs, task1_targets, task2_targets):
+    for input, target1, target2 in zip(inputs, task1_targets, task2_targets, strict=False):
         with torch.autocast(device_type="cpu", dtype=torch.float16):
             features = shared_module(input)
             output1 = task1_module(features)
@@ -103,7 +103,7 @@ def test_iwmtl():
     task1_targets = torch.randn(8, 16)  # 8 batches of 16 targets for the first task
     task2_targets = torch.randn(8, 16)  # 8 batches of 16 targets for the second task
 
-    for input, target1, target2 in zip(inputs, task1_targets, task2_targets):
+    for input, target1, target2 in zip(inputs, task1_targets, task2_targets, strict=False):
         features = shared_module(input)  # shape: [16, 3]
         out1 = task1_module(features).squeeze(1)  # shape: [16]
         out2 = task2_module(features).squeeze(1)  # shape: [16]
@@ -138,7 +138,7 @@ def test_autograd():
         params = model.parameters()
         optimizer = SGD(params, lr=0.1)
 
-        for x, y in zip(X, Y):
+        for x, y in zip(X, Y, strict=False):
             y_hat = model(x).squeeze(dim=1)  # shape: [16]
             loss = loss_fn(y_hat, y)  # shape: [] (scalar)
             loss.backward()
@@ -163,7 +163,7 @@ def test_autojac():
         optimizer = SGD(params, lr=0.1)
         aggregator = UPGrad()
 
-        for x, y in zip(X, Y):
+        for x, y in zip(X, Y, strict=False):
             y_hat = model(x).squeeze(dim=1)  # shape: [16]
             losses = loss_fn(y_hat, y)  # shape: [16]
             backward(losses)
@@ -190,7 +190,7 @@ def test_autogram():
         weighting = UPGradWeighting()
         engine = Engine(model, batch_dim=0)
 
-        for x, y in zip(X, Y):
+        for x, y in zip(X, Y, strict=False):
             y_hat = model(x).squeeze(dim=1)  # shape: [16]
             losses = loss_fn(y_hat, y)  # shape: [16]
             gramian = engine.compute_gramian(losses)  # shape: [16, 16]
@@ -315,7 +315,7 @@ def print_gd_similarity(_, inputs: tuple[torch.Tensor, ...], aggregation: torch.
     task1_targets = torch.randn(8, 16, 1)  # 8 batches of 16 targets for the first task
     task2_targets = torch.randn(8, 16, 1)  # 8 batches of 16 targets for the second task
 
-    for input, target1, target2 in zip(inputs, task1_targets, task2_targets):
+    for input, target1, target2 in zip(inputs, task1_targets, task2_targets, strict=False):
         features = shared_module(input)
         output1 = task1_module(features)
         output2 = task2_module(features)
@@ -353,7 +353,7 @@ def test_mtl():
     task1_targets = torch.randn(8, 16, 1)  # 8 batches of 16 targets for the first task
     task2_targets = torch.randn(8, 16, 1)  # 8 batches of 16 targets for the second task
 
-    for input, target1, target2 in zip(inputs, task1_targets, task2_targets):
+    for input, target1, target2 in zip(inputs, task1_targets, task2_targets, strict=False):
         features = shared_module(input)
         output1 = task1_module(features)
         output2 = task2_module(features)
@@ -389,7 +389,7 @@ def test_partial_jd():
     params = model.parameters()
     optimizer = SGD(params, lr=0.1)
 
-    for x, y in zip(X, Y):
+    for x, y in zip(X, Y, strict=False):
         y_hat = model(x).squeeze(dim=1)  # shape: [16]
         losses = loss_fn(y_hat, y)  # shape: [16]
         gramian = engine.compute_gramian(losses)
@@ -414,7 +414,7 @@ def test_rnn():
     inputs = torch.randn(8, 5, 3, 10)  # 8 batches of 3 sequences of length 5 and of dim 10.
     targets = torch.randn(8, 5, 3, 20)  # 8 batches of 3 sequences of length 5 and of dim 20.
 
-    for input, target in zip(inputs, targets):
+    for input, target in zip(inputs, targets, strict=False):
         output, _ = rnn(input)  # output is of shape [5, 3, 20].
         losses = ((output - target) ** 2).mean(dim=[1, 2])  # 1 loss per sequence element.
 
diff --git a/tests/plots/interactive_plotter.py b/tests/plots/interactive_plotter.py
index 5a40e0b6..d24d2914 100644
--- a/tests/plots/interactive_plotter.py
+++ b/tests/plots/interactive_plotter.py
@@ -7,8 +7,8 @@
 import torch
 from dash import Dash, Input, Output, callback, dcc, html
 from plotly.graph_objs import Figure
-from plots._utils import Plotter, angle_to_coord, coord_to_angle
 
+from plots._utils import Plotter, angle_to_coord, coord_to_angle
 from torchjd.aggregation import (
     IMTLG,
     MGDA,
diff --git a/tests/profiling/plot_memory_timeline.py b/tests/profiling/plot_memory_timeline.py
index f9197101..8f0a02ba 100644
--- a/tests/profiling/plot_memory_timeline.py
+++ b/tests/profiling/plot_memory_timeline.py
@@ -28,7 +28,7 @@ def from_event(event: dict):
 
 
 def extract_memory_timeline(path: Path) -> np.ndarray:
-    with open(path, "r") as f:
+    with open(path) as f:
         data = json.load(f)
 
     events = data["traceEvents"]
@@ -53,7 +53,7 @@ def plot_memory_timelines(experiment: str, folders: list[str]) -> None:
         timelines.append(extract_memory_timeline(path))
 
     fig, ax = plt.subplots(figsize=(12, 6))
-    for folder, timeline in zip(folders, timelines):
+    for folder, timeline in zip(folders, timelines, strict=False):
         time = (timeline[:, 0] - timeline[0, 0]) // 1000  # Make time start at 0 and convert to ms.
         memory = timeline[:, 1]
         ax.plot(time, memory, label=folder, linewidth=1.5)
diff --git a/tests/profiling/run_profiler.py b/tests/profiling/run_profiler.py
index ebab7849..b143a55b 100644
--- a/tests/profiling/run_profiler.py
+++ b/tests/profiling/run_profiler.py
@@ -1,5 +1,5 @@
 import gc
-from typing import Callable
+from collections.abc import Callable
 
 import torch
 from settings import DEVICE
diff --git a/tests/profiling/speed_grad_vs_jac_vs_gram.py b/tests/profiling/speed_grad_vs_jac_vs_gram.py
index 16be875e..13b57b62 100644
--- a/tests/profiling/speed_grad_vs_jac_vs_gram.py
+++ b/tests/profiling/speed_grad_vs_jac_vs_gram.py
@@ -55,9 +55,7 @@ def compare_autograd_autojac_and_autogram_speed(factory: ModuleFactory, batch_si
     A = Mean()
     W = A.weighting
 
-    print(
-        f"\nTimes for forward + backward on {factory} with BS={batch_size}, A={A}" f" on {DEVICE}."
-    )
+    print(f"\nTimes for forward + backward on {factory} with BS={batch_size}, A={A} on {DEVICE}.")
 
     def fn_autograd():
         autograd_forward_backward(model, inputs, loss_fn)
diff --git a/tests/unit/aggregation/_matrix_samplers.py b/tests/unit/aggregation/_matrix_samplers.py
index a106e56f..8a3acd8d 100644
--- a/tests/unit/aggregation/_matrix_samplers.py
+++ b/tests/unit/aggregation/_matrix_samplers.py
@@ -31,8 +31,7 @@ def __repr__(self) -> str:
 
     def __str__(self) -> str:
         return (
-            f"{self.__class__.__name__.replace('MatrixSampler', '')}"
-            f"({self.m}x{self.n}r{self.rank})"
+            f"{self.__class__.__name__.replace('MatrixSampler', '')}({self.m}x{self.n}r{self.rank})"
         )
 
 
diff --git a/tests/unit/aggregation/test_nash_mtl.py b/tests/unit/aggregation/test_nash_mtl.py
index e4a47642..b916890e 100644
--- a/tests/unit/aggregation/test_nash_mtl.py
+++ b/tests/unit/aggregation/test_nash_mtl.py
@@ -55,7 +55,7 @@ def test_nash_mtl_reset():
     aggregator.reset()
     results = [aggregator(matrix) for matrix in matrices]
 
-    for result, expected in zip(results, expecteds):
+    for result, expected in zip(results, expecteds, strict=False):
         assert_close(result, expected)
 
 
diff --git a/tests/unit/autogram/test_engine.py b/tests/unit/autogram/test_engine.py
index 531b6f08..190c8fab 100644
--- a/tests/unit/autogram/test_engine.py
+++ b/tests/unit/autogram/test_engine.py
@@ -340,7 +340,7 @@ def test_iwrm_steps_with_autogram(factory: ModuleFactory, batch_size: int, batch
     engine = Engine(model, batch_dim=batch_dim)
     optimizer = SGD(model.parameters(), lr=1e-7)
 
-    for i in range(n_iter):
+    for _i in range(n_iter):
         inputs, targets = make_inputs_and_targets(model, batch_size)
         loss_fn = make_mse_loss_fn(targets)
         autogram_forward_backward(model, inputs, loss_fn, engine, weighting)
diff --git a/tests/unit/autojac/_transform/test_accumulate.py b/tests/unit/autojac/_transform/test_accumulate.py
index c2c1cf28..e720efd8 100644
--- a/tests/unit/autojac/_transform/test_accumulate.py
+++ b/tests/unit/autojac/_transform/test_accumulate.py
@@ -15,14 +15,14 @@ def test_single_grad_accumulation():
     shapes = [[], [1], [2, 3]]
     keys = [zeros_(shape, requires_grad=True) for shape in shapes]
     values = [ones_(shape) for shape in shapes]
-    input = dict(zip(keys, values))
+    input = dict(zip(keys, values, strict=False))
 
     accumulate = AccumulateGrad()
 
     output = accumulate(input)
     assert_tensor_dicts_are_close(output, {})
 
-    for key, value in zip(keys, values):
+    for key, value in zip(keys, values, strict=False):
         assert_grad_close(key, value)
 
 
@@ -38,12 +38,12 @@ def test_multiple_grad_accumulations(iterations: int):
     values = [ones_(shape) for shape in shapes]
     accumulate = AccumulateGrad()
 
-    for i in range(iterations):
+    for _i in range(iterations):
         # Clone values to ensure that we accumulate values that are not ever used afterwards
-        input = {key: value.clone() for key, value in zip(keys, values)}
+        input = {key: value.clone() for key, value in zip(keys, values, strict=False)}
         accumulate(input)
 
-    for key, value in zip(keys, values):
+    for key, value in zip(keys, values, strict=False):
         assert_grad_close(key, iterations * value)
 
 
@@ -98,14 +98,14 @@ def test_single_jac_accumulation():
     shapes = [[], [1], [2, 3]]
     keys = [zeros_(shape, requires_grad=True) for shape in shapes]
     values = [ones_([4] + shape) for shape in shapes]
-    input = dict(zip(keys, values))
+    input = dict(zip(keys, values, strict=False))
 
     accumulate = AccumulateJac()
 
     output = accumulate(input)
     assert_tensor_dicts_are_close(output, {})
 
-    for key, value in zip(keys, values):
+    for key, value in zip(keys, values, strict=False):
         assert_jac_close(key, value)
 
 
@@ -122,12 +122,12 @@ def test_multiple_jac_accumulations(iterations: int):
 
     accumulate = AccumulateJac()
 
-    for i in range(iterations):
+    for _i in range(iterations):
         # Clone values to ensure that we accumulate values that are not ever used afterwards
-        input = {key: value.clone() for key, value in zip(keys, values)}
+        input = {key: value.clone() for key, value in zip(keys, values, strict=False)}
         accumulate(input)
 
-    for key, value in zip(keys, values):
+    for key, value in zip(keys, values, strict=False):
         assert_jac_close(key, iterations * value)
 
 
diff --git a/tests/unit/autojac/test_jac.py b/tests/unit/autojac/test_jac.py
index 774d9ac9..501504e1 100644
--- a/tests/unit/autojac/test_jac.py
+++ b/tests/unit/autojac/test_jac.py
@@ -42,7 +42,7 @@ def test_jac():
     jacobians = jac(outputs, inputs)
 
     assert len(jacobians) == len([a1, a2])
-    for jacobian, a in zip(jacobians, [a1, a2]):
+    for jacobian, a in zip(jacobians, [a1, a2], strict=False):
         assert jacobian.shape[0] == len([y1, y2])
         assert jacobian.shape[1:] == a.shape
 
diff --git a/tests/unit/autojac/test_mtl_backward.py b/tests/unit/autojac/test_mtl_backward.py
index 3be3650a..d966dcc9 100644
--- a/tests/unit/autojac/test_mtl_backward.py
+++ b/tests/unit/autojac/test_mtl_backward.py
@@ -351,7 +351,7 @@ def test_various_feature_lists(shapes: list[tuple[int]]):
 
     features = [rand_(shape) @ p0 for shape in shapes]
 
-    y1 = sum([(f * p).sum() for f, p in zip(features, p1)])
+    y1 = sum([(f * p).sum() for f, p in zip(features, p1, strict=False)])
     y2 = (features[0] * p2).sum()
 
     mtl_backward(losses=[y1, y2], features=features)
diff --git a/tests/unit/autojac/test_utils.py b/tests/unit/autojac/test_utils.py
index a7036d7c..99c9fc56 100644
--- a/tests/unit/autojac/test_utils.py
+++ b/tests/unit/autojac/test_utils.py
@@ -152,7 +152,7 @@ def test_get_leaf_tensors_deep(depth: int):
 
     one = tensor_(1.0, requires_grad=True)
     sum_ = tensor_(0.0, requires_grad=False)
-    for i in range(depth):
+    for _i in range(depth):
         sum_ = sum_ + one
 
     leaves = get_leaf_tensors(tensors=[sum_], excluded=set())
diff --git a/tests/utils/architectures.py b/tests/utils/architectures.py
index a74c7de8..02ebae2c 100644
--- a/tests/utils/architectures.py
+++ b/tests/utils/architectures.py
@@ -6,6 +6,7 @@
 from torch import Tensor, nn
 from torch.nn import Flatten, ReLU
 from torch.utils._pytree import PyTree
+
 from utils.contexts import fork_rng
 
 
@@ -44,7 +45,7 @@ def get_in_out_shapes(module: nn.Module) -> tuple[PyTree, PyTree]:
     if isinstance(module, ShapedModule):
         return module.INPUT_SHAPES, module.OUTPUT_SHAPES
 
-    elif isinstance(module, (nn.BatchNorm2d, nn.InstanceNorm2d)):
+    elif isinstance(module, nn.BatchNorm2d | nn.InstanceNorm2d):
         HEIGHT = 6  # Arbitrary choice
         WIDTH = 6  # Arbitrary choice
         shape = (module.num_features, HEIGHT, WIDTH)
diff --git a/tests/utils/forward_backwards.py b/tests/utils/forward_backwards.py
index 14ce1e43..f8b9dfe2 100644
--- a/tests/utils/forward_backwards.py
+++ b/tests/utils/forward_backwards.py
@@ -5,14 +5,14 @@
 from torch.nn.functional import mse_loss
 from torch.utils._pytree import PyTree, tree_flatten, tree_map
 from torch.utils.hooks import RemovableHandle
-from utils.architectures import get_in_out_shapes
-from utils.contexts import fork_rng
 
 from torchjd._linalg import PSDTensor
 from torchjd.aggregation import Aggregator, Weighting
 from torchjd.autogram import Engine
 from torchjd.autojac import backward
 from torchjd.autojac._jac_to_grad import jac_to_grad
+from utils.architectures import get_in_out_shapes
+from utils.contexts import fork_rng
 
 
 def autograd_forward_backward(
diff --git a/tests/utils/tensors.py b/tests/utils/tensors.py
index 6d8066dc..6c91a08c 100644
--- a/tests/utils/tensors.py
+++ b/tests/utils/tensors.py
@@ -4,6 +4,7 @@
 from settings import DEVICE, DTYPE
 from torch import nn
 from torch.utils._pytree import PyTree, tree_map
+
 from utils.architectures import get_in_out_shapes
 from utils.contexts import fork_rng
 

From 6a89c198e2e62647806f816fccd344bb64f93ddf Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Tue, 3 Feb 2026 12:01:51 +0100
Subject: [PATCH 03/13] Make `ruff` responsible of checking TODOs.

---
 .github/workflows/checks.yml | 30 ------------------------------
 pyproject.toml               |  1 +
 2 files changed, 1 insertion(+), 30 deletions(-)

diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 9fdd6fa4..47c91627 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -124,33 +124,3 @@ jobs:
 
       - name: Run mypy
         run: uv run mypy src/torchjd
-
-  check-todos:
-    name: Absence of TODOs
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-
-      - name: Scan for TODO strings
-        run: |
-          echo "Scanning codebase for TODOs..."
-
-          git grep -nE "TODO" -- . ':(exclude).github/workflows/*' > todos_found.txt || true
-
-          if [ -s todos_found.txt ]; then
-            echo "❌ ERROR: Found TODOs in the following files:"
-            echo "-------------------------------------------"
-
-            while IFS=: read -r file line content; do
-              echo "::error file=$file,line=$line::TODO found at $file:$line - must be resolved before merge:%0A$content"
-            done < todos_found.txt
-
-            echo "-------------------------------------------"
-            echo "Please resolve these TODOs or track them in an issue before merging."
-
-            exit 1
-          else
-            echo "✅ No TODOs found. Codebase is clean!"
-            exit 0
-          fi
diff --git a/pyproject.toml b/pyproject.toml
index 60ac4492..606ded6e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -128,6 +128,7 @@ select = [
     "I",   # isort
     "UP",  # pyupgrade
     "B",   # flake8-bugbear
+    "FIX",  # flake8-fixme
 ]
 
 ignore = [

From 0aad2ae59d4e0d4b374416aa406d5733be85453d Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Tue, 3 Feb 2026 12:09:31 +0100
Subject: [PATCH 04/13] Make "FIX" fail only on the CI, not locally: We want
 the users to be able to push TODOs to Github and detect them there.

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f8e10ded..6b8b4224 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -13,7 +13,7 @@ repos:
     rev: v0.9.0
     hooks:
       - id: ruff
-        args: [ --fix ]
+        args: [ --fix, --ignore, FIX ]
       - id: ruff-format
 
 ci:

From f94b3a9a2d4b2145594bcfba33d7af72299f9bbc Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Tue, 3 Feb 2026 12:10:31 +0100
Subject: [PATCH 05/13] test a TODO

---
 tests/utils/contexts.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/utils/contexts.py b/tests/utils/contexts.py
index dc508130..ec3553e5 100644
--- a/tests/utils/contexts.py
+++ b/tests/utils/contexts.py
@@ -7,6 +7,8 @@
 
 ExceptionContext: TypeAlias = AbstractContextManager[Exception | None]
 
+# TODO: Test todos
+
 
 @contextmanager
 def fork_rng(seed: int = 0) -> Generator[Any, None, None]:

From ff81b3b726943519546c95f55c0341657bfeb5dc Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Tue, 3 Feb 2026 12:10:49 +0100
Subject: [PATCH 06/13] Revert "test a TODO"

This reverts commit f94b3a9a2d4b2145594bcfba33d7af72299f9bbc.
---
 tests/utils/contexts.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/utils/contexts.py b/tests/utils/contexts.py
index ec3553e5..dc508130 100644
--- a/tests/utils/contexts.py
+++ b/tests/utils/contexts.py
@@ -7,8 +7,6 @@
 
 ExceptionContext: TypeAlias = AbstractContextManager[Exception | None]
 
-# TODO: Test todos
-
 
 @contextmanager
 def fork_rng(seed: int = 0) -> Generator[Any, None, None]:

From 71d340be0191bd90a8e69d52ac76a4ecd1ff916f Mon Sep 17 00:00:00 2001
From: Pierre Quinton <pierre.quinton@gmail.com>
Date: Tue, 3 Feb 2026 14:04:29 +0100
Subject: [PATCH 07/13] Add ruff to checks in ci.

---
 .github/workflows/checks.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 47c91627..ae9fc9da 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -106,8 +106,8 @@ jobs:
           # This reduces false positives due to rate limits
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
-  typing:
-    name: Typing correctness
+  check-code:
+    name: Check code correctness
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
@@ -124,3 +124,6 @@ jobs:
 
       - name: Run mypy
         run: uv run mypy src/torchjd
+
+      - name: Run ruff
+        run: uv run ruff check --output-format=github

From 21bfe125a853b6228eaceb339d76aabb0e33dee8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Val=C3=A9rian=20Rey?= <valerian.rey@gmail.com>
Date: Wed, 4 Feb 2026 03:22:24 +0100
Subject: [PATCH 08/13] Improve name of ruff and ty job

---
 .github/workflows/checks.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index a4f8f57e..58720967 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -109,8 +109,8 @@ jobs:
           # This reduces false positives due to rate limits
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 
-  check-code:
-    name: Check code correctness
+  code-quality:
+    name: Code quality (ty and ruff)
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository

From 93ebf632ad9253965a19a101b56cfe1a4ae0d7c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Val=C3=A9rian=20Rey?= <valerian.rey@gmail.com>
Date: Wed, 4 Feb 2026 03:28:38 +0100
Subject: [PATCH 09/13] Rename _i to _

---
 pyproject.toml                                   | 1 -
 src/torchjd/aggregation/_mgda.py                 | 2 +-
 tests/unit/autogram/test_engine.py               | 2 +-
 tests/unit/autojac/_transform/test_accumulate.py | 4 ++--
 tests/unit/autojac/test_utils.py                 | 2 +-
 5 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ac8e9812..06071519 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,7 +67,6 @@ Changelog = "https://github.com/TorchJD/torchjd/blob/main/CHANGELOG.md"
 [dependency-groups]
 check = [
     "ruff>=0.14.14",
-    "mypy>=1.16.0",
     "ty>=0.0.14",
     "pre-commit>=2.9.2",  # isort doesn't work before 2.9.2
 ]
diff --git a/src/torchjd/aggregation/_mgda.py b/src/torchjd/aggregation/_mgda.py
index f2081e28..8f753c2a 100644
--- a/src/torchjd/aggregation/_mgda.py
+++ b/src/torchjd/aggregation/_mgda.py
@@ -53,7 +53,7 @@ def forward(self, gramian: PSDMatrix, /) -> Tensor:
         dtype = gramian.dtype
 
         alpha = torch.ones(gramian.shape[0], device=device, dtype=dtype) / gramian.shape[0]
-        for _i in range(self.max_iters):
+        for _ in range(self.max_iters):
             t = torch.argmin(gramian @ alpha)
             e_t = torch.zeros(gramian.shape[0], device=device, dtype=dtype)
             e_t[t] = 1.0
diff --git a/tests/unit/autogram/test_engine.py b/tests/unit/autogram/test_engine.py
index cee6e87f..2461e383 100644
--- a/tests/unit/autogram/test_engine.py
+++ b/tests/unit/autogram/test_engine.py
@@ -341,7 +341,7 @@ def test_iwrm_steps_with_autogram(factory: ModuleFactory, batch_size: int, batch
     engine = Engine(model, batch_dim=batch_dim)
     optimizer = SGD(model.parameters(), lr=1e-7)
 
-    for _i in range(n_iter):
+    for _ in range(n_iter):
         inputs, targets = make_inputs_and_targets(model, batch_size)
         loss_fn = make_mse_loss_fn(targets)
         autogram_forward_backward(model, inputs, loss_fn, engine, weighting)
diff --git a/tests/unit/autojac/_transform/test_accumulate.py b/tests/unit/autojac/_transform/test_accumulate.py
index e720efd8..a4b38550 100644
--- a/tests/unit/autojac/_transform/test_accumulate.py
+++ b/tests/unit/autojac/_transform/test_accumulate.py
@@ -38,7 +38,7 @@ def test_multiple_grad_accumulations(iterations: int):
     values = [ones_(shape) for shape in shapes]
     accumulate = AccumulateGrad()
 
-    for _i in range(iterations):
+    for _ in range(iterations):
         # Clone values to ensure that we accumulate values that are not ever used afterwards
         input = {key: value.clone() for key, value in zip(keys, values, strict=False)}
         accumulate(input)
@@ -122,7 +122,7 @@ def test_multiple_jac_accumulations(iterations: int):
 
     accumulate = AccumulateJac()
 
-    for _i in range(iterations):
+    for _ in range(iterations):
         # Clone values to ensure that we accumulate values that are not ever used afterwards
         input = {key: value.clone() for key, value in zip(keys, values, strict=False)}
         accumulate(input)
diff --git a/tests/unit/autojac/test_utils.py b/tests/unit/autojac/test_utils.py
index 99c9fc56..f4dbf7a4 100644
--- a/tests/unit/autojac/test_utils.py
+++ b/tests/unit/autojac/test_utils.py
@@ -152,7 +152,7 @@ def test_get_leaf_tensors_deep(depth: int):
 
     one = tensor_(1.0, requires_grad=True)
     sum_ = tensor_(0.0, requires_grad=False)
-    for _i in range(depth):
+    for _ in range(depth):
         sum_ = sum_ + one
 
     leaves = get_leaf_tensors(tensors=[sum_], excluded=set())

From 994b0c235dd98bb9fe4c77579137d51de3c7b105 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Val=C3=A9rian=20Rey?= <valerian.rey@gmail.com>
Date: Wed, 4 Feb 2026 03:35:29 +0100
Subject: [PATCH 10/13] Replace strict=False by strict=True

The reason we used strict=False was only that it's the default and it's
thus shorter to write. If we're forced to specify the `strict`
parameter, let's use `True`.
---
 src/torchjd/autogram/_module_hook_manager.py     |  2 +-
 src/torchjd/autojac/_jac_to_grad.py              |  2 +-
 src/torchjd/autojac/_mtl_backward.py             |  2 +-
 src/torchjd/autojac/_transform/_diagonalize.py   |  2 +-
 src/torchjd/autojac/_transform/_differentiate.py |  2 +-
 src/torchjd/autojac/_transform/_materialize.py   |  2 +-
 tests/doc/test_autogram.py                       |  2 +-
 tests/profiling/plot_memory_timeline.py          |  2 +-
 tests/unit/aggregation/test_nash_mtl.py          |  2 +-
 tests/unit/autojac/_transform/test_accumulate.py | 16 ++++++++--------
 tests/unit/autojac/test_jac.py                   |  2 +-
 tests/unit/autojac/test_mtl_backward.py          |  2 +-
 12 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/torchjd/autogram/_module_hook_manager.py b/src/torchjd/autogram/_module_hook_manager.py
index 7b493306..ef48b784 100644
--- a/src/torchjd/autogram/_module_hook_manager.py
+++ b/src/torchjd/autogram/_module_hook_manager.py
@@ -141,7 +141,7 @@ def __call__(
             *rg_outputs,
         )
 
-        for idx, output in zip(rg_output_indices, autograd_fn_rg_outputs, strict=False):
+        for idx, output in zip(rg_output_indices, autograd_fn_rg_outputs, strict=True):
             flat_outputs[idx] = output
 
         return tree_unflatten(flat_outputs, output_spec)
diff --git a/src/torchjd/autojac/_jac_to_grad.py b/src/torchjd/autojac/_jac_to_grad.py
index d2d4ce09..352e2655 100644
--- a/src/torchjd/autojac/_jac_to_grad.py
+++ b/src/torchjd/autojac/_jac_to_grad.py
@@ -87,7 +87,7 @@ def _disunite_gradient(
     gradient_vector: Tensor, jacobians: list[Tensor], tensors: list[TensorWithJac]
 ) -> list[Tensor]:
     gradient_vectors = gradient_vector.split([t.numel() for t in tensors])
-    gradients = [g.view(t.shape) for g, t in zip(gradient_vectors, tensors, strict=False)]
+    gradients = [g.view(t.shape) for g, t in zip(gradient_vectors, tensors, strict=True)]
     return gradients
 
 
diff --git a/src/torchjd/autojac/_mtl_backward.py b/src/torchjd/autojac/_mtl_backward.py
index 92192139..831099ed 100644
--- a/src/torchjd/autojac/_mtl_backward.py
+++ b/src/torchjd/autojac/_mtl_backward.py
@@ -132,7 +132,7 @@ def _create_transform(
             OrderedSet([loss]),
             retain_graph,
         )
-        for task_params, loss in zip(tasks_params, losses, strict=False)
+        for task_params, loss in zip(tasks_params, losses, strict=True)
     ]
 
     # Transform that stacks the gradients of the losses w.r.t. the shared representations into a
diff --git a/src/torchjd/autojac/_transform/_diagonalize.py b/src/torchjd/autojac/_transform/_diagonalize.py
index 3aa1eb84..88e5525e 100644
--- a/src/torchjd/autojac/_transform/_diagonalize.py
+++ b/src/torchjd/autojac/_transform/_diagonalize.py
@@ -65,7 +65,7 @@ def __call__(self, tensors: TensorDict, /) -> TensorDict:
         diagonal_matrix = torch.cat(flattened_considered_values).diag()
         diagonalized_tensors = {
             key: diagonal_matrix[:, begin:end].reshape((-1,) + key.shape)
-            for (begin, end), key in zip(self.indices, self.key_order, strict=False)
+            for (begin, end), key in zip(self.indices, self.key_order, strict=True)
         }
         return diagonalized_tensors
 
diff --git a/src/torchjd/autojac/_transform/_differentiate.py b/src/torchjd/autojac/_transform/_differentiate.py
index ce118627..3cec097d 100644
--- a/src/torchjd/autojac/_transform/_differentiate.py
+++ b/src/torchjd/autojac/_transform/_differentiate.py
@@ -41,7 +41,7 @@ def __call__(self, tensors: TensorDict, /) -> TensorDict:
         tensor_outputs = [tensors[output] for output in self.outputs]
 
         differentiated_tuple = self._differentiate(tensor_outputs)
-        new_differentiations = dict(zip(self.inputs, differentiated_tuple, strict=False))
+        new_differentiations = dict(zip(self.inputs, differentiated_tuple, strict=True))
         return type(tensors)(new_differentiations)
 
     @abstractmethod
diff --git a/src/torchjd/autojac/_transform/_materialize.py b/src/torchjd/autojac/_transform/_materialize.py
index 17d75d5d..89100168 100644
--- a/src/torchjd/autojac/_transform/_materialize.py
+++ b/src/torchjd/autojac/_transform/_materialize.py
@@ -16,7 +16,7 @@ def materialize(
     """
 
     tensors = []
-    for optional_tensor, input in zip(optional_tensors, inputs, strict=False):
+    for optional_tensor, input in zip(optional_tensors, inputs, strict=True):
         if optional_tensor is None:
             tensors.append(torch.zeros_like(input))
         else:
diff --git a/tests/doc/test_autogram.py b/tests/doc/test_autogram.py
index a07952af..43651824 100644
--- a/tests/doc/test_autogram.py
+++ b/tests/doc/test_autogram.py
@@ -22,7 +22,7 @@ def test_engine():
     # Create the engine before the backward pass, and only once.
     engine = Engine(model, batch_dim=0)
 
-    for input, target in zip(inputs, targets, strict=False):
+    for input, target in zip(inputs, targets, strict=True):
         output = model(input).squeeze(dim=1)  # shape: [16]
         losses = criterion(output, target)  # shape: [16]
 
diff --git a/tests/profiling/plot_memory_timeline.py b/tests/profiling/plot_memory_timeline.py
index 8f0a02ba..f7cbeec4 100644
--- a/tests/profiling/plot_memory_timeline.py
+++ b/tests/profiling/plot_memory_timeline.py
@@ -53,7 +53,7 @@ def plot_memory_timelines(experiment: str, folders: list[str]) -> None:
         timelines.append(extract_memory_timeline(path))
 
     fig, ax = plt.subplots(figsize=(12, 6))
-    for folder, timeline in zip(folders, timelines, strict=False):
+    for folder, timeline in zip(folders, timelines, strict=True):
         time = (timeline[:, 0] - timeline[0, 0]) // 1000  # Make time start at 0 and convert to ms.
         memory = timeline[:, 1]
         ax.plot(time, memory, label=folder, linewidth=1.5)
diff --git a/tests/unit/aggregation/test_nash_mtl.py b/tests/unit/aggregation/test_nash_mtl.py
index b916890e..44e15400 100644
--- a/tests/unit/aggregation/test_nash_mtl.py
+++ b/tests/unit/aggregation/test_nash_mtl.py
@@ -55,7 +55,7 @@ def test_nash_mtl_reset():
     aggregator.reset()
     results = [aggregator(matrix) for matrix in matrices]
 
-    for result, expected in zip(results, expecteds, strict=False):
+    for result, expected in zip(results, expecteds, strict=True):
         assert_close(result, expected)
 
 
diff --git a/tests/unit/autojac/_transform/test_accumulate.py b/tests/unit/autojac/_transform/test_accumulate.py
index a4b38550..eaa09549 100644
--- a/tests/unit/autojac/_transform/test_accumulate.py
+++ b/tests/unit/autojac/_transform/test_accumulate.py
@@ -15,14 +15,14 @@ def test_single_grad_accumulation():
     shapes = [[], [1], [2, 3]]
     keys = [zeros_(shape, requires_grad=True) for shape in shapes]
     values = [ones_(shape) for shape in shapes]
-    input = dict(zip(keys, values, strict=False))
+    input = dict(zip(keys, values, strict=True))
 
     accumulate = AccumulateGrad()
 
     output = accumulate(input)
     assert_tensor_dicts_are_close(output, {})
 
-    for key, value in zip(keys, values, strict=False):
+    for key, value in zip(keys, values, strict=True):
         assert_grad_close(key, value)
 
 
@@ -40,10 +40,10 @@ def test_multiple_grad_accumulations(iterations: int):
 
     for _ in range(iterations):
         # Clone values to ensure that we accumulate values that are not ever used afterwards
-        input = {key: value.clone() for key, value in zip(keys, values, strict=False)}
+        input = {key: value.clone() for key, value in zip(keys, values, strict=True)}
         accumulate(input)
 
-    for key, value in zip(keys, values, strict=False):
+    for key, value in zip(keys, values, strict=True):
         assert_grad_close(key, iterations * value)
 
 
@@ -98,14 +98,14 @@ def test_single_jac_accumulation():
     shapes = [[], [1], [2, 3]]
     keys = [zeros_(shape, requires_grad=True) for shape in shapes]
     values = [ones_([4] + shape) for shape in shapes]
-    input = dict(zip(keys, values, strict=False))
+    input = dict(zip(keys, values, strict=True))
 
     accumulate = AccumulateJac()
 
     output = accumulate(input)
     assert_tensor_dicts_are_close(output, {})
 
-    for key, value in zip(keys, values, strict=False):
+    for key, value in zip(keys, values, strict=True):
         assert_jac_close(key, value)
 
 
@@ -124,10 +124,10 @@ def test_multiple_jac_accumulations(iterations: int):
 
     for _ in range(iterations):
         # Clone values to ensure that we accumulate values that are not ever used afterwards
-        input = {key: value.clone() for key, value in zip(keys, values, strict=False)}
+        input = {key: value.clone() for key, value in zip(keys, values, strict=True)}
         accumulate(input)
 
-    for key, value in zip(keys, values, strict=False):
+    for key, value in zip(keys, values, strict=True):
         assert_jac_close(key, iterations * value)
 
 
diff --git a/tests/unit/autojac/test_jac.py b/tests/unit/autojac/test_jac.py
index 44c323a1..3a5fb9a4 100644
--- a/tests/unit/autojac/test_jac.py
+++ b/tests/unit/autojac/test_jac.py
@@ -42,7 +42,7 @@ def test_jac():
     jacobians = jac(outputs, inputs)
 
     assert len(jacobians) == len([a1, a2])
-    for jacobian, a in zip(jacobians, [a1, a2], strict=False):
+    for jacobian, a in zip(jacobians, [a1, a2], strict=True):
         assert jacobian.shape[0] == len([y1, y2])
         assert jacobian.shape[1:] == a.shape
 
diff --git a/tests/unit/autojac/test_mtl_backward.py b/tests/unit/autojac/test_mtl_backward.py
index 1e46546c..00bda738 100644
--- a/tests/unit/autojac/test_mtl_backward.py
+++ b/tests/unit/autojac/test_mtl_backward.py
@@ -351,7 +351,7 @@ def test_various_feature_lists(shapes: list[tuple[int]]):
 
     features = [rand_(shape) @ p0 for shape in shapes]
 
-    y1 = sum([(f * p).sum() for f, p in zip(features, p1, strict=False)])
+    y1 = sum([(f * p).sum() for f, p in zip(features, p1, strict=True)])
     y2 = (features[0] * p2).sum()
 
     mtl_backward(losses=[y1, y2], features=features)

From e76cae9c0af36b8707f36c6d107029a2ee042986 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Val=C3=A9rian=20Rey?= <valerian.rey@gmail.com>
Date: Wed, 4 Feb 2026 03:42:13 +0100
Subject: [PATCH 11/13] Add TODO

---
 src/torchjd/autojac/_transform/_materialize.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/torchjd/autojac/_transform/_materialize.py b/src/torchjd/autojac/_transform/_materialize.py
index 89100168..2208af61 100644
--- a/src/torchjd/autojac/_transform/_materialize.py
+++ b/src/torchjd/autojac/_transform/_materialize.py
@@ -3,6 +3,8 @@
 import torch
 from torch import Tensor
 
+# TODO: test todos
+
 
 def materialize(
     optional_tensors: Sequence[Tensor | None], inputs: Sequence[Tensor]

From 15cc65b5097d3021c065f7c5ed15bd1ed24bcd20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Val=C3=A9rian=20Rey?= <valerian.rey@gmail.com>
Date: Wed, 4 Feb 2026 03:47:42 +0100
Subject: [PATCH 12/13] Add non-capitalized todo

---
 src/torchjd/autojac/_transform/_materialize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/torchjd/autojac/_transform/_materialize.py b/src/torchjd/autojac/_transform/_materialize.py
index 2208af61..eb893cf3 100644
--- a/src/torchjd/autojac/_transform/_materialize.py
+++ b/src/torchjd/autojac/_transform/_materialize.py
@@ -3,7 +3,7 @@
 import torch
 from torch import Tensor
 
-# TODO: test todos
+# todo: test todo
 
 
 def materialize(

From a39de4140cfe4a12d61700fa1b9f32a761d69871 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Val=C3=A9rian=20Rey?= <valerian.rey@gmail.com>
Date: Wed, 4 Feb 2026 03:52:13 +0100
Subject: [PATCH 13/13] Remove todo

---
 src/torchjd/autojac/_transform/_materialize.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/torchjd/autojac/_transform/_materialize.py b/src/torchjd/autojac/_transform/_materialize.py
index eb893cf3..89100168 100644
--- a/src/torchjd/autojac/_transform/_materialize.py
+++ b/src/torchjd/autojac/_transform/_materialize.py
@@ -3,8 +3,6 @@
 import torch
 from torch import Tensor
 
-# todo: test todo
-
 
 def materialize(
     optional_tensors: Sequence[Tensor | None], inputs: Sequence[Tensor]