SimplexLab · ValerianRey · Jan 23, 2026 · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -107,3 +107,9 @@ full = [
 
 [tool.pytest.ini_options]
 xfail_strict = true
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: not covered",
+    "@overload",
+]
diff --git a/src/torchjd/_linalg/__init__.py b/src/torchjd/_linalg/__init__.py
@@ -1,4 +1,14 @@
-from .gramian import compute_gramian
-from .matrix import Matrix, PSDMatrix
+from ._gramian import compute_gramian, normalize, regularize
+from ._matrix import Matrix, PSDMatrix, PSDTensor, is_matrix, is_psd_matrix, is_psd_tensor
 
-__all__ = ["compute_gramian", "Matrix", "PSDMatrix"]
+__all__ = [
+    "compute_gramian",
+    "normalize",
+    "regularize",
+    "Matrix",
+    "PSDMatrix",
+    "PSDTensor",
+    "is_matrix",
+    "is_psd_matrix",
+    "is_psd_tensor",
+]
diff --git a/src/torchjd/_linalg/_gramian.py b/src/torchjd/_linalg/_gramian.py
@@ -0,0 +1,70 @@
+from typing import Literal, cast, overload
+
+import torch
+from torch import Tensor
+
+from ._matrix import Matrix, PSDMatrix, PSDTensor
+
+
+@overload
+def compute_gramian(t: Tensor) -> PSDMatrix:
+    pass
+
+
+@overload
+def compute_gramian(t: Tensor, contracted_dims: Literal[-1]) -> PSDMatrix:
+    pass
+
+
+@overload
+def compute_gramian(t: Matrix, contracted_dims: Literal[1]) -> PSDMatrix:
+    pass
+
+
+def compute_gramian(t: Tensor, contracted_dims: int = -1) -> PSDTensor:
+    """
+    Computes the `Gramian matrix <https://en.wikipedia.org/wiki/Gram_matrix>`_ of the input.
+
+    `contracted_dims` specifies the number of trailing dimensions to contract. If negative,
+    it indicates the number of leading dimensions to preserve (e.g., ``-1`` preserves the
+    first dimension).
+    """
+
+    contracted_dims = contracted_dims if 0 <= contracted_dims else contracted_dims + t.ndim
+    indices_source = list(range(t.ndim - contracted_dims))
+    indices_dest = list(range(t.ndim - 1, contracted_dims - 1, -1))
+    transposed = t.movedim(indices_source, indices_dest)
+    gramian = torch.tensordot(t, transposed, dims=contracted_dims)
+    return cast(PSDTensor, gramian)
+
+
+def normalize(gramian: PSDMatrix, eps: float) -> PSDMatrix:
+    """
+    Normalizes the gramian `G=AA^T` with respect to the Frobenius norm of `A`.
+
+    If `G=A A^T`, then the Frobenius norm of `A` is the square root of the trace of `G`, i.e., the
+    sqrt of the sum of the diagonal elements. The gramian of the (Frobenius) normalization of `A` is
+    therefore `G` divided by the sum of its diagonal elements.
+    """
+    squared_frobenius_norm = gramian.diagonal().sum()
+    if squared_frobenius_norm < eps:
+        output = torch.zeros_like(gramian)
+    else:
+        output = gramian / squared_frobenius_norm
+    return cast(PSDMatrix, output)
+
+
+def regularize(gramian: PSDMatrix, eps: float) -> PSDMatrix:
+    """
+    Adds a regularization term to the gramian to enforce positive definiteness.
+
+    Because of numerical errors, `gramian` might have slightly negative eigenvalue(s). Adding a
+    regularization term which is a small proportion of the identity matrix ensures that the gramian
+    is positive definite.
+    """
+
+    regularization_matrix = eps * torch.eye(
+        gramian.shape[0], dtype=gramian.dtype, device=gramian.device
+    )
+    output = gramian + regularization_matrix
+    return cast(PSDMatrix, output)
diff --git a/src/torchjd/_linalg/_matrix.py b/src/torchjd/_linalg/_matrix.py
@@ -0,0 +1,40 @@
+from typing import TypeGuard
+
+from torch import Tensor
+
+# Note: we're using classes and inherittance instead of NewType because it's possible to have
+# multiple inherittance but there is no type intersection. However, these classes should never be
+# instantiated: they're only used for static type checking.
+
+
+class Matrix(Tensor):
+    """Tensor with exactly 2 dimensions."""
+
+
+class PSDTensor(Tensor):
+    """
+    Tensor representing a quadratic form. The first half of its dimensions matches the reversed
+    second half of its dimensions (e.g. shape=[4, 3, 3, 4]), and its reshaping into a matrix should
+    be positive semi-definite.
+    """
+
+
+class PSDMatrix(PSDTensor, Matrix):
+    """Positive semi-definite matrix."""
+
+
+def is_matrix(t: Tensor) -> TypeGuard[Matrix]:
+    return t.ndim == 2
+
+
+def is_psd_tensor(t: Tensor) -> TypeGuard[PSDTensor]:
+    half_dim = t.ndim // 2
+    return t.ndim % 2 == 0 and t.shape[:half_dim] == t.shape[: half_dim - 1 : -1]
+    # We do not check that t is PSD as it is expensive, but this must be checked in the tests of
+    # every function that uses this TypeGuard by using `assert_is_psd_tensor`.
+
+
+def is_psd_matrix(t: Tensor) -> TypeGuard[PSDMatrix]:
+    return t.ndim == 2 and t.shape[0] == t.shape[1]
+    # We do not check that t is PSD as it is expensive, but this must be checked in the tests of
+    # every function that uses this TypeGuard, by using `assert_is_psd_matrix`.
diff --git a/src/torchjd/_linalg/gramian.py b/src/torchjd/_linalg/gramian.py
diff --git a/src/torchjd/_linalg/matrix.py b/src/torchjd/_linalg/matrix.py
diff --git a/src/torchjd/aggregation/_aggregator_bases.py b/src/torchjd/aggregation/_aggregator_bases.py
@@ -2,7 +2,7 @@
 
 from torch import Tensor, nn
 
-from torchjd._linalg import Matrix, PSDMatrix, compute_gramian
+from torchjd._linalg import Matrix, PSDMatrix, compute_gramian, is_matrix
 
 from ._weighting_bases import Weighting
 
@@ -18,20 +18,19 @@ def __init__(self):
 
     @staticmethod
     def _check_is_matrix(matrix: Tensor) -> None:
-        if len(matrix.shape) != 2:
+        if not is_matrix(matrix):
             raise ValueError(
                 "Parameter `matrix` should be a tensor of dimension 2. Found `matrix.shape = "
                 f"{matrix.shape}`."
             )
 
     @abstractmethod
-    def forward(self, matrix: Tensor) -> Tensor:
+    def forward(self, matrix: Matrix) -> Tensor:
         """Computes the aggregation from the input matrix."""
 
-    # Override to make type hints and documentation more specific
     def __call__(self, matrix: Tensor) -> Tensor:
         """Computes the aggregation from the input matrix and applies all registered hooks."""
-
+        Aggregator._check_is_matrix(matrix)
         return super().__call__(matrix)
 
     def __repr__(self) -> str:
@@ -54,7 +53,7 @@ def __init__(self, weighting: Weighting[Matrix]):
         self.weighting = weighting
 
     @staticmethod
-    def combine(matrix: Tensor, weights: Tensor) -> Tensor:
+    def combine(matrix: Matrix, weights: Tensor) -> Tensor:
         """
         Aggregates a matrix by making a linear combination of its rows, using the provided vector of
         weights.
@@ -63,8 +62,7 @@ def combine(matrix: Tensor, weights: Tensor) -> Tensor:
         vector = weights @ matrix
         return vector
 
-    def forward(self, matrix: Tensor) -> Tensor:
-        self._check_is_matrix(matrix)
+    def forward(self, matrix: Matrix) -> Tensor:
         weights = self.weighting(matrix)
         vector = self.combine(matrix, weights)
         return vector

diff --git a/src/torchjd/aggregation/_cagrad.py b/src/torchjd/aggregation/_cagrad.py
@@ -12,8 +12,9 @@
 import torch
 from torch import Tensor
 
+from torchjd._linalg import normalize
+
 from ._aggregator_bases import GramianWeightedAggregator
-from ._utils.gramian import normalize
 from ._utils.non_differentiable import raise_non_differentiable_error
 
 

diff --git a/src/torchjd/aggregation/_config.py b/src/torchjd/aggregation/_config.py
@@ -28,6 +28,8 @@
 import torch
 from torch import Tensor
 
+from torchjd._linalg import Matrix
+
 from ._aggregator_bases import Aggregator
 from ._sum import SumWeighting
 from ._utils.non_differentiable import raise_non_differentiable_error
@@ -56,7 +58,7 @@ def __init__(self, pref_vector: Tensor | None = None):
         # This prevents computing gradients that can be very wrong.
         self.register_full_backward_pre_hook(raise_non_differentiable_error)
 
-    def forward(self, matrix: Tensor) -> Tensor:
+    def forward(self, matrix: Matrix) -> Tensor:
         weights = self.weighting(matrix)
         units = torch.nan_to_num((matrix / (matrix.norm(dim=1)).unsqueeze(1)), 0.0)
         best_direction = torch.linalg.pinv(units) @ weights

diff --git a/src/torchjd/aggregation/_dualproj.py b/src/torchjd/aggregation/_dualproj.py
@@ -2,12 +2,11 @@
 
 from torch import Tensor
 
-from torchjd._linalg import PSDMatrix
+from torchjd._linalg import PSDMatrix, normalize, regularize
 
 from ._aggregator_bases import GramianWeightedAggregator
 from ._mean import MeanWeighting
 from ._utils.dual_cone import project_weights
-from ._utils.gramian import normalize, regularize
 from ._utils.non_differentiable import raise_non_differentiable_error
 from ._utils.pref_vector import pref_vector_to_str_suffix, pref_vector_to_weighting
 from ._weighting_bases import Weighting

diff --git a/src/torchjd/aggregation/_flattening.py b/src/torchjd/aggregation/_flattening.py
@@ -1,10 +1,8 @@
-from math import prod
-
 from torch import Tensor
 
-from torchjd._linalg.matrix import PSDMatrix
+from torchjd._linalg import PSDTensor
 from torchjd.aggregation._weighting_bases import GeneralizedWeighting, Weighting
-from torchjd.autogram._gramian_utils import reshape_gramian
+from torchjd.autogram._gramian_utils import flatten
 
 
 class Flattening(GeneralizedWeighting):
@@ -22,15 +20,14 @@ class Flattening(GeneralizedWeighting):
     :param weighting: The weighting to apply to the Gramian matrix.
     """
 
-    def __init__(self, weighting: Weighting[PSDMatrix]):
+    def __init__(self, weighting: Weighting):
         super().__init__()
         self.weighting = weighting
 
-    def forward(self, generalized_gramian: Tensor) -> Tensor:
+    def forward(self, generalized_gramian: PSDTensor) -> Tensor:
         k = generalized_gramian.ndim // 2
         shape = generalized_gramian.shape[:k]
-        m = prod(shape)
-        square_gramian = reshape_gramian(generalized_gramian, [m])
+        square_gramian = flatten(generalized_gramian)
         weights_vector = self.weighting(square_gramian)
         weights = weights_vector.reshape(shape)
         return weights
diff --git a/src/torchjd/aggregation/_graddrop.py b/src/torchjd/aggregation/_graddrop.py
@@ -3,6 +3,8 @@
 import torch
 from torch import Tensor
 
+from torchjd._linalg import Matrix
+
 from ._aggregator_bases import Aggregator
 from ._utils.non_differentiable import raise_non_differentiable_error
 
@@ -38,8 +40,7 @@ def __init__(self, f: Callable = _identity, leak: Tensor | None = None):
         # This prevents computing gradients that can be very wrong.
         self.register_full_backward_pre_hook(raise_non_differentiable_error)
 
-    def forward(self, matrix: Tensor) -> Tensor:
-        self._check_is_matrix(matrix)
+    def forward(self, matrix: Matrix) -> Tensor:
         self._check_matrix_has_enough_rows(matrix)
 
         if matrix.shape[0] == 0 or matrix.shape[1] == 0:

diff --git a/src/torchjd/aggregation/_pcgrad.py b/src/torchjd/aggregation/_pcgrad.py
@@ -1,3 +1,5 @@
+from typing import cast
+
 import torch
 from torch import Tensor
 
@@ -32,7 +34,7 @@ def forward(self, gramian: PSDMatrix) -> Tensor:
         device = gramian.device
         dtype = gramian.dtype
         cpu = torch.device("cpu")
-        gramian = gramian.to(device=cpu)
+        gramian = cast(PSDMatrix, gramian.to(device=cpu))
 
         dimension = gramian.shape[0]
         weights = torch.zeros(dimension, device=cpu, dtype=dtype)

diff --git a/src/torchjd/aggregation/_trimmed_mean.py b/src/torchjd/aggregation/_trimmed_mean.py
@@ -25,7 +25,6 @@ def __init__(self, trim_number: int):
         self.trim_number = trim_number
 
     def forward(self, matrix: Tensor) -> Tensor:
-        self._check_is_matrix(matrix)
         self._check_matrix_has_enough_rows(matrix)
 
         n_rows = matrix.shape[0]

diff --git a/src/torchjd/aggregation/_upgrad.py b/src/torchjd/aggregation/_upgrad.py
@@ -3,12 +3,11 @@
 import torch
 from torch import Tensor
 
-from torchjd._linalg import PSDMatrix
+from torchjd._linalg import PSDMatrix, normalize, regularize
 
 from ._aggregator_bases import GramianWeightedAggregator
 from ._mean import MeanWeighting
 from ._utils.dual_cone import project_weights
-from ._utils.gramian import normalize, regularize
 from ._utils.non_differentiable import raise_non_differentiable_error
 from ._utils.pref_vector import pref_vector_to_str_suffix, pref_vector_to_weighting
 from ._weighting_bases import Weighting

diff --git a/src/torchjd/aggregation/_utils/gramian.py b/src/torchjd/aggregation/_utils/gramian.py
diff --git a/src/torchjd/aggregation/_utils/pref_vector.py b/src/torchjd/aggregation/_utils/pref_vector.py
@@ -1,6 +1,6 @@
 from torch import Tensor
 
-from torchjd._linalg.matrix import Matrix
+from torchjd._linalg import Matrix
 from torchjd.aggregation._constant import ConstantWeighting
 from torchjd.aggregation._weighting_bases import Weighting