Extract frechet_mean into functional API and rename LieBN.py to liebn.py

bruAristimunha · bruAristimunha · commit 0786e4f4b76b · 2026-03-20T12:40:54.000+01:00
Add frechet_mean() to spd_learn.functional.batchnorm, unifying the
duplicated Karcher flow logic from SPDBatchNormMean, SPDBatchNormMeanVar,
SPDBatchNormLie, and the SPDIM tutorial into a single reusable function.

Rename LieBN.py to liebn.py for snake_case consistency with all other
module files, and rename karcher_steps to n_iter in SPDBatchNormLie to
match the other batchnorm modules.
diff --git a/examples/applied_examples/plot_source_free_domain.py b/examples/applied_examples/plot_source_free_domain.py
@@ -96,75 +96,10 @@
 # SPDIM Geometric Operations
 # --------------------------
 #
-# We define two core geometric operations needed for the SPDIM pipeline.
-# These will be included in a future release of ``spd_learn.functional``.
+# The Fréchet mean and geodesic distances used by SPDIM are available
+# directly from ``spd_learn.functional``.
 #
 
-from spd_learn.functional import (
-    get_epsilon,
-    matrix_exp,
-    matrix_log,
-    matrix_sqrt_inv,
-)
-
-
-def frechet_mean(X, max_iter=50, return_distances=False):
-    r"""Compute the Fréchet mean under the AIRM.
-
-    .. math::
-
-        \bar{X} = \arg\min_{G \in \mathcal{S}_{++}^n}
-        \sum_{i=1}^{N} d_{\text{AIRM}}^2(G, X_i)
-
-    Uses adaptive step-size Karcher flow.
-    """
-    eps = get_epsilon(X.dtype, "eigval_log")
-    n_samples = X.shape[0]
-
-    if n_samples == 1:
-        mean = X[:1]
-        if return_distances:
-            return mean, torch.zeros(X.shape[:-2], dtype=X.dtype, device=X.device)
-        return mean
-
-    w = torch.ones((*X.shape[:-2], 1, 1), dtype=X.dtype, device=X.device)
-    w = w / n_samples
-    G = (X * w).sum(dim=0, keepdim=True)
-
-    nu = 1.0
-    tau = float("inf")
-
-    for _ in range(max_iter):
-        G_sqrt, G_invsqrt = matrix_sqrt_inv.apply(G)
-        X_tangent = matrix_log.apply(G_invsqrt @ X @ G_invsqrt)
-        G_tangent = (X_tangent * w).sum(dim=0, keepdim=True)
-
-        crit = torch.norm(G_tangent, p="fro", dim=(-2, -1)).max().item()
-        if crit <= eps:
-            break
-
-        G = G_sqrt @ matrix_exp.apply(nu * G_tangent) @ G_sqrt
-
-        h = nu * crit
-        if h < tau:
-            nu = 0.95 * nu
-            tau = h
-        else:
-            nu = 0.5 * nu
-
-        if nu <= eps:
-            break
-
-    if return_distances:
-        G_sqrt, G_invsqrt = matrix_sqrt_inv.apply(G)
-        X_tangent = matrix_log.apply(G_invsqrt @ X @ G_invsqrt)
-        G_tangent = (X_tangent * w).sum(dim=0, keepdim=True)
-        distances = torch.norm(X_tangent - G_tangent, p="fro", dim=(-2, -1))
-        return G, distances
-
-    return G
-
-
 ######################################################################
 # Loading the Dataset
 # -------------------
@@ -179,12 +114,13 @@ def frechet_mean(X, max_iter=50, return_distances=False):
 # - **Source domain**: Session A (training with labels)
 # - **Target domain**: Session B (adaptation without labels)
 #
-
 from braindecode.datasets import create_from_X_y
 from moabb.datasets import BNCI2015_001
 from moabb.paradigms import MotorImagery
 from sklearn.preprocessing import LabelEncoder
 
+from spd_learn.functional import frechet_mean
+
 
 dataset = BNCI2015_001()
 paradigm = MotorImagery(
diff --git a/spd_learn/functional/__init__.py b/spd_learn/functional/__init__.py
@@ -16,6 +16,7 @@
 
 from .autograd import modeig_backward, modeig_forward
 from .batchnorm import (
+    frechet_mean,
     karcher_mean_iteration,
     lie_group_variance,
     spd_centering,
@@ -157,6 +158,7 @@
     "ledoit_wolf",
     "shrinkage_covariance",
     # Batch normalization
+    "frechet_mean",
     "karcher_mean_iteration",
     "lie_group_variance",
     "spd_centering",
diff --git a/spd_learn/functional/batchnorm.py b/spd_learn/functional/batchnorm.py
@@ -9,6 +9,8 @@
 
 Functions
 ---------
+frechet_mean
+    Fréchet mean of SPD matrices under the AIRM via Karcher flow.
 karcher_mean_iteration
     Single iteration of the Karcher (Fréchet) mean algorithm.
 spd_centering
@@ -26,7 +28,7 @@
 :class:`~spd_learn.modules.SPDBatchNormMeanVar` : Full Riemannian batch normalization.
 """
 
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 
 import torch
 
@@ -103,6 +105,80 @@ def karcher_mean_iteration(
     return new_mean
 
 
+def frechet_mean(
+    X: torch.Tensor,
+    max_iter: int = 1,
+    weights: Optional[torch.Tensor] = None,
+    return_distances: bool = False,
+) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+    r"""Fréchet mean of SPD matrices under the AIRM via Karcher flow.
+
+    Computes the minimizer of the sum of squared geodesic distances:
+
+    .. math::
+
+        \bar{X} = \arg\min_{G \in \mathcal{S}_{++}^n}
+        \sum_{i=1}^{N} w_i \, d_{\text{AIRM}}^2(G, X_i)
+
+    using iterative Karcher flow initialized from the (weighted) Euclidean mean.
+
+    Parameters
+    ----------
+    X : torch.Tensor
+        Batch of SPD matrices with shape ``(batch_size, ..., n, n)``.
+    max_iter : int, default=1
+        Number of Karcher flow iterations. A single iteration is often
+        sufficient for batch normalization; use more (e.g. 50) when a
+        high-accuracy mean is needed.
+    weights : torch.Tensor, optional
+        Per-sample weights with shape broadcastable to ``X``. When ``None``,
+        uniform weights ``1/N`` are used.
+    return_distances : bool, default=False
+        If True, also returns the geodesic distances from each sample to
+        the mean.
+
+    Returns
+    -------
+    mean : torch.Tensor
+        Fréchet mean with shape ``(1, ..., n, n)``.
+    distances : torch.Tensor
+        Only returned when ``return_distances=True``. Geodesic distances
+        from each sample to the mean, with shape ``(batch_size, ...)``.
+
+    See Also
+    --------
+    :func:`karcher_mean_iteration` : Single Karcher step (lower-level).
+    :func:`~spd_learn.functional.airm_distance` : Pairwise AIRM distance.
+
+    References
+    ----------
+    See :cite:p:`pennec2006riemannian` for details on Karcher mean computation.
+    """
+    batch = X.detach()
+
+    if weights is None:
+        mean = batch.mean(dim=0, keepdim=True)
+    else:
+        mean = (batch * weights).sum(dim=0, keepdim=True)
+
+    for _ in range(max_iter):
+        mean_sqrt, mean_invsqrt = matrix_sqrt_inv.apply(mean)
+        X_tangent = matrix_log.apply(mean_invsqrt @ batch @ mean_invsqrt)
+        if weights is None:
+            mean_tangent = X_tangent.mean(dim=0, keepdim=True)
+        else:
+            mean_tangent = (X_tangent * weights).sum(dim=0, keepdim=True)
+        mean = mean_sqrt @ matrix_exp.apply(mean_tangent) @ mean_sqrt
+
+    if return_distances:
+        mean_sqrt, mean_invsqrt = matrix_sqrt_inv.apply(mean)
+        X_tangent = matrix_log.apply(mean_invsqrt @ batch @ mean_invsqrt)
+        distances = torch.norm(X_tangent, p="fro", dim=(-2, -1))
+        return mean, distances
+
+    return mean
+
+
 def spd_centering(
     X: torch.Tensor,
     mean_invsqrt: torch.Tensor,
@@ -339,6 +415,7 @@ def lie_group_variance(
 
 
 __all__ = [
+    "frechet_mean",
     "karcher_mean_iteration",
     "lie_group_variance",
     "spd_centering",
diff --git a/spd_learn/modules/__init__.py b/spd_learn/modules/__init__.py
@@ -4,7 +4,7 @@
 from .bilinear import BiMap, BiMapIncreaseDim
 from .covariance import CovLayer
 from .dropout import SPDDropout
-from .LieBN import SPDBatchNormLie
+from .liebn import SPDBatchNormLie
 from .manifold import PositiveDefiniteScalar, SymmetricPositiveDefinite
 from .modeig import ExpEig, LogEig, ReEig
 from .regularize import Shrinkage, TraceNorm
diff --git a/spd_learn/modules/batchnorm.py b/spd_learn/modules/batchnorm.py
@@ -13,7 +13,7 @@
     matrix_sqrt,
 )
 from ..functional.batchnorm import (
-    karcher_mean_iteration,
+    frechet_mean,
     spd_centering,
     spd_rebiasing,
     tangent_space_variance,
@@ -194,10 +194,7 @@ def forward(self, input):
 
         """
         if self.training:
-            mean = input.mean(dim=0, keepdim=True)
-            if input.shape[0] > 1:
-                for _ in range(self.n_iter):
-                    mean = karcher_mean_iteration(input, mean)
+            mean = frechet_mean(input, max_iter=self.n_iter)
             with torch.no_grad():
                 self.running_mean = airm_geodesic(
                     self.running_mean, mean, self.momentum
@@ -478,14 +475,8 @@ def forward(self, input):
             Normalized tensor of the same shape as the input.
 
         """
-        n_samples = input.shape[0]
         if self.training:
-            # Kobler et al. SPDMBN/SPDBN: estimate batch Fréchet mean via Karcher step
-            batch_mean = input.mean(dim=0, keepdim=True)
-            if n_samples > 1:
-                for _ in range(self.n_iter):
-                    # Kobler et al. (Eq. 4): P2 L132-145; Karcher flow note: P2 L163-165
-                    batch_mean = karcher_mean_iteration(input, batch_mean)
+            batch_mean = frechet_mean(input, max_iter=self.n_iter)
 
             # Scalar dispersion: mean squared Frobenius norm of log at the mean (a single scalar, not variance matrix)
             mean_inv_sqrt = matrix_inv_sqrt.apply(batch_mean)
diff --git a/spd_learn/modules/liebn.py b/spd_learn/modules/liebn.py
@@ -26,7 +26,7 @@
     matrix_sqrt,
 )
 from ..functional.batchnorm import (
-    karcher_mean_iteration,
+    frechet_mean,
     lie_group_variance,
     spd_centering,
     spd_cholesky_congruence,
@@ -63,9 +63,8 @@ class SPDBatchNormLie(nn.Module):
         Running statistics momentum.
     eps : float, default=1e-5
         Numerical stability constant for variance normalization.
-    karcher_steps : int, default=1
-        Number of Karcher flow iterations used by the AIM mean.  Iterations
-        stop early when the tangent update norm falls below ``1e-5``.
+    n_iter : int, default=1
+        Number of Karcher flow iterations used by the AIM mean.
     congruence : {"cholesky", "eig"}, default="cholesky"
         Implementation of the AIM congruence action (centering/biasing).
         ``"cholesky"`` uses the Cholesky factor :math:`L` of :math:`P` to
@@ -91,7 +90,7 @@ def __init__(
         beta=0.0,
         momentum=0.1,
         eps=1e-5,
-        karcher_steps=1,
+        n_iter=1,
         congruence="cholesky",
         device=None,
         dtype=None,
@@ -113,7 +112,7 @@ def __init__(
         self.beta = beta
         self.momentum = momentum
         self.eps = eps
-        self.karcher_steps = karcher_steps
+        self.n_iter = n_iter
         self.congruence = congruence
 
         self.bias = nn.Parameter(torch.empty(1, n, n, device=device, dtype=dtype))
@@ -182,15 +181,7 @@ def _translate(self, X, P, inverse=False):
     def _frechet_mean(self, X_def):
         """Fréchet mean in the deformed space."""
         if self.metric == "AIM":
-            batch = X_def.detach()
-            mean = batch.mean(dim=0, keepdim=True)
-            for _ in range(self.karcher_steps):
-                mean, mean_tangent = karcher_mean_iteration(
-                    batch, mean, detach=True, return_tangent=True
-                )
-                if mean_tangent.norm(dim=(-1, -2)).max() < 1e-5:
-                    break
-            return mean
+            return frechet_mean(X_def, max_iter=self.n_iter)
         return X_def.detach().mean(dim=0, keepdim=True)
 
     def _scale(self, X, var):
diff --git a/tests/test_liebn.py b/tests/test_liebn.py
@@ -97,7 +97,7 @@ def test_post_normalization_mean(simulated_data, metric, congruence):
     """
     x, _, ndim, nobs = simulated_data
     layer = SPDBatchNormLie(
-        ndim, metric=metric, karcher_steps=64, congruence=congruence, dtype=DTYPE
+        ndim, metric=metric, n_iter=64, congruence=congruence, dtype=DTYPE
     )
     layer.train()
 
@@ -135,7 +135,7 @@ def test_post_normalization_variance(simulated_data, metric):
     this is close to 1.0.
     """
     x, _, ndim, nobs = simulated_data
-    layer = SPDBatchNormLie(ndim, metric=metric, karcher_steps=64, dtype=DTYPE)
+    layer = SPDBatchNormLie(ndim, metric=metric, n_iter=64, dtype=DTYPE)
     layer.train()
 
     with torch.no_grad():
@@ -170,9 +170,7 @@ def test_post_normalization_variance(simulated_data, metric):
 def test_running_stats_single_batch(simulated_data, metric):
     """With momentum=1.0, running stats should match batch stats exactly."""
     x, _, ndim, nobs = simulated_data
-    layer = SPDBatchNormLie(
-        ndim, metric=metric, momentum=1.0, karcher_steps=64, dtype=DTYPE
-    )
+    layer = SPDBatchNormLie(ndim, metric=metric, momentum=1.0, n_iter=64, dtype=DTYPE)
     layer.train()
 
     with torch.no_grad():
@@ -216,12 +214,12 @@ def test_running_stats_single_batch(simulated_data, metric):
 def test_running_stats_convergence(simulated_data, metric):
     """Running stats should converge to population stats over mini-batches."""
     x, _, ndim, nobs = simulated_data
-    layer = SPDBatchNormLie(ndim, metric=metric, karcher_steps=1, dtype=DTYPE)
+    layer = SPDBatchNormLie(ndim, metric=metric, n_iter=1, dtype=DTYPE)
 
     # Full-batch reference statistics (high precision)
     with torch.no_grad():
         ref_layer = SPDBatchNormLie(
-            ndim, metric=metric, momentum=1.0, karcher_steps=64, dtype=DTYPE
+            ndim, metric=metric, momentum=1.0, n_iter=64, dtype=DTYPE
         )
         ref_layer.train()
         ref_layer(x)
@@ -258,7 +256,7 @@ def test_gradient_flow(simulated_data, metric):
     # Use a small batch to keep computation fast
     x_small = x[:8].clone().requires_grad_(True)
 
-    layer = SPDBatchNormLie(ndim, metric=metric, karcher_steps=1, dtype=DTYPE)
+    layer = SPDBatchNormLie(ndim, metric=metric, n_iter=1, dtype=DTYPE)
     layer.train()
 
     output = layer(x_small)