Merge pull request #16 from timnaher/fix/cuda-device-mismatch

bruAristimunha · web-flow · commit 24e0209f252b · 2026-03-02T16:25:43.000+01:00
Fix CUDA device mismatch in WaveletConv and ledoit_wolf
diff --git a/spd_learn/functional/regularize.py b/spd_learn/functional/regularize.py
@@ -79,7 +79,8 @@ def ledoit_wolf(
     alpha = torch.sigmoid(shrinkage)
 
     return (1 - alpha)[..., None, None] * covariances + alpha[..., None, None] * (
-        mu[..., None] * shrink_mat
+        mu[..., None]
+        * shrink_mat.to(device=covariances.device, dtype=covariances.dtype)
     )
 
 
diff --git a/spd_learn/modules/wavelet.py b/spd_learn/modules/wavelet.py
@@ -134,22 +134,24 @@ def __init__(
         tmax = kernel_width_s / 2.0
         tmin = -tmax
         kernel_length = int(kernel_width_s * sfreq)
-        self.register_buffer("tt", torch.linspace(tmin, tmax, kernel_length))
+        self.register_buffer(
+            "tt", torch.linspace(tmin, tmax, kernel_length, device=device)
+        )
 
         # Convert foi_init to tensor if needed
         if isinstance(foi_init, Tensor):
-            foi_tensor = foi_init.detach().clone()
+            foi_tensor = foi_init.detach().clone().to(device=device)
         else:
-            foi_tensor = torch.tensor(foi_init)
+            foi_tensor = torch.tensor(foi_init, device=device)
 
         # Generate default fwhm_init if not provided, then convert to tensor
         if fwhm_init is None:
             # Default: FWHM decreases with frequency (negative values in log scale)
             fwhm_tensor = -foi_tensor
         elif isinstance(fwhm_init, Tensor):
-            fwhm_tensor = fwhm_init.detach().clone()
+            fwhm_tensor = fwhm_init.detach().clone().to(device=device)
         else:
-            fwhm_tensor = torch.tensor(fwhm_init)
+            fwhm_tensor = torch.tensor(fwhm_init, device=device)
 
         self.foi = nn.Parameter(foi_tensor, requires_grad=True)
         self.fwhm = nn.Parameter(fwhm_tensor, requires_grad=True)
@@ -203,4 +205,4 @@ def forward(self, X: Tensor) -> Tensor:
             n_batch, n_freqs, n_sensors, n_epochs, n_times = X_conv.shape
             X_conv = X_conv.view(n_batch, n_freqs, n_sensors, n_epochs * n_times)
 
-        return X_conv.to(device=self.device, dtype=self.dtype)
+        return X_conv.to(device=X.device, dtype=self.dtype)
diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -30,18 +30,24 @@
     ),
 }
 
+DEVICES = [
+    "cpu",
+    pytest.param(
+        "cuda",
+        marks=pytest.mark.skipif(
+            not torch.cuda.is_available(), reason="CUDA not available"
+        ),
+    ),
+]
+
 
 @pytest.mark.parametrize("model_name", model_list)
 def test_integration(model_name):
     model_class = getattr(spd_learn.models, model_name)
 
-    params = {}
+    params = {"sfreq": 125} if model_name == "Green" else {}
     if model_name == "TensorCSPNet":
-        # TensorCSPNet requires a different input shape
         x = torch.randn(2, 9, 22, 1000)
-    elif model_name == "Green":
-        params = {"sfreq": 125}
-        x = torch.randn(2, 22, 1000)
     else:
         x = torch.randn(2, 22, 1000)
 
@@ -89,20 +95,7 @@ def test_module_expose_device_dtype(module_name):
     assert layer is not None
 
 
-# Test that all parameters of the module are on the expected device.
-@pytest.mark.parametrize(
-    "device",
-    [
-        "cpu",
-        pytest.param(
-            "cuda",
-            marks=pytest.mark.skipif(
-                not torch.cuda.is_available(), reason="CUDA not available"
-            ),
-        ),
-        # pytest.param("mps", marks=pytest.mark.skipif(not torch.backends.mps.is_available(), reason="MPS not available (MAC only)"))
-    ],
-)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("module_name", module_list)
 def test_module_parameters_on_device(module_name, device):
     """Instantiate the module on the given device and verify that each parameter is located on that device."""
@@ -119,29 +112,7 @@ def test_module_parameters_on_device(module_name, device):
         )
 
 
-# Optionally, test that all submodules’ parameters are on the expected device.
-@pytest.mark.parametrize(
-    "device", ["cpu"]
-)  # if you want to test submodules only on CPU in CI, or parameterize as above
-@pytest.mark.parametrize("module_name", module_list)
-def test_module_submodules_on_device(module_name, device):
-    """Verify that for each submodule in the module, its parameters are on the correct device."""
-    module_class = getattr(spd_learn.modules, module_name)
-    dtype = torch.float32
-    mandatory_param = mandatory_parameters_per_module.get(module_name, {})
-
-    module = module_class(device=device, dtype=dtype, **mandatory_param)
-    for submodule in module.modules():
-        for name, param in submodule.named_parameters(recurse=False):
-            assert param.device.type == device, (
-                f"Submodule parameter '{name}' in {submodule} is on {param.device} but expected {device}"
-            )
-
-
-# Optionally, test that all buffers are on the expected device.
-@pytest.mark.parametrize(
-    "device", ["cpu"]
-)  # if you want to test buffers only on CPU in CI, or parameterize as above
+@pytest.mark.parametrize("device", ["cpu"])
 @pytest.mark.parametrize("module_name", module_list)
 def test_module_buffers_on_device(module_name, device):
     """Verify that all buffers in the module are on the correct device."""
@@ -156,18 +127,7 @@ def test_module_buffers_on_device(module_name, device):
         )
 
 
-@pytest.mark.parametrize(
-    "device",
-    [
-        "cpu",
-        pytest.param(
-            "cuda",
-            marks=pytest.mark.skipif(
-                not torch.cuda.is_available(), reason="CUDA not available"
-            ),
-        ),
-    ],
-)
+@pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize(
     "dtype",
     [torch.float32, torch.float64, torch.complex64, torch.complex128],
@@ -213,18 +173,69 @@ def test_module_dtype(module_name, dtype, device):
         x = torch.randn(2, 10, 1000, dtype=dtype)
         x = CovLayer(device=device, dtype=dtype)(x)
 
-        # checking if torch.linalg.eigh is available
-        if dtype == torch.float16:
-            with pytest.raises(RuntimeError):
-                with torch.no_grad():
-                    out = module(x)
-
         with torch.no_grad():
             out = module(x)
 
     assert out.dtype == dtype
 
 
+@pytest.mark.parametrize("device", DEVICES)
+@pytest.mark.parametrize("module_name", module_list)
+def test_module_output_device(module_name, device):
+    """Run a forward pass and verify the output tensor is on the expected device."""
+    if module_name == "PositiveDefiniteScalar":
+        pytest.skip(
+            "PositiveDefiniteScalar is a scalar parametrization, not a matrix layer."
+        )
+
+    dtype = torch.float32
+    module_class = getattr(spd_learn.modules, module_name)
+    mandatory_param = mandatory_parameters_per_module.get(module_name, {})
+    module = module_class(device=device, dtype=dtype, **mandatory_param)
+
+    if module_name in ("CovLayer", "WaveletConv"):
+        x = torch.randn(2, 10, 1000, device=device, dtype=dtype)
+    elif module_name == "LogEuclideanResidual":
+        raw = torch.randn(2, 10, 1000, device=device, dtype=dtype)
+        cov = CovLayer(device=device, dtype=dtype)
+        x = cov(raw)
+        y = cov(torch.randn(2, 10, 1000, device=device, dtype=dtype))
+        with torch.no_grad():
+            out = module(x, y)
+        assert out.device.type == device, (
+            f"Output is on {out.device} but expected {device}"
+        )
+        return
+    else:
+        raw = torch.randn(2, 10, 1000, device=device, dtype=dtype)
+        x = CovLayer(device=device, dtype=dtype)(raw)
+
+    with torch.no_grad():
+        out = module(x)
+
+    assert out.device.type == device, f"Output is on {out.device} but expected {device}"
+
+
+@pytest.mark.parametrize("device", DEVICES)
+@pytest.mark.parametrize("model_name", model_list)
+def test_integration_on_device(model_name, device):
+    """Create a model, move it to the target device, and verify output shape and device."""
+    params = {"sfreq": 125} if model_name == "Green" else {}
+    if model_name == "TensorCSPNet":
+        x = torch.randn(2, 9, 22, 1000, device=device)
+    else:
+        x = torch.randn(2, 22, 1000, device=device)
+
+    model = getattr(spd_learn.models, model_name)(n_chans=22, n_outputs=2, **params)
+    model.to(device)
+
+    with torch.no_grad():
+        out = model(x)
+
+    assert out.shape == (2, 2), f"Expected shape (2, 2) but got {out.shape}"
+    assert out.device.type == device, f"Output is on {out.device} but expected {device}"
+
+
 # Batch shapes to test broadcast compatibility
 @pytest.mark.parametrize(
     "extra_dim",

Original file line number	Diff line number	Diff line change
`@@ -79,7 +79,8 @@ def ledoit_wolf(`
`79`	`79`	`alpha = torch.sigmoid(shrinkage)`
`80`	`80`
`81`	`81`	`return (1 - alpha)[..., None, None] * covariances + alpha[..., None, None] * (`
`82`		`- mu[..., None] * shrink_mat`
	`82`	`+ mu[..., None]`
	`83`	`+ * shrink_mat.to(device=covariances.device, dtype=covariances.dtype)`
`83`	`84`	`)`
`84`	`85`
`85`	`86`