Added double backward CPU function to jax TP conv.

vbharadwaj-bk · vbharadwaj-bk · commit 8a0094a0c7e4 · 2025-12-07T23:40:36.000-08:00
diff --git a/openequivariance/openequivariance/benchmark/random_buffer_utils.py b/openequivariance/openequivariance/benchmark/random_buffer_utils.py
@@ -181,3 +181,44 @@ def get_random_buffers_backward_conv(
     return in1, in2, out_grad, weights, weights_grad, in1_grad, in2_grad
 
 
+def get_random_buffers_double_backward_conv(
+    tpp: TPProblem, node_count: int, edge_count: int, prng_seed: int
+):
+    rng = np.random.default_rng(prng_seed)
+    in1 = np.array(
+        rng.uniform(size=(node_count, tpp.irreps_in1.dim)), dtype=tpp.irrep_dtype
+    )
+    in2 = np.array(
+        rng.uniform(size=(edge_count, tpp.irreps_in2.dim)), dtype=tpp.irrep_dtype
+    )
+    out_grad = np.array(
+        rng.uniform(size=(node_count, tpp.irreps_out.dim)), dtype=tpp.irrep_dtype
+    )
+
+    weights_size = (
+        tuple([tpp.weight_numel])
+        if tpp.shared_weights
+        else tuple([edge_count, tpp.weight_numel])
+    )
+
+    weights = np.array(rng.uniform(size=weights_size), dtype=tpp.irrep_dtype)
+    weights_grad = np.array(rng.uniform(size=weights_size), dtype=tpp.irrep_dtype)
+    in1_grad = np.array(
+        rng.uniform(size=(node_count, tpp.irreps_in1.dim)), dtype=tpp.irrep_dtype
+    )
+    in2_grad = np.array(
+        rng.uniform(size=(edge_count, tpp.irreps_in2.dim)), dtype=tpp.irrep_dtype
+    )
+    out_double_grad = np.array(
+        rng.uniform(size=(node_count, tpp.irreps_out.dim)), dtype=tpp.irrep_dtype
+    )
+    return (
+        in1,
+        in2,
+        out_grad,
+        weights,
+        weights_grad,
+        in1_grad,
+        in2_grad,
+        out_double_grad,
+    )
diff --git a/openequivariance/openequivariance/core/ConvolutionBase.py b/openequivariance/openequivariance/core/ConvolutionBase.py
@@ -3,6 +3,7 @@
 from openequivariance.benchmark.random_buffer_utils import (
     get_random_buffers_forward_conv,
     get_random_buffers_backward_conv,
+    get_random_buffers_double_backward_conv,
 )
 
 from openequivariance.benchmark.logging_utils import getLogger, bcolors
@@ -13,7 +14,6 @@
 
 logger = getLogger()
 
-
 def flops_data_per_tp(config, direction):
     """
     Assumes all interactions are "uvu" for now
@@ -549,20 +549,12 @@ def test_correctness_double_backward(
         reference_implementation=None,
         high_precision_ref=False,
     ):
-        global torch
-        import torch
-
-        assert self.torch_op
-        buffers = get_random_buffers_backward_conv(
-            self.config, graph.node_count, graph.nnz, prng_seed
-        )
-
-        rng = np.random.default_rng(seed=prng_seed * 2)
-        dummy_grad_value = rng.standard_normal(1)[0]
+        buffers = get_random_buffers_double_backward_conv(
+                self.config, graph.node_count, graph.nnz, prng_seed
+            )
 
         if reference_implementation is None:
             from openequivariance.impl_torch.E3NNConv import E3NNConv
-
             reference_implementation = E3NNConv
 
         reference_problem = self.config
@@ -576,63 +568,30 @@ def test_correctness_double_backward(
         result = {"thresh": thresh}
         tensors = []
         for i, tp in enumerate([self, reference_tp]):
-            in1, in2, out_grad, weights, _, _, _ = [buf.copy() for buf in buffers]
+            buffers_copy = [buf.copy() for buf in buffers]
 
             if i == 1 and high_precision_ref:
-                in1, in2, out_grad, weights, _, _, _ = [
+                buffers_copy = [
                     np.array(el, dtype=np.float64) for el in buffers
                 ]
 
-            in1_torch = torch.tensor(in1, device="cuda", requires_grad=True)
-            in2_torch = torch.tensor(in2, device="cuda", requires_grad=True)
+            in1, in2, out_grad, weights, weights_dgrad, in1_dgrad, in2_dgrad, _ = buffers_copy
 
             weights_reordered = tp.reorder_weights_from_e3nn(
                 weights, not self.config.shared_weights
             )
-
-            weights_torch = torch.tensor(
-                weights_reordered, device="cuda", requires_grad=True
+            weights_dgrad_reordered = tp.reorder_weights_from_e3nn(
+                weights_dgrad, not self.config.shared_weights
             )
 
-            torch_rows = torch.tensor(graph.rows, device="cuda")
-            torch_cols = torch.tensor(graph.cols, device="cuda")
-            torch_transpose_perm = torch.tensor(graph.transpose_perm, device="cuda")
-
-            fwd_args = [in1_torch, in2_torch, weights_torch, torch_rows, torch_cols]
-            if tp.deterministic:
-                fwd_args.append(torch_transpose_perm)
-
-            out_torch = tp.forward(*fwd_args)
-            out_grad_torch = torch.tensor(out_grad, device="cuda", requires_grad=True)
-
-            in1_grad, in2_grad, w_grad = torch.autograd.grad(
-                outputs=[out_torch],
-                inputs=[in1_torch, in2_torch, weights_torch],
-                grad_outputs=[out_grad_torch],
-                create_graph=True,
-            )
-
-            dummy = torch.norm(in1_grad) + torch.norm(in2_grad) + torch.norm(w_grad)
-            dummy_grad = torch.tensor(
-                float(dummy_grad_value), device="cuda", requires_grad=True
-            )
-            dummy.backward(
-                dummy_grad, inputs=[out_grad_torch, in1_torch, in2_torch, weights_torch]
-            )
-
-            weights_grad = weights_torch.grad.detach().cpu().numpy()
-            weights_grad = tp.reorder_weights_to_e3nn(
-                weights_grad, not self.config.shared_weights
-            )
+            in1_grad, in2_grad, weights_grad, out_dgrad = self.double_backward_cpu(in1, in2, out_grad, weights_reordered, weights_dgrad_reordered, in1_dgrad, in2_dgrad, graph)
 
             tensors.append(
-                (
-                    out_grad_torch.grad.detach().cpu().numpy().copy(),
-                    in1_torch.grad.detach().cpu().numpy().copy(),
-                    in2_torch.grad.detach().cpu().numpy().copy(),
-                    weights_grad.copy(),
-                )
-            )
+                (   out_dgrad,
+                    in1_grad,
+                    in2_grad,
+                    self.reorder_weights_to_e3nn(weights_grad, has_batch_dim=not self.config.shared_weights) 
+                )) 
 
         for name, to_check, ground_truth in [
             ("output_grad", tensors[0][0], tensors[1][0]),
diff --git a/openequivariance/openequivariance/impl_jax/TensorProductConv.py b/openequivariance/openequivariance/impl_jax/TensorProductConv.py
@@ -220,4 +220,24 @@ def backward_cpu(
         L1_grad[:] = np.asarray(L1_grad_jax)
         L2_grad[:] = np.asarray(L2_grad_jax)
         weights_grad[:] = np.asarray(weights_grad_jax)
-        weights_grad[:] = self.reorder_weights_to_e3nn(weights_grad, has_batch_dim=not self.config.shared_weights)
+        weights_grad[:] = self.reorder_weights_to_e3nn(weights_grad, has_batch_dim=not self.config.shared_weights)
+
+    def double_backward_cpu(self, in1, in2, out_grad, weights, weights_dgrad, in1_dgrad, in2_dgrad, graph):
+        in1_jax = jax.numpy.asarray(in1)
+        in2_jax = jax.numpy.asarray(in2)
+        weights_jax = jax.numpy.asarray(weights)
+        out_grad_jax = jax.numpy.asarray(out_grad)
+        in1_dgrad_jax = jax.numpy.asarray(in1_dgrad)
+        in2_dgrad_jax = jax.numpy.asarray(in2_dgrad)
+        weights_dgrad_jax = jax.numpy.asarray(weights_dgrad)
+
+        rows_jax = jax.numpy.asarray(graph.rows.astype(self.idx_dtype))
+        cols_jax = jax.numpy.asarray(graph.cols.astype(self.idx_dtype))
+        sender_perm_jax = jax.numpy.asarray(graph.transpose_perm.astype(self.idx_dtype))
+
+        in1_grad, in2_grad, weights_grad, out_dgrad = jax.vjp(
+            lambda x, y, w, o: jax.vjp(lambda a, b, c: self.forward(a, b, c, rows_jax, cols_jax, sender_perm_jax), x, y, w)[1](o),
+            in1_jax, in2_jax, weights_jax, out_grad_jax
+        )[1]((in1_dgrad_jax, in2_dgrad_jax, weights_dgrad_jax))
+
+        return in1_grad, in2_grad, weights_grad, out_dgrad