Wrote double backward function for JAX.

vbharadwaj-bk · vbharadwaj-bk · commit 79522a1230e4 · 2025-12-07T20:32:41.000-08:00
diff --git a/openequivariance/openequivariance/benchmark/correctness_utils.py b/openequivariance/openequivariance/benchmark/correctness_utils.py
@@ -207,16 +207,17 @@ def correctness_double_backward(
     tensors = []
     for _, impl in enumerate([test_implementation, reference_implementation]):
         tp = instantiate_implementation(impl, problem)
+        weights_reordered = tp.reorder_weights_from_e3nn(weights, has_batch_dim=not problem.shared_weights)
 
         if impl == CUETensorProduct and problem.shared_weights:
-            weights = weights[np.newaxis, :]
+            weights_reordered = weights_reordered[np.newaxis, :]
 
-        in1_grad, in2_grad, weights_grad, out_dgrad = tp.double_backward_cpu(in1, in2, out_grad, weights, weights_dgrad, in1_dgrad, in2_dgrad)
+        in1_grad, in2_grad, weights_grad, out_dgrad = tp.double_backward_cpu(in1, in2, out_grad, weights_reordered, weights_dgrad, in1_dgrad, in2_dgrad)
         tensors.append(
             (   out_dgrad,
                 in1_grad,
                 in2_grad,
-                weights_grad
+                tp.reorder_weights_to_e3nn(weights_grad, has_batch_dim=not problem.shared_weights) 
             )) 
 
     for name, to_check, ground_truth in [
diff --git a/openequivariance/openequivariance/impl_jax/TensorProduct.py b/openequivariance/openequivariance/impl_jax/TensorProduct.py
@@ -114,4 +114,21 @@ def backward_cpu(
         L1_grad[:] = np.asarray(L1_grad_jax)
         L2_grad[:] = np.asarray(L2_grad_jax)
         weights_grad[:] = np.asarray(weights_grad_jax)
-        weights_grad[:] = self.reorder_weights_to_e3nn(weights_grad, has_batch_dim=not self.config.shared_weights)
+        weights_grad[:] = self.reorder_weights_to_e3nn(weights_grad, has_batch_dim=not self.config.shared_weights)
+
+
+    def double_backward_cpu(self, in1, in2, out_grad, weights, weights_dgrad, in1_dgrad, in2_dgrad):
+        in1_jax = jax.numpy.asarray(in1)
+        in2_jax = jax.numpy.asarray(in2)
+        weights_jax = jax.numpy.asarray(weights)
+        out_grad_jax = jax.numpy.asarray(out_grad)
+        in1_dgrad_jax = jax.numpy.asarray(in1_dgrad)
+        in2_dgrad_jax = jax.numpy.asarray(in2_dgrad)
+        weights_dgrad_jax = jax.numpy.asarray(weights_dgrad)
+
+        in1_grad, in2_grad, weights_grad, out_dgrad = jax.vjp(
+            lambda x, y, w: jax.vjp(lambda a, b, c: self.forward(a, b, c), x, y, w)[1](out_grad_jax),
+            in1_jax, in2_jax, weights_jax
+        )[1]((in1_dgrad_jax, in2_dgrad_jax, weights_dgrad_jax))
+
+        return in1_grad, in2_grad, weights_grad, out_dgrad