Convolution test is failing.

vbharadwaj-bk · vbharadwaj-bk · commit 379fd28f33ce · 2026-03-21T22:07:58.000-07:00
diff --git a/openequivariance/openequivariance/core/ComputationSchedule.py b/openequivariance/openequivariance/core/ComputationSchedule.py
@@ -29,6 +29,10 @@ def __init__(self, src_irreps, src_views, idxs):
         src_ranges = [src_irreps.slices()[idx] for idx in self.src_dst_map]
         dst_ranges = [self.dst_irreps.slices()[i] for i in self.src_dst_map.values()]
 
+        self.storeback_procedure = {idx: "write" for idx in self.idxs}
+        self.persist_load = False
+        self.persist_store = False
+
         if src_views[0].layout == "ir_mul":
             return
 
@@ -55,11 +59,6 @@ def __init__(self, src_irreps, src_views, idxs):
         self.dst_ranges.append(slice(dst_start, dst_end))
         self.copy_ranges = list(zip(self.src_ranges, self.dst_ranges))
 
-        self.persist_load = False
-        self.persist_store = False
-
-        self.storeback_procedure = {idx: "write" for idx in self.idxs}
-
 
 class CGTensor:
     def __init__(self, l1, l2, l3, normalization_factor, dtype):
diff --git a/openequivariance/openequivariance/core/ConvolutionBase.py b/openequivariance/openequivariance/core/ConvolutionBase.py
@@ -1,15 +1,16 @@
 import copy
+
 import numpy as np
+
+from openequivariance.benchmark.correctness_utils import check_similiarity
+from openequivariance.benchmark.logging_utils import bcolors, getLogger
 from openequivariance.benchmark.random_buffer_utils import (
-    get_random_buffers_forward_conv,
     get_random_buffers_backward_conv,
     get_random_buffers_double_backward_conv,
+    get_random_buffers_forward_conv,
 )
-
-from openequivariance.benchmark.logging_utils import getLogger, bcolors
-from openequivariance.benchmark.correctness_utils import check_similiarity
 from openequivariance.core.e3nn_lite import wigner_3j
-from openequivariance.core.utils import benchmark
+from openequivariance.core.utils import IrrepLayoutUtils, benchmark
 
 logger = getLogger()
 
@@ -143,6 +144,13 @@ def test_correctness_forward(
         check_reproducible=True,
         high_precision_ref=False,
     ):
+        def maybe_transpose_input_for_test_impl(x, irreps):
+            if self.config.layout == "ir_mul":
+                return IrrepLayoutUtils.transpose_irrep_layout(
+                    x, irreps, "mul_ir", "ir_mul"
+                )
+            return x
+
         if reference_implementation is None:
             from openequivariance._torch.E3NNConv import E3NNConv
 
@@ -186,13 +194,22 @@ def test_correctness_forward(
 
         test_out = out.copy()
         self.forward_cpu(
-            L1_in=in1.copy(),
-            L2_in=in2.copy(),
+            L1_in=maybe_transpose_input_for_test_impl(
+                in1.copy(), self.config.irreps_in1
+            ),
+            L2_in=maybe_transpose_input_for_test_impl(
+                in2.copy(), self.config.irreps_in2
+            ),
             weights=weights.copy(),
             L3_out=test_out,
             graph=graph,
         )
 
+        if self.config.layout == "ir_mul":
+            test_out = IrrepLayoutUtils.transpose_irrep_layout(
+                test_out, self.config.irreps_out, "ir_mul", "mul_ir"
+            )
+
         for name, to_check, ground_truth in [("output", ref_out, test_out)]:
             result[name] = check_similiarity(name, to_check, ground_truth, thresh)
 
@@ -205,13 +222,22 @@ def test_correctness_forward(
             for i in range(num_trials):
                 repeated_run = out.copy()
                 self.forward_cpu(
-                    L1_in=in1.copy(),
-                    L2_in=in2.copy(),
+                    L1_in=maybe_transpose_input_for_test_impl(
+                        in1.copy(), self.config.irreps_in1
+                    ),
+                    L2_in=maybe_transpose_input_for_test_impl(
+                        in2.copy(), self.config.irreps_in2
+                    ),
                     weights=weights.copy(),
                     L3_out=repeated_run,
                     graph=graph,
                 )
 
+                if self.config.layout == "ir_mul":
+                    repeated_run = IrrepLayoutUtils.transpose_irrep_layout(
+                        repeated_run, self.config.irreps_out, "ir_mul", "mul_ir"
+                    )
+
                 for name, to_check, ground_truth in [
                     ("output", repeated_run, test_out)
                 ]:
@@ -387,6 +413,13 @@ def test_correctness_backward(
         reference_implementation=None,
         high_precision_ref=False,
     ):
+        def maybe_transpose_input_for_test_impl(x, irreps):
+            if self.config.layout == "ir_mul":
+                return IrrepLayoutUtils.transpose_irrep_layout(
+                    x, irreps, "mul_ir", "ir_mul"
+                )
+            return x
+
         if reference_implementation is None:
             from openequivariance._torch.E3NNConv import E3NNConv
 
@@ -436,17 +469,35 @@ def test_correctness_backward(
         test_in1_grad = in1_grad.copy()
         test_in2_grad = in2_grad.copy()
 
+        test_L3_grad = out_grad.copy()
+        if self.config.layout == "ir_mul":
+            test_L3_grad = IrrepLayoutUtils.transpose_irrep_layout(
+                test_L3_grad, self.config.irreps_out, "mul_ir", "ir_mul"
+            )
+
         self.backward_cpu(
-            L1_in=in1.copy(),
+            L1_in=maybe_transpose_input_for_test_impl(
+                in1.copy(), self.config.irreps_in1
+            ),
             L1_grad=test_in1_grad,
-            L2_in=in2.copy(),
+            L2_in=maybe_transpose_input_for_test_impl(
+                in2.copy(), self.config.irreps_in2
+            ),
             L2_grad=test_in2_grad,
-            L3_grad=out_grad.copy(),
+            L3_grad=test_L3_grad,
             weights=weights.copy(),
             weights_grad=test_weights_grad,
             graph=graph,
         )
 
+        if self.config.layout == "ir_mul":
+            test_in1_grad = IrrepLayoutUtils.transpose_irrep_layout(
+                test_in1_grad, self.config.irreps_in1, "ir_mul", "mul_ir"
+            )
+            test_in2_grad = IrrepLayoutUtils.transpose_irrep_layout(
+                test_in2_grad, self.config.irreps_in2, "ir_mul", "mul_ir"
+            )
+
         for name, to_check, ground_truth, threshold in [
             ("weight_grad", test_weights_grad, ref_weights_grad, thresh),
             ("in1_grad", test_in1_grad, ref_in1_grad, thresh),
@@ -464,6 +515,13 @@ def test_correctness_double_backward(
         reference_implementation=None,
         high_precision_ref=False,
     ):
+        def maybe_transpose_input_for_test_impl(tp, x, irreps):
+            if tp is self and tp.config.layout == "ir_mul":
+                return IrrepLayoutUtils.transpose_irrep_layout(
+                    x, irreps, "mul_ir", "ir_mul"
+                )
+            return x
+
         buffers = get_random_buffers_double_backward_conv(
             self.config, graph.node_count, graph.nnz, prng_seed
         )
@@ -500,17 +558,44 @@ def test_correctness_double_backward(
                 weights_dgrad, not tp.config.shared_weights
             )
 
+            db_in1 = maybe_transpose_input_for_test_impl(tp, in1, tp.config.irreps_in1)
+            db_in2 = maybe_transpose_input_for_test_impl(tp, in2, tp.config.irreps_in2)
+            db_out_grad = out_grad
+            db_in1_dgrad = in1_dgrad
+            db_in2_dgrad = in2_dgrad
+            if tp is self and tp.config.layout == "ir_mul":
+                db_out_grad = IrrepLayoutUtils.transpose_irrep_layout(
+                    out_grad, tp.config.irreps_out, "mul_ir", "ir_mul"
+                )
+                db_in1_dgrad = IrrepLayoutUtils.transpose_irrep_layout(
+                    in1_dgrad, tp.config.irreps_in1, "mul_ir", "ir_mul"
+                )
+                db_in2_dgrad = IrrepLayoutUtils.transpose_irrep_layout(
+                    in2_dgrad, tp.config.irreps_in2, "mul_ir", "ir_mul"
+                )
+
             in1_grad, in2_grad, weights_grad, out_dgrad = tp.double_backward_cpu(
-                in1,
-                in2,
-                out_grad,
+                db_in1,
+                db_in2,
+                db_out_grad,
                 weights_reordered,
                 weights_dgrad_reordered,
-                in1_dgrad,
-                in2_dgrad,
+                db_in1_dgrad,
+                db_in2_dgrad,
                 graph,
             )
 
+            if tp is self and tp.config.layout == "ir_mul":
+                out_dgrad = IrrepLayoutUtils.transpose_irrep_layout(
+                    out_dgrad, tp.config.irreps_out, "ir_mul", "mul_ir"
+                )
+                in1_grad = IrrepLayoutUtils.transpose_irrep_layout(
+                    in1_grad, tp.config.irreps_in1, "ir_mul", "mul_ir"
+                )
+                in2_grad = IrrepLayoutUtils.transpose_irrep_layout(
+                    in2_grad, tp.config.irreps_in2, "ir_mul", "mul_ir"
+                )
+
             tensors.append(
                 (
                     out_dgrad,
diff --git a/openequivariance/openequivariance/core/LoopUnrollConv.py b/openequivariance/openequivariance/core/LoopUnrollConv.py
@@ -1,18 +1,18 @@
-import numpy as np
 import json
 
-from openequivariance.core.ConvolutionBase import ConvolutionBase
+import numpy as np
+
 from openequivariance.core.ComputationSchedule import (
     ComputationSchedule,
     SMEMCapacityException,
 )
-
-from openequivariance.templates.jinja_utils import get_jinja_environment
+from openequivariance.core.ConvolutionBase import ConvolutionBase
 from openequivariance.core.utils import (
-    filter_and_analyze_problem,
     dtype_to_enum,
+    filter_and_analyze_problem,
     hash_str_64,
 )
+from openequivariance.templates.jinja_utils import get_jinja_environment
 
 
 class LoopUnrollConv(ConvolutionBase):
@@ -114,9 +114,11 @@ def generate_double_backward_schedule(warps_per_block):
                 except SMEMCapacityException:
                     warp_count -= 1
                     if warp_count == 0:
-                        raise SMEMCapacityException(
+                        raise RuntimeError(
                             "Tensor product schedule generation failed, shared memory inadequate!"
                         )
+                except Exception:
+                    raise
 
         if not deterministic:
             for segment in self.forward_schedule.segments:
diff --git a/openequivariance/openequivariance/templates/macros.jinja b/openequivariance/openequivariance/templates/macros.jinja
@@ -91,7 +91,7 @@ Keys map to lists of tuples with (name, dtype, num_elements) of each subarray.
                 {%- set dim = src_mul_ir.ir.dim %}
                 {%- set mul = src_mul_ir.mul %}
                 {%- for i in range(dim) %}
-                    ROW_OPERATION({{mul}}, {{loop_var}}, {{smem_ptr}}[{{dst_rng.start + loop_var + i * mul}} + lane_id] = {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}];)
+                    ROW_OPERATION({{mul}}, {{loop_var}}, {{smem_ptr}}[{{dst_rng.start + i * mul}} + {{loop_var}} + lane_id] = {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}];)
                 {%- endfor %}
             {%- endfor %}
         {%- endif %}
@@ -113,7 +113,7 @@ Keys map to lists of tuples with (name, dtype, num_elements) of each subarray.
             {%- set dim = src_mul_ir.ir.dim %}
             {%- set mul = src_mul_ir.mul %}
             {%- for i in range(dim) %}
-                ROW_OPERATION({{mul}}, {{loop_var}}, {{smem_ptr}}[{{dst_rng.start + loop_var + i * mul}} + lane_id] = {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}];)
+                ROW_OPERATION({{mul}}, {{loop_var}}, {{smem_ptr}}[{{dst_rng.start + i * mul}} + {{loop_var}} + lane_id] = {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}];)
             {%- endfor %}
         {%- endfor %}
     {%- endif %}
@@ -144,15 +144,15 @@ Keys map to lists of tuples with (name, dtype, num_elements) of each subarray.
                 {%- set mul = src_mul_ir.mul %}
                 {%- if map.storeback_procedure[idx] == "write" %}
                     {%- for i in range(dim) %}
-                        ROW_OPERATION({{mul}}, {{loop_var}}, {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}] = {{smem_ptr}}[{{dst_rng.start + loop_var + i * mul}} + lane_id];)
+                        ROW_OPERATION({{mul}}, {{loop_var}}, {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}] = {{smem_ptr}}[{{dst_rng.start + i * mul}} + {{loop_var}} + lane_id];)
                     {%- endfor %}
                 {%- elif map.storeback_procedure[idx] == "accumulate" %}
                     {%- for i in range(dim) %}
-                        ROW_OPERATION({{mul}}, {{loop_var}}, {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}] += {{smem_ptr}}[{{dst_rng.start + loop_var + i * mul}} + lane_id];)
+                        ROW_OPERATION({{mul}}, {{loop_var}}, {{glb_ptr_shft}}[{{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}] += {{smem_ptr}}[{{dst_rng.start + i * mul}} + {{loop_var}} + lane_id];)
                     {%- endfor %}
                 {%- elif map.storeback_procedure[idx] == "atomic_accumulate" %}
                     {%- for i in range(dim) %}
-                        ROW_OPERATION({{mul}}, {{loop_var}}, atomicAdd({{glb_ptr_shft}} + {{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}, {{smem_ptr}}[{{dst_rng.start + loop_var + i * mul}} + lane_id]);)
+                        ROW_OPERATION({{mul}}, {{loop_var}}, atomicAdd({{glb_ptr_shft}} + {{src_view.ir_mul_offset + i * src_view.ir_mul_stride}} + {{loop_var}}, {{smem_ptr}}[{{dst_rng.start + i * mul}} + {{loop_var}} + lane_id]);)
                     {%- endfor %}
                 {%- endif %}
             {%- endfor %}
diff --git a/tests/conv_test.py b/tests/conv_test.py
@@ -284,6 +284,17 @@ def conv_object(self, request, problem, extra_conv_constructor_args):
         return module.to(switch_map[problem.irrep_dtype])
 
 
+class TestIrMulLayout(ConvCorrectness):
+    production_model_tpps = mace_problems()
+
+    @pytest.fixture(params=production_model_tpps, ids=lambda x: x.label, scope="class")
+    def problem(self, request, dtype):
+        problem = request.param.clone()
+        problem.irrep_dtype, problem.weight_dtype = dtype, dtype
+        problem.layout = "ir_mul"
+        return problem
+
+
 class TestTorchToSubmodule:
     """Test that TensorProductConv works as a submodule when parent's .to() is called"""