PASSIONLab
diff --git a/‎openequivariance/openequivariance/_torch/CUETensorProduct.py‎
Lines changed: 2 additions & 54 deletions b/‎openequivariance/openequivariance/_torch/CUETensorProduct.py‎
Lines changed: 2 additions & 54 deletions
diff --git a/‎openequivariance/openequivariance/_torch/E3NNTensorProduct.py‎
Lines changed: 1 addition & 1 deletion b/‎openequivariance/openequivariance/_torch/E3NNTensorProduct.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openequivariance/openequivariance/_torch/TensorProduct.py‎
Lines changed: 1 addition & 1 deletion b/‎openequivariance/openequivariance/_torch/TensorProduct.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openequivariance/openequivariance/_torch/TensorProductConv.py‎
Lines changed: 1 addition & 1 deletion b/‎openequivariance/openequivariance/_torch/TensorProductConv.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openequivariance/openequivariance/_torch/extlib/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎openequivariance/openequivariance/_torch/extlib/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openequivariance/openequivariance/benchmark/ConvBenchmarkSuite.py‎
Lines changed: 1 addition & 1 deletion b/‎openequivariance/openequivariance/benchmark/ConvBenchmarkSuite.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openequivariance/openequivariance/benchmark/TestBenchmarkSuite.py‎
Lines changed: 1 addition & 1 deletion b/‎openequivariance/openequivariance/benchmark/TestBenchmarkSuite.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openequivariance/openequivariance/benchmark/benchmark_utils.py‎
Lines changed: 19 additions & 66 deletions b/‎openequivariance/openequivariance/benchmark/benchmark_utils.py‎
Lines changed: 19 additions & 66 deletions
diff --git a/‎openequivariance/openequivariance/benchmark/correctness.py‎
Lines changed: 2 additions & 2 deletions b/‎openequivariance/openequivariance/benchmark/correctness.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎…variance/benchmark/perf_metrics_utils.py‎ ‎…ce/openequivariance/benchmark/metrics.py‎openequivariance/openequivariance/benchmark/perf_metrics_utils.py renamed to openequivariance/openequivariance/benchmark/metrics.py
Lines changed: 30 additions & 28 deletions b/‎…variance/benchmark/perf_metrics_utils.py‎ ‎…ce/openequivariance/benchmark/metrics.py‎openequivariance/openequivariance/benchmark/perf_metrics_utils.py renamed to openequivariance/openequivariance/benchmark/metrics.py
Lines changed: 30 additions & 28 deletions
@@ -6,13 +6,12 @@
 
 from openequivariance.core.TensorProductBase import TensorProductBase
 from openequivariance.core.e3nn_lite import TPProblem
-from openequivariance.benchmark.logging import getLogger
-from openequivariance.benchmark.tpp_creation_utils import (
+from openequivariance.core.logging import getLogger
+from openequivariance.benchmark.problems import (
     ChannelwiseTPP,
     FullyConnectedTPProblem,
     SingleInstruction,
 )
-from openequivariance.core.utils import count_cg_non_zero
 
 os.environ["CUEQUIVARIANCE_OPS_USE_JIT"] = "1"
 
@@ -235,57 +234,6 @@ def benchmark_backward(
             kernel_names=self.kernel_names,
         )
 
-    # Copied over from loop unroller to match arithmetic intensity on roofline plots
-    def calculate_flops_forward(self, batch_size: int) -> dict:
-        if self.is_uvw:
-            return super().calculate_flops_forward(batch_size)
-        else:
-            tpp = self.config
-            flop_count = {
-                "CG_decomposition": 0,
-                "linear_combination": 0,
-                "outer_products": 0,
-            }
-            for ins in tpp.instructions:
-                l1, l2, l3 = (
-                    tpp.irreps_in1[ins.i_in1].ir.l,
-                    tpp.irreps_in2[ins.i_in2].ir.l,
-                    tpp.irreps_out[ins.i_out].ir.l,
-                )
-                flop_count["CG_decomposition"] += count_cg_non_zero(l1, l2, l3) * (
-                    ins.path_shape[0] * ins.path_shape[1]
-                )
-                flop_count["linear_combination"] += (
-                    (2 * l3 + 1) * np.prod(ins.path_shape) if ins.has_weight else 0
-                )
-
-            flop_count["CG_decomposition"] *= 3 * batch_size
-            flop_count["linear_combination"] *= (
-                batch_size  # Weights do not require FMA here
-            )
-            flop_count["total"] = sum(flop_count.values())
-            return flop_count
-
-    def calculate_flops_backward(self, batch_size: int) -> dict:
-        if self.is_uvw:
-            return super().calculate_flops_backward(batch_size)
-        else:
-            tpp = self.config
-            flop_count = {"backward": 0}
-            for ins in tpp.instructions:
-                l1, l2, l3 = (
-                    tpp.irreps_in1[ins.i_in1].ir.l,
-                    tpp.irreps_in2[ins.i_in2].ir.l,
-                    tpp.irreps_out[ins.i_out].ir.l,
-                )
-                flop_count["backward"] += count_cg_non_zero(l1, l2, l3) * (
-                    ins.path_shape[0] * ins.path_shape[1]
-                )
-
-            flop_count["backward"] *= 9 * batch_size
-            flop_count["total"] = sum(flop_count.values())
-            return flop_count
-
     @staticmethod
     def name():
         return "CUETensorProduct"
@@ -11,7 +11,7 @@
 
 from openequivariance.core.TensorProductBase import TensorProductBase
 from openequivariance.core.e3nn_lite import TPProblem
-from openequivariance.benchmark.logging import getLogger
+from openequivariance.core.logging import getLogger
 from openequivariance._torch.NPDoubleBackwardMixin import NumpyDoubleBackwardMixin
 
 TORCH_COMPILE_AUTOTUNING_DIR = pathlib.Path("triton_autotuning")
 
@@ -3,7 +3,7 @@
 from openequivariance._torch import extlib
 import torch
 from openequivariance.core.utils import torch_to_oeq_dtype, dtype_to_enum
-from openequivariance.benchmark.logging import getLogger
+from openequivariance.core.logging import getLogger
 from openequivariance._torch.utils import (
     reorder_torch,
     string_to_tensor,
 
@@ -23,7 +23,7 @@
     enum_to_torch_dtype,
 )
 
-from openequivariance.benchmark.logging import getLogger
+from openequivariance.core.logging import getLogger
 from openequivariance._torch.NPDoubleBackwardMixin import NumpyDoubleBackwardMixinConv
 
 logger = getLogger()
 
@@ -8,7 +8,7 @@
 
 import torch
 
-from openequivariance.benchmark.logging import getLogger
+from openequivariance.core.logging import getLogger
 
 oeq_root = str(Path(__file__).parent.parent.parent)
 
 
@@ -11,7 +11,7 @@
     correctness_double_backward_conv,
     correctness_forward_conv,
 )
-from openequivariance.benchmark.logging import getLogger
+from openequivariance.core.logging import getLogger
 from openequivariance.core.ConvolutionBase import CoordGraph
 from openequivariance.benchmark.benchmark_utils import NpEncoder
 
 
@@ -10,7 +10,7 @@
 from openequivariance._torch.extlib import DeviceProp
 from openequivariance.core.TensorProductBase import TensorProductBase
 
-from openequivariance.benchmark.logging import getLogger, bcolors
+from openequivariance.core.logging import getLogger, bcolors
 from openequivariance.core.e3nn_lite import TPProblem
 from openequivariance.benchmark.correctness import (
     correctness_forward,
 
@@ -1,21 +1,22 @@
 import json
 import numpy as np
 
-from openequivariance.benchmark.random_buffer_utils import (
+from openequivariance.benchmark.test_buffers import (
     get_random_buffers_forward,
     get_random_buffers_backward,
     get_random_buffers_double_backward,
 )
-from openequivariance.benchmark.perf_metrics_utils import (
-    calculate_minimum_flops_forward,
-    calculate_minimum_memory_streamed_forward,
-    calculate_minimum_memory_streamed_backward,
+from openequivariance.benchmark.metrics import (
+    flops_forward,
+    flops_backward,
+    memory_streamed_forward,
+    memory_streamed_backward,
 )
 from openequivariance.core.utils import calculate_total_nnz
 from openequivariance.core.TensorProductBase import TensorProductBase
 from openequivariance.core.e3nn_lite import TPProblem
 from openequivariance._torch.CUETensorProduct import CUETensorProduct
-from openequivariance.benchmark.logging import getLogger, bcolors
+from openequivariance.core.logging import getLogger, bcolors
 
 logger = getLogger()
 
@@ -110,24 +111,12 @@ def benchmark_forward(
         time_millis = np.full(shape=num_iter, fill_value=-1)
 
     # FLOPS
-    try:
-        flops = tp.calculate_flops_forward(batch_size=batch_size)
-    except NotImplementedError:
-        logger.warning(
-            "Actual flop count not calculated, so minimum values are being used"
-        )
-        flops = calculate_minimum_flops_forward(problem, batch_size=batch_size)
+    flops = flops_forward(problem, batch_size=batch_size)
 
     # DATA
-    try:
-        memory_streamed = tp.calculate_memory_streamed_backward(batch_size=batch_size)
-    except NotImplementedError:
-        logger.warning(
-            "Actual memory streamed not calculated, so minimum values are being used"
-        )
-        memory_streamed = calculate_minimum_memory_streamed_forward(
-            problem, batch_size=batch_size
-        )
+    memory_streamed = memory_streamed_forward(
+        problem, batch_size=batch_size
+    )
 
     result |= calculate_performance_statistics(
         problem=problem,
@@ -181,29 +170,11 @@ def benchmark_backward(
         )
         time_millis = np.full(shape=num_iter, fill_value=-1)
 
-    try:
-        flops = tp.calculate_flops_backward(batch_size=batch_size)
-    except NotImplementedError:
-        try:
-            flops = calculate_minimum_flops_forward(tpp=problem, batch_size=batch_size)
-            logger.warning(
-                "Actual flops was not calculated, so minimum values are being used"
-            )
-        except NotImplementedError:
-            logger.warning(
-                "Minimum Backwards flops calculations are not implemented, -1 is a placeholder"
-            )
-            flops = {"total": -1}
+    flops = flops_backward(tpp=problem, batch_size=batch_size)
 
-    try:
-        memory_streamed = tp.calculate_memory_streamed_backward(batch_size=batch_size)
-    except NotImplementedError:
-        logger.warning(
-            "Actual memory streamed was not calculated, so minimum values are being"
-        )
-        memory_streamed = calculate_minimum_memory_streamed_backward(
-            tpp=problem, batch_size=batch_size
-        )
+    memory_streamed = memory_streamed_backward(
+        tpp=problem, batch_size=batch_size
+    )
 
     result |= calculate_performance_statistics(
         problem=problem,
@@ -258,29 +229,11 @@ def benchmark_double_backward(
         )
         time_millis = np.full(shape=num_iter, fill_value=-1)
 
-    try:
-        flops = tp.calculate_flops_backward(batch_size=batch_size)
-    except NotImplementedError:
-        try:
-            flops = calculate_minimum_flops_forward(tpp=problem, batch_size=batch_size)
-            logger.warning(
-                "Actual flops was not calculated, so minimum values are being used"
-            )
-        except NotImplementedError:
-            logger.warning(
-                "Minimum Backwards flops calculations are not implemented, -1 is a placeholder"
-            )
-            flops = {"total": -1}
+    flops = flops_backward(tpp=problem, batch_size=batch_size)
 
-    try:
-        memory_streamed = tp.calculate_memory_streamed_backward(batch_size=batch_size)
-    except NotImplementedError:
-        logger.warning(
-            "Actual memory streamed was not calculated, so minimum values are being"
-        )
-        memory_streamed = calculate_minimum_memory_streamed_backward(
-            tpp=problem, batch_size=batch_size
-        )
+    memory_streamed = memory_streamed_backward(
+        tpp=problem, batch_size=batch_size
+    )
 
     result |= calculate_performance_statistics(
         problem=problem,
 
@@ -5,8 +5,8 @@
 import numpy.linalg as la
 
 from openequivariance._torch.CUETensorProduct import CUETensorProduct
-from openequivariance.benchmark.logging import bcolors, getLogger
-from openequivariance.benchmark.random_buffer_utils import (
+from openequivariance.core.logging import bcolors, getLogger
+from openequivariance.benchmark.test_buffers import (
     get_random_buffers_backward_conv,
     get_random_buffers_backward,
     get_random_buffers_double_backward_conv,
 
@@ -1,18 +1,15 @@
-import math
-
 from openequivariance.core.utils import (
     count_cg_non_zero,
-    sparse_outer_product_work,
 )
 
-from openequivariance.core.e3nn_lite import TPProblem, wigner_3j
-from openequivariance.benchmark.logging import getLogger
+from openequivariance.core.e3nn_lite import TPProblem
+from openequivariance.core.logging import getLogger
 import numpy as np
 
 logger = getLogger()
 
 
-def calculate_minimum_memory_streamed_forward(
+def memory_streamed_forward(
     tpp: TPProblem, batch_size: int
 ) -> dict[str, int]:
     """
@@ -31,7 +28,7 @@ def calculate_minimum_memory_streamed_forward(
     return data_size
 
 
-def calculate_minimum_memory_streamed_backward(tpp: TPProblem, batch_size: int) -> dict:
+def memory_streamed_backward(tpp: TPProblem, batch_size: int) -> dict:
     """
     This represents an absolute minimum amount of memory that could be streamed on an ideal machine
     It returns the number of bytes streamed total and from each source
@@ -51,46 +48,51 @@ def calculate_minimum_memory_streamed_backward(tpp: TPProblem, batch_size: int)
     return data_size
 
 
-def calculate_minimum_flops_forward(tpp: TPProblem, batch_size: int) -> dict:
+def flops_forward(tpp: TPProblem, batch_size: int) -> dict:
     """
-    This is not actually calcuating the minimum value.
-    Ideally you might share the outer product values between two inputs across multiple inputs.
-    This is assuming that you form those values and reuse them once per CG decomp.
+    Default FLOP estimate aligned with LoopUnrollTP's forward FLOP accounting.
     """
-    logger.warning("Minimum flops Calculation is not the true minimum")
-    flops_count = {}
-    flops_count["outer_products"] = 0
-    flops_count["CG_decomposition"] = 0
-    flops_count["linear_combination"] = 0
+    flops_count = {"CG_decomposition": 0, "linear_combination": 0, "outer_products": 0}
+
     for ins in tpp.instructions:  # type : Instruction
         l1, l2, l3 = (
             tpp.irreps_in1[ins.i_in1].ir.l,
             tpp.irreps_in2[ins.i_in2].ir.l,
             tpp.irreps_out[ins.i_out].ir.l,
         )
 
-        flops_count["outer_products"] += sparse_outer_product_work(
-            wigner_3j(l1, l2, l3)
-        )
         flops_count["CG_decomposition"] += count_cg_non_zero(l1, l2, l3) * (
             ins.path_shape[0] * ins.path_shape[1]
         )
         flops_count["linear_combination"] += (
-            (2 * l3 + 1) * math.prod(ins.path_shape) if ins.has_weight else 0
+            (2 * l3 + 1) * np.prod(ins.path_shape) if ins.has_weight else 0
         )
 
-    flops_count["outer_products"] *= batch_size
-    flops_count["CG_decomposition"] *= 2 * batch_size
-    flops_count["linear_combination"] *= 2 * batch_size
+    flops_count["CG_decomposition"] *= 3 * batch_size
+    flops_count["linear_combination"] *= (
+        batch_size  # Weights do not require FMA here
+    )
 
     flops_count["total"] = sum(flops_count.values())
     return flops_count
 
 
-def calculate_minimum_flops_backward(tpp: TPProblem, batch_size: int) -> dict:
+def flops_backward(tpp: TPProblem, batch_size: int) -> dict:
     """
-    This is not actually calcuating the minumum value.
-    Ideally you might share the outer product values between two inputs across multiple inputs.
-    This is assuming that you form those values and reuse them once per CG decomp.
+    Default FLOP estimate aligned with LoopUnrollTP's backward FLOP accounting.
     """
-    raise NotImplementedError("this needs to be implemented properly")
+    flops_count = {"backward": 0}
+
+    for ins in tpp.instructions:  # type : Instruction
+        l1, l2, l3 = (
+            tpp.irreps_in1[ins.i_in1].ir.l,
+            tpp.irreps_in2[ins.i_in2].ir.l,
+            tpp.irreps_out[ins.i_out].ir.l,
+        )
+        flops_count["backward"] += count_cg_non_zero(l1, l2, l3) * (
+            ins.path_shape[0] * ins.path_shape[1]
+        )
+
+    flops_count["backward"] *= 9 * batch_size
+    flops_count["total"] = sum(flops_count.values())
+    return flops_count
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@`
`23`	`23`	`enum_to_torch_dtype,`
`24`	`24`	`)`
`25`	`25`
`26`		`-from openequivariance.benchmark.logging import getLogger`
	`26`	`+from openequivariance.core.logging import getLogger`
`27`	`27`	`from openequivariance._torch.NPDoubleBackwardMixin import NumpyDoubleBackwardMixinConv`
`28`	`28`
`29`	`29`	`logger = getLogger()`
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`	`correctness_double_backward_conv,`
`12`	`12`	`correctness_forward_conv,`
`13`	`13`	`)`
`14`		`-from openequivariance.benchmark.logging import getLogger`
	`14`	`+from openequivariance.core.logging import getLogger`
`15`	`15`	`from openequivariance.core.ConvolutionBase import CoordGraph`
`16`	`16`	`from openequivariance.benchmark.benchmark_utils import NpEncoder`
`17`	`17`