Merge branch 'dev' into worktree-fix-nested-compose-map-items

aymuos15 · web-flow · commit 593657fbe9e2 · 2026-05-04T09:03:03.000+01:00
diff --git a/monai/apps/detection/utils/anchor_utils.py b/monai/apps/detection/utils/anchor_utils.py
@@ -253,7 +253,7 @@ def grid_anchors(self, grid_sizes: list[list[int]], strides: list[list[Tensor]])
             # compute anchor centers regarding to the image.
             # shifts_centers is [x_center, y_center] or [x_center, y_center, z_center]
             shifts_centers = [
-                torch.arange(0, size[axis], dtype=torch.int32, device=device) * stride[axis]
+                torch.arange(0, size[axis], dtype=torch.int32, device=device) * stride[axis] + stride[axis] // 2
                 for axis in range(self.spatial_dims)
             ]
 
diff --git a/monai/apps/nuclick/transforms.py b/monai/apps/nuclick/transforms.py
@@ -367,14 +367,14 @@ def inclusion_map(self, mask, dtype):
 
     def exclusion_map(self, others, dtype, jitter_range, drop_rate):
         point_mask = torch.zeros_like(others, dtype=dtype)
-        if np.random.choice([True, False], p=[drop_rate, 1 - drop_rate]):
+        if self.R.choice([True, False], p=[drop_rate, 1 - drop_rate]):
             return point_mask
 
         max_x = point_mask.shape[0] - 1
         max_y = point_mask.shape[1] - 1
         stats = measure.regionprops(convert_to_numpy(others))
         for stat in stats:
-            if np.random.choice([True, False], p=[drop_rate, 1 - drop_rate]):
+            if self.R.choice([True, False], p=[drop_rate, 1 - drop_rate]):
                 continue
 
             # random jitter
diff --git a/monai/losses/image_dissimilarity.py b/monai/losses/image_dissimilarity.py
@@ -111,14 +111,16 @@ def __init__(
             raise ValueError(f"kernel_size must be odd, got {self.kernel_size}")
 
         _kernel = look_up_option(kernel_type, kernel_dict)
-        self.kernel = _kernel(self.kernel_size)
-        self.kernel.require_grads = False
-        self.kernel_vol = self.get_kernel_vol()
+        self.kernel: torch.Tensor
+        self.kernel_vol: torch.Tensor
+        self.register_buffer("kernel", _kernel(self.kernel_size), persistent=False)
+        self.register_buffer("kernel_vol", self.get_kernel_vol(), persistent=False)
 
         self.smooth_nr = float(smooth_nr)
         self.smooth_dr = float(smooth_dr)
 
-    def get_kernel_vol(self):
+    def get_kernel_vol(self) -> torch.Tensor:
+        assert self.kernel is not None
         vol = self.kernel
         for _ in range(self.ndim - 1):
             vol = torch.matmul(vol.unsqueeze(-1), self.kernel.unsqueeze(0))
@@ -138,6 +140,8 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
             raise ValueError(f"ground truth has differing shape ({target.shape}) from pred ({pred.shape})")
 
         t2, p2, tp = target * target, pred * pred, target * pred
+        assert self.kernel is not None
+        assert self.kernel_vol is not None
         kernel, kernel_vol = self.kernel.to(pred), self.kernel_vol.to(pred)
         kernels = [kernel] * self.ndim
         # sum over kernel
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
@@ -13,11 +13,17 @@
 
 import torch
 
-from monai.metrics.utils import do_metric_reduction
+from monai.metrics.utils import compute_voronoi_regions_fast, do_metric_reduction
 from monai.utils import MetricReduction, deprecated_arg
+from monai.utils.module import optional_import
 
 from .metric import CumulativeIterationMetric
 
+scipy_ndimage, has_scipy_ndimage = optional_import("scipy.ndimage")
+cupy, has_cupy = optional_import("cupy")
+cupy_ndimage, has_cupy_ndimage = optional_import("cupyx.scipy.ndimage")
+
+
 __all__ = ["DiceMetric", "compute_dice", "DiceHelper"]
 
 
@@ -41,6 +47,18 @@ class DiceMetric(CumulativeIterationMetric):
     image size they can get overwhelmed by the signal from the background. This assumes the shape of both prediction
     and ground truth is BCHW[D].
 
+    The `per_component=True` approach computes the Dice metric on a per-connected component basis in the ground truth segmentation,
+    ensuring equal weighting for each component regardless of its size. This method eliminates biases in traditional metrics,
+    providing a more balanced evaluation, particularly in scenarios where object size does not correlate with clinical relevance.
+    This provides a more granular evaluation of segmentation quality, especially useful when dealing with fragmented or
+    disconnected objects in the foreground.
+    Note:
+    - The input prediction (`y_pred`) and ground truth (`y`) must both have 2 channels (foreground/background),
+    with binary segmentation (0 for background, 1 for foreground). That is, this assumes the shape of both prediction
+    and ground truth is B2HW[D].
+    - This method cannot be used with multiclass segmentation.
+    For more information, refer to the original paper: https://arxiv.org/abs/2410.18684
+
     The typical execution steps of this metric class follows :py:class:`monai.metrics.metric.Cumulative`.
 
     Further information can be found in the official
@@ -95,6 +113,9 @@ class DiceMetric(CumulativeIterationMetric):
             If `True`, use "label_{index}" as the key corresponding to C channels; if ``include_background`` is True,
             the index begins at "0", otherwise at "1". It can also take a list of label names.
             The outcome will then be returned as a dictionary.
+        per_component: whether to compute the Dice metric per connected component. If `True`, the metric will be
+            computed for each connected component in the ground truth, and then averaged. This requires binary
+            segmentations with 2 channels (background + foreground) as input. This is a more fine-grained computation.
 
     """
 
@@ -106,6 +127,7 @@ def __init__(
         ignore_empty: bool = True,
         num_classes: int | None = None,
         return_with_label: bool | list[str] = False,
+        per_component: bool = False,
     ) -> None:
         super().__init__()
         self.include_background = include_background
@@ -114,13 +136,15 @@ def __init__(
         self.ignore_empty = ignore_empty
         self.num_classes = num_classes
         self.return_with_label = return_with_label
+        self.per_component = per_component
         self.dice_helper = DiceHelper(
             include_background=self.include_background,
             reduction=MetricReduction.NONE,
             get_not_nans=False,
             apply_argmax=False,
             ignore_empty=self.ignore_empty,
             num_classes=self.num_classes,
+            per_component=self.per_component,
         )
 
     def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
@@ -175,6 +199,7 @@ def compute_dice(
     include_background: bool = True,
     ignore_empty: bool = True,
     num_classes: int | None = None,
+    per_component: bool = False,
 ) -> torch.Tensor:
     """
     Computes Dice score metric for a batch of predictions. This performs the same computation as
@@ -192,6 +217,9 @@ def compute_dice(
         num_classes: number of input channels (always including the background). When this is ``None``,
             ``y_pred.shape[1]`` will be used. This option is useful when both ``y_pred`` and ``y`` are
             single-channel class indices and the number of classes is not automatically inferred from data.
+        per_component: whether to compute the Dice metric per connected component. If `True`, the metric will be
+            computed for each connected component in the ground truth, and then averaged. This requires binary
+            segmentations with 2 channels (background + foreground) as input. This is a more fine-grained computation.
 
     Returns:
         Dice scores per batch and per class, (shape: [batch_size, num_classes]).
@@ -204,6 +232,7 @@ def compute_dice(
         apply_argmax=False,
         ignore_empty=ignore_empty,
         num_classes=num_classes,
+        per_component=per_component,
     )(y_pred=y_pred, y=y)
 
 
@@ -246,6 +275,9 @@ class DiceHelper:
         num_classes: number of input channels (always including the background). When this is ``None``,
             ``y_pred.shape[1]`` will be used. This option is useful when both ``y_pred`` and ``y`` are
             single-channel class indices and the number of classes is not automatically inferred from data.
+        per_component: whether to compute the Dice metric per connected component. If `True`, the metric will be
+            computed for each connected component in the ground truth, and then averaged. This requires binary
+            segmentations with 2 channels (background + foreground) as input. This is a more fine-grained computation.
     """
 
     @deprecated_arg("softmax", "1.5", "1.7", "Use `apply_argmax` instead.", new_name="apply_argmax")
@@ -262,6 +294,7 @@ def __init__(
         num_classes: int | None = None,
         sigmoid: bool | None = None,
         softmax: bool | None = None,
+        per_component: bool = False,
     ) -> None:
         # handling deprecated arguments
         if sigmoid is not None:
@@ -277,6 +310,50 @@ def __init__(
         self.activate = activate
         self.ignore_empty = ignore_empty
         self.num_classes = num_classes
+        self.per_component = per_component
+
+    def compute_cc_dice(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """
+        Compute per-component Dice for a single batch item.
+
+        Args:
+            y_pred (torch.Tensor): Predictions with shape (1, 2, D, H, W) or (1, 2, H, W).
+            y (torch.Tensor): Ground truth with shape (1, 2, D, H, W) or (1, 2, H, W).
+
+        Returns:
+            torch.Tensor: Mean Dice over connected components.
+        """
+        if y_pred.ndim == y.ndim:
+            y_pred_idx = torch.argmax(y_pred, dim=1)
+            y_idx = torch.argmax(y, dim=1)
+        else:
+            y_pred_idx = y_pred
+            y_idx = y
+        if y_idx[0].sum() == 0:
+            if self.ignore_empty:
+                data = torch.tensor(float("nan"), device=y_idx.device)
+            elif y_pred_idx.sum() == 0:
+                data = torch.tensor(1.0, device=y_idx.device)
+            else:
+                data = torch.tensor(0.0, device=y_idx.device)
+        else:
+            cc_assignment = compute_voronoi_regions_fast(y_idx[0])
+            if cc_assignment.device != y_idx.device:
+                cc_assignment = cc_assignment.to(y_idx.device)
+            uniq, inv = torch.unique(cc_assignment.view(-1), return_inverse=True)
+            nof_components = uniq.numel()
+            code = (y_idx.view(-1) << 1) | y_pred_idx.view(-1)
+            idx = (inv << 2) | code
+            hist = torch.bincount(idx, minlength=nof_components * 4).reshape(-1, 4)
+            _, fp, fn, tp = hist[:, 0], hist[:, 1], hist[:, 2], hist[:, 3]
+            denom = 2 * tp + fp + fn
+            dice_scores = torch.where(
+                denom > 0, (2 * tp).float() / denom.float(), torch.tensor(1.0, device=denom.device)
+            )
+            data = dice_scores.unsqueeze(-1)
+            data = torch.nan_to_num(data)
+        data = data.reshape(-1, 1)
+        return torch.stack([data.mean()])
 
     def compute_channel(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
         """
@@ -305,6 +382,9 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor | tupl
             y_pred: input predictions with shape (batch_size, num_classes or 1, spatial_dims...).
                 the number of channels is inferred from ``y_pred.shape[1]`` when ``num_classes is None``.
             y: ground truth with shape (batch_size, num_classes or 1, spatial_dims...).
+
+        Raises:
+            ValueError: when the shapes of `y_pred` and `y` are not compatible for the per-component computation.
         """
         _apply_argmax, _threshold = self.apply_argmax, self.threshold
         if self.num_classes is None:
@@ -322,15 +402,31 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor | tupl
                 y_pred = torch.sigmoid(y_pred)
             y_pred = y_pred > 0.5
 
-        first_ch = 0 if self.include_background else 1
+        if self.per_component:
+            if y_pred.ndim not in (4, 5) or y.ndim not in (4, 5) or y_pred.shape[1] != 2 or y.shape[1] != 2:
+                same_rank = y_pred.ndim == y.ndim and y_pred.ndim in (4, 5)
+                binary_channels = y_pred.shape[1] == 2 and y.shape[1] == 2
+                same_shape = y_pred.shape == y.shape
+                if not (same_rank and binary_channels and same_shape):
+                    raise ValueError(
+                        "per_component requires matching 4D/5D binary tensors "
+                        "(B, 2, H, W) or (B, 2, D, H, W). "
+                        f"Got y_pred={tuple(y_pred.shape)}, y={tuple(y.shape)}."
+                    )
+
+        first_ch = 0 if self.include_background and not self.per_component else 1
         data = []
         for b in range(y_pred.shape[0]):
+            if self.per_component:
+                data.append(self.compute_cc_dice(y_pred=y_pred[b].unsqueeze(0), y=y[b].unsqueeze(0)).reshape(-1))
+                continue
             c_list = []
             for c in range(first_ch, n_pred_ch) if n_pred_ch > 1 else [1]:
                 x_pred = (y_pred[b, 0] == c) if (y_pred.shape[1] == 1) else y_pred[b, c].bool()
                 x = (y[b, 0] == c) if (y.shape[1] == 1) else y[b, c]
                 c_list.append(self.compute_channel(x_pred, x))
             data.append(torch.stack(c_list))
+
         data = torch.stack(data, dim=0).contiguous()  # type: ignore
 
         f, not_nans = do_metric_reduction(data, self.reduction)  # type: ignore
diff --git a/monai/metrics/utils.py b/monai/metrics/utils.py
@@ -39,6 +39,10 @@
 distance_transform_edt, _ = optional_import("scipy.ndimage", name="distance_transform_edt")
 distance_transform_cdt, _ = optional_import("scipy.ndimage", name="distance_transform_cdt")
 
+scipy_ndimage, has_scipy_ndimage = optional_import("scipy.ndimage")
+cupy, has_cupy = optional_import("cupy")
+cupy_ndimage, has_cupy_ndimage = optional_import("cupyx.scipy.ndimage")
+
 __all__ = [
     "ignore_background",
     "do_metric_reduction",
@@ -462,6 +466,59 @@ def prepare_spacing(
     )
 
 
+def compute_voronoi_regions_fast(labels: np.ndarray | torch.Tensor) -> torch.Tensor:
+    """
+    Voronoi assignment to connected components (CPU, single EDT) without cc3d.
+    Returns the ID of the nearest component for each voxel.
+
+    Args:
+        labels (np.ndarray | torch.Tensor): Label map where values > 0 are seeds.
+
+    Raises:
+        RuntimeError: when `scipy.ndimage` is not available.
+        ValueError: when `labels` has fewer than two dimensions.
+
+    Returns:
+        torch.Tensor: Voronoi region IDs (int32) on CPU.
+    """
+    if isinstance(labels, torch.Tensor) and labels.is_cuda and has_cupy and has_cupy_ndimage:
+        xp = cupy
+        nd_distance_transform_edt = cupy_ndimage.distance_transform_edt
+        nd_generate_binary_structure = cupy_ndimage.generate_binary_structure
+        nd_label = cupy_ndimage.label
+        x = cupy.asarray(labels.detach())
+    else:
+        xp = np
+        nd_distance_transform_edt = scipy_ndimage.distance_transform_edt
+        nd_generate_binary_structure = scipy_ndimage.generate_binary_structure
+        nd_label = scipy_ndimage.label
+
+        if not has_scipy_ndimage:
+            raise RuntimeError("scipy.ndimage is required for per_component Dice computation.")
+
+        if isinstance(labels, torch.Tensor):
+            warnings.warn(
+                "Voronoi computation is running on CPU. "
+                "To accelerate, move the input tensor to GPU and ensure 'cupy' with 'cupyx.scipy.ndimage' is installed."
+            )
+            x = labels.cpu().numpy()
+        else:
+            x = np.asarray(labels)
+    rank = conn_rank = x.ndim
+    structure = nd_generate_binary_structure(rank=rank, connectivity=conn_rank)
+    cc, num = nd_label(x > 0, structure=structure)
+    if num == 0:
+        return torch.zeros_like(torch.from_numpy(x), dtype=torch.int32)
+    edt_input = xp.ones(cc.shape, dtype=xp.uint8)
+    edt_input[cc > 0] = 0
+    indices = nd_distance_transform_edt(edt_input, sampling=None, return_distances=False, return_indices=True)
+    voronoi = cc[tuple(indices)]
+    if xp is cupy:
+        return torch.as_tensor(cupy.asnumpy(voronoi), dtype=torch.int32)
+    else:
+        return torch.as_tensor(voronoi, dtype=torch.int32)
+
+
 ENCODING_KERNEL = {2: [[8, 4], [2, 1]], 3: [[[128, 64], [32, 16]], [[8, 4], [2, 1]]]}
 
 
diff --git a/tests/apps/detection/utils/test_anchor_box.py b/tests/apps/detection/utils/test_anchor_box.py
@@ -44,9 +44,9 @@ class TestAnchorGenerator(unittest.TestCase):
     @parameterized.expand(TEST_CASES_2D)
     def test_anchor_2d(self, input_param, image_shape, feature_maps_shapes):
         torch_anchor_utils, _ = optional_import("torchvision.models.detection.anchor_utils")
-        image_list, _ = optional_import("torchvision.models.detection.image_list")
 
-        # test it behaves the same with torchvision for 2d
+        # test it behaves for new functionality of centered anchors
+        # pytorch does not follow this functionality
         anchor = AnchorGenerator(**input_param, indexing="xy")
         anchor_ref = torch_anchor_utils.AnchorGenerator(**input_param)
         for a, a_f in zip(anchor.cell_anchors, anchor_ref.cell_anchors):
@@ -56,15 +56,18 @@ def test_anchor_2d(self, input_param, image_shape, feature_maps_shapes):
 
         grid_sizes = [[2, 2], [1, 1]]
         strides = [[torch.tensor(1), torch.tensor(2)], [torch.tensor(2), torch.tensor(4)]]
-        for a, a_f in zip(anchor.grid_anchors(grid_sizes, strides), anchor_ref.grid_anchors(grid_sizes, strides)):
-            assert_allclose(a, a_f, type_test=True, device_test=False, atol=1e-3)
 
-        images = torch.rand(image_shape)
-        feature_maps = tuple(torch.rand(fs) for fs in feature_maps_shapes)
-        result = anchor(images, feature_maps)
-        result_ref = anchor_ref(image_list.ImageList(images, ([123, 122],)), feature_maps)
-        for a, a_f in zip(result, result_ref):
-            assert_allclose(a, a_f, type_test=True, device_test=False, atol=0.1)
+        monai_anchors = anchor.grid_anchors(grid_sizes, strides)
+        torchvision_anchors = anchor_ref.grid_anchors(grid_sizes, strides)
+
+        for a, a_f, s in zip(monai_anchors, torchvision_anchors, strides):
+            stride_y, stride_x = s
+
+            offset_x = stride_x // 2
+            offset_y = stride_y // 2
+            offset = torch.tensor([offset_x, offset_y, offset_x, offset_y], dtype=a_f.dtype, device=a_f.device)
+
+            assert_allclose(a, a_f + offset, type_test=True, device_test=False, atol=1e-3)
 
     @parameterized.expand(TEST_CASES_2D)
     def test_script_2d(self, input_param, image_shape, feature_maps_shapes):
diff --git a/tests/metrics/test_compute_meandice.py b/tests/metrics/test_compute_meandice.py

Original file line number	Diff line number	Diff line change
`@@ -253,7 +253,7 @@ def grid_anchors(self, grid_sizes: list[list[int]], strides: list[list[Tensor]])`
`253`	`253`	`# compute anchor centers regarding to the image.`
`254`	`254`	`# shifts_centers is [x_center, y_center] or [x_center, y_center, z_center]`
`255`	`255`	`shifts_centers = [`
`256`		`- torch.arange(0, size[axis], dtype=torch.int32, device=device) * stride[axis]`
	`256`	`+ torch.arange(0, size[axis], dtype=torch.int32, device=device) * stride[axis] + stride[axis] // 2`
`257`	`257`	`for axis in range(self.spatial_dims)`
`258`	`258`	`]`
`259`	`259`