5717 Enhance hovernet to use standard resnet50's weights (#5688)

yiheng-wang-nv · web-flow · commit a09d2a25a339 · 2022-12-14T10:00:03.000Z
Signed-off-by: Yiheng Wang <vennw@nvidia.com> Fixes #5717 . ### Description This PR adds the support to load torchvision's resnet50 weights. ### Types of changes  - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [ ] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [ ] In-line docstrings updated. - [ ] Documentation updated, tested `make html` command in the `docs/` folder. Signed-off-by: Yiheng Wang <vennw@nvidia.com>
diff --git a/monai/networks/nets/hovernet.py b/monai/networks/nets/hovernet.py
@@ -414,6 +414,7 @@ class HoVerNet(nn.Module):
       Medical Image Analysis 2019
 
       https://github.com/vqdang/hover_net
+      https://pytorch.org/vision/main/models/generated/torchvision.models.resnet50.html
 
     Args:
         mode: use original implementation (`HoVerNetMODE.ORIGINAL` or "original") or
@@ -429,10 +430,16 @@ class HoVerNet(nn.Module):
             Please note that to get consistent output size, `HoVerNetMode.FAST` mode should be employed.
         dropout_prob: dropout rate after each dense layer.
         pretrained_url: if specifying, will loaded the pretrained weights downloaded from the url.
-            The weights should be ImageNet pretrained preact-resnet50 weights coming from the referred hover_net
+            There are two supported forms of weights:
+            1. preact-resnet50 weights coming from the referred hover_net
             repository, each user is responsible for checking the content of model/datasets and the applicable licenses
             and determining if suitable for the intended use. please check the following link for more details:
             https://github.com/vqdang/hover_net#data-format
+            2. standard resnet50 weights of torchvision. Please check the following link for more details:
+            https://pytorch.org/vision/main/_modules/torchvision/models/resnet.html#ResNet50_Weights
+        adapt_standard_resnet: if the pretrained weights of the encoder follow the original format (preact-resnet50), this
+            value should be `False`. If using the pretrained weights that follow torchvision's standard resnet50 format,
+            this value should be `True`.
         freeze_encoder: whether to freeze the encoder of the network.
     """
 
@@ -450,6 +457,7 @@ def __init__(
         decoder_padding: bool = False,
         dropout_prob: float = 0.0,
         pretrained_url: Optional[str] = None,
+        adapt_standard_resnet: bool = False,
         freeze_encoder: bool = False,
     ) -> None:
 
@@ -555,7 +563,11 @@ def __init__(
                 nn.init.constant_(torch.as_tensor(m.bias), 0)
 
         if pretrained_url is not None:
-            _load_pretrained_encoder(self, pretrained_url)
+            if adapt_standard_resnet:
+                weights = _remap_standard_resnet_model(pretrained_url)
+            else:
+                weights = _remap_preact_resnet_model(pretrained_url)
+            _load_pretrained_encoder(self, weights)
 
     def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
 
@@ -588,7 +600,18 @@ def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
         return output
 
 
-def _load_pretrained_encoder(model: nn.Module, model_url: str):
+def _load_pretrained_encoder(model: nn.Module, state_dict: Union[OrderedDict, Dict]):
+
+    model_dict = model.state_dict()
+    state_dict = {
+        k: v for k, v in state_dict.items() if (k in model_dict) and (model_dict[k].shape == state_dict[k].shape)
+    }
+
+    model_dict.update(state_dict)
+    model.load_state_dict(model_dict)
+
+
+def _remap_preact_resnet_model(model_url: str):
 
     pattern_conv0 = re.compile(r"^(conv0\.\/)(.+)$")
     pattern_block = re.compile(r"^(d\d+)\.(.+)$")
@@ -614,12 +637,59 @@ def _load_pretrained_encoder(model: nn.Module, model_url: str):
         if "upsample2x" in key:
             del state_dict[key]
 
-    model_dict = model.state_dict()
-    state_dict = {
-        k: v for k, v in state_dict.items() if (k in model_dict) and (model_dict[k].shape == state_dict[k].shape)
-    }
-    model_dict.update(state_dict)
-    model.load_state_dict(model_dict)
+    return state_dict
+
+
+def _remap_standard_resnet_model(model_url: str):
+
+    pattern_conv0 = re.compile(r"^conv1\.(.+)$")
+    pattern_bn1 = re.compile(r"^bn1\.(.+)$")
+    pattern_block = re.compile(r"^layer(\d+)\.(\d+)\.(.+)$")
+    # bn3 to next denselayer's preact/bn
+    pattern_block_bn3 = re.compile(r"^(res_blocks.d\d+\.layers\.denselayer_)(\d+)\.layers\.bn3\.(.+)$")
+    # bn1, bn2 to conv1/bn, conv2/bn
+    pattern_block_bn = re.compile(r"^(res_blocks.d\d+\.layers\.denselayer_\d+\.layers)\.bn(\d+)\.(.+)$")
+    pattern_downsample0 = re.compile(r"^(res_blocks.d\d+).+\.downsample\.0\.(.+)")
+    pattern_downsample1 = re.compile(r"^(res_blocks.d\d+).+\.downsample\.1\.(.+)")
+    # download the pretrained weights into torch hub's default dir
+    weights_dir = os.path.join(torch.hub.get_dir(), "resnet50.pth")
+    download_url(model_url, fuzzy=True, filepath=weights_dir, progress=False)
+    state_dict = torch.load(weights_dir, map_location=None)
+
+    for key in list(state_dict.keys()):
+        new_key = None
+        if pattern_conv0.match(key):
+            new_key = re.sub(pattern_conv0, r"conv0.conv.\1", key)
+        elif pattern_bn1.match(key):
+            new_key = re.sub(pattern_bn1, r"conv0.bn.\1", key)
+        elif pattern_block.match(key):
+            new_key = re.sub(
+                pattern_block,
+                lambda s: "res_blocks.d"
+                + str(int(s.group(1)) - 1)
+                + ".layers.denselayer_"
+                + s.group(2)
+                + ".layers."
+                + s.group(3),
+                key,
+            )
+            if pattern_block_bn3.match(new_key):
+                new_key = re.sub(
+                    pattern_block_bn3,
+                    lambda s: s.group(1) + str(int(s.group(2)) + 1) + ".layers.preact/bn." + s.group(3),
+                    new_key,
+                )
+            elif pattern_block_bn.match(new_key):
+                new_key = re.sub(pattern_block_bn, r"\1.conv\2/bn.\3", new_key)
+            elif pattern_downsample0.match(new_key):
+                new_key = re.sub(pattern_downsample0, r"\1.shortcut.\2", new_key)
+            elif pattern_downsample1.match(new_key):
+                new_key = re.sub(pattern_downsample1, r"\1.bna_block.bn.\2", new_key)
+        if new_key:
+            state_dict[new_key] = state_dict[key]
+            del state_dict[key]
+
+    return state_dict
 
 
 Hovernet = HoVernet = HoverNet = HoVerNet