Skip to content

Commit 4f584a9

Browse files
mingxin-zhengwyli
andauthored
Reduce Auto3DSeg tests memory footprint and running time (#5689)
Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> Fixes #5604 also fixes #5697 by 5b1865e ### Description - Update the Auto3DSeg templates - SwinUNETR optionally uses pretrain weights - Fix bug when multiple GPUs are used in a tiny datasets, `step_size` will be zero. - Change image size in the test dataset from (64, 64, 64) to (24, 24, 24) - Change `num_epochs` from 2 to 1 - Remove an unnecessary unit test in test_auto3dseg_hpo (check the algorithm can run in another folder, which is not very useful and may bring bugs in the future). ### Types of changes <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [x] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [x] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> Signed-off-by: Wenqi Li <wenqil@nvidia.com> Co-authored-by: Wenqi Li <831580+wyli@users.noreply.github.com> Co-authored-by: Wenqi Li <wenqil@nvidia.com>
1 parent af44576 commit 4f584a9

5 files changed

Lines changed: 78 additions & 93 deletions

File tree

monai/apps/auto3dseg/bundle_gen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from monai.utils import ensure_tuple
3333

3434
logger = get_logger(module_name=__name__)
35-
ALGO_HASH = os.environ.get("MONAI_ALGO_HASH", "d7bf36c")
35+
ALGO_HASH = os.environ.get("MONAI_ALGO_HASH", "5895e1b")
3636

3737
__all__ = ["BundleAlgo", "BundleGen"]
3838

tests/test_auto3dseg_ensemble.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import nibabel as nib
1818
import numpy as np
19+
import torch
1920

2021
from monai.apps.auto3dseg import AlgoEnsembleBestByFold, AlgoEnsembleBestN, AlgoEnsembleBuilder, BundleGen, DataAnalyzer
2122
from monai.bundle.config_parser import ConfigParser
@@ -44,14 +45,21 @@
4445
],
4546
}
4647

47-
train_param = {
48-
"CUDA_VISIBLE_DEVICES": [0],
49-
"num_iterations": 8,
50-
"num_iterations_per_validation": 4,
51-
"num_images_per_batch": 2,
52-
"num_epochs": 2,
53-
"num_warmup_iterations": 4,
54-
}
48+
num_gpus = 4 if torch.cuda.device_count() > 4 else torch.cuda.device_count()
49+
train_param = (
50+
{
51+
"CUDA_VISIBLE_DEVICES": list(range(num_gpus)),
52+
"num_iterations": int(4 / num_gpus),
53+
"num_iterations_per_validation": int(4 / num_gpus),
54+
"num_images_per_batch": 2,
55+
"num_epochs": 1,
56+
"num_warmup_iterations": int(4 / num_gpus),
57+
"use_pretrain": False,
58+
"pretrained_path": "",
59+
}
60+
if torch.cuda.is_available()
61+
else {}
62+
)
5563

5664
pred_param = {"files_slices": slice(0, 1), "mode": "mean", "sigmoid": True}
5765

@@ -81,7 +89,7 @@ def test_ensemble(self) -> None:
8189

8290
# Generate a fake dataset
8391
for d in fake_datalist["testing"] + fake_datalist["training"]:
84-
im, seg = create_test_image_3d(64, 64, 64, rad_max=10, num_seg_classes=1)
92+
im, seg = create_test_image_3d(24, 24, 24, rad_max=10, num_seg_classes=1)
8593
nib_image = nib.Nifti1Image(im, affine=np.eye(4))
8694
image_fpath = os.path.join(dataroot, d["image"])
8795
nib.save(nib_image, image_fpath)
@@ -114,7 +122,7 @@ def test_ensemble(self) -> None:
114122
bundle_generator = BundleGen(
115123
algo_path=work_dir, data_stats_filename=da_output_yaml, data_src_cfg_name=data_src_cfg
116124
)
117-
bundle_generator.generate(work_dir, num_fold=2)
125+
bundle_generator.generate(work_dir, num_fold=1)
118126
history = bundle_generator.get_history()
119127

120128
for h in history:
@@ -126,9 +134,9 @@ def test_ensemble(self) -> None:
126134
builder.set_ensemble_method(AlgoEnsembleBestN(n_best=2))
127135
ensemble = builder.get_ensemble()
128136
preds = ensemble(pred_param)
129-
self.assertTupleEqual(preds[0].shape, (2, 64, 64, 64))
137+
self.assertTupleEqual(preds[0].shape, (2, 24, 24, 24))
130138

131-
builder.set_ensemble_method(AlgoEnsembleBestByFold(2))
139+
builder.set_ensemble_method(AlgoEnsembleBestByFold(1))
132140
ensemble = builder.get_ensemble()
133141
for algo in ensemble.get_algo_ensemble():
134142
print(algo[AlgoEnsembleKeys.ID])

tests/test_auto3dseg_hpo.py

Lines changed: 20 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010
# limitations under the License.
1111

1212
import os
13-
import shutil
1413
import tempfile
1514
import unittest
1615
from functools import partial
1716
from typing import Dict, List
1817

1918
import nibabel as nib
2019
import numpy as np
20+
import torch
2121

2222
from monai.apps.auto3dseg import BundleGen, DataAnalyzer, NNIGen, OptunaGen, import_bundle_algo_history
2323
from monai.bundle.config_parser import ConfigParser
@@ -28,6 +28,23 @@
2828
_, has_tb = optional_import("torch.utils.tensorboard", name="SummaryWriter")
2929
optuna, has_optuna = optional_import("optuna")
3030

31+
num_gpus = 4 if torch.cuda.device_count() > 4 else torch.cuda.device_count()
32+
33+
override_param = (
34+
{
35+
"CUDA_VISIBLE_DEVICES": list(range(num_gpus)),
36+
"num_iterations": int(4 / num_gpus),
37+
"num_iterations_per_validation": int(4 / num_gpus),
38+
"num_images_per_batch": 2,
39+
"num_epochs": 1,
40+
"num_warmup_iterations": int(4 / num_gpus),
41+
"use_pretrain": False,
42+
"pretrained_path": "",
43+
}
44+
if torch.cuda.is_available()
45+
else {}
46+
)
47+
3148

3249
def skip_if_no_optuna(obj):
3350
"""
@@ -76,7 +93,7 @@ def setUp(self) -> None:
7693

7794
# Generate a fake dataset
7895
for d in fake_datalist["testing"] + fake_datalist["training"]:
79-
im, seg = create_test_image_3d(64, 64, 64, rad_max=10, num_seg_classes=1)
96+
im, seg = create_test_image_3d(24, 24, 24, rad_max=10, num_seg_classes=1)
8097
nib_image = nib.Nifti1Image(im, affine=np.eye(4))
8198
image_fpath = os.path.join(dataroot, d["image"])
8299
nib.save(nib_image, image_fpath)
@@ -108,21 +125,14 @@ def setUp(self) -> None:
108125
bundle_generator = BundleGen(
109126
algo_path=work_dir, data_stats_filename=da_output_yaml, data_src_cfg_name=data_src_cfg
110127
)
111-
bundle_generator.generate(work_dir, num_fold=2)
128+
bundle_generator.generate(work_dir, num_fold=1)
112129

113130
self.history = bundle_generator.get_history()
114131
self.work_dir = work_dir
115132
self.test_path = test_path
116133

117134
@skip_if_no_cuda
118135
def test_run_algo(self) -> None:
119-
override_param = {
120-
"num_iterations": 8,
121-
"num_iterations_per_validation": 4,
122-
"num_images_per_batch": 2,
123-
"num_epochs": 2,
124-
"num_warmup_iterations": 4,
125-
}
126136

127137
algo_dict = self.history[0]
128138
algo_name = list(algo_dict.keys())[0]
@@ -135,14 +145,6 @@ def test_run_algo(self) -> None:
135145
@skip_if_no_cuda
136146
@skip_if_no_optuna
137147
def test_run_optuna(self) -> None:
138-
override_param = {
139-
"num_iterations": 8,
140-
"num_iterations_per_validation": 4,
141-
"num_images_per_batch": 2,
142-
"num_epochs": 2,
143-
"num_warmup_iterations": 4,
144-
}
145-
146148
algo_dict = self.history[0]
147149
algo_name = list(algo_dict.keys())[0]
148150
algo = algo_dict[algo_name]
@@ -164,45 +166,8 @@ def get_hyperparameters(self):
164166
)
165167
print(f"Best value: {study.best_value} (params: {study.best_params})\n")
166168

167-
@skip_if_no_cuda
168-
def test_run_algo_after_move_files(self) -> None:
169-
override_param = {
170-
"num_iterations": 8,
171-
"num_iterations_per_validation": 4,
172-
"num_images_per_batch": 2,
173-
"num_epochs": 2,
174-
"num_warmup_iterations": 4,
175-
}
176-
177-
algo_dict = self.history[0]
178-
algo_name = list(algo_dict.keys())[0]
179-
algo = algo_dict[algo_name]
180-
nni_gen = NNIGen(algo=algo, params=override_param)
181-
obj_filename = nni_gen.get_obj_filename()
182-
183-
work_dir_2 = os.path.join(self.test_path, "workdir2")
184-
os.makedirs(work_dir_2)
185-
algorithm_template = os.path.join(self.work_dir, "algorithm_templates")
186-
algorithm_templates_2 = os.path.join(work_dir_2, "algorithm_templates")
187-
algo_dir = os.path.dirname(obj_filename)
188-
algo_dir_2 = os.path.join(work_dir_2, os.path.basename(algo_dir))
189-
190-
obj_filename_2 = os.path.join(algo_dir_2, "algo_object.pkl")
191-
shutil.copytree(algorithm_template, algorithm_templates_2)
192-
shutil.copytree(algo_dir, algo_dir_2)
193-
# this function will be used in HPO via Python Fire in remote
194-
NNIGen().run_algo(obj_filename_2, work_dir_2, template_path=algorithm_templates_2)
195-
196169
@skip_if_no_cuda
197170
def test_get_history(self) -> None:
198-
override_param = {
199-
"num_iterations": 8,
200-
"num_iterations_per_validation": 4,
201-
"num_images_per_batch": 2,
202-
"num_epochs": 2,
203-
"num_warmup_iterations": 4,
204-
}
205-
206171
algo_dict = self.history[0]
207172
algo_name = list(algo_dict.keys())[0]
208173
algo = algo_dict[algo_name]

tests/test_integration_autorunner.py

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import nibabel as nib
1818
import numpy as np
19+
import torch
1920

2021
from monai.apps.auto3dseg import AutoRunner
2122
from monai.bundle.config_parser import ConfigParser
@@ -44,14 +45,21 @@
4445
],
4546
}
4647

47-
train_param = {
48-
"CUDA_VISIBLE_DEVICES": [0],
49-
"num_iterations": 8,
50-
"num_iterations_per_validation": 4,
51-
"num_images_per_batch": 2,
52-
"num_epochs": 2,
53-
"num_warmup_iterations": 4,
54-
}
48+
num_gpus = 4 if torch.cuda.device_count() > 4 else torch.cuda.device_count()
49+
train_param = (
50+
{
51+
"CUDA_VISIBLE_DEVICES": list(range(num_gpus)),
52+
"num_iterations": int(4 / num_gpus),
53+
"num_iterations_per_validation": int(4 / num_gpus),
54+
"num_images_per_batch": 2,
55+
"num_epochs": 1,
56+
"num_warmup_iterations": int(4 / num_gpus),
57+
"use_pretrain": False,
58+
"pretrained_path": "",
59+
}
60+
if torch.cuda.is_available()
61+
else {}
62+
)
5563

5664
pred_param = {"files_slices": slice(0, 1), "mode": "mean", "sigmoid": True}
5765

@@ -70,7 +78,7 @@ def setUp(self) -> None:
7078

7179
# Generate a fake dataset
7280
for d in sim_datalist["testing"] + sim_datalist["training"]:
73-
im, seg = create_test_image_3d(64, 64, 64, rad_max=10, num_seg_classes=1)
81+
im, seg = create_test_image_3d(24, 24, 24, rad_max=10, num_seg_classes=1)
7482
nib_image = nib.Nifti1Image(im, affine=np.eye(4))
7583
image_fpath = os.path.join(sim_dataroot, d["image"])
7684
nib.save(nib_image, image_fpath)
@@ -123,22 +131,25 @@ def test_autorunner_hpo(self) -> None:
123131
work_dir = os.path.join(self.test_path, "work_dir")
124132
runner = AutoRunner(work_dir=work_dir, input=self.data_src_cfg, hpo=True, ensemble=False)
125133
hpo_param = {
126-
"num_iterations": 8,
127-
"num_iterations_per_validation": 4,
128-
"num_images_per_batch": 2,
129-
"num_epochs": 2,
130-
"num_warmup_iterations": 4,
134+
"CUDA_VISIBLE_DEVICES": train_param["CUDA_VISIBLE_DEVICES"],
135+
"num_iterations": train_param["num_iterations"],
136+
"num_iterations_per_validation": train_param["num_iterations_per_validation"],
137+
"num_images_per_batch": train_param["num_images_per_batch"],
138+
"num_epochs": train_param["num_epochs"],
139+
"num_warmup_iterations": train_param["num_warmup_iterations"],
140+
"use_pretrain": train_param["use_pretrain"],
141+
"pretrained_path": train_param["pretrained_path"],
131142
# below are to shorten the time for dints
132-
"training#num_iterations": 8,
133-
"training#num_iterations_per_validation": 4,
134-
"training#num_images_per_batch": 2,
135-
"training#num_epochs": 2,
136-
"training#num_warmup_iterations": 4,
137-
"searching#num_iterations": 8,
138-
"searching#num_iterations_per_validation": 4,
139-
"searching#num_images_per_batch": 2,
140-
"searching#num_epochs": 2,
141-
"searching#num_warmup_iterations": 4,
143+
"training#num_iterations": train_param["num_iterations"],
144+
"training#num_iterations_per_validation": train_param["num_iterations_per_validation"],
145+
"training#num_images_per_batch": train_param["num_images_per_batch"],
146+
"training#num_epochs": train_param["num_epochs"],
147+
"training#num_warmup_iterations": train_param["num_warmup_iterations"],
148+
"searching#num_iterations": train_param["num_iterations"],
149+
"searching#num_iterations_per_validation": train_param["num_iterations_per_validation"],
150+
"searching#num_images_per_batch": train_param["num_images_per_batch"],
151+
"searching#num_epochs": train_param["num_epochs"],
152+
"searching#num_warmup_iterations": train_param["num_warmup_iterations"],
142153
"nni_dry_run": True,
143154
}
144155
search_space = {"learning_rate": {"_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1]}}

tests/test_weighted_random_sampler_dist.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
import torch.distributed as dist
1717

1818
from monai.data import DistributedWeightedRandomSampler
19-
from tests.utils import DistCall, DistTestCase, skip_if_windows
19+
from tests.utils import DistCall, DistTestCase, skip_if_darwin, skip_if_windows
2020

2121

2222
@skip_if_windows
23+
@skip_if_darwin
2324
class DistributedWeightedRandomSamplerTest(DistTestCase):
2425
@DistCall(nnodes=1, nproc_per_node=2)
2526
def test_sampling(self):

0 commit comments

Comments
 (0)