Skip to content

Commit b533a53

Browse files
mingxin-zhengwyli
andauthored
Add dryrun mode for AutoRunner HPO to fix 5568 (#5681)
Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> Fixes #5568 . ### Description - Add the "dryrun" mode for HPO NNI so that users and our test systems can skip starting the NNI server. - Take the NNI instruction printing out from the init function of the class. ### Types of changes <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [x] New tests added to cover the changes. - [x] Integration tests passed locally by running `python tests/test_integration_autorunner.py`. - [x] In-line docstrings updated. Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> Co-authored-by: Wenqi Li <831580+wyli@users.noreply.github.com>
1 parent a05c202 commit b533a53

3 files changed

Lines changed: 24 additions & 3 deletions

File tree

monai/apps/auto3dseg/auto_runner.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,9 +410,15 @@ def set_hpo_params(self, params: Optional[Dict[str, Any]] = None):
410410
- "tuner"
411411
- "trainingService"
412412
413+
and (3) enable the dry-run mode if the user would generate the NNI configs without starting the NNI service.
414+
413415
Args:
414416
params: a dict that defines the overriding key-value pairs during instantiation of the algo. For
415417
BundleAlgo, it will override the template config filling.
418+
419+
Notes:
420+
Users can set ``nni_dry_run`` to ``True`` in the ``params`` to enable the dry-run mode for the NNI backend.
421+
416422
"""
417423
if params is None:
418424
self.hpo_params = self.train_params
@@ -538,6 +544,7 @@ def _train_algo_in_nni(self, history):
538544
}
539545

540546
last_total_tasks = len(import_bundle_algo_history(self.work_dir, only_trained=True))
547+
mode_dry_run = self.hpo_params.pop("nni_dry_run", False)
541548
for task in history:
542549
for name, algo in task.items():
543550
nni_gen = NNIGen(algo=algo, params=self.hpo_params)
@@ -551,11 +558,16 @@ def _train_algo_in_nni(self, history):
551558
nni_config.update({"search_space": self.search_space})
552559
trial_cmd = "python -m monai.apps.auto3dseg NNIGen run_algo " + obj_filename + " " + self.work_dir
553560
nni_config.update({"trialCommand": trial_cmd})
554-
nni_config_filename = os.path.abspath(os.path.join(self.work_dir, "nni_config.yaml"))
561+
nni_config_filename = os.path.abspath(os.path.join(self.work_dir, f"{name}_nni_config.yaml"))
555562
ConfigParser.export_config_file(nni_config, nni_config_filename, fmt="yaml", default_flow_style=None)
556563

557564
max_trial = min(self.hpo_tasks, default_nni_config["maxTrialNumber"])
558565
cmd = "nnictl create --config " + nni_config_filename + " --port 8088"
566+
567+
if mode_dry_run:
568+
logger.info(f"AutoRunner HPO is in dry-run mode. Please manually launch: {cmd}")
569+
continue
570+
559571
subprocess.run(cmd.split(), check=True)
560572

561573
n_trainings = len(import_bundle_algo_history(self.work_dir, only_trained=True))

monai/apps/auto3dseg/hpo_gen.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,15 @@ class NNIGen(HPOGen):
9191
├── model_fold0
9292
└── scripts
9393
94+
.. code-block:: python
95+
# Bundle Algorithms are already generated by BundleGen in work_dir
96+
import_bundle_algo_history(work_dir, only_trained=False)
97+
algo_dict = self.history[0] # pick the first algorithm
98+
algo_name = list(algo_dict.keys())[0]
99+
onealgo = algo_dict[algo_name]
100+
nni_gen = NNIGen(algo=onealgo)
101+
nni_gen.print_bundle_algo_instruction()
102+
94103
Notes:
95104
The NNIGen will prepare the algorithms in a folder and suggest a command to replace trialCommand in the experiment
96105
config. However, NNIGen will not trigger NNI. User needs to write their NNI experiment configs, and then run the
@@ -118,7 +127,6 @@ def __init__(self, algo: Optional[Algo] = None, params=None):
118127

119128
if isinstance(self.algo, BundleAlgo):
120129
self.obj_filename = algo_to_pickle(self.algo, template_path=self.algo.template_path)
121-
self.print_bundle_algo_instruction()
122130
else:
123131
self.obj_filename = algo_to_pickle(self.algo)
124132
# nni instruction unknown

tests/test_integration_autorunner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def test_autorunner_ensemble(self) -> None:
121121
@unittest.skipIf(not has_nni, "nni required")
122122
def test_autorunner_hpo(self) -> None:
123123
work_dir = os.path.join(self.test_path, "work_dir")
124-
runner = AutoRunner(work_dir=work_dir, input=self.data_src_cfg, hpo=True)
124+
runner = AutoRunner(work_dir=work_dir, input=self.data_src_cfg, hpo=True, ensemble=False)
125125
hpo_param = {
126126
"num_iterations": 8,
127127
"num_iterations_per_validation": 4,
@@ -139,6 +139,7 @@ def test_autorunner_hpo(self) -> None:
139139
"searching#num_images_per_batch": 2,
140140
"searching#num_epochs": 2,
141141
"searching#num_warmup_iterations": 4,
142+
"nni_dry_run": True,
142143
}
143144
search_space = {"learning_rate": {"_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1]}}
144145
runner.set_num_fold(1)

0 commit comments

Comments
 (0)