Add dryrun mode for AutoRunner HPO to fix 5568 (#5681)

mingxin-zheng · wyli · web-flow · commit b533a538abd6 · 2022-12-08T10:04:21.000Z
Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> Fixes #5568 . ### Description - Add the "dryrun" mode for HPO NNI so that users and our test systems can skip starting the NNI server. - Take the NNI instruction printing out from the init function of the class. ### Types of changes  - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [x] New tests added to cover the changes. - [x] Integration tests passed locally by running `python tests/test_integration_autorunner.py`. - [x] In-line docstrings updated. Signed-off-by: Mingxin Zheng <18563433+mingxin-zheng@users.noreply.github.com> Co-authored-by: Wenqi Li <831580+wyli@users.noreply.github.com>
diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py
@@ -410,9 +410,15 @@ def set_hpo_params(self, params: Optional[Dict[str, Any]] = None):
             - "tuner"
             - "trainingService"
 
+        and (3) enable the dry-run mode if the user would generate the NNI configs without starting the NNI service.
+
         Args:
             params: a dict that defines the overriding key-value pairs during instantiation of the algo. For
                 BundleAlgo, it will override the template config filling.
+
+        Notes:
+            Users can set ``nni_dry_run`` to ``True`` in the ``params`` to enable the dry-run mode for the NNI backend.
+
         """
         if params is None:
             self.hpo_params = self.train_params
@@ -538,6 +544,7 @@ def _train_algo_in_nni(self, history):
         }
 
         last_total_tasks = len(import_bundle_algo_history(self.work_dir, only_trained=True))
+        mode_dry_run = self.hpo_params.pop("nni_dry_run", False)
         for task in history:
             for name, algo in task.items():
                 nni_gen = NNIGen(algo=algo, params=self.hpo_params)
@@ -551,11 +558,16 @@ def _train_algo_in_nni(self, history):
                 nni_config.update({"search_space": self.search_space})
                 trial_cmd = "python -m monai.apps.auto3dseg NNIGen run_algo " + obj_filename + " " + self.work_dir
                 nni_config.update({"trialCommand": trial_cmd})
-                nni_config_filename = os.path.abspath(os.path.join(self.work_dir, "nni_config.yaml"))
+                nni_config_filename = os.path.abspath(os.path.join(self.work_dir, f"{name}_nni_config.yaml"))
                 ConfigParser.export_config_file(nni_config, nni_config_filename, fmt="yaml", default_flow_style=None)
 
                 max_trial = min(self.hpo_tasks, default_nni_config["maxTrialNumber"])
                 cmd = "nnictl create --config " + nni_config_filename + " --port 8088"
+
+                if mode_dry_run:
+                    logger.info(f"AutoRunner HPO is in dry-run mode. Please manually launch: {cmd}")
+                    continue
+
                 subprocess.run(cmd.split(), check=True)
 
                 n_trainings = len(import_bundle_algo_history(self.work_dir, only_trained=True))
diff --git a/monai/apps/auto3dseg/hpo_gen.py b/monai/apps/auto3dseg/hpo_gen.py
@@ -91,6 +91,15 @@ class NNIGen(HPOGen):
             ├── model_fold0
             └── scripts
 
+        .. code-block:: python
+            # Bundle Algorithms are already generated by BundleGen in work_dir
+            import_bundle_algo_history(work_dir, only_trained=False)
+            algo_dict = self.history[0]  # pick the first algorithm
+            algo_name = list(algo_dict.keys())[0]
+            onealgo = algo_dict[algo_name]
+            nni_gen = NNIGen(algo=onealgo)
+            nni_gen.print_bundle_algo_instruction()
+
     Notes:
         The NNIGen will prepare the algorithms in a folder and suggest a command to replace trialCommand in the experiment
         config. However, NNIGen will not trigger NNI. User needs to write their NNI experiment configs, and then run the
@@ -118,7 +127,6 @@ def __init__(self, algo: Optional[Algo] = None, params=None):
 
             if isinstance(self.algo, BundleAlgo):
                 self.obj_filename = algo_to_pickle(self.algo, template_path=self.algo.template_path)
-                self.print_bundle_algo_instruction()
             else:
                 self.obj_filename = algo_to_pickle(self.algo)
                 # nni instruction unknown
diff --git a/tests/test_integration_autorunner.py b/tests/test_integration_autorunner.py
@@ -121,7 +121,7 @@ def test_autorunner_ensemble(self) -> None:
     @unittest.skipIf(not has_nni, "nni required")
     def test_autorunner_hpo(self) -> None:
         work_dir = os.path.join(self.test_path, "work_dir")
-        runner = AutoRunner(work_dir=work_dir, input=self.data_src_cfg, hpo=True)
+        runner = AutoRunner(work_dir=work_dir, input=self.data_src_cfg, hpo=True, ensemble=False)
         hpo_param = {
             "num_iterations": 8,
             "num_iterations_per_validation": 4,
@@ -139,6 +139,7 @@ def test_autorunner_hpo(self) -> None:
             "searching#num_images_per_batch": 2,
             "searching#num_epochs": 2,
             "searching#num_warmup_iterations": 4,
+            "nni_dry_run": True,
         }
         search_space = {"learning_rate": {"_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1]}}
         runner.set_num_fold(1)