@@ -83,16 +83,19 @@ def gen_auto_mask_table(
8383 Output directory to write the mask table.
8484 """
8585
86- ds_topog = xr .open_dataset (topo_file_path )
87- ny , nx = ds_topog .mask .shape
88-
8986 ibuf = 2
9087 jbuf = 2
9188 num_masked_blocks = 0
9289
93- mask = np .zeros ((ny + 2 * jbuf , nx + 2 * ibuf ))
94-
95- mask [jbuf : ny + jbuf , ibuf : nx + ibuf ] = ds_topog .mask .data
90+ ds_topog = xr .open_dataset (topo_file_path )
91+ if "mask" in ds_topog :
92+ ny , nx = ds_topog .mask .shape
93+ mask = np .zeros ((ny + 2 * jbuf , nx + 2 * ibuf ))
94+ mask [jbuf : ny + jbuf , ibuf : nx + ibuf ] = ds_topog .mask .data
95+ elif "wet" in ds_topog :
96+ ny , nx = ds_topog .wet .shape
97+ mask = np .zeros ((ny + 2 * jbuf , nx + 2 * ibuf ))
98+ mask [jbuf : ny + jbuf , ibuf : nx + ibuf ] = ds_topog .wet .data
9699
97100 # fill in buffer cells
98101 if reentrant_x :
@@ -123,37 +126,98 @@ def gen_auto_mask_table(
123126 # ratio of ocean cells to total number of cells
124127 glob_ocn_frac = mask [jbuf : ny + jbuf , ibuf : nx + ibuf ].sum () / (ny * nx )
125128
129+ pfrac = 0.01
130+ max_feasible_p = 0
131+ target_io_pes = args .tiopes
132+ found_feasible_layout = False
133+
126134 # Iteratively check for all possible division counts starting from the upper bound of npes/glob_ocn_frac,
127- # which is over-optimistic for realistic domains, but may be satisfied with idealized domains.
128- for p in range (int (np .ceil (npes / glob_ocn_frac )), npes , - 1 ):
129-
130- # compute the layout for the current division count, p
131- idiv , jdiv = MOM_define_layout (nx , ny , p )
132-
133- # don't bother checking this p if the aspect ratio is extreme
134- r_p = (nx / idiv ) / (ny / jdiv )
135- if r_p * r_extreme < 1.0 or r_extreme < r_p :
136- continue
137-
138- # Get the number of masked_blocks for this particular division count
139- mask_table = determine_land_blocks (mask , nx , ny , idiv , jdiv , ibuf , jbuf )
140-
141- # If we can eliminate enough blocks to reach the target npes, adopt
142- # this p (and the associated layout) and terminate the iteration.
143- num_masked_blocks = len (mask_table )
144- if p - num_masked_blocks <= npes :
145- print ("Found the optimum layout for auto-masking. Terminating iteration..." )
146- print (f"\t new ndivs: { p } , num_masked_blocks: { p - npes } " )
135+ # which is over-optimistic for realistic domains, but may be satisfied with idealized domains. The first encountered
136+ # feasible division count is stored in max_feasible_p. If the target_io_pes is not achievable with this layout,
137+ # the iteration continues until max_feasible_p * (1 - pfrac) is reached or the target_io_pes is satisfiable.
138+ # If not, the target_io_pes is decremented and the iteration is re-done from max_feasible_p to max_feasible_p * (1 - pfrac).
139+
140+ for i in range (target_io_pes , 0 , - 1 ):
141+
142+ if found_feasible_layout :
147143 break
148144
145+ if max_feasible_p == 0 : # first iteration
146+ p_up = int (np .ceil (npes / glob_ocn_frac ))
147+ else :
148+ p_up = max_feasible_p
149+
150+ for p in range (p_up , npes , - 1 ):
151+
152+ # compute the layout for the current division count, p
153+ idiv , jdiv = MOM_define_layout (nx , ny , p )
154+
155+ # don't bother checking this p if the aspect ratio is extreme
156+ ar = (nx / idiv ) / (ny / jdiv )
157+ if ar * r_extreme < 1.0 or r_extreme < ar :
158+ continue
159+
160+ # Get the number of masked_blocks for this particular division count
161+ mask_table = determine_land_blocks (mask , nx , ny , idiv , jdiv , ibuf , jbuf )
162+
163+ # If we can eliminate enough blocks to reach the target npes, adopt
164+ # this p (and the associated layout) and terminate the iteration.
165+ num_masked_blocks = len (mask_table )
166+
167+ if p - num_masked_blocks <= npes :
168+ print (
169+ f"ndivs: { p } , masked_blocks: { num_masked_blocks } " ,
170+ " idiv: " ,
171+ idiv ,
172+ "jdiv" ,
173+ jdiv ,
174+ )
175+
176+ if max_feasible_p == 0 :
177+ print ("^^^^^^^^^^^^^^^ first feasible layout ^^^^^^^^^^^^^^^" )
178+ max_feasible_p = p
179+ if (idiv * jdiv ) % i == 0 :
180+ idiv_io , jdiv_io = determine_io_layout (idiv , jdiv , i )
181+ # if the io layout ratio is extreme, skip this layout
182+ ar = (idiv / idiv_io ) / (jdiv / jdiv_io )
183+ if ar * r_extreme < 1.0 or r_extreme < ar :
184+ continue
185+ print (f"IO layout: { idiv_io } x { jdiv_io } " )
186+ print (
187+ "Found the optimum layout for auto-masking. Terminating iteration."
188+ )
189+ found_feasible_layout = True
190+ break
191+
192+ if p <= max_feasible_p * (1 - pfrac ):
193+ break
194+
149195 if num_masked_blocks == 0 :
150196 raise RuntimeError (
151197 "Couldn't auto-eliminate any land blocks. Try to increase the number"
152198 )
153199
154200 # Call determine_land_blocks once again, this time to retrieve and write out the mask_table.
155201 mask_table = determine_land_blocks (mask , nx , ny , idiv , jdiv , ibuf , jbuf )
156- write_auto_mask_file (mask_table , idiv , jdiv , npes , output_dir )
202+
203+
204+ def determine_io_layout (idiv , jdiv , nio ):
205+ """Determines the optimal I/O layout given the number of partitions in x and y direction and the number of I/O PEs."""
206+ min_ratio_diff = float ("inf" )
207+ best_idiv_io , best_jdiv_io = 1 , nio
208+
209+ for f in range (1 , nio + 1 ):
210+ if nio % f == 0 :
211+ idiv_io , jdiv_io = f , nio // f
212+
213+ if idiv % idiv_io == 0 and jdiv % jdiv_io == 0 :
214+ ratio_diff = abs ((idiv_io / jdiv_io ) - (idiv / jdiv ))
215+
216+ if ratio_diff < min_ratio_diff :
217+ min_ratio_diff = ratio_diff
218+ best_idiv_io , best_jdiv_io = idiv_io , jdiv_io
219+
220+ return best_idiv_io , best_jdiv_io
157221
158222
159223def write_auto_mask_file (
@@ -209,6 +273,13 @@ def write_auto_mask_file(
209273 required = True ,
210274 help = "Number of MOM6 PEs (NTASKS_OCN)" ,
211275 )
276+ parser .add_argument (
277+ "--tiopes" ,
278+ default = 1 ,
279+ type = int ,
280+ required = False ,
281+ help = "Number of target I/O PEs (NTASKS_IO) (default: 1)" ,
282+ )
212283 parser .add_argument (
213284 "-rx" ,
214285 default = False ,
0 commit comments