@@ -16,12 +16,14 @@ def extract_segments_wrapper(entry):
1616
1717@gu .gui_runtime_error_handler
1818def _extract_segments (
19- audio_dir , result_dir , output_dir , min_conf , num_seq , audio_speed , seq_length , threads , progress = gr .Progress ()
19+ audio_dir , result_dir , output_dir , min_conf , max_conf , num_seq , audio_speed , seq_length , threads , collection_mode , progress = gr .Progress ()
2020):
2121 from birdnet_analyzer .segments .utils import parse_files , parse_folders
2222
2323 gu .validate (audio_dir , loc .localize ("validation-no-audio-directory-selected" ))
2424
25+ gu .validate (max_conf > min_conf , loc .localize ("validation-max-confidence-lower-than-min-confidence" ))
26+
2527 if not result_dir :
2628 result_dir = audio_dir
2729
@@ -43,8 +45,11 @@ def _extract_segments(
4345 # Set confidence threshold
4446 cfg .MIN_CONFIDENCE = max (0.01 , min (0.99 , min_conf ))
4547
48+ # Set maximum confidence threshold
49+ cfg .MAX_CONFIDENCE = max (0.01 , min (1.0 , max_conf ))
50+
4651 # Parse file list and make list of segments
47- cfg .FILE_LIST = parse_files (cfg .FILE_LIST , max (1 , int (num_seq )))
52+ cfg .FILE_LIST = parse_files (cfg .FILE_LIST , max (1 , int (num_seq )), collection_mode )
4853
4954 # Audio speed
5055 cfg .AUDIO_SPEED = max (0.1 , 1.0 / (audio_speed * - 1 )) if audio_speed < 0 else max (1.0 , float (audio_speed ))
@@ -127,40 +132,73 @@ def select_directory_to_state_and_tb(state_key):
127132 show_progress = "hidden" ,
128133 )
129134
130- min_conf_slider = gr .Slider (
131- minimum = 0.1 ,
132- maximum = 0.99 ,
133- step = 0.01 ,
134- value = cfg .MIN_CONFIDENCE ,
135- label = loc .localize ("segments-tab-min-confidence-slider-label" ),
136- info = loc .localize ("segments-tab-min-confidence-slider-info" ),
137- )
138- num_seq_number = gr .Number (
139- 100 ,
140- label = loc .localize ("segments-tab-max-seq-number-label" ),
141- info = loc .localize ("segments-tab-max-seq-number-info" ),
142- minimum = 1 ,
143- )
144- audio_speed_slider = gr .Slider (
145- minimum = - 10 ,
146- maximum = 10 ,
147- value = cfg .AUDIO_SPEED ,
148- step = 1 ,
149- label = loc .localize ("inference-settings-audio-speed-slider-label" ),
150- info = loc .localize ("inference-settings-audio-speed-slider-info" ),
151- )
152- seq_length_number = gr .Number (
153- cfg .SIG_LENGTH ,
154- label = loc .localize ("segments-tab-seq-length-number-label" ),
155- info = loc .localize ("segments-tab-seq-length-number-info" ),
156- minimum = 0.1 ,
157- )
158- threads_number = gr .Number (
159- 4 ,
160- label = loc .localize ("segments-tab-threads-number-label" ),
161- info = loc .localize ("segments-tab-threads-number-info" ),
162- minimum = 1 ,
163- )
135+ with gr .Group ():
136+ with gr .Row ():
137+ min_conf_slider = gr .Slider (
138+ minimum = 0.1 ,
139+ maximum = 0.99 ,
140+ step = 0.01 ,
141+ value = cfg .MIN_CONFIDENCE ,
142+ label = loc .localize ("segments-tab-min-confidence-slider-label" ),
143+ info = loc .localize ("segments-tab-min-confidence-slider-info" ),
144+ )
145+ max_conf_slider = gr .Slider (
146+ minimum = 0.1 ,
147+ maximum = 1.0 ,
148+ step = 0.01 ,
149+ value = cfg .MAX_CONFIDENCE ,
150+ label = loc .localize ("segments-tab-max-confidence-slider-label" ),
151+ info = loc .localize ("segments-tab-max-confidence-slider-info" ),
152+ )
153+
154+ with gr .Row ():
155+ collection_mode_radio = gr .Radio (
156+ choices = [
157+ (loc .localize ("segments-tab-collection-mode-radio-option-random" ), "random" ),
158+ (loc .localize ("segments-tab-collection-mode-radio-option-confidence" ), "confidence" ),
159+ (loc .localize ("segments-tab-collection-mode-radio-option-balanced" ), "balanced" ),
160+ ],
161+ value = cfg .SEGMENTS_COLLECTION_MODE ,
162+ label = loc .localize ("segments-tab-collection-mode-label" ),
163+ info = loc .localize ("segments-tab-collection-mode-info" ),
164+ interactive = True ,
165+ )
166+
167+ num_bins = gr .Number (
168+ cfg .BALANCED_COLLECTION_BINS ,
169+ label = loc .localize ("segments-tab-n-bins-label" ),
170+ info = loc .localize ("segments-tab-n-bins-info" ),
171+ minimum = 2 ,
172+ step = 1 ,
173+ visible = False ,
174+ interactive = True )
175+
176+ num_seq_number = gr .Number (
177+ 100 ,
178+ label = loc .localize ("segments-tab-max-seq-number-label" ),
179+ info = loc .localize ("segments-tab-max-seq-number-info" ),
180+ minimum = 1 ,
181+ )
182+ audio_speed_slider = gr .Slider (
183+ minimum = - 10 ,
184+ maximum = 10 ,
185+ value = cfg .AUDIO_SPEED ,
186+ step = 1 ,
187+ label = loc .localize ("inference-settings-audio-speed-slider-label" ),
188+ info = loc .localize ("inference-settings-audio-speed-slider-info" ),
189+ )
190+ seq_length_number = gr .Number (
191+ cfg .SIG_LENGTH ,
192+ label = loc .localize ("segments-tab-seq-length-number-label" ),
193+ info = loc .localize ("segments-tab-seq-length-number-info" ),
194+ minimum = 0.1 ,
195+ )
196+ threads_number = gr .Number (
197+ 4 ,
198+ label = loc .localize ("segments-tab-threads-number-label" ),
199+ info = loc .localize ("segments-tab-threads-number-info" ),
200+ minimum = 1 ,
201+ )
164202
165203 extract_segments_btn = gr .Button (loc .localize ("segments-tab-extract-button-label" ), variant = "huggingface" )
166204
@@ -178,14 +216,24 @@ def select_directory_to_state_and_tb(state_key):
178216 result_directory_state ,
179217 output_directory_state ,
180218 min_conf_slider ,
219+ max_conf_slider ,
181220 num_seq_number ,
182221 audio_speed_slider ,
183222 seq_length_number ,
184223 threads_number ,
224+ collection_mode_radio
185225 ],
186226 outputs = result_grid ,
187227 )
188228
229+ def on_collection_mode_change (collection_mode ):
230+ return gr .Number (visible = collection_mode == "balanced" )
231+
232+ collection_mode_radio .change (
233+ on_collection_mode_change ,
234+ inputs = collection_mode_radio ,
235+ outputs = num_bins ,
236+ )
189237
190238if __name__ == "__main__" :
191239 gu .open_window (build_segments_tab )
0 commit comments