Skip to content

Commit c404277

Browse files
Merge pull request #731 from birdnet-team/segments-collection-mode
Segments collection mode
2 parents ad98398 + d1250ee commit c404277

19 files changed

Lines changed: 331 additions & 61 deletions

File tree

birdnet_analyzer/cli.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,28 @@ def segments_parser():
541541
help="Minimum length of extracted segments in seconds. If a segment is shorter than this value, it will be padded with audio from the source file.",
542542
)
543543

544+
parser.add_argument(
545+
"--max_conf",
546+
default=cfg.MAX_CONFIDENCE,
547+
type=lambda a: max(0.00001, min(1.0, float(a))),
548+
help="Maximum confidence threshold. Values in [0.00001, 1.0].",
549+
)
550+
551+
parser.add_argument(
552+
"--collection_mode",
553+
default=cfg.SEGMENTS_COLLECTION_MODE,
554+
choices=["random", "confidence", "balanced"],
555+
help="Collection mode for selecting the segments. Can be 'random' or 'confidence'.",
556+
)
557+
558+
parser.add_argument(
559+
"--n_bins",
560+
type=lambda a: max(2, int(a)),
561+
default=10,
562+
help="Number of bins to use for the balanced collection mode",
563+
)
564+
565+
544566
return parser
545567

546568

birdnet_analyzer/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,15 @@
9090
# probabilities and needs to be adjusted)
9191
MIN_CONFIDENCE: float = 0.25
9292

93+
# Maximum confidence score for the segments feature.
94+
MAX_CONFIDENCE: float = 1.0
95+
96+
# How segments are selected from the result files.
97+
SEGMENTS_COLLECTION_MODE: str = "random"
98+
99+
# Number of bins for the balanced collection mode
100+
BALANCED_COLLECTION_BINS: int = 10
101+
93102
# Number of consecutive detections for one species to merge into one
94103
# If set to 1 or 0, no merging will be done
95104
# If set to None, all detections will be included

birdnet_analyzer/gui/segments.py

Lines changed: 84 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ def extract_segments_wrapper(entry):
1616

1717
@gu.gui_runtime_error_handler
1818
def _extract_segments(
19-
audio_dir, result_dir, output_dir, min_conf, num_seq, audio_speed, seq_length, threads, progress=gr.Progress()
19+
audio_dir, result_dir, output_dir, min_conf, max_conf, num_seq, audio_speed, seq_length, threads, collection_mode, progress=gr.Progress()
2020
):
2121
from birdnet_analyzer.segments.utils import parse_files, parse_folders
2222

2323
gu.validate(audio_dir, loc.localize("validation-no-audio-directory-selected"))
2424

25+
gu.validate(max_conf > min_conf, loc.localize("validation-max-confidence-lower-than-min-confidence"))
26+
2527
if not result_dir:
2628
result_dir = audio_dir
2729

@@ -43,8 +45,11 @@ def _extract_segments(
4345
# Set confidence threshold
4446
cfg.MIN_CONFIDENCE = max(0.01, min(0.99, min_conf))
4547

48+
# Set maximum confidence threshold
49+
cfg.MAX_CONFIDENCE = max(0.01, min(1.0, max_conf))
50+
4651
# Parse file list and make list of segments
47-
cfg.FILE_LIST = parse_files(cfg.FILE_LIST, max(1, int(num_seq)))
52+
cfg.FILE_LIST = parse_files(cfg.FILE_LIST, max(1, int(num_seq)), collection_mode)
4853

4954
# Audio speed
5055
cfg.AUDIO_SPEED = max(0.1, 1.0 / (audio_speed * -1)) if audio_speed < 0 else max(1.0, float(audio_speed))
@@ -127,40 +132,73 @@ def select_directory_to_state_and_tb(state_key):
127132
show_progress="hidden",
128133
)
129134

130-
min_conf_slider = gr.Slider(
131-
minimum=0.1,
132-
maximum=0.99,
133-
step=0.01,
134-
value=cfg.MIN_CONFIDENCE,
135-
label=loc.localize("segments-tab-min-confidence-slider-label"),
136-
info=loc.localize("segments-tab-min-confidence-slider-info"),
137-
)
138-
num_seq_number = gr.Number(
139-
100,
140-
label=loc.localize("segments-tab-max-seq-number-label"),
141-
info=loc.localize("segments-tab-max-seq-number-info"),
142-
minimum=1,
143-
)
144-
audio_speed_slider = gr.Slider(
145-
minimum=-10,
146-
maximum=10,
147-
value=cfg.AUDIO_SPEED,
148-
step=1,
149-
label=loc.localize("inference-settings-audio-speed-slider-label"),
150-
info=loc.localize("inference-settings-audio-speed-slider-info"),
151-
)
152-
seq_length_number = gr.Number(
153-
cfg.SIG_LENGTH,
154-
label=loc.localize("segments-tab-seq-length-number-label"),
155-
info=loc.localize("segments-tab-seq-length-number-info"),
156-
minimum=0.1,
157-
)
158-
threads_number = gr.Number(
159-
4,
160-
label=loc.localize("segments-tab-threads-number-label"),
161-
info=loc.localize("segments-tab-threads-number-info"),
162-
minimum=1,
163-
)
135+
with gr.Group():
136+
with gr.Row():
137+
min_conf_slider = gr.Slider(
138+
minimum=0.1,
139+
maximum=0.99,
140+
step=0.01,
141+
value=cfg.MIN_CONFIDENCE,
142+
label=loc.localize("segments-tab-min-confidence-slider-label"),
143+
info=loc.localize("segments-tab-min-confidence-slider-info"),
144+
)
145+
max_conf_slider = gr.Slider(
146+
minimum=0.1,
147+
maximum=1.0,
148+
step=0.01,
149+
value=cfg.MAX_CONFIDENCE,
150+
label=loc.localize("segments-tab-max-confidence-slider-label"),
151+
info=loc.localize("segments-tab-max-confidence-slider-info"),
152+
)
153+
154+
with gr.Row():
155+
collection_mode_radio = gr.Radio(
156+
choices=[
157+
(loc.localize("segments-tab-collection-mode-radio-option-random"), "random"),
158+
(loc.localize("segments-tab-collection-mode-radio-option-confidence"), "confidence"),
159+
(loc.localize("segments-tab-collection-mode-radio-option-balanced"), "balanced"),
160+
],
161+
value=cfg.SEGMENTS_COLLECTION_MODE,
162+
label=loc.localize("segments-tab-collection-mode-label"),
163+
info=loc.localize("segments-tab-collection-mode-info"),
164+
interactive=True,
165+
)
166+
167+
num_bins = gr.Number(
168+
cfg.BALANCED_COLLECTION_BINS,
169+
label=loc.localize("segments-tab-n-bins-label"),
170+
info=loc.localize("segments-tab-n-bins-info"),
171+
minimum=2,
172+
step=1,
173+
visible=False,
174+
interactive=True)
175+
176+
num_seq_number = gr.Number(
177+
100,
178+
label=loc.localize("segments-tab-max-seq-number-label"),
179+
info=loc.localize("segments-tab-max-seq-number-info"),
180+
minimum=1,
181+
)
182+
audio_speed_slider = gr.Slider(
183+
minimum=-10,
184+
maximum=10,
185+
value=cfg.AUDIO_SPEED,
186+
step=1,
187+
label=loc.localize("inference-settings-audio-speed-slider-label"),
188+
info=loc.localize("inference-settings-audio-speed-slider-info"),
189+
)
190+
seq_length_number = gr.Number(
191+
cfg.SIG_LENGTH,
192+
label=loc.localize("segments-tab-seq-length-number-label"),
193+
info=loc.localize("segments-tab-seq-length-number-info"),
194+
minimum=0.1,
195+
)
196+
threads_number = gr.Number(
197+
4,
198+
label=loc.localize("segments-tab-threads-number-label"),
199+
info=loc.localize("segments-tab-threads-number-info"),
200+
minimum=1,
201+
)
164202

165203
extract_segments_btn = gr.Button(loc.localize("segments-tab-extract-button-label"), variant="huggingface")
166204

@@ -178,14 +216,24 @@ def select_directory_to_state_and_tb(state_key):
178216
result_directory_state,
179217
output_directory_state,
180218
min_conf_slider,
219+
max_conf_slider,
181220
num_seq_number,
182221
audio_speed_slider,
183222
seq_length_number,
184223
threads_number,
224+
collection_mode_radio
185225
],
186226
outputs=result_grid,
187227
)
188228

229+
def on_collection_mode_change(collection_mode):
230+
return gr.Number(visible=collection_mode == "balanced")
231+
232+
collection_mode_radio.change(
233+
on_collection_mode_change,
234+
inputs=collection_mode_radio,
235+
outputs=num_bins,
236+
)
189237

190238
if __name__ == "__main__":
191239
gu.open_window(build_segments_tab)

birdnet_analyzer/lang/de.json

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,16 @@
194194
"segments-tab-max-seq-number-info": "Maximale Anzahl von zufällig extrahierten Segmenten pro Art.",
195195
"segments-tab-max-seq-number-label": "Maximale Anzahl von Segmenten",
196196
"segments-tab-min-confidence-slider-info": "Wählt nur Segmente mit einem Wert über diesem Schwellenwert aus.",
197-
"segments-tab-min-confidence-slider-label": "Konfidenz-Schwellenwert",
197+
"segments-tab-min-confidence-slider-label": "Minimaler Konfidenz-Schwellenwert",
198+
"segments-tab-max-confidence-slider-info": "Wählt nur Segmente mit einem Wert unter diesem Schwellenwert aus.",
199+
"segments-tab-max-confidence-slider-label": "Maximaler Konfidenz-Schwellenwert",
200+
"segments-tab-collection-mode-label": "Sammelmodus",
201+
"segments-tab-collection-mode-info": "Gibt an wie die Segmente aus den Ergebnisdateien ausgewählt werden.",
202+
"segments-tab-n-bins-label": "Anzahl der Konfidenz-Behälter",
203+
"segments-tab-n-bins-info": "Setzt die Anzahl der Konfidenz-Behälter für den ausgeglichenen Sammelmodus.",
204+
"segments-tab-collection-mode-radio-option-random": "zufällig",
205+
"segments-tab-collection-mode-radio-option-confidence": "nach Konfidenz",
206+
"segments-tab-collection-mode-radio-option-balanced": "ausgeglichen",
198207
"segments-tab-output-selection-button-label": "Wählen Sie das Ausgabeverzeichnis",
199208
"segments-tab-output-selection-textbox-placeholder": "Gleich wie das Audioverzeichnis, wenn nicht ausgewählt",
200209
"segments-tab-result-dataframe-column-execution-header": "Ausführung",
@@ -338,5 +347,6 @@
338347
"validation-no-valid-frequency": "Bitte geben Sie eine gültige Frequenz an",
339348
"validation-no-valid-learning-rate": "Bitte geben Sie eine gültige Lernrate an.",
340349
"validation-non-event-samples-required-in-binary-classification": "Für binäre Klassifikation müssen Hintergrundsamples vorhanden sein.",
341-
"validation-only-repeat-upsampling-for-multi-label": "Mit Multi-Label Beispielen kann nur 'wiederholen' als Upsampling-Modus verwendet werden."
350+
"validation-only-repeat-upsampling-for-multi-label": "Mit Multi-Label Beispielen kann nur 'wiederholen' als Upsampling-Modus verwendet werden.",
351+
"validation-max-confidence-lower-than-min-confidence": "Maximaler Konfidenz-Schwellwert muss größer sein als der minimale Schwellwert."
342352
}

birdnet_analyzer/lang/en.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@
195195
"segments-tab-max-seq-number-label": "Maximum number of segments",
196196
"segments-tab-min-confidence-slider-info": "Select only segments with a confidence above this threshold.",
197197
"segments-tab-min-confidence-slider-label": "Minimum confidence",
198+
"segments-tab-max-confidence-slider-info": "Select only segments with a confidence below this threshold.",
199+
"segments-tab-max-confidence-slider-label": "Maximum confidence",
200+
"segments-tab-collection-mode-label": "Collection mode",
201+
"segments-tab-collection-mode-info": "Determines how segments are selected from the result files.",
202+
"segments-tab-n-bins-label": "Number of confidence bins",
203+
"segments-tab-n-bins-info": "Sets the number of confidence bins for the balanced collection mode.",
204+
"segments-tab-collection-mode-radio-option-random": "random",
205+
"segments-tab-collection-mode-radio-option-confidence": "confidence",
206+
"segments-tab-collection-mode-radio-option-balanced": "balanced",
198207
"segments-tab-output-selection-button-label": "Select output directory",
199208
"segments-tab-output-selection-textbox-placeholder": "Same as audio directory if not selected",
200209
"segments-tab-result-dataframe-column-execution-header": "Execution",
@@ -338,5 +347,6 @@
338347
"validation-no-valid-frequency": "Please enter a valid frequency in",
339348
"validation-no-valid-learning-rate": "Please enter a valid learning rate.",
340349
"validation-non-event-samples-required-in-binary-classification": "Non-event samples are required for binary classification",
341-
"validation-only-repeat-upsampling-for-multi-label": "Only repeat-upsampling is available for multi-label"
350+
"validation-only-repeat-upsampling-for-multi-label": "Only repeat-upsampling is available for multi-label",
351+
"validation-max-confidence-lower-than-min-confidence": "Maximum confidence must be greater than minimum confidence"
342352
}

birdnet_analyzer/lang/fi.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@
195195
"segments-tab-max-seq-number-label": "Segmenttien enimmäismäärä",
196196
"segments-tab-min-confidence-slider-info": "Valitse vain segmentit, joiden luotettavuus on tätä raja-arvoa korkeampi.",
197197
"segments-tab-min-confidence-slider-label": "Vähimmäisluotettavuus",
198+
"segments-tab-max-confidence-slider-info": "Valitse vain segmentit, joiden luotettavuus on tätä raja-arvoa alhaisempi.",
199+
"segments-tab-max-confidence-slider-label": "Enimmäisluotettavuus",
200+
"segments-tab-collection-mode-label": "Keräystila",
201+
"segments-tab-collection-mode-info": "Määrittää, miten segmentit valitaan tulostiedostoista.",
202+
"segments-tab-n-bins-label": "Luotettavuusluokkien määrä",
203+
"segments-tab-n-bins-info": "Asettaa luotettavuusluokkien määrän tasapainotetussa keräystilassa.",
204+
"segments-tab-collection-mode-radio-option-random": "satunnainen",
205+
"segments-tab-collection-mode-radio-option-confidence": "luotettavuus",
206+
"segments-tab-collection-mode-radio-option-balanced": "tasapainotettu",
198207
"segments-tab-output-selection-button-label": "Valitse tulostehakemisto",
199208
"segments-tab-output-selection-textbox-placeholder": "Sama kuin äänihakemisto, jos ei valittu",
200209
"segments-tab-result-dataframe-column-execution-header": "Suoritus",
@@ -338,5 +347,6 @@
338347
"validation-no-valid-frequency": "Anna kelvollinen taajuus",
339348
"validation-no-valid-learning-rate": "Anna kelvollinen oppimistahti.",
340349
"validation-non-event-samples-required-in-binary-classification": "Binääriluokittelussa vaaditaan ei-tapahtuma-näytteitä",
341-
"validation-only-repeat-upsampling-for-multi-label": "Moninimiöisessä luokittelussa vain 'toista'-ylösnäytteistys on käytettävissä"
350+
"validation-only-repeat-upsampling-for-multi-label": "Moninimiöisessä luokittelussa vain 'toista'-ylösnäytteistys on käytettävissä",
351+
"validation-max-confidence-lower-than-min-confidence": "Enimmäisluotettavuuden on oltava suurempi kuin vähimmäisluotettavuus"
342352
}

birdnet_analyzer/lang/fr.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@
195195
"segments-tab-max-seq-number-label": "Nombre maximal de segments",
196196
"segments-tab-min-confidence-slider-info": "Ne sélectionner que les segments dont la confiance est supérieure à ce seuil.",
197197
"segments-tab-min-confidence-slider-label": "Confiance minimale",
198+
"segments-tab-max-confidence-slider-info": "Ne sélectionner que les segments dont la confiance est inférieure à ce seuil.",
199+
"segments-tab-max-confidence-slider-label": "Confiance maximale",
200+
"segments-tab-collection-mode-label": "Mode de collecte",
201+
"segments-tab-collection-mode-info": "Détermine comment les segments sont sélectionnés à partir des fichiers de résultats.",
202+
"segments-tab-n-bins-label": "Nombre de classes de confiance",
203+
"segments-tab-n-bins-info": "Définit le nombre de classes de confiance pour le mode de collecte équilibré.",
204+
"segments-tab-collection-mode-radio-option-random": "aléatoire",
205+
"segments-tab-collection-mode-radio-option-confidence": "confiance",
206+
"segments-tab-collection-mode-radio-option-balanced": "équilibré",
198207
"segments-tab-output-selection-button-label": "Sélectionner le répertoire de sortie",
199208
"segments-tab-output-selection-textbox-placeholder": "Identique au répertoire audio s'il n'est pas sélectionné",
200209
"segments-tab-result-dataframe-column-execution-header": "Execution",
@@ -338,5 +347,6 @@
338347
"validation-no-valid-frequency": "Veuillez saisir une fréquence valide dans",
339348
"validation-no-valid-learning-rate": "Veuillez saisir un taux d'apprentissage valide.",
340349
"validation-non-event-samples-required-in-binary-classification": "Des échantillons sans événement sont nécessaires pour la classification binaire.",
341-
"validation-only-repeat-upsampling-for-multi-label": "Seul l'échantillonnage ascendant répété est disponible pour les étiquettes multiples."
350+
"validation-only-repeat-upsampling-for-multi-label": "Seul l'échantillonnage ascendant répété est disponible pour les étiquettes multiples.",
351+
"validation-max-confidence-lower-than-min-confidence": "La confiance maximale doit être supérieure à la confiance minimale"
342352
}

birdnet_analyzer/lang/id.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@
195195
"segments-tab-max-seq-number-label": "Jumlah maksimum segmen",
196196
"segments-tab-min-confidence-slider-info": "Pilih hanya segmen dengan kepercayaan di atas ambang batas ini.",
197197
"segments-tab-min-confidence-slider-label": "Kepercayaan minimum",
198+
"segments-tab-max-confidence-slider-info": "Pilih hanya segmen dengan kepercayaan di bawah ambang batas ini.",
199+
"segments-tab-max-confidence-slider-label": "Kepercayaan maksimum",
200+
"segments-tab-collection-mode-label": "Mode koleksi",
201+
"segments-tab-collection-mode-info": "Menentukan bagaimana segmen dipilih dari file hasil.",
202+
"segments-tab-n-bins-label": "Jumlah bin kepercayaan",
203+
"segments-tab-n-bins-info": "Mengatur jumlah bin kepercayaan untuk mode koleksi seimbang.",
204+
"segments-tab-collection-mode-radio-option-random": "acak",
205+
"segments-tab-collection-mode-radio-option-confidence": "kepercayaan",
206+
"segments-tab-collection-mode-radio-option-balanced": "seimbang",
198207
"segments-tab-output-selection-button-label": "Pilih output direktori",
199208
"segments-tab-output-selection-textbox-placeholder": "Sama dengan direktori audio jika tidak dipilih",
200209
"segments-tab-result-dataframe-column-execution-header": "Eksekusi",
@@ -338,5 +347,6 @@
338347
"validation-no-valid-frequency": "Silakan masukkan frekuensi yang valid dalam",
339348
"validation-no-valid-learning-rate": "Silakan masukkan laju pembelajaran yang valid.",
340349
"validation-non-event-samples-required-in-binary-classification": "Sampel non-pertanyaan diperlukan untuk klasifikasi biner",
341-
"validation-only-repeat-upsampling-for-multi-label": "Hanya 'repeat-upsampling' yang tersedia untuk 'multi-label'"
350+
"validation-only-repeat-upsampling-for-multi-label": "Hanya 'repeat-upsampling' yang tersedia untuk 'multi-label'",
351+
"validation-max-confidence-lower-than-min-confidence": "Kepercayaan maksimum harus lebih besar dari kepercayaan minimum"
342352
}

0 commit comments

Comments
 (0)