move to two digit precision in results to improve accuracy when using overlap+audiospeed (#730)

Josef-Haupt · web-flow · commit 26085c1d3191 · 2025-06-23T16:57:01.000+02:00
* move to two digit precision in results to improve accuracy when using overlap+audiospeed

* new ruff version

* .
diff --git a/birdnet_analyzer/analyze/utils.py b/birdnet_analyzer/analyze/utils.py
@@ -365,8 +365,7 @@ def combine_kaleidoscope_files(saved_results: list[str]):
                         continue
 
                     # skip header and add to file
-                    for line in lines[1:]:
-                        f.write(line)
+                    f.writelines(lines[1:])
 
                 except Exception as ex:
                     print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
@@ -545,13 +544,12 @@ def iterate_audio_chunks(fpath: str, embeddings: bool = False):
             break
 
         for chunk_index, chunk in enumerate(chunks):
+            t_start = start + (chunk_index * (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED)
+            end = min(t_start + cfg.SIG_LENGTH * cfg.AUDIO_SPEED, fileLengthSeconds)
+
             # Add to batch
             samples.append(chunk)
-            timestamps.append([round(start, 1), round(end, 1)])
-
-            # Advance start and end
-            start += (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED
-            end = min(start + cfg.SIG_LENGTH * cfg.AUDIO_SPEED, fileLengthSeconds)
+            timestamps.append([round(t_start, 2), round(end, 2)])
 
             # Check if batch is full or last chunk
             if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
@@ -571,6 +569,8 @@ def iterate_audio_chunks(fpath: str, embeddings: bool = False):
             samples = []
             timestamps = []
 
+        start += len(chunks) * (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED
+
 
 def predict(samples):
     """Predicts the classes for the given samples.
diff --git a/birdnet_analyzer/gui/evaluation.py b/birdnet_analyzer/gui/evaluation.py
@@ -379,7 +379,7 @@ def select_directory_on_empty():  # Nishant - Function modified for For Folder s
 
                 if folder:
                     files = get_selection_tables(folder)
-                    files_to_display = files[:100] + [["..."]] if len(files) > 100 else files
+                    files_to_display = [*files[:100], ["..."]] if len(files) > 100 else files
                     return [files, files_to_display, gr.update(visible=True), *on_select(files)]
 
                 return ["", [[loc.localize("eval-tab-no-files-found")]]]
diff --git a/birdnet_analyzer/gui/multi_file.py b/birdnet_analyzer/gui/multi_file.py
@@ -119,7 +119,7 @@ def select_directory_on_empty():  # Nishant - Function modified for For Folder s
                     if folder:
                         files_and_durations = gu.get_audio_files_and_durations(folder)
                         if len(files_and_durations) > 100:
-                            return [folder, files_and_durations[:100] + [["..."]]]  # hopefully fixes issue#272
+                            return [folder, *files_and_durations[:100], ["..."]]  # hopefully fixes issue#272
                         return [folder, files_and_durations]
 
                     return ["", [[loc.localize("multi-tab-samples-dataframe-no-files-found")]]]
diff --git a/birdnet_analyzer/model.py b/birdnet_analyzer/model.py
@@ -872,8 +872,7 @@ def save_linear_classifier(classifier, model_path: str, labels: list[str], mode=
 
     # Save labels
     with open(model_path.replace(".tflite", "_Labels.txt"), "w", encoding="utf-8") as f:
-        for label in labels:
-            f.write(label + "\n")
+        f.writelines(label + "\n" for label in labels)
 
     save_model_params(model_path.replace(".tflite", "_Params.csv"))
 
diff --git a/birdnet_analyzer/species/utils.py b/birdnet_analyzer/species/utils.py
@@ -70,5 +70,4 @@ def run(output_path, lat, lon, week, threshold, sortby):
 
     # Save species list
     with open(cfg.OUTPUT_PATH, "w") as f:
-        for s in species_list:
-            f.write(s + "\n")
+        f.writelines(s + "\n" for s in species_list)
diff --git a/birdnet_analyzer/translate.py b/birdnet_analyzer/translate.py
@@ -119,8 +119,7 @@ def save_labels_file(labels: list[str], locale: str):
         cfg.TRANSLATED_LABELS_PATH, "{}_{}.txt".format(os.path.basename(cfg.LABELS_FILE).rsplit(".", 1)[0], locale)
     )
     with open(fpath, "w", encoding="utf-8") as f:
-        for label in labels:
-            f.write(label + "\n")
+        f.writelines(label + "\n" for label in labels)
 
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
@@ -144,5 +144,6 @@ ignore = [
     "PLR0915",
     "PLR0912",
     "PLC0206",
+    "PLC0415",
     "RUF015",
 ]
diff --git a/tests/analyze/test_analyze.py b/tests/analyze/test_analyze.py
@@ -254,6 +254,7 @@ def test_analyze_with_custom_species_list(mock_analyze_file: MagicMock, mock_set
     _, kwargs = mock_set_params.call_args
     assert kwargs["slist"] == species_list
 
+
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 def test_analyze_with_negative_speed(setup_test_environment):
     """Test analyzing with negative speed."""
@@ -267,6 +268,7 @@ def test_analyze_with_negative_speed(setup_test_environment):
     with pytest.raises(ValueError, match="Audio speed must be a positive value."):
         analyze(soundscape_path, env["output_dir"], audio_speed=-1.0, top_n=1, min_conf=0)
 
+
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 def test_analyze_with_zero_speed(setup_test_environment):
     """Test analyzing with zero speed."""
@@ -280,6 +282,7 @@ def test_analyze_with_zero_speed(setup_test_environment):
     with pytest.raises(ValueError, match="Audio speed must be a positive value."):
         analyze(soundscape_path, env["output_dir"], audio_speed=0.0, top_n=1, min_conf=0)
 
+
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 def test_analyze_with_invalid_audio_speed(setup_test_environment):
     """Test analyzing with invalid audio speed."""
@@ -293,6 +296,7 @@ def test_analyze_with_invalid_audio_speed(setup_test_environment):
     with pytest.raises(ValueError, match="Audio speed must be a numeric value."):
         analyze(soundscape_path, env["output_dir"], audio_speed="fast", top_n=1, min_conf=0)
 
+
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 def test_analyze_with_negative_overlap(setup_test_environment):
     """Test analyzing with invalid overlap."""
@@ -306,6 +310,7 @@ def test_analyze_with_negative_overlap(setup_test_environment):
     with pytest.raises(ValueError, match="Overlap must be a non-negative value."):
         analyze(soundscape_path, env["output_dir"], audio_speed=1.0, top_n=1, overlap=-1)
 
+
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 def test_analyze_with_invalid_overlap(setup_test_environment):
     """Test analyzing with invalid overlap."""
@@ -319,6 +324,7 @@ def test_analyze_with_invalid_overlap(setup_test_environment):
     with pytest.raises(ValueError, match="Overlap must be a numeric value."):
         analyze(soundscape_path, env["output_dir"], audio_speed=1.0, top_n=1, overlap="high")
 
+
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 def test_analyze_with_too_high_overlap(setup_test_environment):
     """Test analyzing with too high overlap."""
@@ -332,9 +338,10 @@ def test_analyze_with_too_high_overlap(setup_test_environment):
     with pytest.raises(ValueError, match=f"Overlap must be less than {cfg.SIG_LENGTH} seconds."):
         analyze(soundscape_path, env["output_dir"], audio_speed=1.0, top_n=1, overlap=3.0)
 
+
 @pytest.mark.parametrize(
     ("audio_speed", "overlap"),
-    [(10, 1), (5, 2), (5, 0), (0.1, 1), (0.2, 0)],
+    [(10, 1), (5, 2), (5, 0), (0.1, 1), (0.2, 0), (0.3, 0.7)],
 )
 def test_analyze_with_speed_up_and_overlap(setup_test_environment, audio_speed, overlap):
     """Test analyzing with speed up."""
@@ -344,9 +351,10 @@ def test_analyze_with_speed_up_and_overlap(setup_test_environment, audio_speed,
 
     assert os.path.exists(soundscape_path), "Soundscape file does not exist"
     file_length = 120
-    step_size = round(3 * audio_speed - overlap * audio_speed, 1)
-    expected_start_timestamps = [e / 10 for e in range(0, int(file_length * 10), int(step_size * 10))]
-    expected_end_timestamps = [e / 10 for e in range(int(3 * audio_speed * 10), int(file_length) * 10 + 1, int(step_size * 10))]
+    precision = 100
+    step_size = round((3 - overlap) * audio_speed, precision // 10)
+    expected_start_timestamps = [e / precision for e in range(0, int(file_length * precision), int(step_size * precision))]
+    expected_end_timestamps = [e / precision for e in range(round(3 * audio_speed * precision), int(file_length * precision) + 1, int(step_size * precision))]
 
     while len(expected_end_timestamps) < len(expected_start_timestamps):
         if file_length - expected_start_timestamps[-1] >= 1 * audio_speed:

Original file line number	Diff line number	Diff line change
`@@ -119,8 +119,7 @@ def save_labels_file(labels: list[str], locale: str):`
`119`	`119`	`cfg.TRANSLATED_LABELS_PATH, "{}_{}.txt".format(os.path.basename(cfg.LABELS_FILE).rsplit(".", 1)[0], locale)`
`120`	`120`	`)`
`121`	`121`	`with open(fpath, "w", encoding="utf-8") as f:`
`122`		`- for label in labels:`
`123`		`- f.write(label + "\n")`
	`122`	`+ f.writelines(label + "\n" for label in labels)`
`124`	`123`
`125`	`124`
`126`	`125`	`if __name__ == "__main__":`
Original file line number	Diff line number	Diff line change
`@@ -144,5 +144,6 @@ ignore = [`
`144`	`144`	`"PLR0915",`
`145`	`145`	`"PLR0912",`
`146`	`146`	`"PLC0206",`
	`147`	`+ "PLC0415",`
`147`	`148`	`"RUF015",`
`148`	`149`	`]`