fixed timestamps not respecting overlap when using different audio speed (#722)

Josef-Haupt · web-flow · commit 928bf9f73724 · 2025-06-16T17:13:24.000+02:00
* fixed timestamps not respecting overlap when using different audio speed

* fixed offset overtaking start timestamp

* slight change because of floats

* speed up and overlap tests

* slow down test

* parameterized test cases for overlap + speed

* .
diff --git a/birdnet_analyzer/analyze/core.py b/birdnet_analyzer/analyze/core.py
@@ -160,6 +160,21 @@ def _set_params(
     from birdnet_analyzer.species.utils import get_species_list
     from birdnet_analyzer.utils import collect_audio_files, read_lines
 
+    if not isinstance(overlap, int | float):
+        raise ValueError("Overlap must be a numeric value.")
+
+    if overlap < 0:
+        raise ValueError("Overlap must be a non-negative value.")
+
+    if overlap >= cfg.SIG_LENGTH:
+        raise ValueError(f"Overlap must be less than {cfg.SIG_LENGTH} seconds.")
+
+    if not isinstance(audio_speed, int | float):
+        raise ValueError("Audio speed must be a numeric value.")
+
+    if audio_speed <= 0:
+        raise ValueError("Audio speed must be a positive value.")
+
     cfg.CODES = load_codes()
     cfg.LABELS = read_lines(labels_file if labels_file else cfg.LABELS_FILE)
     cfg.SKIP_EXISTING_RESULTS = skip_existing_results
diff --git a/birdnet_analyzer/analyze/utils.py b/birdnet_analyzer/analyze/utils.py
@@ -600,9 +600,8 @@ def analyze_file(item) -> dict[str, str] | None:
 
     # Start time
     start_time = datetime.datetime.now()
-    offset = 0
     duration = int(cfg.FILE_SPLITTING_DURATION / cfg.AUDIO_SPEED)
-    start, end = 0, cfg.SIG_LENGTH
+    start, end = 0, cfg.SIG_LENGTH * cfg.AUDIO_SPEED
     results = {}
 
     # Status
@@ -619,19 +618,19 @@ def analyze_file(item) -> dict[str, str] | None:
 
     # Process each chunk
     try:
-        while offset < fileLengthSeconds:
-            chunks = get_raw_audio_from_file(fpath, offset, duration)
+        while start < fileLengthSeconds and not np.isclose(start, fileLengthSeconds):
+            chunks = get_raw_audio_from_file(fpath, start, duration)
             samples = []
             timestamps = []
 
             for chunk_index, chunk in enumerate(chunks):
                 # Add to batch
                 samples.append(chunk)
-                timestamps.append([round(start * cfg.AUDIO_SPEED, 1), round(end * cfg.AUDIO_SPEED, 1)])
+                timestamps.append([round(start, 1), round(end, 1)])
 
                 # Advance start and end
-                start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
-                end = start + cfg.SIG_LENGTH
+                start += (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED
+                end = min(start + cfg.SIG_LENGTH * cfg.AUDIO_SPEED, fileLengthSeconds)
 
                 # Check if batch is full or last chunk
                 if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
@@ -671,7 +670,6 @@ def analyze_file(item) -> dict[str, str] | None:
                 # Clear batch
                 samples = []
                 timestamps = []
-            offset = offset + duration
 
     except Exception as ex:
         # Write error log
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,7 +45,7 @@ gui = [
 embeddings = ["perch-hoplite"]
 all = ["birdnet-analyzer[server,gui]"]
 docs = ["sphinx", "sphinx-rtd-theme", "sphinx-argparse"]
-tests = ["pytest"]
+tests = ["pytest", "pytest-timeout"]
 dev = ["birdnet_analyzer[tests]", "birdnet_analyzer[docs]", "ruff"]
 
 [project.scripts]
@@ -93,6 +93,7 @@ birdnet_analyzer = [
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 pythonpath = ["birdnet_analyzer"]
+timeout = 120
 
 [tool.ruff]
 exclude = ["conf.py"]
diff --git a/tests/analyze/test_analyze.py b/tests/analyze/test_analyze.py
@@ -4,7 +4,6 @@
 import tempfile
 from unittest.mock import MagicMock, patch
 
-import numpy as np
 import pytest
 
 import birdnet_analyzer.config as cfg
@@ -32,9 +31,7 @@ def setup_test_environment():
         f.write(b"more dummy audio data")
 
     # Store original config values
-    original_config = {
-        attr: getattr(cfg, attr) for attr in dir(cfg) if not attr.startswith("_") and not callable(getattr(cfg, attr))
-    }
+    original_config = {attr: getattr(cfg, attr) for attr in dir(cfg) if not attr.startswith("_") and not callable(getattr(cfg, attr))}
 
     yield {
         "test_dir": test_dir,
@@ -88,9 +85,7 @@ def test_analyze_single_file(
 @patch("birdnet_analyzer.analyze.core._set_params")
 @patch("multiprocessing.Pool")
 @patch("birdnet_analyzer.analyze.utils.save_analysis_params")
-def test_analyze_directory_multiprocess(
-    mock_save_params: MagicMock, mock_pool, mock_set_params: MagicMock, mock_ensure_model: MagicMock, setup_test_environment
-):
+def test_analyze_directory_multiprocess(mock_save_params: MagicMock, mock_pool, mock_set_params: MagicMock, mock_ensure_model: MagicMock, setup_test_environment):
     """Test analyzing multiple files with multiprocessing."""
     env = setup_test_environment
 
@@ -180,6 +175,7 @@ def test_analyze_with_location_filtering(mock_analyze_file: MagicMock, mock_set_
 
     # Verify parameter passing
     mock_set_params.assert_called_once()
+    mock_ensure_model.assert_called_once()
     _, kwargs = mock_set_params.call_args
     assert kwargs["lat"] == 42.5
     assert kwargs["lon"] == -76.45
@@ -207,16 +203,15 @@ def test_analyze_with_custom_classifier(mock_analyze_file: MagicMock, mock_set_p
 
     # Verify parameter passing
     mock_set_params.assert_called_once()
+    mock_ensure_model.assert_called_once()
     _, kwargs = mock_set_params.call_args
     assert kwargs["custom_classifier"] == custom_classifier
 
 
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 @patch("birdnet_analyzer.analyze.core._set_params")
 @patch("birdnet_analyzer.analyze.utils.analyze_file")
-def test_analyze_with_multiple_result_types(
-    mock_analyze_file: MagicMock, mock_set_params: MagicMock, mock_ensure_model: MagicMock, setup_test_environment
-):
+def test_analyze_with_multiple_result_types(mock_analyze_file: MagicMock, mock_set_params: MagicMock, mock_ensure_model: MagicMock, setup_test_environment):
     """Test analyzing with multiple output result types."""
     env = setup_test_environment
 
@@ -229,16 +224,15 @@ def test_analyze_with_multiple_result_types(
 
     # Verify parameter passing
     mock_set_params.assert_called_once()
+    mock_ensure_model.assert_called_once()
     _, kwargs = mock_set_params.call_args
     assert kwargs["rtype"] == ["table", "csv", "audacity"]
 
 
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 @patch("birdnet_analyzer.analyze.core._set_params")
 @patch("birdnet_analyzer.analyze.utils.analyze_file")
-def test_analyze_with_custom_species_list(
-    mock_analyze_file: MagicMock, mock_set_params: MagicMock, mock_ensure_model: MagicMock, setup_test_environment
-):
+def test_analyze_with_custom_species_list(mock_analyze_file: MagicMock, mock_set_params: MagicMock, mock_ensure_model: MagicMock, setup_test_environment):
     """Test analyzing with a custom species list."""
     env = setup_test_environment
 
@@ -256,59 +250,126 @@ def test_analyze_with_custom_species_list(
 
     # Verify parameter passing
     mock_set_params.assert_called_once()
+    mock_ensure_model.assert_called_once()
     _, kwargs = mock_set_params.call_args
     assert kwargs["slist"] == species_list
 
-def test_analyze_with_speed_up(setup_test_environment):
-    """Test analyzing with speed up."""
+@patch("birdnet_analyzer.utils.ensure_model_exists")
+def test_analyze_with_negative_speed(setup_test_environment):
+    """Test analyzing with negative speed."""
     env = setup_test_environment
 
     soundscape_path = "birdnet_analyzer/example/soundscape.wav"
 
     assert os.path.exists(soundscape_path), "Soundscape file does not exist"
 
     # Call function under test
-    analyze(soundscape_path, env["output_dir"], audio_speed=5.0, top_n=1, min_conf=0)
+    with pytest.raises(ValueError, match="Audio speed must be a positive value."):
+        analyze(soundscape_path, env["output_dir"], audio_speed=-1.0, top_n=1, min_conf=0)
 
-    output_file = os.path.join(env["output_dir"], "soundscape.BirdNET.selection.table.txt")
-    assert os.path.exists(output_file)
+@patch("birdnet_analyzer.utils.ensure_model_exists")
+def test_analyze_with_zero_speed(setup_test_environment):
+    """Test analyzing with zero speed."""
+    env = setup_test_environment
 
-    with open(output_file) as f:
-        lines = f.readlines()[1:]
-        assert len(lines) == 8, "Number of predicted segments does not match"
+    soundscape_path = "birdnet_analyzer/example/soundscape.wav"
 
-        for index, line in enumerate(lines):
-            parts = line.strip().split("\t")
-            start = float(parts[3])
-            end = float(parts[4])
-            assert np.isclose(start, index * 15), "Start time does not match expected value"
-            assert np.isclose(end, (index + 1) * 15), "End time does not match expected value"
+    assert os.path.exists(soundscape_path), "Soundscape file does not exist"
+
+    # Call function under test
+    with pytest.raises(ValueError, match="Audio speed must be a positive value."):
+        analyze(soundscape_path, env["output_dir"], audio_speed=0.0, top_n=1, min_conf=0)
+
+@patch("birdnet_analyzer.utils.ensure_model_exists")
+def test_analyze_with_invalid_audio_speed(setup_test_environment):
+    """Test analyzing with invalid audio speed."""
+    env = setup_test_environment
+
+    soundscape_path = "birdnet_analyzer/example/soundscape.wav"
+
+    assert os.path.exists(soundscape_path), "Soundscape file does not exist"
+
+    # Call function under test
+    with pytest.raises(ValueError, match="Audio speed must be a numeric value."):
+        analyze(soundscape_path, env["output_dir"], audio_speed="fast", top_n=1, min_conf=0)
+
+@patch("birdnet_analyzer.utils.ensure_model_exists")
+def test_analyze_with_negative_overlap(setup_test_environment):
+    """Test analyzing with invalid overlap."""
+    env = setup_test_environment
 
+    soundscape_path = "birdnet_analyzer/example/soundscape.wav"
+
+    assert os.path.exists(soundscape_path), "Soundscape file does not exist"
 
-def test_analyze_with_slow_down(setup_test_environment):
+    # Call function under test
+    with pytest.raises(ValueError, match="Overlap must be a non-negative value."):
+        analyze(soundscape_path, env["output_dir"], audio_speed=1.0, top_n=1, overlap=-1)
+
+@patch("birdnet_analyzer.utils.ensure_model_exists")
+def test_analyze_with_invalid_overlap(setup_test_environment):
+    """Test analyzing with invalid overlap."""
+    env = setup_test_environment
+
+    soundscape_path = "birdnet_analyzer/example/soundscape.wav"
+
+    assert os.path.exists(soundscape_path), "Soundscape file does not exist"
+
+    # Call function under test
+    with pytest.raises(ValueError, match="Overlap must be a numeric value."):
+        analyze(soundscape_path, env["output_dir"], audio_speed=1.0, top_n=1, overlap="high")
+
+@patch("birdnet_analyzer.utils.ensure_model_exists")
+def test_analyze_with_too_high_overlap(setup_test_environment):
+    """Test analyzing with too high overlap."""
+    env = setup_test_environment
+
+    soundscape_path = "birdnet_analyzer/example/soundscape.wav"
+
+    assert os.path.exists(soundscape_path), "Soundscape file does not exist"
+
+    # Call function under test
+    with pytest.raises(ValueError, match=f"Overlap must be less than {cfg.SIG_LENGTH} seconds."):
+        analyze(soundscape_path, env["output_dir"], audio_speed=1.0, top_n=1, overlap=3.0)
+
+@pytest.mark.parametrize(
+    ("audio_speed", "overlap"),
+    [(10, 1), (5, 2), (5, 0), (0.1, 1), (0.2, 0)],
+)
+def test_analyze_with_speed_up_and_overlap(setup_test_environment, audio_speed, overlap):
     """Test analyzing with speed up."""
     env = setup_test_environment
 
     soundscape_path = "birdnet_analyzer/example/soundscape.wav"
 
     assert os.path.exists(soundscape_path), "Soundscape file does not exist"
+    file_length = 120
+    step_size = round(3 * audio_speed - overlap * audio_speed, 1)
+    expected_start_timestamps = [e / 10 for e in range(0, int(file_length * 10), int(step_size * 10))]
+    expected_end_timestamps = [e / 10 for e in range(int(3 * audio_speed * 10), int(file_length) * 10 + 1, int(step_size * 10))]
+
+    while len(expected_end_timestamps) < len(expected_start_timestamps):
+        if file_length - expected_start_timestamps[-1] >= 1 * audio_speed:
+            expected_end_timestamps.append(file_length)
+        else:
+            expected_start_timestamps.pop()
 
     # Call function under test
-    analyze(soundscape_path, env["output_dir"], audio_speed=0.2, top_n=1, min_conf=0)
+    analyze(soundscape_path, env["output_dir"], audio_speed=audio_speed, top_n=1, overlap=overlap)
 
     output_file = os.path.join(env["output_dir"], "soundscape.BirdNET.selection.table.txt")
     assert os.path.exists(output_file)
 
     with open(output_file) as f:
         lines = f.readlines()[1:]
-        assert len(lines) == 200, "Number of predicted segments does not match"
 
-        for index, line in enumerate(lines):
+        for expected_start, expected_end, line in zip(expected_start_timestamps, expected_end_timestamps, lines, strict=True):
             parts = line.strip().split("\t")
-            start = float(parts[3])
-            end = float(parts[4])
-            assert np.isclose(start, index * 0.6), "Start time does not match expected value"
-            assert np.isclose(end, (index + 1) * 0.6), "End time does not match expected value"
+            actual_start = float(parts[3])
+            actual_end = float(parts[4])
+            assert float(actual_start) == expected_start, "Start time does not match expected value"
+            assert float(actual_end) == expected_end, "End time does not match expected value"
+
 
 @patch("birdnet_analyzer.utils.ensure_model_exists")
 def test_analyze_with_additional_columns(mock_ensure_model, setup_test_environment):
@@ -332,6 +393,7 @@ def test_analyze_with_additional_columns(mock_ensure_model, setup_test_environme
         rtype=["csv"],
     )
 
+    mock_ensure_model.assert_called_once()
     output_file = os.path.join(env["output_dir"], "soundscape.BirdNET.results.csv")
     assert os.path.exists(output_file)