Merge pull request #817 from birdnet-team/removed-embedding-normalization

max-mauermann · web-flow · commit a62d4ce03f84 · 2025-10-20T13:59:08.000+02:00
removed the embedding normalization after data loading during training
diff --git a/birdnet_analyzer/train/utils.py b/birdnet_analyzer/train/utils.py
@@ -263,29 +263,6 @@ def load_data(data_path, allowed_folders):
     # Return only the valid labels for further use
     return x_train, y_train, x_test, y_test, valid_labels
 
-
-def normalize_embeddings(embeddings):
-    """
-    Normalize embeddings to improve training stability and performance.
-
-    This applies L2 normalization to each embedding vector, which can help
-    with convergence and model performance, especially when training on
-    embeddings from different sources or domains.
-
-    Args:
-        embeddings: numpy array of embedding vectors
-
-    Returns:
-        Normalized embeddings array
-    """
-    # Calculate L2 norm of each embedding vector
-    norms = np.sqrt(np.sum(embeddings**2, axis=1, keepdims=True))
-    # Avoid division by zero
-    norms[norms == 0] = 1.0
-    # Normalize each embedding vector
-    return embeddings / norms
-
-
 def train_model(on_epoch_end=None, on_trial_result=None, on_data_load_end=None, autotune_directory="autotune"):
     """Trains a custom classifier.
 
@@ -310,12 +287,6 @@ def train_model(on_epoch_end=None, on_trial_result=None, on_data_load_end=None,
     if len(x_test) > 0:
         print(f"...Loaded {x_test.shape[0]} test samples.", flush=True)
 
-    # Normalize embeddings
-    print("Normalizing embeddings...", flush=True)
-    x_train = normalize_embeddings(x_train)
-    if len(x_test) > 0:
-        x_test = normalize_embeddings(x_test)
-
     if cfg.AUTOTUNE:
         import gc