@@ -396,7 +396,7 @@ def combine_csv_files(saved_results: list[str]):
396396 f .write (out_string )
397397
398398
399- def combine_results (saved_results : Sequence [dict [str , str ]| None ]):
399+ def combine_results (saved_results : Sequence [dict [str , str ] | None ]):
400400 """
401401 Combines various types of result files based on the configuration settings.
402402 This function checks the types of results specified in the configuration
@@ -522,6 +522,56 @@ def get_raw_audio_from_file(fpath: str, offset, duration):
522522 return audio .split_signal (sig , rate , cfg .SIG_LENGTH , cfg .SIG_OVERLAP , cfg .SIG_MINLEN )
523523
524524
525+ def iterate_audio_chunks (fpath : str , embeddings : bool = False ):
526+ """Iterates over audio chunks from a file.
527+
528+ Args:
529+ fpath: Path to the audio file.
530+ offset: Offset in seconds to start reading the file.
531+
532+ Yields:
533+ Chunks of audio data.
534+ """
535+ fileLengthSeconds = audio .get_audio_file_length (fpath )
536+ start , end = 0 , cfg .SIG_LENGTH * cfg .AUDIO_SPEED
537+ duration = int (cfg .FILE_SPLITTING_DURATION / cfg .AUDIO_SPEED )
538+
539+ while start < fileLengthSeconds and not np .isclose (start , fileLengthSeconds ):
540+ chunks = get_raw_audio_from_file (fpath , start , duration )
541+ samples = []
542+ timestamps = []
543+
544+ if not chunks :
545+ break
546+
547+ for chunk_index , chunk in enumerate (chunks ):
548+ # Add to batch
549+ samples .append (chunk )
550+ timestamps .append ([round (start , 1 ), round (end , 1 )])
551+
552+ # Advance start and end
553+ start += (cfg .SIG_LENGTH - cfg .SIG_OVERLAP ) * cfg .AUDIO_SPEED
554+ end = min (start + cfg .SIG_LENGTH * cfg .AUDIO_SPEED , fileLengthSeconds )
555+
556+ # Check if batch is full or last chunk
557+ if len (samples ) < cfg .BATCH_SIZE and chunk_index < len (chunks ) - 1 :
558+ continue
559+
560+ # Predict
561+ p = model .embeddings (samples ) if embeddings else predict (samples )
562+
563+ # Add to results
564+ for i in range (len (samples )):
565+ # Get timestamp
566+ s_start , s_end = timestamps [i ]
567+
568+ yield s_start , s_end , p [i ]
569+
570+ # Clear batch
571+ samples = []
572+ timestamps = []
573+
574+
525575def predict (samples ):
526576 """Predicts the classes for the given samples.
527577
@@ -600,76 +650,31 @@ def analyze_file(item) -> dict[str, str] | None:
600650
601651 # Start time
602652 start_time = datetime .datetime .now ()
603- duration = int (cfg .FILE_SPLITTING_DURATION / cfg .AUDIO_SPEED )
604- start , end = 0 , cfg .SIG_LENGTH * cfg .AUDIO_SPEED
605653 results = {}
606654
607655 # Status
608656 print (f"Analyzing { fpath } " , flush = True )
609657
610- try :
611- fileLengthSeconds = audio .get_audio_file_length (fpath )
612- except Exception as ex :
613- # Write error log
614- print (f"Error: Cannot analyze audio file { fpath } . File corrupt?\n " , flush = True )
615- utils .write_error_log (ex )
616-
617- return None
618-
619658 # Process each chunk
620659 try :
621- while start < fileLengthSeconds and not np .isclose (start , fileLengthSeconds ):
622- chunks = get_raw_audio_from_file (fpath , start , duration )
623- samples = []
624- timestamps = []
625-
626- for chunk_index , chunk in enumerate (chunks ):
627- # Add to batch
628- samples .append (chunk )
629- timestamps .append ([round (start , 1 ), round (end , 1 )])
630-
631- # Advance start and end
632- start += (cfg .SIG_LENGTH - cfg .SIG_OVERLAP ) * cfg .AUDIO_SPEED
633- end = min (start + cfg .SIG_LENGTH * cfg .AUDIO_SPEED , fileLengthSeconds )
634-
635- # Check if batch is full or last chunk
636- if len (samples ) < cfg .BATCH_SIZE and chunk_index < len (chunks ) - 1 :
637- continue
638-
639- # Predict
640- p = predict (samples )
641-
642- # Add to results
643- for i in range (len (samples )):
644- # Get timestamp
645- s_start , s_end = timestamps [i ]
646-
647- # Get prediction
648- pred = p [i ]
649-
650- if not cfg .LABELS :
651- cfg .LABELS = [f"Species-{ i } _Species-{ i } " for i in range (len (pred ))]
652-
653- # Assign scores to labels
654- p_labels = [
655- p
656- for p in zip (cfg .LABELS , pred , strict = True )
657- if (cfg .TOP_N or p [1 ] >= cfg .MIN_CONFIDENCE ) and (not cfg .SPECIES_LIST or p [0 ] in cfg .SPECIES_LIST )
658- ]
660+ for s_start , s_end , pred in iterate_audio_chunks (fpath ):
661+ if not cfg .LABELS :
662+ cfg .LABELS = [f"Species-{ i } _Species-{ i } " for i in range (len (pred ))]
659663
660- # Sort by score
661- p_sorted = sorted (p_labels , key = operator .itemgetter (1 ), reverse = True )
664+ # Assign scores to labels
665+ p_labels = [
666+ p for p in zip (cfg .LABELS , pred , strict = True ) if (cfg .TOP_N or p [1 ] >= cfg .MIN_CONFIDENCE ) and (not cfg .SPECIES_LIST or p [0 ] in cfg .SPECIES_LIST )
667+ ]
662668
663- if cfg . TOP_N :
664- p_sorted = p_sorted [: cfg . TOP_N ]
669+ # Sort by score
670+ p_sorted = sorted ( p_labels , key = operator . itemgetter ( 1 ), reverse = True )
665671
666- # TODO: hier schon top n oder min conf raussortieren
667- # Store top 5 results and advance indices
668- results [str (s_start ) + "-" + str (s_end )] = p_sorted
672+ if cfg .TOP_N :
673+ p_sorted = p_sorted [: cfg .TOP_N ]
669674
670- # Clear batch
671- samples = []
672- timestamps = []
675+ # TODO: hier schon top n oder min conf raussortieren
676+ # Store top 5 results and advance indices
677+ results [ str ( s_start ) + "-" + str ( s_end )] = p_sorted
673678
674679 except Exception as ex :
675680 # Write error log
0 commit comments