1111import birdnet_analyzer .config as cfg
1212from birdnet_analyzer import audio , model , utils
1313
14- RAVEN_TABLE_HEADER = "Selection\t View\t Channel\t Begin Time (s)\t End Time (s)\t Low Freq (Hz)\t High Freq (Hz)\t Common Name\t Species Code\t Confidence\t Begin Path\t File Offset (s)\n " # noqa: E501
14+ RAVEN_TABLE_HEADER = (
15+ "Selection\t View\t Channel\t Begin Time (s)\t End Time (s)\t Low Freq (Hz)\t High Freq (Hz)\t Common Name\t Species Code\t Confidence\t Begin Path\t File Offset (s)\n "
16+ )
1517KALEIDOSCOPE_HEADER = "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity\n "
1618CSV_HEADER = "Start (s),End (s),Scientific name,Common name,Confidence,File\n "
1719SCRIPT_DIR = os .path .abspath (os .path .dirname (__file__ ))
@@ -53,13 +55,11 @@ def load_codes():
5355 Returns:
5456 A dictionary containing the eBird codes.
5557 """
56- with open (os .path .join (SCRIPT_DIR , cfg .CODES_FILE )) as cfile :
58+ with open (os .path .join (SCRIPT_DIR , cfg .CODES_FILE ), encoding = "utf-8" ) as cfile :
5759 return json .load (cfile )
5860
5961
60- def generate_raven_table (
61- timestamps : list [str ], result : dict [str , list ], afile_path : str , result_path : str
62- ):
62+ def generate_raven_table (timestamps : list [str ], result : dict [str , list ], afile_path : str , result_path : str ):
6363 """
6464 Generates a Raven selection table from the given timestamps and prediction results.
6565
@@ -90,11 +90,7 @@ def generate_raven_table(
9090
9191 for c in result [timestamp ]:
9292 selection_id += 1
93- label = (
94- cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])]
95- if cfg .TRANSLATED_LABELS
96- else c [0 ]
97- )
93+ label = cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])] if cfg .TRANSLATED_LABELS else c [0 ]
9894 code = cfg .CODES [c [0 ]] if c [0 ] in cfg .CODES else c [0 ]
9995 lbl = label if cfg .USE_PERCH else label .split ("_" , 1 )[- 1 ]
10096 rstring += f"{ selection_id } \t Spectrogram 1\t 1\t { start } \t { end } \t { low_freq } \t { high_freq } \t { lbl } \t { code } \t { c [1 ]:.4f} \t { afile_path } \t { start } \n "
@@ -131,11 +127,7 @@ def generate_audacity(timestamps: list[str], result: dict[str, list], result_pat
131127 rstring = ""
132128
133129 for c in result [timestamp ]:
134- label = (
135- cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])]
136- if cfg .TRANSLATED_LABELS
137- else c [0 ]
138- )
130+ label = cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])] if cfg .TRANSLATED_LABELS else c [0 ]
139131 ts = timestamp .replace ("-" , "\t " )
140132 lbl = label if cfg .USE_PERCH else label .replace ("_" , ", " )
141133 rstring += f"{ ts } \t { lbl } \t { c [1 ]:.4f} \n "
@@ -146,9 +138,7 @@ def generate_audacity(timestamps: list[str], result: dict[str, list], result_pat
146138 utils .save_result_file (result_path , out_string )
147139
148140
149- def generate_kaleidoscope (
150- timestamps : list [str ], result : dict [str , list ], afile_path : str , result_path : str
151- ):
141+ def generate_kaleidoscope (timestamps : list [str ], result : dict [str , list ], afile_path : str , result_path : str ):
152142 """
153143 Generates a Kaleidoscope-compatible CSV string from the given timestamps and results, and saves it to a file.
154144
@@ -172,11 +162,7 @@ def generate_kaleidoscope(
172162 start , end = timestamp .split ("-" , 1 )
173163
174164 for c in result [timestamp ]:
175- label = (
176- cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])]
177- if cfg .TRANSLATED_LABELS
178- else c [0 ]
179- )
165+ label = cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])] if cfg .TRANSLATED_LABELS else c [0 ]
180166
181167 if cfg .USE_PERCH :
182168 common = scientific = label
@@ -206,9 +192,7 @@ def generate_kaleidoscope(
206192 utils .save_result_file (result_path , out_string )
207193
208194
209- def generate_csv (
210- timestamps : list [str ], result : dict [str , list ], afile_path : str , result_path : str
211- ):
195+ def generate_csv (timestamps : list [str ], result : dict [str , list ], afile_path : str , result_path : str ):
212196 """
213197 Generates a CSV file from the given timestamps and results.
214198
@@ -240,11 +224,7 @@ def generate_csv(
240224
241225 for c in result [timestamp ]:
242226 start , end = timestamp .split ("-" , 1 )
243- label = (
244- cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])]
245- if cfg .TRANSLATED_LABELS
246- else c [0 ]
247- )
227+ label = cfg .TRANSLATED_LABELS [cfg .LABELS .index (c [0 ])] if cfg .TRANSLATED_LABELS else c [0 ]
248228
249229 if cfg .USE_PERCH :
250230 common = scientific = label
@@ -265,9 +245,7 @@ def generate_csv(
265245 utils .save_result_file (result_path , out_string )
266246
267247
268- def save_result_files (
269- r : dict [str , list ], result_files : dict [str , str ], afile_path : str
270- ):
248+ def save_result_files (r : dict [str , list ], result_files : dict [str , str ], afile_path : str ):
271249 """
272250 Saves the result files in various formats based on the provided configuration.
273251
@@ -298,9 +276,7 @@ def save_result_files(
298276 # generate_rtable(timestamps, r, afile_path, result_files["r"])
299277
300278 if "kaleidoscope" in cfg .RESULT_TYPES :
301- generate_kaleidoscope (
302- timestamps , r_merged , afile_path , result_files ["kaleidoscope" ]
303- )
279+ generate_kaleidoscope (timestamps , r_merged , afile_path , result_files ["kaleidoscope" ])
304280
305281 if "csv" in cfg .RESULT_TYPES :
306282 generate_csv (timestamps , r_merged , afile_path , result_files ["csv" ])
@@ -321,9 +297,7 @@ def combine_raven_tables(saved_results: list[str]):
321297 time_offset = 0
322298 audiofiles = []
323299
324- with open (
325- os .path .join (cfg .OUTPUT_PATH , cfg .OUTPUT_RAVEN_FILENAME ), "w" , encoding = "utf-8"
326- ) as f :
300+ with open (os .path .join (cfg .OUTPUT_PATH , cfg .OUTPUT_RAVEN_FILENAME ), "w" , encoding = "utf-8" ) as f :
327301 f .write (RAVEN_TABLE_HEADER )
328302
329303 for rfile in saved_results :
@@ -350,10 +324,7 @@ def combine_raven_tables(saved_results: list[str]):
350324
351325 # Is species code and common name == 'nocall'?
352326 # If so, that's a dummy line and we can skip it
353- if (
354- line .split ("\t " )[7 ] == "nocall"
355- and line .split ("\t " )[8 ] == "nocall"
356- ):
327+ if line .split ("\t " )[7 ] == "nocall" and line .split ("\t " )[8 ] == "nocall" :
357328 continue
358329
359330 # adjust selection id
@@ -435,9 +406,7 @@ def combine_csv_files(saved_results: list[str]):
435406 print (f"Error: Cannot combine results from { rfile } .\n " , flush = True )
436407 utils .write_error_log (ex )
437408
438- with open (
439- os .path .join (cfg .OUTPUT_PATH , cfg .OUTPUT_CSV_FILENAME ), "w" , encoding = "utf-8"
440- ) as f :
409+ with open (os .path .join (cfg .OUTPUT_PATH , cfg .OUTPUT_CSV_FILENAME ), "w" , encoding = "utf-8" ) as f :
441410 f .write (out_string )
442411
443412
@@ -465,9 +434,7 @@ def combine_results(saved_results: Sequence[dict[str, str] | str]):
465434 combine_csv_files ([f ["csv" ] for f in saved_results if isinstance (f , dict )])
466435
467436
468- def merge_consecutive_detections (
469- results : dict [str , list ], max_consecutive : int | None = None
470- ):
437+ def merge_consecutive_detections (results : dict [str , list ], max_consecutive : int | None = None ):
471438 """Merges consecutive detections of the same species.
472439 Uses the mean of the top-3 highest scoring predictions as
473440 confidence score for the merged detection.
@@ -513,9 +480,7 @@ def merge_consecutive_detections(
513480 merged_scores = [timestamps [i ][1 ], timestamps [i + 1 ][1 ]]
514481 timestamps .pop (i )
515482
516- while i < len (timestamps ) - 1 and float (next_end ) >= float (
517- timestamps [i + 1 ][0 ].split ("-" , 1 )[0 ]
518- ):
483+ while i < len (timestamps ) - 1 and float (next_end ) >= float (timestamps [i + 1 ][0 ].split ("-" , 1 )[0 ]):
519484 if max_consecutive and len (merged_scores ) >= max_consecutive :
520485 break
521486 merged_scores .append (timestamps [i + 1 ][1 ])
@@ -576,9 +541,7 @@ def get_raw_audio_from_file(fpath: str, offset, duration):
576541 )
577542
578543 # Split into raw audio chunks
579- return audio .split_signal (
580- sig , rate , cfg .SIG_LENGTH , cfg .SIG_OVERLAP , cfg .SIG_MINLEN
581- )
544+ return audio .split_signal (sig , rate , cfg .SIG_LENGTH , cfg .SIG_OVERLAP , cfg .SIG_MINLEN )
582545
583546
584547def iterate_audio_chunks (fpath : str , embeddings : bool = False ):
@@ -604,9 +567,7 @@ def iterate_audio_chunks(fpath: str, embeddings: bool = False):
604567 break
605568
606569 for chunk_index , chunk in enumerate (chunks ):
607- t_start = start + (
608- chunk_index * (cfg .SIG_LENGTH - cfg .SIG_OVERLAP ) * cfg .AUDIO_SPEED
609- )
570+ t_start = start + (chunk_index * (cfg .SIG_LENGTH - cfg .SIG_OVERLAP ) * cfg .AUDIO_SPEED )
610571 end = min (t_start + cfg .SIG_LENGTH * cfg .AUDIO_SPEED , fileLengthSeconds )
611572
612573 # Add to batch
@@ -649,9 +610,7 @@ def predict(samples):
649610
650611 # Logits or sigmoid activations?
651612 if cfg .APPLY_SIGMOID and not cfg .USE_PERCH :
652- prediction = model .flat_sigmoid (
653- np .array (prediction ), sensitivity = - 1 , bias = cfg .SIGMOID_SENSITIVITY
654- )
613+ prediction = model .flat_sigmoid (np .array (prediction ), sensitivity = - 1 , bias = cfg .SIGMOID_SENSITIVITY )
655614
656615 return prediction
657616
@@ -671,32 +630,20 @@ def get_result_file_names(fpath: str):
671630
672631 rpath = fpath .replace (cfg .INPUT_PATH , "" )
673632
674- rpath = (
675- (rpath [1 :] if rpath [0 ] in ["/" , "\\ " ] else rpath )
676- if rpath
677- else os .path .basename (fpath )
678- )
633+ rpath = (rpath [1 :] if rpath [0 ] in ["/" , "\\ " ] else rpath ) if rpath else os .path .basename (fpath )
679634
680635 file_shorthand = rpath .rsplit ("." , 1 )[0 ]
681636
682637 if "table" in cfg .RESULT_TYPES :
683- result_names ["table" ] = os .path .join (
684- cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.selection.table.txt"
685- )
638+ result_names ["table" ] = os .path .join (cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.selection.table.txt" )
686639 if "audacity" in cfg .RESULT_TYPES :
687- result_names ["audacity" ] = os .path .join (
688- cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.results.txt"
689- )
640+ result_names ["audacity" ] = os .path .join (cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.results.txt" )
690641 # if "r" in cfg.RESULT_TYPES:
691642 # result_names["r"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.r.csv")
692643 if "kaleidoscope" in cfg .RESULT_TYPES :
693- result_names ["kaleidoscope" ] = os .path .join (
694- cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.results.kaleidoscope.csv"
695- )
644+ result_names ["kaleidoscope" ] = os .path .join (cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.results.kaleidoscope.csv" )
696645 if "csv" in cfg .RESULT_TYPES :
697- result_names ["csv" ] = os .path .join (
698- cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.results.csv"
699- )
646+ result_names ["csv" ] = os .path .join (cfg .OUTPUT_PATH , file_shorthand + ".BirdNET.results.csv" )
700647
701648 return result_names
702649
@@ -720,9 +667,7 @@ def analyze_file(item) -> dict[str, str] | None:
720667
721668 result_file_names = get_result_file_names (fpath )
722669
723- if cfg .SKIP_EXISTING_RESULTS and all (
724- os .path .exists (f ) for f in result_file_names .values ()
725- ):
670+ if cfg .SKIP_EXISTING_RESULTS and all (os .path .exists (f ) for f in result_file_names .values ()):
726671 print (f"Skipping { fpath } as it has already been analyzed" , flush = True )
727672 return None # or return path to combine later? TODO
728673
@@ -741,10 +686,7 @@ def analyze_file(item) -> dict[str, str] | None:
741686
742687 # Assign scores to labels
743688 p_labels = [
744- p
745- for p in zip (cfg .LABELS , pred , strict = True )
746- if (cfg .TOP_N or p [1 ] >= cfg .MIN_CONFIDENCE )
747- and (not cfg .SPECIES_LIST or p [0 ] in cfg .SPECIES_LIST )
689+ p for p in zip (cfg .LABELS , pred , strict = True ) if (cfg .TOP_N or p [1 ] >= cfg .MIN_CONFIDENCE ) and (not cfg .SPECIES_LIST or p [0 ] in cfg .SPECIES_LIST )
748690 ]
749691
750692 # Sort by score
0 commit comments