Skip to content

Commit 1f1aa42

Browse files
authored
Use utf-8 to open eBird codes file (#831)
* Use utf-8 to open eBird codes file * .
1 parent c2c52fb commit 1f1aa42

1 file changed

Lines changed: 28 additions & 86 deletions

File tree

birdnet_analyzer/analyze/utils.py

Lines changed: 28 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
import birdnet_analyzer.config as cfg
1212
from birdnet_analyzer import audio, model, utils
1313

14-
RAVEN_TABLE_HEADER = "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n" # noqa: E501
14+
RAVEN_TABLE_HEADER = (
15+
"Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n"
16+
)
1517
KALEIDOSCOPE_HEADER = "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity\n"
1618
CSV_HEADER = "Start (s),End (s),Scientific name,Common name,Confidence,File\n"
1719
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
@@ -53,13 +55,11 @@ def load_codes():
5355
Returns:
5456
A dictionary containing the eBird codes.
5557
"""
56-
with open(os.path.join(SCRIPT_DIR, cfg.CODES_FILE)) as cfile:
58+
with open(os.path.join(SCRIPT_DIR, cfg.CODES_FILE), encoding="utf-8") as cfile:
5759
return json.load(cfile)
5860

5961

60-
def generate_raven_table(
61-
timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str
62-
):
62+
def generate_raven_table(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
6363
"""
6464
Generates a Raven selection table from the given timestamps and prediction results.
6565
@@ -90,11 +90,7 @@ def generate_raven_table(
9090

9191
for c in result[timestamp]:
9292
selection_id += 1
93-
label = (
94-
cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
95-
if cfg.TRANSLATED_LABELS
96-
else c[0]
97-
)
93+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
9894
code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
9995
lbl = label if cfg.USE_PERCH else label.split("_", 1)[-1]
10096
rstring += f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{lbl}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n"
@@ -131,11 +127,7 @@ def generate_audacity(timestamps: list[str], result: dict[str, list], result_pat
131127
rstring = ""
132128

133129
for c in result[timestamp]:
134-
label = (
135-
cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
136-
if cfg.TRANSLATED_LABELS
137-
else c[0]
138-
)
130+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
139131
ts = timestamp.replace("-", "\t")
140132
lbl = label if cfg.USE_PERCH else label.replace("_", ", ")
141133
rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
@@ -146,9 +138,7 @@ def generate_audacity(timestamps: list[str], result: dict[str, list], result_pat
146138
utils.save_result_file(result_path, out_string)
147139

148140

149-
def generate_kaleidoscope(
150-
timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str
151-
):
141+
def generate_kaleidoscope(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
152142
"""
153143
Generates a Kaleidoscope-compatible CSV string from the given timestamps and results, and saves it to a file.
154144
@@ -172,11 +162,7 @@ def generate_kaleidoscope(
172162
start, end = timestamp.split("-", 1)
173163

174164
for c in result[timestamp]:
175-
label = (
176-
cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
177-
if cfg.TRANSLATED_LABELS
178-
else c[0]
179-
)
165+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
180166

181167
if cfg.USE_PERCH:
182168
common = scientific = label
@@ -206,9 +192,7 @@ def generate_kaleidoscope(
206192
utils.save_result_file(result_path, out_string)
207193

208194

209-
def generate_csv(
210-
timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str
211-
):
195+
def generate_csv(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
212196
"""
213197
Generates a CSV file from the given timestamps and results.
214198
@@ -240,11 +224,7 @@ def generate_csv(
240224

241225
for c in result[timestamp]:
242226
start, end = timestamp.split("-", 1)
243-
label = (
244-
cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
245-
if cfg.TRANSLATED_LABELS
246-
else c[0]
247-
)
227+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])] if cfg.TRANSLATED_LABELS else c[0]
248228

249229
if cfg.USE_PERCH:
250230
common = scientific = label
@@ -265,9 +245,7 @@ def generate_csv(
265245
utils.save_result_file(result_path, out_string)
266246

267247

268-
def save_result_files(
269-
r: dict[str, list], result_files: dict[str, str], afile_path: str
270-
):
248+
def save_result_files(r: dict[str, list], result_files: dict[str, str], afile_path: str):
271249
"""
272250
Saves the result files in various formats based on the provided configuration.
273251
@@ -298,9 +276,7 @@ def save_result_files(
298276
# generate_rtable(timestamps, r, afile_path, result_files["r"])
299277

300278
if "kaleidoscope" in cfg.RESULT_TYPES:
301-
generate_kaleidoscope(
302-
timestamps, r_merged, afile_path, result_files["kaleidoscope"]
303-
)
279+
generate_kaleidoscope(timestamps, r_merged, afile_path, result_files["kaleidoscope"])
304280

305281
if "csv" in cfg.RESULT_TYPES:
306282
generate_csv(timestamps, r_merged, afile_path, result_files["csv"])
@@ -321,9 +297,7 @@ def combine_raven_tables(saved_results: list[str]):
321297
time_offset = 0
322298
audiofiles = []
323299

324-
with open(
325-
os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_RAVEN_FILENAME), "w", encoding="utf-8"
326-
) as f:
300+
with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_RAVEN_FILENAME), "w", encoding="utf-8") as f:
327301
f.write(RAVEN_TABLE_HEADER)
328302

329303
for rfile in saved_results:
@@ -350,10 +324,7 @@ def combine_raven_tables(saved_results: list[str]):
350324

351325
# Is species code and common name == 'nocall'?
352326
# If so, that's a dummy line and we can skip it
353-
if (
354-
line.split("\t")[7] == "nocall"
355-
and line.split("\t")[8] == "nocall"
356-
):
327+
if line.split("\t")[7] == "nocall" and line.split("\t")[8] == "nocall":
357328
continue
358329

359330
# adjust selection id
@@ -435,9 +406,7 @@ def combine_csv_files(saved_results: list[str]):
435406
print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
436407
utils.write_error_log(ex)
437408

438-
with open(
439-
os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_CSV_FILENAME), "w", encoding="utf-8"
440-
) as f:
409+
with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_CSV_FILENAME), "w", encoding="utf-8") as f:
441410
f.write(out_string)
442411

443412

@@ -465,9 +434,7 @@ def combine_results(saved_results: Sequence[dict[str, str] | str]):
465434
combine_csv_files([f["csv"] for f in saved_results if isinstance(f, dict)])
466435

467436

468-
def merge_consecutive_detections(
469-
results: dict[str, list], max_consecutive: int | None = None
470-
):
437+
def merge_consecutive_detections(results: dict[str, list], max_consecutive: int | None = None):
471438
"""Merges consecutive detections of the same species.
472439
Uses the mean of the top-3 highest scoring predictions as
473440
confidence score for the merged detection.
@@ -513,9 +480,7 @@ def merge_consecutive_detections(
513480
merged_scores = [timestamps[i][1], timestamps[i + 1][1]]
514481
timestamps.pop(i)
515482

516-
while i < len(timestamps) - 1 and float(next_end) >= float(
517-
timestamps[i + 1][0].split("-", 1)[0]
518-
):
483+
while i < len(timestamps) - 1 and float(next_end) >= float(timestamps[i + 1][0].split("-", 1)[0]):
519484
if max_consecutive and len(merged_scores) >= max_consecutive:
520485
break
521486
merged_scores.append(timestamps[i + 1][1])
@@ -576,9 +541,7 @@ def get_raw_audio_from_file(fpath: str, offset, duration):
576541
)
577542

578543
# Split into raw audio chunks
579-
return audio.split_signal(
580-
sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN
581-
)
544+
return audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
582545

583546

584547
def iterate_audio_chunks(fpath: str, embeddings: bool = False):
@@ -604,9 +567,7 @@ def iterate_audio_chunks(fpath: str, embeddings: bool = False):
604567
break
605568

606569
for chunk_index, chunk in enumerate(chunks):
607-
t_start = start + (
608-
chunk_index * (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED
609-
)
570+
t_start = start + (chunk_index * (cfg.SIG_LENGTH - cfg.SIG_OVERLAP) * cfg.AUDIO_SPEED)
610571
end = min(t_start + cfg.SIG_LENGTH * cfg.AUDIO_SPEED, fileLengthSeconds)
611572

612573
# Add to batch
@@ -649,9 +610,7 @@ def predict(samples):
649610

650611
# Logits or sigmoid activations?
651612
if cfg.APPLY_SIGMOID and not cfg.USE_PERCH:
652-
prediction = model.flat_sigmoid(
653-
np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY
654-
)
613+
prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY)
655614

656615
return prediction
657616

@@ -671,32 +630,20 @@ def get_result_file_names(fpath: str):
671630

672631
rpath = fpath.replace(cfg.INPUT_PATH, "")
673632

674-
rpath = (
675-
(rpath[1:] if rpath[0] in ["/", "\\"] else rpath)
676-
if rpath
677-
else os.path.basename(fpath)
678-
)
633+
rpath = (rpath[1:] if rpath[0] in ["/", "\\"] else rpath) if rpath else os.path.basename(fpath)
679634

680635
file_shorthand = rpath.rsplit(".", 1)[0]
681636

682637
if "table" in cfg.RESULT_TYPES:
683-
result_names["table"] = os.path.join(
684-
cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.selection.table.txt"
685-
)
638+
result_names["table"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.selection.table.txt")
686639
if "audacity" in cfg.RESULT_TYPES:
687-
result_names["audacity"] = os.path.join(
688-
cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.txt"
689-
)
640+
result_names["audacity"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.txt")
690641
# if "r" in cfg.RESULT_TYPES:
691642
# result_names["r"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.r.csv")
692643
if "kaleidoscope" in cfg.RESULT_TYPES:
693-
result_names["kaleidoscope"] = os.path.join(
694-
cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.kaleidoscope.csv"
695-
)
644+
result_names["kaleidoscope"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.kaleidoscope.csv")
696645
if "csv" in cfg.RESULT_TYPES:
697-
result_names["csv"] = os.path.join(
698-
cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.csv"
699-
)
646+
result_names["csv"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.csv")
700647

701648
return result_names
702649

@@ -720,9 +667,7 @@ def analyze_file(item) -> dict[str, str] | None:
720667

721668
result_file_names = get_result_file_names(fpath)
722669

723-
if cfg.SKIP_EXISTING_RESULTS and all(
724-
os.path.exists(f) for f in result_file_names.values()
725-
):
670+
if cfg.SKIP_EXISTING_RESULTS and all(os.path.exists(f) for f in result_file_names.values()):
726671
print(f"Skipping {fpath} as it has already been analyzed", flush=True)
727672
return None # or return path to combine later? TODO
728673

@@ -741,10 +686,7 @@ def analyze_file(item) -> dict[str, str] | None:
741686

742687
# Assign scores to labels
743688
p_labels = [
744-
p
745-
for p in zip(cfg.LABELS, pred, strict=True)
746-
if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE)
747-
and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
689+
p for p in zip(cfg.LABELS, pred, strict=True) if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE) and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
748690
]
749691

750692
# Sort by score

0 commit comments

Comments
 (0)