
2025
- Rauch, Lukas, Raphael Schwinger, Moritz Wirth, René Heinrich, Denis Huseljic, Marek Herde, Jonas Lange, et al. “BirdSet: A Large-Scale Dataset for Audio Classification in Avian Bioacoustics”. In International Conference on Learning Representations (ICLR). ICLR, 2025. https://iclr.cc/.
@inproceedings{rauch2024birdset,
author = {Rauch, Lukas and Schwinger, Raphael and Wirth, Moritz and Heinrich, René and Huseljic, Denis and Herde, Marek and Lange, Jonas and Kahl, Stefan and Sick, Bernhard and Tomforde, Sven and Scholz, Christoph},
booktitle = {International Conference on Learning Representations (ICLR)},
keywords = {deepbirddetect},
publisher = {ICLR},
title = {BirdSet: A Large-Scale Dataset for Audio Classification in Avian Bioacoustics},
year = 2025
}%0 Conference Paper
%1 rauch2024birdset
%A Rauch, Lukas
%A Schwinger, Raphael
%A Wirth, Moritz
%A Heinrich, René
%A Huseljic, Denis
%A Herde, Marek
%A Lange, Jonas
%A Kahl, Stefan
%A Sick, Bernhard
%A Tomforde, Sven
%A Scholz, Christoph
%B International Conference on Learning Representations (ICLR)
%D 2025
%I ICLR
%T BirdSet: A Large-Scale Dataset for Audio Classification in Avian Bioacoustics
%U https://iclr.cc/ - Heinrich, René, Lukas Rauch, Bernhard Sick, and Christoph Scholz. “AudioProtoPNet: An Interpretable Deep Learning Model for Bird Sound Classification”. Ecological Informatics (2025): 103081. doi:https://doi.org/10.1016/j.ecoinf.2025.103081.Deep learning models have significantly advanced acoustic bird monitoring by recognizing numerous bird species based on their vocalizations. However, traditional deep learning models are black boxes that provide no insight into their underlying computations, limiting their usefulness to ornithologists and machine learning engineers. Explainable models could facilitate debugging, knowledge discovery, trust, and interdisciplinary collaboration. We introduce AudioProtoPNet, an adaptation of the Prototypical Part Network (ProtoPNet) for multi-label bird sound classification. It is inherently interpretable, leveraging a ConvNeXt backbone to extract embeddings and a prototype learning classifier trained on these embeddings. The classifier learns prototypical patterns of each bird species’ vocalizations from spectrograms of instances in the training data. During inference, recordings are classified by comparing them to learned prototypes in the embedding space, providing explanations for the model’s decisions and insights into the most informative embeddings of each bird species. The model was trained on the BirdSet training dataset, which consists of 9734 bird species and over 6800 h of recordings. Its performance was evaluated on the seven BirdSet test datasets, covering different geographical regions. AudioProtoPNet outperformed the state-of-the-art bird sound classification model Perch, which is superior to the more popular BirdNet, achieving an average AUROC of 0.90 and a cmAP of 0.42, with relative improvements of 7.1% and 16.7% over Perch, respectively. These results demonstrate that even for the challenging task of multi-label bird sound classification, it is possible to develop powerful yet interpretable deep learning models that provide valuable insights for professionals in ornithology and machine learning.
@article{HEINRICH2025103081,
abstract = {Deep learning models have significantly advanced acoustic bird monitoring by recognizing numerous bird species based on their vocalizations. However, traditional deep learning models are black boxes that provide no insight into their underlying computations, limiting their usefulness to ornithologists and machine learning engineers. Explainable models could facilitate debugging, knowledge discovery, trust, and interdisciplinary collaboration. We introduce AudioProtoPNet, an adaptation of the Prototypical Part Network (ProtoPNet) for multi-label bird sound classification. It is inherently interpretable, leveraging a ConvNeXt backbone to extract embeddings and a prototype learning classifier trained on these embeddings. The classifier learns prototypical patterns of each bird species’ vocalizations from spectrograms of instances in the training data. During inference, recordings are classified by comparing them to learned prototypes in the embedding space, providing explanations for the model’s decisions and insights into the most informative embeddings of each bird species. The model was trained on the BirdSet training dataset, which consists of 9734 bird species and over 6800 h of recordings. Its performance was evaluated on the seven BirdSet test datasets, covering different geographical regions. AudioProtoPNet outperformed the state-of-the-art bird sound classification model Perch, which is superior to the more popular BirdNet, achieving an average AUROC of 0.90 and a cmAP of 0.42, with relative improvements of 7.1% and 16.7% over Perch, respectively. These results demonstrate that even for the challenging task of multi-label bird sound classification, it is possible to develop powerful yet interpretable deep learning models that provide valuable insights for professionals in ornithology and machine learning.},
author = {Heinrich, René and Rauch, Lukas and Sick, Bernhard and Scholz, Christoph},
journal = {Ecological Informatics},
keywords = 2025,
pages = 103081,
title = {AudioProtoPNet: An interpretable deep learning model for bird sound classification},
year = 2025
}%0 Journal Article
%1 HEINRICH2025103081
%A Heinrich, René
%A Rauch, Lukas
%A Sick, Bernhard
%A Scholz, Christoph
%D 2025
%J Ecological Informatics
%P 103081
%R https://doi.org/10.1016/j.ecoinf.2025.103081
%T AudioProtoPNet: An interpretable deep learning model for bird sound classification
%U https://www.sciencedirect.com/science/article/pii/S1574954125000901
%X Deep learning models have significantly advanced acoustic bird monitoring by recognizing numerous bird species based on their vocalizations. However, traditional deep learning models are black boxes that provide no insight into their underlying computations, limiting their usefulness to ornithologists and machine learning engineers. Explainable models could facilitate debugging, knowledge discovery, trust, and interdisciplinary collaboration. We introduce AudioProtoPNet, an adaptation of the Prototypical Part Network (ProtoPNet) for multi-label bird sound classification. It is inherently interpretable, leveraging a ConvNeXt backbone to extract embeddings and a prototype learning classifier trained on these embeddings. The classifier learns prototypical patterns of each bird species’ vocalizations from spectrograms of instances in the training data. During inference, recordings are classified by comparing them to learned prototypes in the embedding space, providing explanations for the model’s decisions and insights into the most informative embeddings of each bird species. The model was trained on the BirdSet training dataset, which consists of 9734 bird species and over 6800 h of recordings. Its performance was evaluated on the seven BirdSet test datasets, covering different geographical regions. AudioProtoPNet outperformed the state-of-the-art bird sound classification model Perch, which is superior to the more popular BirdNet, achieving an average AUROC of 0.90 and a cmAP of 0.42, with relative improvements of 7.1% and 16.7% over Perch, respectively. These results demonstrate that even for the challenging task of multi-label bird sound classification, it is possible to develop powerful yet interpretable deep learning models that provide valuable insights for professionals in ornithology and machine learning.
2024
- Wood, Connor M., Felix Günther, Angela Rex, Daniel F. Hofstadter, Hendrik Reers, Stefan Kahl, M. Zachariah Peery, and Holger Klinck. “Real-Time Acoustic Monitoring Facilitates the Proactive Management of Biological Invasions”. Biological Invasions 26, no. 12 (December 1, 2024): 3989–3996. doi:10.1007/s10530-024-03426-y.Biological surveillance at an invasion front is hindered by low population densities and, among animals, high mobility of target species. Using the barred owl (Strix varia) invasion of western North American forests as a test case, we tested real-time autonomous recording units (the ecoPi, OekoFor GbR, Freiburg, Germany) by deploying them in an area known to be occupied by the target species. The ecoPi passively record audio, analyze it onboard with the BirdNET algorithm, and transmit audio clips with identifiable sounds via cellular network to a web interface where users can listen to audio to manually vet the results. We successfully detected and lethally removed three barred owls, demonstrating that real-time acoustic monitoring can be used to support rapid interventions at the forefront of an ongoing invasion in which proactive management may be essential to the protection of an iconic native species, the spotted owl (S. occidentalis). This approach has the potential to make a significant contribution to global biodiversity conservation efforts by massively increasing the speed at which biological invasions by acoustically active species, and other time-sensitive conservation challenges, can be managed.
@article{Wood2024,
abstract = {Biological surveillance at an invasion front is hindered by low population densities and, among animals, high mobility of target species. Using the barred owl (Strix varia) invasion of western North American forests as a test case, we tested real-time autonomous recording units (the ecoPi, OekoFor GbR, Freiburg, Germany) by deploying them in an area known to be occupied by the target species. The ecoPi passively record audio, analyze it onboard with the BirdNET algorithm, and transmit audio clips with identifiable sounds via cellular network to a web interface where users can listen to audio to manually vet the results. We successfully detected and lethally removed three barred owls, demonstrating that real-time acoustic monitoring can be used to support rapid interventions at the forefront of an ongoing invasion in which proactive management may be essential to the protection of an iconic native species, the spotted owl (S. occidentalis). This approach has the potential to make a significant contribution to global biodiversity conservation efforts by massively increasing the speed at which biological invasions by acoustically active species, and other time-sensitive conservation challenges, can be managed.},
author = {Wood, Connor M. and Günther, Felix and Rex, Angela and Hofstadter, Daniel F. and Reers, Hendrik and Kahl, Stefan and Peery, M. Zachariah and Klinck, Holger},
journal = {Biological Invasions},
keywords = {deepbirddetect},
month = 12,
number = 12,
pages = {3989--3996},
title = {Real-time acoustic monitoring facilitates the proactive management of biological invasions},
volume = 26,
year = 2024
}%0 Journal Article
%1 Wood2024
%A Wood, Connor M.
%A Günther, Felix
%A Rex, Angela
%A Hofstadter, Daniel F.
%A Reers, Hendrik
%A Kahl, Stefan
%A Peery, M. Zachariah
%A Klinck, Holger
%D 2024
%J Biological Invasions
%N 12
%P 3989--3996
%R 10.1007/s10530-024-03426-y
%T Real-time acoustic monitoring facilitates the proactive management of biological invasions
%U https://doi.org/10.1007/s10530-024-03426-y
%V 26
%X Biological surveillance at an invasion front is hindered by low population densities and, among animals, high mobility of target species. Using the barred owl (Strix varia) invasion of western North American forests as a test case, we tested real-time autonomous recording units (the ecoPi, OekoFor GbR, Freiburg, Germany) by deploying them in an area known to be occupied by the target species. The ecoPi passively record audio, analyze it onboard with the BirdNET algorithm, and transmit audio clips with identifiable sounds via cellular network to a web interface where users can listen to audio to manually vet the results. We successfully detected and lethally removed three barred owls, demonstrating that real-time acoustic monitoring can be used to support rapid interventions at the forefront of an ongoing invasion in which proactive management may be essential to the protection of an iconic native species, the spotted owl (S. occidentalis). This approach has the potential to make a significant contribution to global biodiversity conservation efforts by massively increasing the speed at which biological invasions by acoustically active species, and other time-sensitive conservation challenges, can be managed. - Rauch, Lukas, Denis Huseljic, Moritz Wirth, Jens Decke, Bernhard Sick, and Christoph Scholz. “Towards Deep Active Learning in Avian Bioacoustics.”. CoRR abs/2406.18621 (2024). http://dblp.uni-trier.de/db/journals/corr/corr2406.html#abs-2406-18621.
@article{journals/corr/abs-2406-18621,
author = {Rauch, Lukas and Huseljic, Denis and Wirth, Moritz and Decke, Jens and Sick, Bernhard and Scholz, Christoph},
journal = {CoRR},
keywords = {deepbirddetect},
title = {Towards Deep Active Learning in Avian Bioacoustics.},
volume = {abs/2406.18621},
year = 2024
}%0 Journal Article
%1 journals/corr/abs-2406-18621
%A Rauch, Lukas
%A Huseljic, Denis
%A Wirth, Moritz
%A Decke, Jens
%A Sick, Bernhard
%A Scholz, Christoph
%D 2024
%J CoRR
%T Towards Deep Active Learning in Avian Bioacoustics.
%U http://dblp.uni-trier.de/db/journals/corr/corr2406.html#abs-2406-18621
%V abs/2406.18621