@article{92296d5f68044cd2af11c08ba7aa5753,
title = "BAST-Mamba: Binaural Audio Spectrogram Mamba Transformer for binaural sound localization",
abstract = "Accurate sound localization in reverberant environments is essential for human auditory perception. Recently, Convolutional Neural Networks (CNNs) have been used to model the binaural human auditory pathway. However, CNNs face limitations in capturing global acoustic features. To address this issue, we propose a novel end-to-end Binaural Audio Spectrogram Mamba Transformer (BAST-Mamba) model to predict sound azimuth in both anechoic and reverberant conditions. We explore two implementation modes: BAST-Mamba-SP and BAST-Mamba-NSP, which correspond to shared and non-shared parameter configurations, respectively. Our best model BAST-Mamba-SP, equipped with subtraction-based interaural integration and a hybrid loss function, achieves a state-of-the-art angular distance (AD) error of 0.89°and mean squared error of 0.0004, significantly outperforming baseline models. The model demonstrates generalization across acoustic environments, robust hemifield symmetry and high accurate real-time localization performance (<4°AD at 300 ms). Moderate noise augmentation at 30 dB SNR yields the strongest noise resilience. Explainability analyses highlight consistent frequency focus in the 2–3 kHz and 5.5–6.5 kHz bands, aligning with known neurophysiological cues. These results validate the potential of neurobiologically inspired Transformer for robust, high-precision sound localization and offer new insights into human sound localization.",
keywords = "Binaural integration, Sound localization, Transformer",
author = "Sheng Kuang and Jie Shi and \{van der Heijden\}, Kiki and Siamak Mehrkanoon",
note = "Funding Information: Siamak Mehrkanoon earned two Ph.D. degrees in Numerical Analysis and Machine Learning from Universiti Putra Malaysia, Seri Kembangan, Malaysia, and Katholieke Universiteit Leuven (KU Leuven), Leuven, Belgium, in 2011 and 2015, respectively. He has held research positions internationally, including Visiting Researcher at the Department of Automation, Tsinghua University, Beijing, China (2014); Postdoctoral Fellow at the University of Waterloo, Waterloo, ON, Canada (2015\textbackslash{}u20132016); and Visiting Postdoctoral Researcher at the Cognitive Systems Laboratory, University of T\textbackslash{}u00FCbingen, Germany (2016). From 2016 to 2018, he was an FWO Postdoctoral Research Fellow at the STADIUS Center for Dynamical Systems, Signal Processing, and Data Analytics, KU Leuven. He served as an Assistant Professor in the Department of Data Science and Knowledge Engineering (DKE) at Maastricht University, the Netherlands (2018\textbackslash{}u20132022). He is currently an Assistant Professor in the Department of Information and Computing Sciences at Utrecht University. His research interests include deep learning, neural networks, kernel-based models, numerical algorithms, optimization, and computational science. He has received several grants and fellowships, including the Postdoctoral Mandate (PDM) from KU Leuven and the prestigious Research Fellowship from the Research Foundation \textbackslash{}u2013 Flanders (FWO). Funding Information: Kiki van der Heijden received a B.A. in Cultural Sciences from Maastricht University (The Netherlands) in 2006, a M.A. in Media and Communications Management from Middlesex University (London, United Kingdom) in 2007, and a M.Sc. in Cognitive Neuroscience from Maastricht University (The Netherlands) in 2012. She conducted her Ph.D. research at Maastricht University and Georgetown University (United States) and was awarded the Ph.D. degree in 2017. After completing her Ph.D., she worked as a Post-Doctoral Research Fellow at the Cognitive Neuroscience Department at Maastricht University, and the Ear-, Nose and Throat (ENT) Department of the Maastricht University Medical Center. She is currently a Research Fellow at the Donders Institute at Radboud University (Nijmegen, Netherlands) and a Visiting Research Fellow at Columbia University (New York, United States). In her research, she uses an interdisciplinary approach combining cognitive neuroscience, computational modeling (focusing on deep neural network models) and clinical audiology to unravel the computational mechanisms underlying neural sound encoding in normal and hearing-impaired listeners. She has been awarded several research grants, including a Marie-Curie Individual Global Fellowship by the European Commission in 2019, and a NWO Veni by the Dutch Scientific Council (NWO) in 2020. Publisher Copyright: {\textcopyright} 2025 The Authors",
year = "2025",
month = oct,
day = "14",
doi = "10.1016/j.neucom.2025.130804",
language = "English",
volume = "650",
journal = "Neurocomputing",
issn = "0925-2312",
publisher = "Elsevier B.V.",
}