@article{ZeitlerWAM24_BeethovenPianoSonatas_TISMIR, author = {Johannes Zeitler and Christof Weiß and Vlora Arifi-M{\"u}ller and Meinard M{\"u}ller}, title={{BPSD}: A coherent multi-version dataset for analyzing the first movements of {B}eethoven's piano sonatas.}, journal={Transactions of the International Society for Music Information Retrieval {(TISMIR)}}, year={2024}, volume={7}, number={1}, pages={195--212}, url-pdf={https://transactions.ismir.net/articles/196/files/66ec15a20b3a3.pdf}, url-doi={10.5334/tismir.196}, url-demo={https://zenodo.org/doi/10.5281/zenodo.10847702}}
@inproceedings{ZeitlerMM24_Synchronization_ISMIR, author = {Johannes Zeitler and Ben Maman and Meinard M{\"u}ller}, title = {Robust and Accurate Audio Synchronization Using Raw Features from Transcription Models}, booktitle = {Proceedings of the International Society for Music Information Retrieval Conference ({ISMIR})}, year = {2024}, address = {San Francisco, USA}, }
@inproceedings{ChiuZAM24_DownbeatBeethoven_DAGA, author = {Ching-Yu Chiu and Johannes Zeitler and Vlora Arifi-M{\"u}ller and Meinard M{\"u}ller}, title = {Downbeat Tracking for {W}estern Classical Music Recordings: {A} Case Study for {B}eethoven Piano Sonatas}, booktitle = {Proceedings of the {D}eutsche {J}ahrestagung f{\"u}r {A}kustik ({DAGA})}, address = {Hannover, Germany}, year = {2024}, pages = {1--4} }
@inproceedings{faucris.318741779, address = {Seoul, Korea}, author = {Ben Maman and Meinard Müller and Johannes Zeitler and Amit Bermano}, booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)}, faupublication = {yes}, peerreviewed = {Yes}, title = {{Performance} {Conditioning} for {Diffusion}-{Based} {Multi}-{Instrument} {Music} {Synthesis}}, year = {2024} }
@inproceedings{faucris.318554622, address = {Seoul, Korea}, author = {Johannes Zeitler and Michael Krause and Meinard Müller}, booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)}, faupublication = {yes}, peerreviewed = {Yes}, title = {{Soft} {Dynamic} {Time} {Warping} with {Variable} {Step} {Weights}}, year = {2024}, url-demo={https://github.com/groupmm/weightedSDTW}, url-doi={10.1109/icassp48485.2024.10446578} }
@inproceedings{faucris.307044861, abstract = {Soft dynamic time warping (SDTW) is a differentiable loss function that allows for training neural networks from weakly aligned data. Typically, SDTW is used to iteratively compute and refine soft alignments that compensate for temporal deviations between the training data and its weakly annotated targets. One major problem is that a mismatch between the estimated soft alignments and the reference alignments in the early training stage leads to incorrect parameter updates, making the overall training procedure unstable. In this paper, we investigate such stability issues by considering the task of pitch class estimation from music recordings as an illustrative case study. In particular, we introduce and discuss three conceptually different strategies (a hyperparameter scheduling, a diagonal prior, and a sequence unfolding strategy) with the objective of stabilizing intermediate soft alignment results. Finally, we report on experiments that demonstrate the effectiveness of the strategies and discuss efficiency and implementation issues.
@inproceedings{faucris.268675201, abstract = {Many spatial filtering algorithms used for voice capture in, e.g., teleconferencing applications, can benefit from or even rely on knowledge of Relative Transfer Functions (RTFs). Accordingly, many RTF estimators have been proposed which, however, suffer from performance degradation under acoustically adverse conditions or need prior knowledge on the properties of the interfering sources. While state-of-the-art RTF estimators ignore prior knowledge about the acoustic enclosure, audio signal processing algorithms for teleconferencing equipment are often operating in the same or at least a similar acoustic enclosure, e.g., a car or an office, such that training data can be collected. In this contribution, we use such data to train Variational Autoencoders (VAEs) in an unsupervised manner and apply the trained VAEs to enhance imprecise RTF estimates. Furthermore, a hybrid between classic RTF estimation and the trained VAE is investigated. Comprehensive experiments with real-world data confirm the efficacy for the proposed method.}, author = {Andreas Brendel and Johannes Zeitler and Walter Kellermann}, booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, date = {2022-05-07/2022-05-27}, doi = {10.1109/icassp43922.2022.9746045}, faupublication = {yes}, peerreviewed = {Yes}, title = {{Manifold} learning-supported estimation of relative transfer functions for spatial filtering}, venue = {Singapur}, year = {2022} }
@article{faucris.261835966, abstract = {Synaptic communication is based on a biological Molecular Communication (MC) system which may serve as a blueprint for the design of synthetic MC systems. However, the physical modeling of synaptic MC is complicated by the possible saturation of the molecular receiver caused by the competition of neurotransmitters (NTs) for postsynaptic receptors. Receiver saturation renders the system behavior nonlinear in the number of released NTs and is commonly neglected in existing analytical models. Furthermore, due to the ligands’ competition for receptors (and vice versa), the individual binding events at the molecular receiver are in general not statistically independent and the commonly used binomial model for the statistics of the received signal does not apply. Hence, in this work, we propose a novel deterministic model for receptor saturation in terms of a state-space description based on an eigenfunction expansion of Fick’s diffusion equation. The presented solution is numerically stable and computationally efficient. Employing the proposed deterministic model, we show that saturation at the molecular receiver effectively reduces the peak-value of the expected received signal and accelerates the clearance of NTs as compared to the case when receptor occupancy is neglected. We further derive a statistical model for the received signal in terms of the hypergeometric distribution which accounts for the competition of NTs for receptors and the competition of receptors for NTs. The proposed statistical model reveals how the signal statistics are shaped by the number of released NTs, the number of receptors, and the binding kinetics of the receptors, respectively, in the presence of competition. In particular, we show that the impact of these parameters on the signal variance is qualitatively different depending on the relative numbers of NTs and receptors. Finally, the accuracy of the proposed deterministic and statistical models is verified by particle-based computer simulations.
}, author = {Sebastian Lotter and Maximilian Schäfer and Johannes Zeitler and Robert Schober}, doi = {10.1109/TNB.2021.3092279}, faupublication = {yes}, journal = {IEEE Transactions on Nanobioscience}, keywords = {Receivers; Mathematical model; Analytical models; Computational modeling; Numerical models; Monte Carlo methods; Degradation}, pages = {1-1}, peerreviewed = {Yes}, title = {{Saturating} {Receiver} and {Receptor} {Competition} in {Synaptic} {DMC}: {Deterministic} and {Statistical} {Signal} {Models}}, year = {2021} }
@inproceedings{faucris.262786628, abstract = {Synaptic communication is a natural Molecular Communication (MC) system which may serve as a blueprint for the design of synthetic MC systems. In particular, it features highly specialized mechanisms to enable inter-symbol interference (ISI)-free and energy efficient communication. The understanding of synaptic MC is furthermore critical for disruptive innovations in the context of brain-machine interfaces. However, the physical modeling of synaptic MC is complicated by the possible saturation of the molecular receiver arising from the competition of postsynaptic receptors for neurotransmitters. Saturation renders the system behavior nonlinear and is commonly neglected in existing analytical models. In this work, we propose a novel model for receptor saturation in terms of a nonlinear, state-dependent boundary condition for Fick’s diffusion equation. We solve the resulting boundary-value problem using an eigenfunction expansion of the Laplace operator and the incorporation of the receiver memory as feedback system into the corresponding state-space description. The presented solution is numerically stable and computationally efficient. Furthermore, the proposed model is validated with particle-based stochastic computer simulations.}, author = {Sebastian Lotter and Maximilian Schäfer and Johannes Zeitler and Robert Schober}, booktitle = {ICC 2021 - IEEE International Conference on Communications}, date = {2021-07-14/2021-07-23}, doi = {10.1109/ICC42927.2021.9500809}, editor = {IEEE}, faupublication = {yes}, isbn = {9781728171227}, keywords = {Degradation; Neurotransmitters; Computational modeling; Biological system modeling; Stochastic processes; Receivers; Brain modeling}, peerreviewed = {unknown}, title = {{Receptor} {Saturation} {Modeling} for {Synaptic} {DMC}}, venue = {Montreal, QC}, year = {2021} }
@inproceedings{faucris.266302262, abstract = {Chroma or pitch-class representations of audio recordings are an essential tool in music information retrieval. Traditional chroma features relying on signal processing are often influenced by timbral properties such as overtones or vibrato and, thus, only roughly correspond to the pitch classes indicated by a score. Deep learning provides a promising possibility to overcome such problems but requires large annotated datasets. Previous approaches therefore use either synthetic audio, MIDI-piano recordings, or chord annotations for training. Since these strategies have different limitations, we propose to learn transcription-like pitch-class representations using pre-synchronized score-audio pairs of classical music. We train several CNNs with musically inspired architectures and evaluate their pitch-class estimates for various instrumentations including orchestra, piano, chamber music, and singing. Moreover, we illustrate the learned features' behavior when used as input to a chord recognition system. In all our experiments, we compare cross-validation with cross-dataset evaluation. Obtaining promising results, our strategy shows how to leverage the power of deep learning for constructing robust but interpretable tonal representation}, author = {Christof Weiß and Johannes Zeitler, and Tim Zunner and Florian Schuberth and Meinard Müller}, booktitle = {Proceedings of the International Society for Music Information Retrieval Conference (ISMIR)}, date = {2021-11-07/2021-11-12}, doi = {10.5281/zenodo.5624549}, faupublication = {yes}, peerreviewed = {Yes}, title = {{Learning} {Pitch}-{Class} {Representations} from {Score}-{Audio} {Pairs} of {Classical} {Music}}, venue = {online}, year = {2021} }