I recently completed my PhD in the Explainable Machine Learning group at the University of Tübingen and the International Max Planck Research School for Intelligent Systems (IMPRS-IS), supervised by Prof. Zeynep Akata.
My research centres on multi-modal learning at the intersection of vision, language, and audio. During my PhD I worked on audio-visual zero-shot and few-shot learning, language-guided video retrieval — including fine-grained video retrieval and egocentric video benchmarking. I also spent time as a research intern at Sony AI in Zurich, where I developed a CLAP-like audio-language alignment model that enables semantic sound effects search, as well as text and audio conditioning for sound effects generation. The model is now integrated as a search engine in Audiokinetic's Wwise 2025.1 and as the CLAP backbone of Woosh, Sony AI's sound effects foundation model.
Before my PhD, I received my master's degree in Intelligent Adaptive Systems from the University of Hamburg (2019) and my bachelor's degree in Bioprocess Informatics from the Weihenstephan-Triesdorf University of Applied Sciences (2015).

@InProceedings{hadjeres2026ARXIV,
author = {Gaetan Hadjeres and Marc Ferras and Khaled Koutini and Benno Weck and Alexandre Bittar and Thomas Hummel and Zineb Lahrichi and Hakim Missoum and Joan Serrà and Yuki Mitsufuji},
title = {Woosh: A Sound Effects Foundation Model},
booktitle = {arXiv.org},
year = {2026},
}
@InProceedings{hummel2024ECCV,
author = {Thomas Hummel and Shyamgopal Karthik and Mariana-Iuliana Georgescu and Zeynep Akata},
title = {EgoCVR: An Egocentric Benchmark for Fine-Grained Composed Video Retrieval},
booktitle = {Proc. of the European Conf. on Computer Vision (ECCV)},
year = {2024},
}
@InProceedings{hummel2023BMVC,
author = {Thomas Hummel and Otniel-Bogdan Mercea and A. Sophia Koepke and Zeynep Akata},
title = {Video-adverb retrieval with compositional adverb-action embeddings},
booktitle = {British Machine Vision Conference (BMVC)},
year = {2023},
}
@InProceedings{mercea2023GCPR,
author = {Otniel-Bogdan Mercea and Thomas Hummel and A. Sophia Koepke and Zeynep Akata},
title = {Text-to-feature diffusion for audio-visual few-shot learning},
booktitle = {DAGM German Conference on Pattern Recognition (GCPR)},
year = {2023},
}
@InProceedings{mercea2022ECCV,
author = {Otniel-Bogdan Mercea and Thomas Hummel and A. Sophia Koepke and Zeynep Akata},
title = {Temporal and cross-modal attention for audio-visual zero-shot learning},
booktitle = {Proc. of the European Conf. on Computer Vision (ECCV)},
year = {2022},
}
@InProceedings{alaniz2022DGM4HSD,
author = {Stephan Alaniz and Thomas Hummel and Zeynep Akata},
title = {Semantic Image Synthesis with Semantically Coupled VQ-Model},
booktitle = {ICLR Workshop on Deep Generative Models for Highly Structured Data (DGM4HSD)},
year = {2022},
}
@InProceedings{heinrich2020FRONTIERS,
author = {Stefan Heinrich and Yuan Yao and Tobias Hinz and Zhiyuan Liu and Thomas Hummel and Matthias Kerzel and Cornelius Weber and Stefan Wermter},
title = {Crossmodal Language Grounding in an Embodied Neurocognitive Model},
booktitle = {Frontiers in Neurorobotics},
year = {2020},
}
@InProceedings{churamani2017HAI,
author = {Nikhil Churamani and Paul Anton and Marc Brügger and Erik Fließwasser and Thomas Hummel and Julius Mayer and Waleed Mustafa and Hwei Geok Ng and Thi Linh Chi Nguyen and Quan Nguyen and Marcus Soll and Sebastian Springenberg and Sascha Griffiths and Stefan Heinrich and Nicolás Navarro-Guerrero and Erik Strahl and Johannes Twiefel and Cornelius Weber and Stefan Wermter},
title = {The impact of personalisation on human-robot interaction in learning scenarios},
booktitle = {Proceedings of the 5th International Conference on Human Agent Interaction},
year = {2017},
}
@InProceedings{ng2017ROMAN,
author = {Hwei Geok Ng and Paul Anton and Marc Brügger and Nikhil Churamani and Erik Fließwasser and Thomas Hummel and Julius Mayer and Waleed Mustafa and Thi Linh Chi Nguyen and Quan Nguyen and Marcus Soll and Sebastian Springenberg and Sascha Griffiths and Stefan Heinrich and Nicolás Navarro-Guerrero and Erik Strahl and Johannes Twiefel and Cornelius Weber and Stefan Wermter},
title = {Hey Robot, Why Don't You Talk To Me?},
booktitle = {Proc. of the IEEE International Symposium on Robot and Human Interactive Communication (RO-MAN)},
year = {2017},
}
@InProceedings{Riemenschneider2016SHIVA,
author = {Mona Riemenschneider and Thomas Hummel and Dominik Heider},
title = {SHIVA - a web application for drug resistance and tropism testing in HIV},
booktitle = {BMC Bioinformatics},
year = {2016},
}This website is based on a great template from Michael Niemeyer