2024
|
Amit Meghanani; Thomas Hain: SCORE: Self-supervised Correspondence Fine-Tuning for Improved Content Representations. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Forthcoming. @inproceedings{meghanani2024icassp,
title = {SCORE: Self-supervised Correspondence Fine-Tuning for Improved Content Representations},
author = {Amit Meghanani and Thomas Hain},
year = {2024},
date = {2024-00-00},
urldate = {2024-00-00},
booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
keywords = {},
pubstate = {forthcoming},
tppubtype = {inproceedings}
}
|
Rehan Ahmad; Muhammad Umar Farooq; Thomas Hain: Progressive Unsupervised Domain Adaptation for ASR Using Ensemble Models and Multi-stage Training. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Forthcoming. @inproceedings{ahmad2024icassp,
title = {Progressive Unsupervised Domain Adaptation for ASR Using Ensemble Models and Multi-stage Training},
author = {Rehan Ahmad and Muhammad Umar Farooq and Thomas Hain},
year = {2024},
date = {2024-00-00},
booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
keywords = {},
pubstate = {forthcoming},
tppubtype = {inproceedings}
}
|
William Ravenscroft; Stefan Goetze; Thomas Hain: Combining Conformer and Dual-Path-Transformer Networks for Single Channel Noisy Reverberant Speech Separation. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Forthcoming. @inproceedings{ravenscroft2024icassp,
title = {Combining Conformer and Dual-Path-Transformer Networks for Single Channel Noisy Reverberant Speech Separation},
author = {William Ravenscroft and Stefan Goetze and Thomas Hain},
year = {2024},
date = {2024-00-00},
booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
keywords = {},
pubstate = {forthcoming},
tppubtype = {inproceedings}
}
|
George Close; William Ravenscroft; Thomas Hain; Stefan Goetze: MULTI-CMGAN+/+: Leveraging Multi-Objective Speech Quality Metric Prediction for Speech Enhancement. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Forthcoming. @inproceedings{close2024icassp,
title = {MULTI-CMGAN+/+: Leveraging Multi-Objective Speech Quality Metric Prediction for Speech Enhancement},
author = {George Close and William Ravenscroft and Thomas Hain and Stefan Goetze},
year = {2024},
date = {2024-00-00},
booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
keywords = {},
pubstate = {forthcoming},
tppubtype = {inproceedings}
}
|
Rhiannon Mogridge; George Close; Robert Sutherland; Thomas Hain; Jon Barker; Stefan Goetze; Anton Ragni: Non-Intrusive Speech Intelligibility Prediction for Hearing-Impaired Users Using Intermediate ASR Features and Human Memory Models. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Forthcoming. @inproceedings{mogridge2024icassp,
title = {Non-Intrusive Speech Intelligibility Prediction for Hearing-Impaired Users Using Intermediate ASR Features and Human Memory Models},
author = {Rhiannon Mogridge and George Close and Robert Sutherland and Thomas Hain and Jon Barker and Stefan Goetze and Anton Ragni},
year = {2024},
date = {2024-00-00},
booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
keywords = {},
pubstate = {forthcoming},
tppubtype = {inproceedings}
}
|
Amit Meghanani; Thomas Hain : Improving Acoustic Word Embeddings through Correspondence Training of Self-supervised Speech Representations. In: Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2024), Forthcoming. @inproceedings{amit2024eacl,
title = {Improving Acoustic Word Embeddings through Correspondence Training of Self-supervised Speech Representations},
author = {Amit Meghanani and Thomas Hain },
year = {2024},
date = {2024-00-00},
urldate = {2024-00-00},
booktitle = {Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (EACL 2024)},
keywords = {},
pubstate = {forthcoming},
tppubtype = {inproceedings}
}
|
2023
|
Will Ravenscroft; Stefan Goetze; Thomas Hain: On Time Domain Conformer Models for Monaural Speech Separation in Noisy Reverberant Acoustic Environments. In: IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), 2023. @inproceedings{ravenscroft2023asru,
title = {On Time Domain Conformer Models for Monaural Speech Separation in Noisy Reverberant Acoustic Environments},
author = {Will Ravenscroft and Stefan Goetze and Thomas Hain},
doi = {https://doi.org/10.1109/ASRU57964.2023.10389669 },
year = {2023},
date = {2023-12-17},
urldate = {2023-12-17},
booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Elaf Islam; Thomas Hain; Protima Nomo Sudro: Simulation of Teacher-Learner Interaction in English Language Pronunciation Learning. In: IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), 2023. @inproceedings{islam2023asru,
title = {Simulation of Teacher-Learner Interaction in English Language Pronunciation Learning},
author = {Elaf Islam and Thomas Hain and Protima Nomo Sudro},
doi = {https://doi.org/10.1109/ASRU57964.2023.10389639},
year = {2023},
date = {2023-12-17},
urldate = {2023-12-17},
booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Amit Meghanani; Thomas Hain: Deriving Translational Acoustic Sub-Word Embeddings. In: IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), 2023. @inproceedings{meghanani2023asru,
title = {Deriving Translational Acoustic Sub-Word Embeddings},
author = {Amit Meghanani and Thomas Hain},
doi = {https://doi.org/10.1109/ASRU57964.2023.10389747},
year = {2023},
date = {2023-12-17},
urldate = {2023-12-17},
booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Muhammad Umar Farooq; Rehan Ahmad; Thomas Hain: MUST: A Multilingual Student-Teacher Approach for Low-Resource Speech Recognition. In: IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), 2023. @inproceedings{farooq2023asru,
title = {MUST: A Multilingual Student-Teacher Approach for Low-Resource Speech Recognition},
author = {Muhammad Umar Farooq and Rehan Ahmad and Thomas Hain},
url = {https://arxiv.org/pdf/2310.18865.pdf},
doi = {https://doi.org/10.1109/ASRU57964.2023.10389636},
year = {2023},
date = {2023-12-17},
urldate = {2023-12-17},
booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
George Close; Thomas Hain; Stefan Goetze: The Effect of Spoken Language on Speech Enhancement Using Self-Supervised Speech Representation Loss Functions. In: 2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), 2023. @inproceedings{close2023waspaa,
title = {The Effect of Spoken Language on Speech Enhancement Using Self-Supervised Speech Representation Loss Functions},
author = {George Close and Thomas Hain and Stefan Goetze},
url = {https://ieeexplore.ieee.org/document/10248166 },
doi = {https://doi.org/10.1109/WASPAA58266.2023.10248166 },
year = {2023},
date = {2023-10-20},
urldate = {2023-10-20},
booktitle = {2023 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
William Ravenscroft; Stefan Goetze; Thomas Hain: On Data Sampling Strategies for Training Neural Network Speech Separation Models. In: 31st European Signal Processing Conference (EUSIPCO), 2023. @inproceedings{ravenscroft2023eusipco,
title = {On Data Sampling Strategies for Training Neural Network Speech Separation Models},
author = {William Ravenscroft and Stefan Goetze and Thomas Hain},
url = {https://arxiv.org/abs/2304.07142},
doi = {https://doi.org/10.23919/EUSIPCO58844.2023.10289800 },
year = {2023},
date = {2023-09-04},
urldate = {2023-09-04},
booktitle = {31st European Signal Processing Conference (EUSIPCO)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Protima Nomo Sudro; Anton Ragni; Thomas Hain: Adapting pretrained models for adult to child voice conversion. In: 1st European Signal Processing Conference (EUSIPCO), 2023. @inproceedings{sudro2023eusipco,
title = {Adapting pretrained models for adult to child voice conversion},
author = {Protima Nomo Sudro and Anton Ragni and Thomas Hain},
doi = {https://doi.org/10.23919/EUSIPCO58844.2023.10289993 },
year = {2023},
date = {2023-09-04},
urldate = {2023-09-04},
booktitle = {1st European Signal Processing Conference (EUSIPCO)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Anna Ollerenshaw; Md Asif Jalal; Thomas Hain.: Probing Statistical Representations for End-to-End ASR. In: 31st European Signal Processing Conference (EUSIPCO), 2023. @inproceedings{ollerenshaw2023eusipco,
title = {Probing Statistical Representations for End-to-End ASR},
author = {Anna Ollerenshaw and Md Asif Jalal and Thomas Hain.},
doi = {https://doi.org/10.23919/EUSIPCO58844.2023.10290070 },
year = {2023},
date = {2023-09-04},
booktitle = {31st European Signal Processing Conference (EUSIPCO)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Muhammad Umar Farooq; Thomas Hain: Learning Cross-lingual Mappings for Data Augmentation to Improve Low-Resource Speech Recognition. In: Interspeech 2023, 2023. @inproceedings{farooq2023dataAug,
title = {Learning Cross-lingual Mappings for Data Augmentation to Improve Low-Resource Speech Recognition},
author = {Muhammad Umar Farooq and Thomas Hain},
url = {https://arxiv.org/pdf/2306.08577.pdf},
doi = {10.21437/Interspeech.2023-1613},
year = {2023},
date = {2023-08-24},
urldate = {2023-08-24},
booktitle = {Interspeech 2023},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
George Close; William Ravenscroft; Thomas Hain; Stefan Goetze: The University of Sheffield CHiME-7 UDASE Challenge Speech Enhancement System. 2023. @techreport{close2023chime,
title = {The University of Sheffield CHiME-7 UDASE Challenge Speech Enhancement System},
author = {George Close and William Ravenscroft and Thomas Hain and Stefan Goetze},
url = {https://www.chimechallenge.org/challenges/chime7/task2/documents/Close_CMGAN++.pdf
},
year = {2023},
date = {2023-08-20},
urldate = {2023-08-20},
howpublished = {Computational Hearing in Multisource Environments (CHiME), 2023},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
|
Elaf Islam; Chanho Park; Thomas Hain: Exploring Speech Representations for Proficiency Assessment in Language Learning. In: 9th Workshop on Speech and Language Technology in Education (SLaTE), 2023. @inproceedings{islam2023slate,
title = {Exploring Speech Representations for Proficiency Assessment in Language Learning},
author = {Elaf Islam and Chanho Park and Thomas Hain},
url = {https://www.isca-speech.org/archive/slate_2023/islam23_slate.html },
doi = {https://doi.org/10.21437/slate.2023-29 },
year = {2023},
date = {2023-08-19},
urldate = {2023-08-19},
booktitle = {9th Workshop on Speech and Language Technology in Education (SLaTE)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Rehan Ahmad; Md Asif Jalal; Muhammad Umar Farooq; Anna Ollerenshaw; Thomas Hain: Towards Domain Generalisation in ASR with Elitist Sampling and Ensemble Knowledge Distillation. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2023, IEEE, 2023. @inproceedings{ahmad2023kd,
title = {Towards Domain Generalisation in ASR with Elitist Sampling and Ensemble Knowledge Distillation},
author = {Rehan Ahmad and Md Asif Jalal and Muhammad Umar Farooq and Anna Ollerenshaw and Thomas Hain},
url = {https://arxiv.org/pdf/2303.00550.pdf},
doi = {10.1109/ICASSP49357.2023.10095746},
year = {2023},
date = {2023-05-05},
urldate = {2023-05-05},
booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2023},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2022
|
William Ravenscroft; Stefan Goetze; Thomas Hain: Utterance Weighted Multi-Dilation Temporal Convolutional Networks for Monaural Speech Dereverberation. In: 2020 17th International Workshop on Acoustic Signal Enhancement (IWAENC), 2022. @inproceedings{ravenscroft2022b,
title = {Utterance Weighted Multi-Dilation Temporal Convolutional Networks for Monaural Speech Dereverberation},
author = {William Ravenscroft and Stefan Goetze and Thomas Hain},
url = {https://arxiv.org/abs/2205.08455},
doi = {10.48550/ARXIV.2205.08455},
year = {2022},
date = {2022-09-01},
booktitle = {2020 17th International Workshop on Acoustic Signal Enhancement (IWAENC)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Madina Hasan; Nicholas Jefferson; Thomas Hain; Jeremy Dawson: Automatic detection of behavioural codes in team interactions. In: Comput. Speech Lang., vol. 74, pp. 101339, 2022. @article{DBLP:journals/csl/HasanJHD22,
title = {Automatic detection of behavioural codes in team interactions},
author = {Madina Hasan and Nicholas Jefferson and Thomas Hain and Jeremy Dawson},
url = {https://doi.org/10.1016/j.csl.2021.101339},
doi = {10.1016/j.csl.2021.101339},
year = {2022},
date = {2022-01-01},
journal = {Comput. Speech Lang.},
volume = {74},
pages = {101339},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Chanho Park; Rehan Ahmad; Thomas Hain: Unsupervised Data Selection for Speech Recognition with Contrastive Loss Ratios. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8587-8591, 2022. @inproceedings{9747390,
title = {Unsupervised Data Selection for Speech Recognition with Contrastive Loss Ratios},
author = {Chanho Park and Rehan Ahmad and Thomas Hain},
doi = {10.1109/ICASSP43922.2022.9747390},
year = {2022},
date = {2022-01-01},
booktitle = {ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {8587-8591},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Jose Antonio Lopez Saenz; Thomas Hain: A Model for Assessor Bias in Automatic Pronunciation Assessment. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7267-7271, 2022. @inproceedings{9746720,
title = {A Model for Assessor Bias in Automatic Pronunciation Assessment},
author = {Jose Antonio Lopez Saenz and Thomas Hain},
doi = {10.1109/ICASSP43922.2022.9746720},
year = {2022},
date = {2022-01-01},
booktitle = {ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {7267-7271},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
George Close; Thomas Hain; Stefan Goetze: MetricGAN+/-: Increasing Robustness of Noise Reduction on Unseen Data. In: 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade,
Serbia, August 29 - September 2, 2022, IEEE, 2022. @inproceedings{Close2022,
title = {MetricGAN+/-: Increasing Robustness of Noise Reduction on Unseen Data},
author = {George Close and Thomas Hain and Stefan Goetze},
doi = {https://doi.org/10.48550/arXiv.2203.12369},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {30th European Signal Processing Conference, EUSIPCO 2022, Belgrade,
Serbia, August 29 - September 2, 2022},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Muhammad Umar Farooq; Thomas Hain : Investigating the Impact of Cross-lingual Acoustic-Phonetic Similarities on Multilingual Speech Recognition. In: Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022, ISCA, 2022. @inproceedings{Farooq2022crosslingual,
title = {Investigating the Impact of Cross-lingual Acoustic-Phonetic Similarities on Multilingual Speech Recognition},
author = {Muhammad Umar Farooq and Thomas Hain },
doi = {https://doi.org/10.48550/arXiv.2207.03390},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Muhammad Umar Farooq; Darshan Adiga Haniya Narayana; Thomas Hain : Non-Linear Pairwise Language Mappings for Low-Resource Multilingual Acoustic Model Fusion. In: Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022, ISCA, 2022. @inproceedings{Farooq2022lowresource,
title = {Non-Linear Pairwise Language Mappings for Low-Resource Multilingual Acoustic Model Fusion},
author = {Muhammad Umar Farooq and Darshan Adiga Haniya Narayana and Thomas Hain },
doi = {https://doi.org/10.48550/arXiv.2207.03391},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Thomas Hain; Md Asif Jalal; Anna Ollerenshaw: Insights of Neural Representations in Multi-Banded and Multi-Channel Convolutional Transformers for End-to-End ASR. In: IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022, 2022. @inproceedings{Ollerenshaw2022,
title = {Insights of Neural Representations in Multi-Banded and Multi-Channel Convolutional Transformers for End-to-End ASR},
author = {Thomas Hain and Md Asif Jalal and Anna Ollerenshaw},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
William Ravenscroft; Stefan Goetze; Thomas Hain: Receptive Field Analysis of Temporal Convolutional Networks for Monaural Speech Dereverberation. In: IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022, 2022. @inproceedings{Ravenscroft2022eusipco,
title = {Receptive Field Analysis of Temporal Convolutional Networks for Monaural Speech Dereverberation},
author = {William Ravenscroft and Stefan Goetze and Thomas Hain},
doi = {https://doi.org/10.48550/arXiv.2204.06439},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
William Ravenscroft; Stefan Goetze; Thomas Hain: Att-TasNet: Attending to Encodings in Time-Domain Audio Speech Separation of Noisy, Reverberant Speech Mixtures. In: 2022. @article{Ravenscroft2022frontiers,
title = {Att-TasNet: Attending to Encodings in Time-Domain Audio Speech Separation of Noisy, Reverberant Speech Mixtures},
author = { William Ravenscroft and Stefan Goetze and Thomas Hain},
doi = {https://doi.org/10.3389/frsip.2022.856968},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Frontiers in Signal Processing},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2021
|
Jose Antonio Lopez Saenz; Md Asif Jalal; Rosanna Milner; Thomas Hain: Attention Based Model for Segmental Pronunciation Error Detection. In: IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2021, Cartagena, Colombia, December 13-17, 2021, pp. 725–732, IEEE, 2021. @inproceedings{DBLP:conf/asru/SaenzJMH21,
title = {Attention Based Model for Segmental Pronunciation Error Detection},
author = {Jose Antonio Lopez Saenz and Md Asif Jalal and Rosanna Milner and Thomas Hain},
url = {https://doi.org/10.1109/ASRU51503.2021.9687993},
doi = {10.1109/ASRU51503.2021.9687993},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2021, Cartagena, Colombia, December 13-17, 2021},
pages = {725--732},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: H-VECTORS: Improving the robustness in utterance-level speaker embeddings
using a hierarchical attention model. In: Neural Networks, vol. 142, pp. 329–339, 2021. @article{DBLP:journals/nn/ShiHH21,
title = {H-VECTORS: Improving the robustness in utterance-level speaker embeddings
using a hierarchical attention model},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
url = {https://doi.org/10.1016/j.neunet.2021.05.024},
doi = {10.1016/j.neunet.2021.05.024},
year = {2021},
date = {2021-01-01},
journal = {Neural Networks},
volume = {142},
pages = {329--339},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Korbinian Friedl; Georgios Rizos; Lukas Stappen; Madina Hasan; Lucia Specia; Thomas Hain; Björn W. Schuller: Uncertainty Aware Review Hallucination for Science Article Classification. In: Zong, Chengqing; Xia, Fei; Li, Wenjie; Navigli, Roberto (Ed.): Findings of the Association for Computational Linguistics: ACL/IJCNLP
2021, Online Event, August 1-6, 2021, pp. 5004–5009, Association for Computational Linguistics, 2021. @inproceedings{DBLP:conf/acl/FriedlRSHSHS21,
title = {Uncertainty Aware Review Hallucination for Science Article Classification},
author = {Korbinian Friedl and Georgios Rizos and Lukas Stappen and Madina Hasan and Lucia Specia and Thomas Hain and Björn W. Schuller},
editor = {Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli},
url = {https://doi.org/10.18653/v1/2021.findings-acl.443},
doi = {10.18653/v1/2021.findings-acl.443},
year = {2021},
date = {2021-01-01},
booktitle = {Findings of the Association for Computational Linguistics: ACL/IJCNLP
2021, Online Event, August 1-6, 2021},
volume = {ACL/IJCNLP 2021},
pages = {5004--5009},
publisher = {Association for Computational Linguistics},
series = {Findings of ACL},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mingjie Chen; Yanpei Shi; Thomas Hain: Towards Low-Resource Stargan Voice Conversion Using Weight Adaptive
Instance Normalization. In: IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021, pp. 5949–5953, IEEE, 2021. @inproceedings{DBLP:conf/icassp/ChenSH21,
title = {Towards Low-Resource Stargan Voice Conversion Using Weight Adaptive
Instance Normalization},
author = {Mingjie Chen and Yanpei Shi and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP39728.2021.9415042},
doi = {10.1109/ICASSP39728.2021.9415042},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021},
pages = {5949--5953},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Qiang Huang; Thomas Hain: Improving Audio Anomalies Recognition Using Temporal Convolutional
Attention Networks. In: IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021, pp. 6473–6477, IEEE, 2021. @inproceedings{DBLP:conf/icassp/0008H21,
title = {Improving Audio Anomalies Recognition Using Temporal Convolutional
Attention Networks},
author = {Qiang Huang and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP39728.2021.9414611},
doi = {10.1109/ICASSP39728.2021.9414611},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021},
pages = {6473--6477},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Anna Ollerenshaw; Md Asif Jalal; Thomas Hain: Insights on Neural Representations for End-to-End Speech Recognition. In: Hermansky, Hynek; Cernocký, Honza; Burget, Lukás; Lamel, Lori; Scharenborg, Odette; Motlícek, Petr (Ed.): Interspeech 2021, 22nd Annual Conference of the International Speech
Communication Association, Brno, Czechia, 30 August - 3 September
2021, pp. 4079–4083, ISCA, 2021. @inproceedings{DBLP:conf/interspeech/OllerenshawJH21,
title = {Insights on Neural Representations for End-to-End Speech Recognition},
author = {Anna Ollerenshaw and Md Asif Jalal and Thomas Hain},
editor = {Hynek Hermansky and Honza Cernocký and Lukás Burget and Lori Lamel and Odette Scharenborg and Petr Motlícek},
url = {https://doi.org/10.21437/Interspeech.2021-1516},
doi = {10.21437/Interspeech.2021-1516},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech
Communication Association, Brno, Czechia, 30 August - 3 September
2021},
pages = {4079--4083},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Shengjie Huang; Mingjie Chen; Yanyan Xu; Dengfeng Ke; Thomas Hain: WINVC: One-Shot Voice Conversion with Weight Adaptive Instance Normalization. In: Pham, Duc Nghia; Theeramunkong, Thanaruk; Governatori, Guido; Liu, Fenrong (Ed.): PRICAI 2021: Trends in Artificial Intelligence - 18th Pacific Rim
International Conference on Artificial Intelligence, PRICAI 2021,
Hanoi, Vietnam, November 8-12, 2021, Proceedings, Part II, pp. 559–573, Springer, 2021. @inproceedings{DBLP:conf/pricai/HuangCXKH21,
title = {WINVC: One-Shot Voice Conversion with Weight Adaptive Instance Normalization},
author = {Shengjie Huang and Mingjie Chen and Yanyan Xu and Dengfeng Ke and Thomas Hain},
editor = {Duc Nghia Pham and Thanaruk Theeramunkong and Guido Governatori and Fenrong Liu},
url = {https://doi.org/10.1007/978-3-030-89363-7_42},
doi = {10.1007/978-3-030-89363-7_42},
year = {2021},
date = {2021-01-01},
booktitle = {PRICAI 2021: Trends in Artificial Intelligence - 18th Pacific Rim
International Conference on Artificial Intelligence, PRICAI 2021,
Hanoi, Vietnam, November 8-12, 2021, Proceedings, Part II},
volume = {13032},
pages = {559--573},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Jose Antonio Lopez Saenz; Thomas Hain: Use of Speaker Metadata for Improving Automatic Pronunciation Assessment. In: Anke, Luis Espinosa; Martín-Vide, Carlos; Spasic, Irena (Ed.): Statistical Language and Speech Processing - 9th International Conference,
SLSP 2021, Cardiff, UK, November 23-25, 2021, Proceedings, pp. 61–72, Springer, 2021. @inproceedings{DBLP:conf/slsp/SaenzH21,
title = {Use of Speaker Metadata for Improving Automatic Pronunciation Assessment},
author = {Jose Antonio Lopez Saenz and Thomas Hain},
editor = {Luis Espinosa Anke and Carlos Martín-Vide and Irena Spasic},
url = {https://doi.org/10.1007/978-3-030-89579-2_6},
doi = {10.1007/978-3-030-89579-2_6},
year = {2021},
date = {2021-01-01},
booktitle = {Statistical Language and Speech Processing - 9th International Conference,
SLSP 2021, Cardiff, UK, November 23-25, 2021, Proceedings},
volume = {13062},
pages = {61--72},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Thomas Hain: Contextual Joint Factor Acoustic Embeddings. In: IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021, pp. 750–757, IEEE, 2021. @inproceedings{DBLP:conf/slt/ShiH21,
title = {Contextual Joint Factor Acoustic Embeddings},
author = {Yanpei Shi and Thomas Hain},
url = {https://doi.org/10.1109/SLT48900.2021.9383592},
doi = {10.1109/SLT48900.2021.9383592},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021},
pages = {750--757},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Thomas Hain: Supervised Speaker Embedding De-Mixing in Two-Speaker Environment. In: IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021, pp. 758–765, IEEE, 2021. @inproceedings{DBLP:conf/slt/ShiH21a,
title = {Supervised Speaker Embedding De-Mixing in Two-Speaker Environment},
author = {Yanpei Shi and Thomas Hain},
url = {https://doi.org/10.1109/SLT48900.2021.9383580},
doi = {10.1109/SLT48900.2021.9383580},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021},
pages = {758--765},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2020
|
Md Asif Jalal; Rosanna Milner; Thomas Hain; Roger K. Moore: Removing Bias with Residual Mixture of Multi-View Attention for Speech
Emotion Recognition. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4084–4088, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/JalalMHM20,
title = {Removing Bias with Residual Mixture of Multi-View Attention for Speech
Emotion Recognition},
author = {Md Asif Jalal and Rosanna Milner and Thomas Hain and Roger K. Moore},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-3005},
doi = {10.21437/Interspeech.2020-3005},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4084--4088},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Md Asif Jalal; Rosanna Milner; Thomas Hain: Empirical Interpretation of Speech Emotion Perception with Attention Based Model for Speech Emotion Recognition. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4113–4117, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/JalalMH20,
title = {Empirical Interpretation of Speech Emotion Perception with Attention Based Model for Speech Emotion Recognition},
author = {Md Asif Jalal and Rosanna Milner and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-3007},
doi = {10.21437/Interspeech.2020-3007},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4113--4117},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: H-Vectors: Utterance-Level Speaker Embedding Using a Hierarchical
Attention Model. In: 2020 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2020, Barcelona, Spain, May 4-8, 2020, pp. 7579–7583, IEEE, 2020. @inproceedings{DBLP:conf/icassp/ShiHH20,
title = {H-Vectors: Utterance-Level Speaker Embedding Using a Hierarchical
Attention Model},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP40776.2020.9054448},
doi = {10.1109/ICASSP40776.2020.9054448},
year = {2020},
date = {2020-01-01},
booktitle = {2020 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2020, Barcelona, Spain, May 4-8, 2020},
pages = {7579--7583},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: Speaker Re-Identification with Speaker Dependent Speech Enhancement. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 1530–1534, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/Shi0H20,
title = {Speaker Re-Identification with Speaker Dependent Speech Enhancement},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1772},
doi = {10.21437/Interspeech.2020-1772},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {1530--1534},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Lukas Stappen; Georgios Rizos; Madina Hasan; Thomas Hain; Björn W. Schuller: Uncertainty-Aware Machine Support for Paper Reviewing on the Interspeech
2019 Submission Corpus. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 1808–1812, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/StappenRHHS20,
title = {Uncertainty-Aware Machine Support for Paper Reviewing on the Interspeech
2019 Submission Corpus},
author = {Lukas Stappen and Georgios Rizos and Madina Hasan and Thomas Hain and Björn W. Schuller},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-2862},
doi = {10.21437/Interspeech.2020-2862},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {1808--1812},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: Weakly Supervised Training of Hierarchical Attention Networks for
Speaker Identification. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 2992–2996, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/Shi0H20a,
title = {Weakly Supervised Training of Hierarchical Attention Networks for
Speaker Identification},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1774},
doi = {10.21437/Interspeech.2020-1774},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {2992--2996},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Qiang Huang; Thomas Hain: Exploration of Audio Quality Assessment and Anomaly Localisation Using
Attention Models. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4611–4615, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/0008H20,
title = {Exploration of Audio Quality Assessment and Anomaly Localisation Using
Attention Models},
author = {Qiang Huang and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1885},
doi = {10.21437/Interspeech.2020-1885},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4611--4615},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Hardik B. Sailor; Thomas Hain: Multilingual Speech Recognition Using Language-Specific Phoneme Recognition
as Auxiliary Task for Indian Languages. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4756–4760, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/SailorH20,
title = {Multilingual Speech Recognition Using Language-Specific Phoneme Recognition
as Auxiliary Task for Indian Languages},
author = {Hardik B. Sailor and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-2739},
doi = {10.21437/Interspeech.2020-2739},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4756--4760},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mingjie Chen; Thomas Hain: Unsupervised Acoustic Unit Representation Learning for Voice Conversion
Using WaveNet Auto-Encoders. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4866–4870, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/ChenH20,
title = {Unsupervised Acoustic Unit Representation Learning for Voice Conversion
Using WaveNet Auto-Encoders},
author = {Mingjie Chen and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1785},
doi = {10.21437/Interspeech.2020-1785},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4866--4870},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: Robust Speaker Recognition Using Speech Enhancement And Attention
Model. In: Lee, Kong-Aik; Koshinaka, Takafumi; Shinoda, Koichi (Ed.): Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
2020, Tokyo, Japan, pp. 451–458, ISCA, 2020. @inproceedings{DBLP:conf/odyssey/Shi0H20,
title = {Robust Speaker Recognition Using Speech Enhancement And Attention
Model},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
editor = {Kong-Aik Lee and Takafumi Koshinaka and Koichi Shinoda},
url = {https://doi.org/10.21437/Odyssey.2020-65},
doi = {10.21437/Odyssey.2020-65},
year = {2020},
date = {2020-01-01},
booktitle = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
2020, Tokyo, Japan},
pages = {451--458},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2019
|
Rosanna Milner; Md Asif Jalal; Raymond W. M. Ng; Thomas Hain: A Cross-Corpus Study on Speech Emotion Recognition. In: IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019, pp. 304–311, IEEE, 2019. @inproceedings{DBLP:conf/asru/MilnerJNH19,
title = {A Cross-Corpus Study on Speech Emotion Recognition},
author = {Rosanna Milner and Md Asif Jalal and Raymond W. M. Ng and Thomas Hain},
url = {https://doi.org/10.1109/ASRU46091.2019.9003838},
doi = {10.1109/ASRU46091.2019.9003838},
year = {2019},
date = {2019-01-01},
booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019},
pages = {304--311},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Salil Deena; Madina Hasan; Mortaza Doulaty; Oscar Saz; Thomas Hain: Recurrent Neural Network Language Model Adaptation for Multi-Genre
Broadcast Speech Recognition and Alignment. In: IEEE ACM Trans. Audio Speech Lang. Process., vol. 27, no. 3, pp. 572–582, 2019. @article{DBLP:journals/taslp/DeenaHDSH19,
title = {Recurrent Neural Network Language Model Adaptation for Multi-Genre
Broadcast Speech Recognition and Alignment},
author = {Salil Deena and Madina Hasan and Mortaza Doulaty and Oscar Saz and Thomas Hain},
url = {https://doi.org/10.1109/TASLP.2018.2888814},
doi = {10.1109/TASLP.2018.2888814},
year = {2019},
date = {2019-01-01},
journal = {IEEE ACM Trans. Audio Speech Lang. Process.},
volume = {27},
number = {3},
pages = {572--582},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|