2023
|
Muhammad Umar Farooq; Thomas Hain: Learning Cross-lingual Mappings for Data Augmentation to Improve Low-Resource Speech Recognition. In: Interspeech 2023, 2023. @inproceedings{farooq2023dataAug,
title = {Learning Cross-lingual Mappings for Data Augmentation to Improve Low-Resource Speech Recognition},
author = {Muhammad Umar Farooq and Thomas Hain},
url = {https://arxiv.org/pdf/2306.08577.pdf},
doi = {10.21437/Interspeech.2023-1613},
year = {2023},
date = {2023-08-24},
urldate = {2023-08-24},
booktitle = {Interspeech 2023},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Rehan Ahmad; Md Asif Jalal; Muhammad Umar Farooq; Anna Ollerenshaw; Thomas Hain: Towards Domain Generalisation in ASR with Elitist Sampling and Ensemble Knowledge Distillation. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2023, IEEE, 2023. @inproceedings{ahmad2023kd,
title = {Towards Domain Generalisation in ASR with Elitist Sampling and Ensemble Knowledge Distillation},
author = {Rehan Ahmad and Md Asif Jalal and Muhammad Umar Farooq and Anna Ollerenshaw and Thomas Hain},
url = {https://arxiv.org/pdf/2303.00550.pdf},
doi = {10.1109/ICASSP49357.2023.10095746},
year = {2023},
date = {2023-05-05},
urldate = {2023-05-05},
booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2023},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2022
|
William Ravenscroft; Stefan Goetze; Thomas Hain: Utterance Weighted Multi-Dilation Temporal Convolutional Networks for Monaural Speech Dereverberation. In: 2020 17th International Workshop on Acoustic Signal Enhancement (IWAENC), 2022. @inproceedings{ravenscroft2022b,
title = {Utterance Weighted Multi-Dilation Temporal Convolutional Networks for Monaural Speech Dereverberation},
author = {William Ravenscroft and Stefan Goetze and Thomas Hain},
url = {https://arxiv.org/abs/2205.08455},
doi = {10.48550/ARXIV.2205.08455},
year = {2022},
date = {2022-09-01},
booktitle = {2020 17th International Workshop on Acoustic Signal Enhancement (IWAENC)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Madina Hasan; Nicholas Jefferson; Thomas Hain; Jeremy Dawson: Automatic detection of behavioural codes in team interactions. In: Comput. Speech Lang., vol. 74, pp. 101339, 2022. @article{DBLP:journals/csl/HasanJHD22,
title = {Automatic detection of behavioural codes in team interactions},
author = {Madina Hasan and Nicholas Jefferson and Thomas Hain and Jeremy Dawson},
url = {https://doi.org/10.1016/j.csl.2021.101339},
doi = {10.1016/j.csl.2021.101339},
year = {2022},
date = {2022-01-01},
journal = {Comput. Speech Lang.},
volume = {74},
pages = {101339},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Chanho Park; Rehan Ahmad; Thomas Hain: Unsupervised Data Selection for Speech Recognition with Contrastive Loss Ratios. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8587-8591, 2022. @inproceedings{9747390,
title = {Unsupervised Data Selection for Speech Recognition with Contrastive Loss Ratios},
author = {Chanho Park and Rehan Ahmad and Thomas Hain},
doi = {10.1109/ICASSP43922.2022.9747390},
year = {2022},
date = {2022-01-01},
booktitle = {ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {8587-8591},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Jose Antonio Lopez Saenz; Thomas Hain: A Model for Assessor Bias in Automatic Pronunciation Assessment. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 7267-7271, 2022. @inproceedings{9746720,
title = {A Model for Assessor Bias in Automatic Pronunciation Assessment},
author = {Jose Antonio Lopez Saenz and Thomas Hain},
doi = {10.1109/ICASSP43922.2022.9746720},
year = {2022},
date = {2022-01-01},
booktitle = {ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {7267-7271},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
George Close; Thomas Hain; Stefan Goetze: MetricGAN+/-: Increasing Robustness of Noise Reduction on Unseen Data. In: 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade,
Serbia, August 29 - September 2, 2022, IEEE, 2022. @inproceedings{Close2022,
title = {MetricGAN+/-: Increasing Robustness of Noise Reduction on Unseen Data},
author = {George Close and Thomas Hain and Stefan Goetze},
doi = {https://doi.org/10.48550/arXiv.2203.12369},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {30th European Signal Processing Conference, EUSIPCO 2022, Belgrade,
Serbia, August 29 - September 2, 2022},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Muhammad Umar Farooq; Thomas Hain : Investigating the Impact of Cross-lingual Acoustic-Phonetic Similarities on Multilingual Speech Recognition. In: Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022, ISCA, 2022. @inproceedings{Farooq2022crosslingual,
title = {Investigating the Impact of Cross-lingual Acoustic-Phonetic Similarities on Multilingual Speech Recognition},
author = {Muhammad Umar Farooq and Thomas Hain },
doi = {https://doi.org/10.48550/arXiv.2207.03390},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Muhammad Umar Farooq; Darshan Adiga Haniya Narayana; Thomas Hain : Non-Linear Pairwise Language Mappings for Low-Resource Multilingual Acoustic Model Fusion. In: Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022, ISCA, 2022. @inproceedings{Farooq2022lowresource,
title = {Non-Linear Pairwise Language Mappings for Low-Resource Multilingual Acoustic Model Fusion},
author = {Muhammad Umar Farooq and Darshan Adiga Haniya Narayana and Thomas Hain },
doi = {https://doi.org/10.48550/arXiv.2207.03391},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Interspeech 2022, 23rd Annual Conference of the International Speech Communication Association, Incheon, South Korea, September 18 - 22, 2022},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Thomas Hain; Md Asif Jalal; Anna Ollerenshaw: Insights of Neural Representations in Multi-Banded and Multi-Channel Convolutional Transformers for End-to-End ASR. In: IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022, 2022. @inproceedings{Ollerenshaw2022,
title = {Insights of Neural Representations in Multi-Banded and Multi-Channel Convolutional Transformers for End-to-End ASR},
author = {Thomas Hain and Md Asif Jalal and Anna Ollerenshaw},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
William Ravenscroft; Stefan Goetze; Thomas Hain: Receptive Field Analysis of Temporal Convolutional Networks for Monaural Speech Dereverberation. In: IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022, 2022. @inproceedings{Ravenscroft2022eusipco,
title = {Receptive Field Analysis of Temporal Convolutional Networks for Monaural Speech Dereverberation},
author = {William Ravenscroft and Stefan Goetze and Thomas Hain},
doi = {https://doi.org/10.48550/arXiv.2204.06439},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {IEEE 30th European Signal Processing Conference, EUSIPCO 2022, Belgrade, Serbia, August 29 - September 2, 2022},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
William Ravenscroft; Stefan Goetze; Thomas Hain: Att-TasNet: Attending to Encodings in Time-Domain Audio Speech Separation of Noisy, Reverberant Speech Mixtures. In: 2022. @article{Ravenscroft2022frontiers,
title = {Att-TasNet: Attending to Encodings in Time-Domain Audio Speech Separation of Noisy, Reverberant Speech Mixtures},
author = { William Ravenscroft and Stefan Goetze and Thomas Hain},
doi = {https://doi.org/10.3389/frsip.2022.856968},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Frontiers in Signal Processing},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2021
|
Jose Antonio Lopez Saenz; Md Asif Jalal; Rosanna Milner; Thomas Hain: Attention Based Model for Segmental Pronunciation Error Detection. In: IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2021, Cartagena, Colombia, December 13-17, 2021, pp. 725–732, IEEE, 2021. @inproceedings{DBLP:conf/asru/SaenzJMH21,
title = {Attention Based Model for Segmental Pronunciation Error Detection},
author = {Jose Antonio Lopez Saenz and Md Asif Jalal and Rosanna Milner and Thomas Hain},
url = {https://doi.org/10.1109/ASRU51503.2021.9687993},
doi = {10.1109/ASRU51503.2021.9687993},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2021, Cartagena, Colombia, December 13-17, 2021},
pages = {725--732},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: H-VECTORS: Improving the robustness in utterance-level speaker embeddings
using a hierarchical attention model. In: Neural Networks, vol. 142, pp. 329–339, 2021. @article{DBLP:journals/nn/ShiHH21,
title = {H-VECTORS: Improving the robustness in utterance-level speaker embeddings
using a hierarchical attention model},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
url = {https://doi.org/10.1016/j.neunet.2021.05.024},
doi = {10.1016/j.neunet.2021.05.024},
year = {2021},
date = {2021-01-01},
journal = {Neural Networks},
volume = {142},
pages = {329--339},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Korbinian Friedl; Georgios Rizos; Lukas Stappen; Madina Hasan; Lucia Specia; Thomas Hain; Björn W. Schuller: Uncertainty Aware Review Hallucination for Science Article Classification. In: Zong, Chengqing; Xia, Fei; Li, Wenjie; Navigli, Roberto (Ed.): Findings of the Association for Computational Linguistics: ACL/IJCNLP
2021, Online Event, August 1-6, 2021, pp. 5004–5009, Association for Computational Linguistics, 2021. @inproceedings{DBLP:conf/acl/FriedlRSHSHS21,
title = {Uncertainty Aware Review Hallucination for Science Article Classification},
author = {Korbinian Friedl and Georgios Rizos and Lukas Stappen and Madina Hasan and Lucia Specia and Thomas Hain and Björn W. Schuller},
editor = {Chengqing Zong and Fei Xia and Wenjie Li and Roberto Navigli},
url = {https://doi.org/10.18653/v1/2021.findings-acl.443},
doi = {10.18653/v1/2021.findings-acl.443},
year = {2021},
date = {2021-01-01},
booktitle = {Findings of the Association for Computational Linguistics: ACL/IJCNLP
2021, Online Event, August 1-6, 2021},
volume = {ACL/IJCNLP 2021},
pages = {5004--5009},
publisher = {Association for Computational Linguistics},
series = {Findings of ACL},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mingjie Chen; Yanpei Shi; Thomas Hain: Towards Low-Resource Stargan Voice Conversion Using Weight Adaptive
Instance Normalization. In: IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021, pp. 5949–5953, IEEE, 2021. @inproceedings{DBLP:conf/icassp/ChenSH21,
title = {Towards Low-Resource Stargan Voice Conversion Using Weight Adaptive
Instance Normalization},
author = {Mingjie Chen and Yanpei Shi and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP39728.2021.9415042},
doi = {10.1109/ICASSP39728.2021.9415042},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021},
pages = {5949--5953},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Qiang Huang; Thomas Hain: Improving Audio Anomalies Recognition Using Temporal Convolutional
Attention Networks. In: IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021, pp. 6473–6477, IEEE, 2021. @inproceedings{DBLP:conf/icassp/0008H21,
title = {Improving Audio Anomalies Recognition Using Temporal Convolutional
Attention Networks},
author = {Qiang Huang and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP39728.2021.9414611},
doi = {10.1109/ICASSP39728.2021.9414611},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing,
ICASSP 2021, Toronto, ON, Canada, June 6-11, 2021},
pages = {6473--6477},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Anna Ollerenshaw; Md Asif Jalal; Thomas Hain: Insights on Neural Representations for End-to-End Speech Recognition. In: Hermansky, Hynek; Cernocký, Honza; Burget, Lukás; Lamel, Lori; Scharenborg, Odette; Motlícek, Petr (Ed.): Interspeech 2021, 22nd Annual Conference of the International Speech
Communication Association, Brno, Czechia, 30 August - 3 September
2021, pp. 4079–4083, ISCA, 2021. @inproceedings{DBLP:conf/interspeech/OllerenshawJH21,
title = {Insights on Neural Representations for End-to-End Speech Recognition},
author = {Anna Ollerenshaw and Md Asif Jalal and Thomas Hain},
editor = {Hynek Hermansky and Honza Cernocký and Lukás Burget and Lori Lamel and Odette Scharenborg and Petr Motlícek},
url = {https://doi.org/10.21437/Interspeech.2021-1516},
doi = {10.21437/Interspeech.2021-1516},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
booktitle = {Interspeech 2021, 22nd Annual Conference of the International Speech
Communication Association, Brno, Czechia, 30 August - 3 September
2021},
pages = {4079--4083},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Shengjie Huang; Mingjie Chen; Yanyan Xu; Dengfeng Ke; Thomas Hain: WINVC: One-Shot Voice Conversion with Weight Adaptive Instance Normalization. In: Pham, Duc Nghia; Theeramunkong, Thanaruk; Governatori, Guido; Liu, Fenrong (Ed.): PRICAI 2021: Trends in Artificial Intelligence - 18th Pacific Rim
International Conference on Artificial Intelligence, PRICAI 2021,
Hanoi, Vietnam, November 8-12, 2021, Proceedings, Part II, pp. 559–573, Springer, 2021. @inproceedings{DBLP:conf/pricai/HuangCXKH21,
title = {WINVC: One-Shot Voice Conversion with Weight Adaptive Instance Normalization},
author = {Shengjie Huang and Mingjie Chen and Yanyan Xu and Dengfeng Ke and Thomas Hain},
editor = {Duc Nghia Pham and Thanaruk Theeramunkong and Guido Governatori and Fenrong Liu},
url = {https://doi.org/10.1007/978-3-030-89363-7_42},
doi = {10.1007/978-3-030-89363-7_42},
year = {2021},
date = {2021-01-01},
booktitle = {PRICAI 2021: Trends in Artificial Intelligence - 18th Pacific Rim
International Conference on Artificial Intelligence, PRICAI 2021,
Hanoi, Vietnam, November 8-12, 2021, Proceedings, Part II},
volume = {13032},
pages = {559--573},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Jose Antonio Lopez Saenz; Thomas Hain: Use of Speaker Metadata for Improving Automatic Pronunciation Assessment. In: Anke, Luis Espinosa; Martín-Vide, Carlos; Spasic, Irena (Ed.): Statistical Language and Speech Processing - 9th International Conference,
SLSP 2021, Cardiff, UK, November 23-25, 2021, Proceedings, pp. 61–72, Springer, 2021. @inproceedings{DBLP:conf/slsp/SaenzH21,
title = {Use of Speaker Metadata for Improving Automatic Pronunciation Assessment},
author = {Jose Antonio Lopez Saenz and Thomas Hain},
editor = {Luis Espinosa Anke and Carlos Martín-Vide and Irena Spasic},
url = {https://doi.org/10.1007/978-3-030-89579-2_6},
doi = {10.1007/978-3-030-89579-2_6},
year = {2021},
date = {2021-01-01},
booktitle = {Statistical Language and Speech Processing - 9th International Conference,
SLSP 2021, Cardiff, UK, November 23-25, 2021, Proceedings},
volume = {13062},
pages = {61--72},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Thomas Hain: Contextual Joint Factor Acoustic Embeddings. In: IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021, pp. 750–757, IEEE, 2021. @inproceedings{DBLP:conf/slt/ShiH21,
title = {Contextual Joint Factor Acoustic Embeddings},
author = {Yanpei Shi and Thomas Hain},
url = {https://doi.org/10.1109/SLT48900.2021.9383592},
doi = {10.1109/SLT48900.2021.9383592},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021},
pages = {750--757},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Thomas Hain: Supervised Speaker Embedding De-Mixing in Two-Speaker Environment. In: IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021, pp. 758–765, IEEE, 2021. @inproceedings{DBLP:conf/slt/ShiH21a,
title = {Supervised Speaker Embedding De-Mixing in Two-Speaker Environment},
author = {Yanpei Shi and Thomas Hain},
url = {https://doi.org/10.1109/SLT48900.2021.9383580},
doi = {10.1109/SLT48900.2021.9383580},
year = {2021},
date = {2021-01-01},
booktitle = {IEEE Spoken Language Technology Workshop, SLT 2021, Shenzhen,
China, January 19-22, 2021},
pages = {758--765},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2020
|
Md Asif Jalal; Rosanna Milner; Thomas Hain; Roger K. Moore: Removing Bias with Residual Mixture of Multi-View Attention for Speech
Emotion Recognition. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4084–4088, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/JalalMHM20,
title = {Removing Bias with Residual Mixture of Multi-View Attention for Speech
Emotion Recognition},
author = {Md Asif Jalal and Rosanna Milner and Thomas Hain and Roger K. Moore},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-3005},
doi = {10.21437/Interspeech.2020-3005},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4084--4088},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Md Asif Jalal; Rosanna Milner; Thomas Hain: Empirical Interpretation of Speech Emotion Perception with Attention Based Model for Speech Emotion Recognition. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4113–4117, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/JalalMH20,
title = {Empirical Interpretation of Speech Emotion Perception with Attention Based Model for Speech Emotion Recognition},
author = {Md Asif Jalal and Rosanna Milner and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-3007},
doi = {10.21437/Interspeech.2020-3007},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4113--4117},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: H-Vectors: Utterance-Level Speaker Embedding Using a Hierarchical
Attention Model. In: 2020 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2020, Barcelona, Spain, May 4-8, 2020, pp. 7579–7583, IEEE, 2020. @inproceedings{DBLP:conf/icassp/ShiHH20,
title = {H-Vectors: Utterance-Level Speaker Embedding Using a Hierarchical
Attention Model},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP40776.2020.9054448},
doi = {10.1109/ICASSP40776.2020.9054448},
year = {2020},
date = {2020-01-01},
booktitle = {2020 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2020, Barcelona, Spain, May 4-8, 2020},
pages = {7579--7583},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: Speaker Re-Identification with Speaker Dependent Speech Enhancement. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 1530–1534, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/Shi0H20,
title = {Speaker Re-Identification with Speaker Dependent Speech Enhancement},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1772},
doi = {10.21437/Interspeech.2020-1772},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {1530--1534},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Lukas Stappen; Georgios Rizos; Madina Hasan; Thomas Hain; Björn W. Schuller: Uncertainty-Aware Machine Support for Paper Reviewing on the Interspeech
2019 Submission Corpus. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 1808–1812, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/StappenRHHS20,
title = {Uncertainty-Aware Machine Support for Paper Reviewing on the Interspeech
2019 Submission Corpus},
author = {Lukas Stappen and Georgios Rizos and Madina Hasan and Thomas Hain and Björn W. Schuller},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-2862},
doi = {10.21437/Interspeech.2020-2862},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {1808--1812},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: Weakly Supervised Training of Hierarchical Attention Networks for
Speaker Identification. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 2992–2996, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/Shi0H20a,
title = {Weakly Supervised Training of Hierarchical Attention Networks for
Speaker Identification},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1774},
doi = {10.21437/Interspeech.2020-1774},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {2992--2996},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Qiang Huang; Thomas Hain: Exploration of Audio Quality Assessment and Anomaly Localisation Using
Attention Models. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4611–4615, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/0008H20,
title = {Exploration of Audio Quality Assessment and Anomaly Localisation Using
Attention Models},
author = {Qiang Huang and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1885},
doi = {10.21437/Interspeech.2020-1885},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4611--4615},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Hardik B. Sailor; Thomas Hain: Multilingual Speech Recognition Using Language-Specific Phoneme Recognition
as Auxiliary Task for Indian Languages. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4756–4760, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/SailorH20,
title = {Multilingual Speech Recognition Using Language-Specific Phoneme Recognition
as Auxiliary Task for Indian Languages},
author = {Hardik B. Sailor and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-2739},
doi = {10.21437/Interspeech.2020-2739},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4756--4760},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mingjie Chen; Thomas Hain: Unsupervised Acoustic Unit Representation Learning for Voice Conversion
Using WaveNet Auto-Encoders. In: Meng, Helen; Xu, Bo; Zheng, Thomas Fang (Ed.): Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020, pp. 4866–4870, ISCA, 2020. @inproceedings{DBLP:conf/interspeech/ChenH20,
title = {Unsupervised Acoustic Unit Representation Learning for Voice Conversion
Using WaveNet Auto-Encoders},
author = {Mingjie Chen and Thomas Hain},
editor = {Helen Meng and Bo Xu and Thomas Fang Zheng},
url = {https://doi.org/10.21437/Interspeech.2020-1785},
doi = {10.21437/Interspeech.2020-1785},
year = {2020},
date = {2020-01-01},
booktitle = {Interspeech 2020, 21st Annual Conference of the International Speech
Communication Association, Virtual Event, Shanghai, China, 25-29 October
2020},
pages = {4866--4870},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Yanpei Shi; Qiang Huang; Thomas Hain: Robust Speaker Recognition Using Speech Enhancement And Attention
Model. In: Lee, Kong-Aik; Koshinaka, Takafumi; Shinoda, Koichi (Ed.): Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
2020, Tokyo, Japan, pp. 451–458, ISCA, 2020. @inproceedings{DBLP:conf/odyssey/Shi0H20,
title = {Robust Speaker Recognition Using Speech Enhancement And Attention
Model},
author = {Yanpei Shi and Qiang Huang and Thomas Hain},
editor = {Kong-Aik Lee and Takafumi Koshinaka and Koichi Shinoda},
url = {https://doi.org/10.21437/Odyssey.2020-65},
doi = {10.21437/Odyssey.2020-65},
year = {2020},
date = {2020-01-01},
booktitle = {Odyssey 2020: The Speaker and Language Recognition Workshop, 1-5 November
2020, Tokyo, Japan},
pages = {451--458},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2019
|
Rosanna Milner; Md Asif Jalal; Raymond W. M. Ng; Thomas Hain: A Cross-Corpus Study on Speech Emotion Recognition. In: IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019, pp. 304–311, IEEE, 2019. @inproceedings{DBLP:conf/asru/MilnerJNH19,
title = {A Cross-Corpus Study on Speech Emotion Recognition},
author = {Rosanna Milner and Md Asif Jalal and Raymond W. M. Ng and Thomas Hain},
url = {https://doi.org/10.1109/ASRU46091.2019.9003838},
doi = {10.1109/ASRU46091.2019.9003838},
year = {2019},
date = {2019-01-01},
booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019},
pages = {304--311},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Salil Deena; Madina Hasan; Mortaza Doulaty; Oscar Saz; Thomas Hain: Recurrent Neural Network Language Model Adaptation for Multi-Genre
Broadcast Speech Recognition and Alignment. In: IEEE ACM Trans. Audio Speech Lang. Process., vol. 27, no. 3, pp. 572–582, 2019. @article{DBLP:journals/taslp/DeenaHDSH19,
title = {Recurrent Neural Network Language Model Adaptation for Multi-Genre
Broadcast Speech Recognition and Alignment},
author = {Salil Deena and Madina Hasan and Mortaza Doulaty and Oscar Saz and Thomas Hain},
url = {https://doi.org/10.1109/TASLP.2018.2888814},
doi = {10.1109/TASLP.2018.2888814},
year = {2019},
date = {2019-01-01},
journal = {IEEE ACM Trans. Audio Speech Lang. Process.},
volume = {27},
number = {3},
pages = {572--582},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Md Asif Jalal; Roger K. Moore; Thomas Hain: Spatio-Temporal Context Modelling for Speech Emotion Classification. In: IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019, pp. 853–859, IEEE, 2019. @inproceedings{DBLP:conf/asru/JalalMH19,
title = {Spatio-Temporal Context Modelling for Speech Emotion Classification},
author = {Md Asif Jalal and Roger K. Moore and Thomas Hain},
url = {https://doi.org/10.1109/ASRU46091.2019.9004037},
doi = {10.1109/ASRU46091.2019.9004037},
year = {2019},
date = {2019-01-01},
booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019},
pages = {853--859},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Hardik B. Sailor; Salil Deena; Md Asif Jalal; Rasa Lileikyte; Thomas Hain: Unsupervised Adaptation of Acoustic Models for ASR Using Utterance-Level
Embeddings from Squeeze and Excitation Networks. In: IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019, pp. 980–987, IEEE, 2019. @inproceedings{DBLP:conf/asru/SailorDJLH19,
title = {Unsupervised Adaptation of Acoustic Models for ASR Using Utterance-Level
Embeddings from Squeeze and Excitation Networks},
author = {Hardik B. Sailor and Salil Deena and Md Asif Jalal and Rasa Lileikyte and Thomas Hain},
url = {https://doi.org/10.1109/ASRU46091.2019.9003755},
doi = {10.1109/ASRU46091.2019.9003755},
year = {2019},
date = {2019-01-01},
booktitle = {IEEE Automatic Speech Recognition and Understanding Workshop, ASRU
2019, Singapore, December 14-18, 2019},
pages = {980--987},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Qiang Huang; Thomas Hain: Detecting Mismatch Between Speech and Transcription Using Cross-Modal
Attention. In: Kubin, Gernot; Kacic, Zdravko (Ed.): Interspeech 2019, 20th Annual Conference of the International Speech
Communication Association, Graz, Austria, 15-19 September 2019, pp. 584–588, ISCA, 2019. @inproceedings{DBLP:conf/interspeech/HuangH19,
title = {Detecting Mismatch Between Speech and Transcription Using Cross-Modal
Attention},
author = {Qiang Huang and Thomas Hain},
editor = {Gernot Kubin and Zdravko Kacic},
url = {https://doi.org/10.21437/Interspeech.2019-2125},
doi = {10.21437/Interspeech.2019-2125},
year = {2019},
date = {2019-01-01},
booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech
Communication Association, Graz, Austria, 15-19 September 2019},
pages = {584--588},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Md Asif Jalal; Erfan Loweimi; Roger K. Moore; Thomas Hain: Learning Temporal Clusters Using Capsule Routing for Speech Emotion
Recognition. In: Kubin, Gernot; Kacic, Zdravko (Ed.): Interspeech 2019, 20th Annual Conference of the International Speech
Communication Association, Graz, Austria, 15-19 September 2019, pp. 1701–1705, ISCA, 2019. @inproceedings{DBLP:conf/interspeech/JalalLMH19,
title = {Learning Temporal Clusters Using Capsule Routing for Speech Emotion
Recognition},
author = {Md Asif Jalal and Erfan Loweimi and Roger K. Moore and Thomas Hain},
editor = {Gernot Kubin and Zdravko Kacic},
url = {https://doi.org/10.21437/Interspeech.2019-3068},
doi = {10.21437/Interspeech.2019-3068},
year = {2019},
date = {2019-01-01},
booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech
Communication Association, Graz, Austria, 15-19 September 2019},
pages = {1701--1705},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mortaza Doulaty; Thomas Hain: Latent Dirichlet Allocation Based Acoustic Data Selection for Automatic
Speech Recognition. In: Kubin, Gernot; Kacic, Zdravko (Ed.): Interspeech 2019, 20th Annual Conference of the International Speech
Communication Association, Graz, Austria, 15-19 September 2019, pp. 3228–3232, ISCA, 2019. @inproceedings{DBLP:conf/interspeech/DoulatyH19,
title = {Latent Dirichlet Allocation Based Acoustic Data Selection for Automatic
Speech Recognition},
author = {Mortaza Doulaty and Thomas Hain},
editor = {Gernot Kubin and Zdravko Kacic},
url = {https://doi.org/10.21437/Interspeech.2019-1797},
doi = {10.21437/Interspeech.2019-1797},
year = {2019},
date = {2019-01-01},
booktitle = {Interspeech 2019, 20th Annual Conference of the International Speech
Communication Association, Graz, Austria, 15-19 September 2019},
pages = {3228--3232},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2018
|
Oscar Saz; Salil Deena; Mortaza Doulaty; Madina Hasan; Bilal Khaliq; Rosanna Milner; Raymond W. M. Ng; Julia Olcoz; Thomas Hain: Lightly supervised alignment of subtitles on multi-genre broadcasts. In: Multim. Tools Appl., vol. 77, no. 23, pp. 30533–30550, 2018. @article{DBLP:journals/mta/SazDDHKMNOH18,
title = {Lightly supervised alignment of subtitles on multi-genre broadcasts},
author = {Oscar Saz and Salil Deena and Mortaza Doulaty and Madina Hasan and Bilal Khaliq and Rosanna Milner and Raymond W. M. Ng and Julia Olcoz and Thomas Hain},
url = {https://doi.org/10.1007/s11042-018-6050-1},
doi = {10.1007/s11042-018-6050-1},
year = {2018},
date = {2018-01-01},
journal = {Multim. Tools Appl.},
volume = {77},
number = {23},
pages = {30533--30550},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Erfan Loweimi; Jon Barker; Thomas Hain: Exploring the Use of Group Delay for Generalised VTS Based Noise
Compensation. In: 2018 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2018, Calgary, AB, Canada, April 15-20, 2018, pp. 4824–4828, IEEE, 2018. @inproceedings{DBLP:conf/icassp/LoweimiBH18,
title = {Exploring the Use of Group Delay for Generalised VTS Based Noise
Compensation},
author = {Erfan Loweimi and Jon Barker and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP.2018.8462595},
doi = {10.1109/ICASSP.2018.8462595},
year = {2018},
date = {2018-01-01},
booktitle = {2018 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2018, Calgary, AB, Canada, April 15-20, 2018},
pages = {4824--4828},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Erfan Loweimi; Jon Barker; Thomas Hain: On the Usefulness of the Speech Phase Spectrum for Pitch Extraction. In: Yegnanarayana, B. (Ed.): Interspeech 2018, 19th Annual Conference of the International Speech
Communication Association, Hyderabad, India, 2-6 September 2018, pp. 696–700, ISCA, 2018. @inproceedings{DBLP:conf/interspeech/LoweimiBH18,
title = {On the Usefulness of the Speech Phase Spectrum for Pitch Extraction},
author = {Erfan Loweimi and Jon Barker and Thomas Hain},
editor = {B. Yegnanarayana},
url = {https://doi.org/10.21437/Interspeech.2018-1062},
doi = {10.21437/Interspeech.2018-1062},
year = {2018},
date = {2018-01-01},
booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech
Communication Association, Hyderabad, India, 2-6 September 2018},
pages = {696--700},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mauro Nicolao; Michiel Sanders; Thomas Hain: Improved Acoustic Modelling for Automatic Literacy Assessment of Children. In: Yegnanarayana, B. (Ed.): Interspeech 2018, 19th Annual Conference of the International Speech
Communication Association, Hyderabad, India, 2-6 September 2018, pp. 1666–1670, ISCA, 2018. @inproceedings{DBLP:conf/interspeech/NicolaoSH18,
title = {Improved Acoustic Modelling for Automatic Literacy Assessment of Children},
author = {Mauro Nicolao and Michiel Sanders and Thomas Hain},
editor = {B. Yegnanarayana},
url = {https://doi.org/10.21437/Interspeech.2018-2118},
doi = {10.21437/Interspeech.2018-2118},
year = {2018},
date = {2018-01-01},
booktitle = {Interspeech 2018, 19th Annual Conference of the International Speech
Communication Association, Hyderabad, India, 2-6 September 2018},
pages = {1666--1670},
publisher = {ISCA},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Rahhal Errattahi; Salil Deena; Asmaa El Hannani; Hassan Ouahmane; Thomas Hain: Improving ASR Error Detection with RNNLM Adaptation. In: 2018 IEEE Spoken Language Technology Workshop, SLT 2018, Athens,
Greece, December 18-21, 2018, pp. 190–196, IEEE, 2018. @inproceedings{DBLP:conf/slt/ErrattahiDHOH18,
title = {Improving ASR Error Detection with RNNLM Adaptation},
author = {Rahhal Errattahi and Salil Deena and Asmaa El Hannani and Hassan Ouahmane and Thomas Hain},
url = {https://doi.org/10.1109/SLT.2018.8639602},
doi = {10.1109/SLT.2018.8639602},
year = {2018},
date = {2018-01-01},
booktitle = {2018 IEEE Spoken Language Technology Workshop, SLT 2018, Athens,
Greece, December 18-21, 2018},
pages = {190--196},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2017
|
Rosanna Milner; Thomas Hain: DNN approach to speaker diarisation using speaker channels. In: 2017 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2017, New Orleans, LA, USA, March 5-9, 2017, pp. 4925–4929, IEEE, 2017. @inproceedings{DBLP:conf/icassp/MilnerH17,
title = {DNN approach to speaker diarisation using speaker channels},
author = {Rosanna Milner and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP.2017.7953093},
doi = {10.1109/ICASSP.2017.7953093},
year = {2017},
date = {2017-01-01},
booktitle = {2017 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2017, New Orleans, LA, USA, March 5-9, 2017},
pages = {4925--4929},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Oscar Saz; Thomas Hain: Acoustic adaptation to dynamic background conditions with asynchronous
transformations. In: Comput. Speech Lang., vol. 41, pp. 180–194, 2017. @article{DBLP:journals/csl/SazH17,
title = {Acoustic adaptation to dynamic background conditions with asynchronous
transformations},
author = {Oscar Saz and Thomas Hain},
url = {https://doi.org/10.1016/j.csl.2016.06.008},
doi = {10.1016/j.csl.2016.06.008},
year = {2017},
date = {2017-01-01},
journal = {Comput. Speech Lang.},
volume = {41},
pages = {180--194},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Raymond W. M. Ng; Mauro Nicolao; Thomas Hain: Unsupervised crosslingual adaptation of tokenisers for spoken language
recognition. In: Comput. Speech Lang., vol. 46, pp. 327–342, 2017. @article{DBLP:journals/csl/NgNH17,
title = {Unsupervised crosslingual adaptation of tokenisers for spoken language
recognition},
author = {Raymond W. M. Ng and Mauro Nicolao and Thomas Hain},
url = {https://doi.org/10.1016/j.csl.2017.05.002},
doi = {10.1016/j.csl.2017.05.002},
year = {2017},
date = {2017-01-01},
journal = {Comput. Speech Lang.},
volume = {46},
pages = {327--342},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Salil Deena; Raymond W. M. Ng; Pranava Swaroop Madhyastha; Lucia Specia; Thomas Hain: Exploring the use of acoustic embeddings in neural machine translation. In: 2017 IEEE Automatic Speech Recognition and Understanding Workshop,
ASRU 2017, Okinawa, Japan, December 16-20, 2017, pp. 450–457, IEEE, 2017. @inproceedings{DBLP:conf/asru/DeenaNMSH17,
title = {Exploring the use of acoustic embeddings in neural machine translation},
author = {Salil Deena and Raymond W. M. Ng and Pranava Swaroop Madhyastha and Lucia Specia and Thomas Hain},
url = {https://doi.org/10.1109/ASRU.2017.8268971},
doi = {10.1109/ASRU.2017.8268971},
year = {2017},
date = {2017-01-01},
booktitle = {2017 IEEE Automatic Speech Recognition and Understanding Workshop,
ASRU 2017, Okinawa, Japan, December 16-20, 2017},
pages = {450--457},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Erfan Loweimi; Jon Barker; Thomas Hain: Statistical normalisation of phase-based feature representation for
robust speech recognition. In: 2017 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2017, New Orleans, LA, USA, March 5-9, 2017, pp. 5310–5314, IEEE, 2017. @inproceedings{DBLP:conf/icassp/LoweimiBH17,
title = {Statistical normalisation of phase-based feature representation for
robust speech recognition},
author = {Erfan Loweimi and Jon Barker and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP.2017.7953170},
doi = {10.1109/ICASSP.2017.7953170},
year = {2017},
date = {2017-01-01},
booktitle = {2017 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2017, New Orleans, LA, USA, March 5-9, 2017},
pages = {5310--5314},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Raymond W. M. Ng; Alvin C. M. Kwan; Tan Lee; Thomas Hain: Shefce: A Cantonese-English bilingual speech corpus for pronunciation assessment. In: 2017 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2017, New Orleans, LA, USA, March 5-9, 2017, pp. 5825–5829, IEEE, 2017. @inproceedings{DBLP:conf/icassp/NgKLH17,
title = {Shefce: A Cantonese-English bilingual speech corpus for pronunciation assessment},
author = {Raymond W. M. Ng and Alvin C. M. Kwan and Tan Lee and Thomas Hain},
url = {https://doi.org/10.1109/ICASSP.2017.7953273},
doi = {10.1109/ICASSP.2017.7953273},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
booktitle = {2017 IEEE International Conference on Acoustics, Speech and Signal
Processing, ICASSP 2017, New Orleans, LA, USA, March 5-9, 2017},
pages = {5825--5829},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|