
The homeService project is one of the main applications of NST.
HomeService aims to take the state-of-the-art speech recognition developed by the NST research team and put it to use in people’s homes. For elderly people or people with disabilities who can’t or choose not to use conventional means of interacting with technology (such as a keyboard or a computer mouse), speech can be an excellent alternative.
What would it be like being able to talk to your television, command your lights and doing web browsing using only your voice? A homeService system can help its users achieve a higher degree of independence and thereby increase their quality of life.
M. Doulaty, O. Saz, and T. Hain, “Data-selective Transfer Learning for Multi-Domain Speech Recognition,” in Proceedings of the 16th Annual Conference of the International Speech Communication Association (Interspeech), Dresden, Germany, 2015.
[Bibtex]@inproceedings{doulaty15, address = {Dresden, Germany}, author = {Mortaza Doulaty and Oscar Saz and Thomas Hain}, booktitle = {{Proceedings of the 16th Annual Conference of the International Speech Communication Association (Interspeech)}}, project = {nst}, title = {{Data-selective Transfer Learning for Multi-Domain Speech Recognition}}, year = {2015} }
M. Doulaty, O. Saz, and T. Hain, “Unsupervised Domain Discovery using Latent Dirichlet Allocation for Acoustic Modelling in Speech Recognition,” in Proceedings of the 16th Annual Conference of the International Speech Communication Association (Interspeech), Dresden, Germany, 2015.
[Bibtex]@inproceedings{doulaty15b, address = {Dresden, Germany}, author = {Mortaza Doulaty and Oscar Saz and Thomas Hain}, booktitle = {{Proceedings of the 16th Annual Conference of the International Speech Communication Association (Interspeech)}}, project = {nst}, title = {{Unsupervised Domain Discovery using Latent Dirichlet Allocation for Acoustic Modelling in Speech Recognition}}, year = {2015} }
M. Doulaty, O. Saz, R. W. M. Ng, and T. Hain, “Latent Dirichlet Allocation Based Organisation of Broadcast Media Archives for Deep Neural Network Adaptation,” in Proceedings of the 2015 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU 2015), Scottsdale, Arizona, USA, 2015.
[Bibtex]@inproceedings{doulaty15c, address = {Scottsdale, Arizona, USA}, author = {Mortaza Doulaty and Oscar Saz and Raymond W. M. Ng and Thomas Hain}, booktitle = {{Proceedings of the 2015 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU 2015)}}, project = {nst}, title = {{Latent Dirichlet Allocation Based Organisation of Broadcast Media Archives for Deep Neural Network Adaptation}}, year = {2015} }
- M. Doulaty, O. Saz, R. W. M. Ng, and T. Hain, “Automatic Genre and Show Identification of Broadcast Media,” in Proceedings of the 17th Annual Conference of the International Speech Communication Association (Interspeech), San Francisco, California, USA, 2016.
[Bibtex]@inproceedings{doulaty16a, address = {San Francisco, California, USA}, author = {Mortaza Doulaty and Oscar Saz and Raymond W. M. Ng and Thomas Hain}, booktitle = {{Proceedings of the 17th Annual Conference of the International Speech Communication Association (Interspeech)}}, project = {nst}, title = {{Automatic Genre and Show Identification of Broadcast Media}}, year = {2016} }
- R. Milner, O. Saz, S. Deena, M. Doulaty, R. Ng, and T. Hain, “The 2015 Sheffield System for Longitudinal Diarisation of Broadcast Media,” in Proceedings of the 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), Scottsdale, AZ, 2015.
[Bibtex]@inproceedings{milner_ASRU2015, address = {Scottsdale, AZ}, author = {Rosanna Milner and Oscar Saz and Salil Deena and Mortaza Doulaty and Raymond Ng and Thomas Hain}, booktitle = {{Proceedings of the 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)}}, project = {nst}, title = {{The 2015 Sheffield System for Longitudinal Diarisation of Broadcast Media}}, year = {2015} }
- R. Milner and T. Hain, “Segment-oriented evaluation of speaker diarisation performance,” in Proceedings of the 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Shanghai, China, 2016.
[Bibtex]@inproceedings{milner_ICASSP2016, address = {Shanghai, China}, author = {Rosanna Milner and Thomas Hain}, booktitle = {{Proceedings of the 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}}, project = {thphd-milner}, title = {{Segment-oriented evaluation of speaker diarisation performance}}, year = {2016} }
- R. Milner and T. Hain, “DNN-based speaker clustering for speaker diarisation,” in Proceedings of the 17th Annual Conference of the International Speech Communication Association (Interspeech), San Francisco, USA, 2016.
[Bibtex]@inproceedings{milner_INTERSPEECH2016, address = {San Francisco, USA}, author = {Rosanna Milner and Thomas Hain}, booktitle = {{Proceedings of the 17th Annual Conference of the International Speech Communication Association (Interspeech)}}, project = {thphd-milner}, title = {{DNN-based speaker clustering for speaker diarisation}}, year = {2016} }
E. Loweimi, S. M. Ahadi, and T. Drugman, “A new phase-based feature representation for robust speech recognition,” in IEEE International conference on Acoustics, Speech and Signal Processing, Vancouver, Canada, 2013.
[Bibtex]@inproceedings{loweimi_icassp13, address = {Vancouver, Canada}, author = {Erfan Loweimi and Seyed Mohammad Ahadi and Thomas Drugman}, booktitle = {{IEEE International conference on Acoustics, Speech and Signal Processing}}, title = {{A new phase-based feature representation for robust speech recognition}}, year = {2013} }
E. Loweimi, J. Barker, and T. Hain, “Statistical Normalisation of Phase-based Feature Representation for Robust Speech Recognition,” in ICASSP 2017, New Orleans, USA, 2017.
[Bibtex]@inproceedings{loweimi_icassp17, address = {New Orleans, USA}, author = {Erfan Loweimi and Jon Barker and Thomas Hain}, booktitle = {{ICASSP 2017}}, title = {{Statistical Normalisation of Phase-based Feature Representation for Robust Speech Recognition}}, year = {2017} }
E. Loweimi, S. M. Ahadi, and S. Loveymi, “On the importance of phase and magnitude spectra in speech enhancement,” in Iranian conference on Electrical Engineering (ICEE), Tehran, Iran, 2011.
[Bibtex]@inproceedings{loweimi_icee11, address = {Tehran, Iran}, author = {Erfan Loweimi and Seyed Mohammad Ahadi and Samira Loveymi}, booktitle = {{Iranian conference on Electrical Engineering (ICEE)}}, title = {{On the importance of phase and magnitude spectra in speech enhancement}}, year = {2011} }
E. Loweimi and S. M. Ahadi, “A new group delay-based feature for robust speech recognition,” in IEEE International conference on Multimedia and Expo (ICME), Barcelona, Spain, 2011.
[Bibtex]@inproceedings{loweimi_icme11, address = {Barcelona, Spain}, author = {Erfan Loweimi and Seyed Mohammad Ahadi}, booktitle = {{IEEE International conference on Multimedia and Expo (ICME)}}, title = {{A new group delay-based feature for robust speech recognition}}, year = {2011} }
E. Loweimi, S. M. Ahadi, and H. Sheikhzadeh, “Phase-only speech reconstruction using very short frames,” in Proceedings of the 12th Annual Conference of the International Speech Communication Association (Interspeech), Florence, Italy, 2011.
[Bibtex]@inproceedings{loweimi_is11, address = {Florence, Italy}, author = {Erfan Loweimi and Seyed Mohammad Ahadi and Hamid Sheikhzadeh}, booktitle = {{Proceedings of the 12th Annual Conference of the International Speech Communication Association (Interspeech)}}, title = {{Phase-only speech reconstruction using very short frames}}, year = {2011} }
E. Loweimi, J. Barker, and T. Hain, “Source-filter Separation of Speech Signal in the Phase Domain,” in Proceedings of the 16th Annual Conference of the International Speech Communication Association (Interspeech), Dresden, Germany, 2015.
[Bibtex]@inproceedings{loweimi_is15, address = {Dresden, Germany}, author = {Erfan Loweimi and Jon Barker and Thomas Hain}, booktitle = {{Proceedings of the 16th Annual Conference of the International Speech Communication Association (Interspeech)}}, title = {{Source-filter Separation of Speech Signal in the Phase Domain}}, year = {2015} }
E. Loweimi, J. Barker, and T. Hain, “Use of Generalised Nonlinearity in Vector Taylor Series Noise Compensation for Robust Speech Recognition,” in Proceedings of the 17th Annual Conference of the International Speech Communication Association (Interspeech), San Francisco, USA, 2016.
[Bibtex]@inproceedings{loweimi_is16, address = {San Francisco, USA}, author = {Erfan Loweimi and Jon Barker and Thomas Hain}, booktitle = {{Proceedings of the 17th Annual Conference of the International Speech Communication Association (Interspeech)}}, title = {{Use of Generalised Nonlinearity in Vector Taylor Series Noise Compensation for Robust Speech Recognition}}, year = {2016} }
E. Loweimi, J. Barker, and T. Hain, “Channel Compensation in the Generalised Vector Taylor Series Approach to Robust ASR,” in Interspeech 2017, Stockholm, Sweden, 2017.
[Bibtex]@inproceedings{loweimi_is17-gvts, address = {Stockholm, Sweden}, author = {Erfan Loweimi and Jon Barker and Thomas Hain}, booktitle = {{Interspeech 2017}}, title = {{Channel Compensation in the Generalised Vector Taylor Series Approach to Robust ASR}}, year = {2017} }
E. Loweimi, J. Barker, O. S. Torralba, and T. Hain, “Robust Source-filter Separation of Speech Signal in the Phase Domain,” in Interspeech 2017, Stockholm, Sweden, 2017.
[Bibtex]@inproceedings{loweimi_is17-ph, address = {Stockholm, Sweden}, author = {Erfan Loweimi and Jon Barker and Oscar Saz Torralba and Thomas Hain}, booktitle = {{Interspeech 2017}}, title = {{Robust Source-filter Separation of Speech Signal in the Phase Domain}}, year = {2017} }
E. Loweimi and S. M. Ahadi, “Objective evaluation of magnitude and phase only spectrum-based reconstruction of the speech signal,” in International Symposium on Communications, Control and Signal Processing, Limassol, Cyprus, 2010.
[Bibtex]@inproceedings{loweimi_isccsp10, address = {Limassol, Cyprus}, author = {Erfan Loweimi and Seyed Mohammad Ahadi}, booktitle = {{International Symposium on Communications, Control and Signal Processing}}, title = {{Objective evaluation of magnitude and phase only spectrum-based reconstruction of the speech signal}}, year = {2010} }
E. Loweimi and S. M. Ahadi, “Objective evaluation of phase and magnitude only reconstructed speech: new considerations,” in Information Sciences Signal Processing and their Applications (ISSPA), Kuala Lumpur, Malaysia, 2010.
[Bibtex]@inproceedings{loweimi_isspa10, address = {Kuala Lumpur, Malaysia}, author = {Erfan Loweimi and Seyed Mohammad Ahadi}, booktitle = {{Information Sciences Signal Processing and their Applications (ISSPA)}}, title = {{Objective evaluation of phase and magnitude only reconstructed speech: new considerations}}, year = {2010} }
E. Loweimi, S. M. Ahadi, T. Drugman, and S. Loveymi, “On the importance of pre-emphasis and window shape in phase-based speech recognition,” in Lecture Notes in Computer Science, Advances in Non-Linear Speech Processing (NOLISP), Mons, Belgium, 2013.
[Bibtex]@inproceedings{loweimi_nolisp13, address = {Mons, Belgium}, author = {Erfan Loweimi and Seyed Mohammad Ahadi and Thomas Drugman and Samira Loveymi}, booktitle = {{Lecture Notes in Computer Science, Advances in Non-Linear Speech Processing (NOLISP)}}, title = {{On the importance of pre-emphasis and window shape in phase-based speech recognition}}, year = {2013} }
E. Loweimi, M. Doulaty, J. Barker, and T. Hain, “Long-term statistical Feature Extraction from Speech Signal and its Application in Emotion Recognition,” in Statistical Language and Speech Processing (SLSP), Budapest, Hungary, 2015.
[Bibtex]@inproceedings{loweimi_slsp15, address = {Budapest, Hungary}, author = {Erfan Loweimi and M. Doulaty and Jon Barker and Thomas Hain}, booktitle = {{Statistical Language and Speech Processing (SLSP)}}, title = {{Long-term statistical Feature Extraction from Speech Signal and its Application in Emotion Recognition}}, year = {2015} }
E. Loweimi, J. Barker, and T.Hain, “Compression of model-based group delay function for robust speech recognition,” in University of Sheffield Engineering Symposium, Sheffield, UK, 2014.
[Bibtex]@inproceedings{loweimi_uses14, address = {Sheffield, UK}, author = {Erfan Loweimi and Jon Barker and T.Hain}, booktitle = {{University of Sheffield Engineering Symposium}}, title = {{Compression of model-based group delay function for robust speech recognition}}, year = {2014} }
E. Loweimi, J. Barker, and T. Hain, “Emotion Recognition from Speech Signal by Effective Combination of the Generative and Discriminative Models,” in University of Sheffield Engineering Symposium, Sheffield, UK, 2015.
[Bibtex]@inproceedings{loweimi_uses15, address = {Sheffield, UK}, author = {Erfan Loweimi and Jon Barker and Thomas Hain}, booktitle = {{University of Sheffield Engineering Symposium}}, title = {{Emotion Recognition from Speech Signal by Effective Combination of the Generative and Discriminative Models}}, year = {2015} }
- M. Hasan, R. Doddipatla, and T. Hain, “Multi-pass sentence-end detection of lecture speech,” in Interspeech 2014, 2014.
[Bibtex]@inproceedings{madinainterspeech2014, author = {Madina Hasan and Rama Doddipatla and Thomas Hain}, booktitle = {{Interspeech 2014}}, title = {{Multi-pass sentence-end detection of lecture speech}}, year = {2014} }
- M. Hasan, R. Doddipatla, and T. Hain, “Noise-matched training of CRF based sentence end detection models,” in Interspeech 2015, 2015.
[Bibtex]@inproceedings{madinainterspeech2015, author = {Madina Hasan and Rama Doddipatla and Thomas Hain}, booktitle = {{Interspeech 2015}}, project = {nst}, title = {{Noise-matched training of CRF based sentence end detection models}}, year = {2015} }
- R. Doddipatla, M. Hasan, and T. Hain, “Speaker Dependent Bottleneck Layer Training forSpeaker Adaptation in Automatic Speech Recognition,” in Interspeech 2014, 2014.
[Bibtex]@inproceedings{ramainterspeech2014, author = {Rama Doddipatla and Madina Hasan and Thomas Hain}, booktitle = {{Interspeech 2014}}, title = {{Speaker Dependent Bottleneck Layer Training forSpeaker Adaptation in Automatic Speech Recognition}}, year = {2014} }
J. Carletta, S. Ashby, S. Bourban, M. Flynn, M. Guillemot, T. Hain, J. Kadlec, V. Karaiskos, W. Kraaij, M. Kronenthal, and others, “The AMI meeting corpus: A pre-announcement,” in Machine learning for multimodal interaction, Springer, 2006, p. 28–39.
[Bibtex]@incollection{carletta2006amitest, author = {Carletta, Jean and Ashby, Simone and Bourban, Sebastien and Flynn, Mike and Guillemot, Mael and Hain, Thomas and Kadlec, Jaroslav and Karaiskos, Vasilis and Kraaij, Wessel and Kronenthal, Melissa and others}, booktitle = {{Machine learning for multimodal interaction}}, pages = {28--39}, pdf = {}, publisher = {Springer}, resource = {SWC}, title = {{The AMI meeting corpus: A pre-announcement}}, year = {2006} }
- I. McCowan, J. Carletta, W. Kraaij, S. Ashby, S. Bourban, M. Flynn, M. Guillemot, T. Hain, J. Kadlec, V. Karaiskos, and others, “The AMI meeting corpus,” Proceedings of the 5th international conference on methods and techniques in behavioral research, vol. 88, 2005.
[Bibtex]@article{mccowan2005ami, author = {McCowan, Iain and Carletta, Jean and Kraaij, W and Ashby, S and Bourban, S and Flynn, M and Guillemot, M and Hain, Thomas and Kadlec, J and Karaiskos, V and others}, journal = {Proceedings of the 5th International Conference on Methods and Techniques in Behavioral Research}, project = {AMI, NST}, title = {{The AMI meeting corpus}}, volume = {88}, year = {2005} }
S. Young, G. Evermann, M. Gales, T. Hain, D. Kershaw, X. Liu, G. Moore, J. Odell, D. Ollason, D. Povey, and others, “The HTK book (Version 3.3, Version 3.4),” , 2006.
[Bibtex]@article{young2006htk, author = {Young, Steve and Evermann, Gunnar and Gales, Mark and Hain, Thomas and Kershaw, Dan and Liu, XA and Moore, Gareth and Odell, Julian and Ollason, Dave and Povey, Dan and others}, project = {NST}, publisher = {{Cambridge University Engineering Department, CUED} key={young2006htka}}, resource = {HTK}, title = {{The HTK book (Version 3.3, Version 3.4)}}, year = {2006} }
- H. Christensen, M. B. Aniol, P. Bell, P. Green, T. Hain, S. King, and P. Swietojanski, “Combining in-domain and out-of-domain speech data for automatic recognition of disordered speech,” in Interspeech’13, 2013.
[Bibtex]@conference{Christensen2013, author = {H. Christensen and M. B. Aniol and Bell, P. and P. Green and T. Hain and S. King and P Swietojanski}, booktitle = {{Interspeech{\textquoteright}13}}, link = {}, project = {nst,nst-homeservice}, title = {{Combining in-domain and out-of-domain speech data for automatic recognition of disordered speech}}, year = {2013} }
- H. Christensen, I. Casanueva, S. Cunningham, P. Green, and T. Hain, “Automatic Selection of Speakers for Improved Acoustic Modelling : Recognition of Disordered Speech with Sparse Data,” in Spoken Language Technology Workshop, SLT’14, Lake Tahoe, 2014.
[Bibtex]@conference{Christensen2014_spoken, address = {Lake Tahoe}, author = {H. Christensen and I. Casanueva and S. Cunningham and P. Green and T. Hain}, booktitle = {{Spoken Language Technology Workshop, SLT{\textquoteright}14}}, project = {nst,nst-homeservice}, title = {{Automatic Selection of Speakers for Improved Acoustic Modelling : Recognition of Disordered Speech with Sparse Data}}, year = {2014} }
- H. Christensen, S. Siddharth, P. O. ’, Z. Clarke, S. Judge, S. Cunningham, and M. Hawley, “SPECS – an embedded platform, speech-driven environmental control system evaluated in a virtuous circle framework,” in Proc. Workshop on Innovation and Applications in Speech Technology, 2012.
[Bibtex]@conference{Christensen_iast2012, author = {H. Christensen and S. Siddharth and P. O{\textquoteright}Neill and Z. Clarke and S. Judge and S. Cunningham and M. Hawley}, booktitle = {{Proc. Workshop on Innovation and Applications in Speech Technology}}, link = {}, project = {nst,nst-homeservice}, title = {{SPECS - an embedded platform, speech-driven environmental control system evaluated in a virtuous circle framework}}, year = {2012} }
- H. Christensen, S. Cunningham, C. Fox, P. Green, and T. Hain, “A comparative study of adaptive, automatic recognition of disordered speech,” in Proc Interspeech 2012, Portland, Oregon, US, 2012.
[Bibtex]@conference{christensen_is12, address = {Portland, Oregon, US}, author = {H. Christensen and S. Cunningham and Charles Fox and P. Green and T. Hain}, booktitle = {{Proc Interspeech 2012}}, link = {}, month = {Sep}, project = {nst,nst-homeservice}, title = {{A comparative study of adaptive, automatic recognition of disordered speech}}, year = {2012} }
- H. Christensen, P. Green, and T. Hain, “Learning speaker-specific pronunciations of disordered speech,” in Interspeech’13, 2013.
[Bibtex]@conference{christensen_pron_is13, author = {H. Christensen and P. Green and T. Hain}, booktitle = {{Interspeech{\textquoteright}13}}, link = {}, project = {nst,nst-homeservice}, title = {{Learning speaker-specific pronunciations of disordered speech}}, year = {2013} }
- H. Christensen, S. Cunningham, P. Green, and T. Hain, “homeService: Voice-enabled assistive technology in the home using cloud-based automatic speech recognition,” in 4th Workshop on Speech and Language Processing (SLPAT), 2013.
[Bibtex]@conference{christensen_slpat13, author = {H. Christensen and S. Cunningham and P. Green and T. Hain}, booktitle = {{4th Workshop on Speech and Language Processing (SLPAT)}}, link = {}, project = {nst,nst-homeservice}, title = {{homeService: Voice-enabled assistive technology in the home using cloud-based automatic speech recognition}}, year = {2013} }
H. Christensen, M. Nicolao, S. Cunningham, S. Deena, P. Green, and T. Hain, “Speech-Enabled Environmental Control in an AAL setting for people with Speech Disorders: a Case Study,” in IET International Conference on Technologies for Active and Assisted Living, TechAAL 2015, London, UK, 2015.
[Bibtex]@inproceedings{christensen_techaal15, address = {London, UK}, author = {Christensen, Heidi and Nicolao, Mauro and Cunningham, Stuart and Deena, Salil and Green, Phil and Hain, Thomas}, booktitle = {{IET International Conference on Technologies for Active and Assisted Living, TechAAL 2015}}, project = {nst,nst-homeservice}, title = {{Speech-Enabled Environmental Control in an AAL setting for people with Speech Disorders: a Case Study}}, year = {2015} }
- C. Fox, H. Christensen, and T. Hain, “Studio report: Linux audio for multi-speaker natural speech technology.,” in Proc. Linux Audio Conference, 2012.
[Bibtex]@conference{FOX-LAC2012, author = {Charles Fox and H. Christensen and T. Hain}, booktitle = {{Proc. Linux Audio Conference}}, link = {}, project = {nst}, title = {{Studio report: Linux audio for multi-speaker natural speech technology.}}, year = {2012} }
- D. M. González, P. Green, and H. Christensen, “Dysarthria Intelligibility Assessment in a Factor Analysis Total Variability Space,” in Interspeech’13, 2013.
[Bibtex]@conference{Gonzalez2013, author = {D. M Gonz{\'a}lez and P. Green and H. Christensen}, booktitle = {{Interspeech{\textquoteright}13}}, link = {}, project = {nst,nst-homeservice}, title = {{Dysarthria Intelligibility Assessment in a Factor Analysis Total Variability Space}}, year = {2013} }
D. Mart{‘i}nez, E. Lleida, P. Green, H. Christensen, A. Ortega, and A. Miguel, “Intelligibility Assessment and Speech Recognizer Word Accuracy Rate Prediction for Dysarthric Speakers in a Factor Analysis Subspace,” Acm transactions on accessible computing (taccess), vol. 6, iss. 3, p. 10, 2015.
[Bibtex]@article{martinez2015intelligibility, author = {Mart{\'\i}nez, David and Lleida, Eduardo and Green, Phil and Christensen, Heidi and Ortega, Alfonso and Miguel, Antonio}, journal = {ACM Transactions on Accessible Computing (TACCESS)}, number = {3}, pages = {10}, project = {nst,nst-homeservice}, publisher = {ACM}, title = {{Intelligibility Assessment and Speech Recognizer Word Accuracy Rate Prediction for Dysarthric Speakers in a Factor Analysis Subspace}}, volume = {6}, year = {2015} }
I. Casanueva, H. Christensen, T. Hain, and P. Green, “Adaptive speech recognition and dialogue management for users with speech disorders,” in Proceedings of Interspeech, Singapore, Singapore, 2014.
[Bibtex]@inproceedings{casanueva:14, address = {Singapore, Singapore}, author = {Inigo Casanueva and Heidi Christensen and Thomas Hain and Phil Green}, booktitle = {{Proceedings of Interspeech}}, pdf = {}, project = {nst-homeService}, title = {{Adaptive speech recognition and dialogue management for users with speech disorders}}, year = {2014} }
I. Casanueva, T. Hain, H. Christensen, R. Marxer, and P. Green, “Knowledge transfer between speakers for personalised dialogue management,” in Proceedings of SIGDial, Prague, Czech Republic, 2015.
[Bibtex]@inproceedings{casanueva:15, address = {Prague, Czech Republic}, author = {Inigo Casanueva and Thomas Hain and Heidi Christensen and Ricard Marxer and Phil Green}, booktitle = {{Proceedings of SIGDial}}, pdf = {}, project = {nst-homeService}, title = {{Knowledge transfer between speakers for personalised dialogue management}}, year = {2015} }
H. Christensen, I. Casanueva, S. Cunningham, P. Green, and T. Hain, “HomeService: Voice-enabled assistive technology in the home using cloud-based automatic speech recognition,” in Proceedings of SLPAT, Grenoble, France, 2013.
[Bibtex]@inproceedings{christensen:13, address = {Grenoble, France}, author = {Heidi Christensen and Inigo Casanueva and Stuart Cunningham and Phil Green and Thomas Hain}, booktitle = {{Proceedings of SLPAT}}, pdf = {}, project = {nst-homeService}, title = {{HomeService: Voice-enabled assistive technology in the home using cloud-based automatic speech recognition}}, year = {2013} }
H. Christensen, I. Casanueva, S. Cunningham, P. Green, and T. Hain, “Automatic selection of speakers for improved acoustic modelling: recognition of disordered speech with sparse data,” in Proceedings of SLT, Nevada, USA, 2014.
[Bibtex]@inproceedings{christensen:14, address = {Nevada, USA}, author = {Heidi Christensen and Inigo Casanueva and Stuart Cunningham and Phil Green and Thomas Hain}, booktitle = {{Proceedings of SLT}}, pdf = {}, project = {nst-homeService}, title = {{Automatic selection of speakers for improved acoustic modelling: recognition of disordered speech with sparse data}}, year = {2014} }
S. Al-Shareef and T. Hain, “An Investigation in Speech Recognition for Colloquial Arabic,” in INTERSPEECH, 2011.
[Bibtex]@inproceedings{Alshareef11IS, author = {S. Al-Shareef and T. Hain}, booktitle = {{INTERSPEECH}}, pdf = {}, title = {{An Investigation in Speech Recognition for Colloquial Arabic}}, year = {2011} }
S. Al-Shareef and T. Hain, “CRF-based Diacritisation of Colloquial Arabic for Automatic Speech Recognition,” in INTERSPEECH, 2012.
[Bibtex]@inproceedings{Alshareef12IS, author = {S. Al-Shareef and T. Hain}, booktitle = {{INTERSPEECH}}, pdf = {}, title = {{CRF-based Diacritisation of Colloquial Arabic for Automatic Speech Recognition}}, year = {2012} }
S. Al-Shareef and T. Hain, “Conditional Random Fields Based Diacritisation of Colloquial Arabic,” in Saudi International Conference, 2012.
[Bibtex]@inproceedings{Alshareef12SIC, author = {S. Al-Shareef and T. Hain}, booktitle = {{Saudi International Conference}}, pdf = {}, title = {{Conditional Random Fields Based Diacritisation of Colloquial Arabic}}, year = {2012} }
- S. Al-Shareef, “Conversational Arabic Automatic Speech Recognition: Literature Review,” The University of Sheffield 2013.
[Bibtex]@techreport{Alshareef13TR, author = {S. Al-Shareef}, institution = {The University of Sheffield}, title = {{Conversational Arabic Automatic Speech Recognition: Literature Review}}, year = {2013} }
C. W. Fox, Y. Liu, E. Zwyssig, and T. Hain, “The Sheffield Wargames Corpus,” in Proc. Interspeech 2013, ISCA, 2013.
[Bibtex]@inbook{fox2013, author = {Charles W. Fox and Yulan Liu and Erich Zwyssig and Thomas Hain}, booktitle = {{Proc. Interspeech 2013}}, pdf = {}, project = {NST}, publisher = {ISCA}, title = {{The Sheffield Wargames Corpus}}, year = {2013} }
Y. Liu, P. Zhang, and T. Hain, “Using neural network front-ends on far field multiple microphones based speech recognition,” in Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International Conference on, 2014, pp. 5542-5546.
[Bibtex]@inproceedings{liu2014, author = {Yulan Liu and Pengyuan Zhang and Thomas Hain}, booktitle = {{Acoustics, Speech and Signal Processing (ICASSP), 2014 IEEE International Conference on}}, doi = {10.1109/ICASSP.2014.6854663}, month = {May}, pages = {5542-5546}, pdf = {}, project = {NST}, title = {{Using neural network front-ends on far field multiple microphones based speech recognition}}, year = {2014} }
Y. Liu, P. Karanasou, and T. Hain, “An Investigation Into Speaker Informed DNN Front-end for LVCSR,” in Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on, 2015.
[Bibtex]@inproceedings{liu2015, author = {Yulan Liu and Penny Karanasou and Thomas Hain}, booktitle = {{Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on}}, keyword = {speech recognition; deep neural network; speaker adaptation; speaker informed training, bias adaptation}, month = {April}, pdf = {}, project = {NST}, title = {{An Investigation Into Speaker Informed DNN Front-end for {LVCSR}}}, year = {2015} }
P. Zhang, Y. Liu, and T. Hain, “Semi-Supervised DNN Training in Meeting Recognition,” in 2014 IEEE Spoken Language Technology Workshop (SLT 2014), South Lake Tahoe, USA, 2014.
[Bibtex]@inproceedings{zhang2014, address = {South Lake Tahoe, USA}, author = {Pengyuan Zhang and Yulan Liu and Thomas Hain}, booktitle = {{2014 IEEE Spoken Language Technology Workshop (SLT 2014)}}, doi = {10.1109/SLT.2014.7078564}, month = {December}, pages = {}, pdf = {}, project = {NST}, title = {{Semi-Supervised DNN Training in Meeting Recognition}}, year = {2014} }
C. Wu, R. W. M. Ng, O. S. Torralba, and T. Hain, “Analysing Acoustic Model Changes for Active Learning in Automatic Speech Recognition,” in 24th International Conference on Systems, Signals and Image Processing (IWSSIP), Poznań, Poland, 2017.
[Bibtex]@inproceedings{wu_iwssip17, address = {Poznań, Poland}, author = {Chenhao Wu and Raymond W. M. Ng and Oscar Saz Torralba and Thomas Hain}, booktitle = {{24th International Conference on Systems, Signals and Image Processing (IWSSIP)}}, title = {{Analysing Acoustic Model Changes for Active Learning in Automatic Speech Recognition}}, year = {2017} }
- C. Fox and T. Hain, “Extending Limabeam with discrimination and coarse gradients,” in INTERSPEECH 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, September 14-18, 2014, 2014, p. 2440–2444.
[Bibtex]@inproceedings{DBLP:conf/interspeech/FoxH14, author = {Charles Fox and Thomas Hain}, bibsource = {dblp computer science bibliography,}, biburl = {}, booktitle = {{{INTERSPEECH} 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, September 14-18, 2014}}, crossref = {DBLP:conf/interspeech/2014}, link = {}, pages = {2440--2444}, project = {nst}, timestamp = {Wed, 18 Feb 2015 08:38:47 +0100}, title = {{Extending Limabeam with discrimination and coarse gradients}}, year = {2014} }
- C. Fox, H. Christensen, and T. Hain, “Studio report: Linux audio for multi-speaker natural speech technology.,” in Proc. Linux Audio Conference, 2012.
[Bibtex]@conference{FOX-LAC2012, author = {Charles Fox and H. Christensen and T. Hain}, booktitle = {{Proc. Linux Audio Conference}}, link = {}, project = {nst}, title = {{Studio report: Linux audio for multi-speaker natural speech technology.}}, year = {2012} }
- C. Fox and T. Hain, “Lightly supervised learning from a damaged natural speech corpus,” in Proc. IEEE ICASSP 2013, 2013.
[Bibtex]@conference{WILDCAT, author = {Charles Fox and T. Hain}, booktitle = {{Proc. IEEE ICASSP 2013}}, link = {}, project = {nst}, title = {{Lightly supervised learning from a damaged natural speech corpus}}, year = {2013} }
R. K. Moore and M. Nicolao, “Reactive Speech Synthesis: Actively Managing Phonetic Contrast Along an H&H Continuum,” in Proceedings of the 17th International Congress of Phonetic Sciences, ICPhS 2011, Hong Kong, China, 2011, p. 1422–1425.
[Bibtex]@inproceedings{moore_icphs11, address = {Hong Kong, China}, author = {Moore, Roger K and Nicolao, Mauro}, booktitle = {{Proceedings of the 17th International Congress of Phonetic Sciences, ICPhS 2011}}, keyword = {feedback control, hypo/hyper-articulation, reactive speech synthesis}, month = {aug}, pages = {1422--1425}, project = {SCALE}, title = {{Reactive Speech Synthesis: Actively Managing Phonetic Contrast Along an H{\&}H Continuum}}, year = {2011} }
M. Nicolao, A. V. Beeston, and T. Hain, “Automatic Assessment of English Learner Pronunciation Using Discriminative Classifiers ,” in IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2015, Brisbane, Australia, 2015, p. 5351–5355.
[Bibtex]@inproceedings{nicolao_icassp2015, address = {Brisbane, Australia}, author = {Nicolao, Mauro and Beeston, Amy V and Hain, Thomas}, booktitle = {{IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2015}}, doi = {10.1109/ICASSP.2015.7178993}, month = {apr}, pages = {5351--5355}, project = {ITSLanguage}, title = {{Automatic Assessment of English Learner Pronunciation Using Discriminative Classifiers }}, year = {2015} }
- M. Nicolao, J. Latorre, and R. K. Moore, “C2H: A Computational Model of H&H-based Phonetic Contrast in Synthetic Speech,” in Proceedings of 13th Annual Conference of the International Speech Communication Association, INTERSPEECH 2012, Portland, OR, 2012.
[Bibtex]@inproceedings{nicolao_is12, address = {Portland, OR}, author = {Nicolao, Mauro and Latorre, Javier and Moore, Roger K}, booktitle = {{Proceedings of 13th Annual Conference of the International Speech Communication Association, INTERSPEECH 2012}}, month = {sep}, project = {SCALE}, title = {{C2H: A Computational Model of H{\&}H-based Phonetic Contrast in Synthetic Speech}}, year = {2012} }
M. Nicolao, H. Christensen, S. Cunningham, P. Green, and T. Hain, “A framework for collecting realistic recordings of dysarthric speech – the homeService corpus,” in The International Conference on Language Resources and Evaluation – LREC 2016, Portorož, SLO, 2016.
[Bibtex]@inproceedings{nicolao_lrec2016, address = {Portorož, SLO}, author = {Nicolao, Mauro and Christensen, Heidi and Cunningham, Stuart and Green, Phil and Hain, Thomas}, booktitle = {{The International Conference on Language Resources and Evaluation - LREC 2016}}, project = {nst,nst-homeservice}, title = {{A framework for collecting realistic recordings of dysarthric speech - the homeService corpus}}, year = {2016} }
- M. Nicolao and R. K. Moore, “Establishing some principles of human speech production through two-dimensional computational models,” in SAPA-SCALE workshop 2012, Portland, OR, 2012.
[Bibtex]@inproceedings{nicolao_sapa12, address = {Portland, OR}, author = {Nicolao, Mauro and Moore, Roger K}, booktitle = {{SAPA-SCALE workshop 2012}}, month = {aug}, project = {SCALE}, title = {{Establishing some principles of human speech production through two-dimensional computational models}}, year = {2012} }
M. Nicolao, F. Tesser, and R. K. Moore, “A phonetic-contrast motivated adaptation to control the degree-of-articulation on Italian HMM-based synthetic voices,” in 8th ISCA Workshop on Speech Synthesis, Barcelona, Spain, 2013, p. 127–132.
[Bibtex]@inproceedings{nicolao_ssw8, address = {Barcelona, Spain}, author = {Nicolao, Mauro and Tesser, Fabio and Moore, Roger K}, booktitle = {{8th ISCA Workshop on Speech Synthesis}}, month = {August}, pages = {127--132}, project = {SCALE}, title = {{A phonetic-contrast motivated adaptation to control the degree-of-articulation on Italian HMM-based synthetic voices}}, year = {2013} }
- P. Bell, M. Gales, T. Hain, J. Kilgour, P. Lanchantin, A. Liu, A. McParland, S. Renals, O. Saz, M. Wester, and P. Woodland, “The MGB Challenge: Evaluating Multi-genre Broadcast Media Recognition,” in Proceedings of the 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), Scottsdale, AZ, 2015.
[Bibtex]@inproceedings{Bell_ASRU, address = {Scottsdale, AZ}, author = {Peter Bell and Mark Gales and Thomas Hain and Jonathan Kilgour and Pierre Lanchantin and Andrew Liu and Andrew McParland and Steve Renals and Oscar Saz and Mirjam Wester and Phil Woodland}, booktitle = {{Proceedings of the 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)}}, project = {nst}, title = {{The MGB Challenge: Evaluating Multi-genre Broadcast Media Recognition}}, year = {2015} }
O. Saz and T. Hain, “Asynchronous Factorisation of Speaker and Background with Feature Transforms in Speech Recognition,” in Proceedings of the 14th Annual Conference of the International Speech Communication Association (Interspeech), Lyon, France, 2013, p. 1238–1242.
[Bibtex]@inproceedings{Saz13, address = {Lyon, France}, author = {Oscar Saz and Thomas Hain}, booktitle = {{Proceedings of the 14th Annual Conference of the International Speech Communication Association (Interspeech)}}, pages = {1238--1242}, project = {nst}, title = {{Asynchronous Factorisation of Speaker and Background with Feature Transforms in Speech Recognition}}, year = {2013} }
- P. Lanchantin, P. J. Bell, M. J. F. Gales, T. Hain, X. Liu, Y. Long, J. Quinnell, S. Renals, O. Saz, M. S. Seigel, P. Swietojanski, and P. C. Woodland, “Automatic Transcription of Multi-Genre Media Archives,” in Proceedings of the First Workshop on Speech, Language and Audio in Multimedia, Marseille, France, 2013, p. 26–31.
[Bibtex]@inproceedings{Saz13b, address = {Marseille, France}, author = {P. Lanchantin and P.J. Bell and M.J.F. Gales and Thomas Hain and X. Liu and Y. Long and J. Quinnell and S. Renals and Oscar Saz and M.S. Seigel and P. Swietojanski and P.C. Woodland}, booktitle = {{Proceedings of the First Workshop on Speech, Language and Audio in Multimedia}}, pages = {26--31}, project = {nst}, title = {{Automatic Transcription of Multi-Genre Media Archives}}, year = {2013} }
- O. Saz and T. Hain, “Using Contextual Information in Joint Factor Eigenspace MLLR for Speech Recognition in Diverse Scenarios,” in Proceedings of the 2014 International Conference on Acoustic, Speech and Signal Processing (ICASSP), Florence, Italy, 2014, p. 6314–6318.
[Bibtex]@inproceedings{Saz14, address = {Florence, Italy}, author = {Oscar Saz and Thomas Hain}, booktitle = {{Proceedings of the 2014 International Conference on Acoustic, Speech and Signal Processing (ICASSP)}}, pages = {6314--6318}, project = {nst}, title = {{Using Contextual Information in Joint Factor Eigenspace MLLR for Speech Recognition in Diverse Scenarios}}, year = {2014} }
O. Saz, M. Doulaty, and T. Hain, “Background-Tracking Acoustic Features for Genre Identification of Broadcast Shows,” in Proceedings of the 2014 Spoken Language Technology (SLT) Workshop, South Lake Tahoe NV, USA, 2014, p. 118–123.
[Bibtex]@inproceedings{Saz14b, address = {South Lake Tahoe NV, USA}, author = {Oscar Saz and Mortaza Doulaty and Thomas Hain}, booktitle = {{Proceedings of the 2014 Spoken Language Technology (SLT) Workshop}}, pages = {118--123}, project = {nst}, title = {{Background-Tracking Acoustic Features for Genre Identification of Broadcast Shows}}, year = {2014} }
- O. Saz, M. Doulaty, S. Deena, R. Milner, R. Ng, M. Hasan, Y. Liu, and T. Hain, “The 2015 Sheffield System for Transcription of Multi–Genre Broadcast Media,” in Proceedings of the 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), Scottsdale, AZ, 2015.
[Bibtex]@inproceedings{Saz_ASRU, address = {Scottsdale, AZ}, author = {Oscar Saz and Mortaza Doulaty and Salil Deena and Rosanna Milner and Raymond Ng and Madina Hasan and Yulan Liu and Thomas Hain}, booktitle = {{Proceedings of the 2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)}}, project = {nst}, title = {{The 2015 Sheffield System for Transcription of Multi--Genre Broadcast Media}}, year = {2015} }
- R. W. M. Ng, K. Shah, W. Aziz, L. Specia, and T. Hain, “Quality estimation for ASR k-best list rescoring in spoken language translation,” in 2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2015.
[Bibtex]@inproceedings{ng_icassp15, author = {Raymond W. M. Ng and Kashif Shah and Wilker Aziz and Lucia Specia and Thomas Hain}, booktitle = {{2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}}, keyword = {spoken language translation, quality estimation, system integration}, month = {April}, project = {WFST,NST}, title = {{Quality estimation for {ASR} k-best list rescoring in spoken language translation}}, year = {2015} }
R. W. M. Ng, K. Shah, L. Specia, and T. Hain, “Groupwise learning for ASR k-best list reranking in spoken langauge translation,” in 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2016.
[Bibtex]@inproceedings{ng_icassp16, author = {Raymond W. M. Ng and Kashif Shah and Lucia Specia and Thomas Hain}, booktitle = {{2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}}, keyword = {groupwise learning, spoken language translation}, month = {March}, project = {WFST,NST}, title = {{Groupwise learning for ASR k-best list reranking in spoken langauge translation}}, year = {2016} }
R. W. M. Ng, A. C. M. Kwan, T. Lee, and T. Hain, ,” in 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2017.
[Bibtex]@inproceedings{ng_icassp17, author = {Raymond W. M. Ng and Alvin C.M. Kwan and Tan Lee and Thomas Hain}, booktitle = {{2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}}, keyword = {corpus, pronunciation assessment, Cantonese}, month = {March}, project = {WFST,PRONOUNCE}, titel = {Shef{CE}: A {C}antonese-{E}nglish bilingual speech corpus for pronunciation assessment}, year = {2017} }
R. W. M. Ng, K. Shah, L. Specia, and T. Hain, “A study on the stability and effectiveness of features in quality estimation for spoken langauge translation,” in the 16th Annual Conference of the International Speech Communication Association (Interspeech), 2015.
[Bibtex]@inproceedings{ng_is15, author = {Raymond W. M. Ng and Kashif Shah and Lucia Specia and Thomas Hain}, booktitle = {{the 16th Annual Conference of the International Speech Communication Association (Interspeech)}}, keyword = {spoken language translation, quality estimation, system robustness}, month = {September}, project = {WFST,NST}, title = {{A study on the stability and effectiveness of features in quality estimation for spoken langauge translation}}, year = {2015} }
R. W. M. Ng, B. Chettri, and T. Hain, “Combining weak tokenisers for phonotactic langauge recognition in a resource-constrained setting,” in the 17th Annual Conference of the International Speech Communication Association (Interspeech), 2016.
[Bibtex]@inproceedings{ng_is16, author = {Raymond W. M. Ng and Bhusan Chettri and Thomas Hain}, booktitle = {{the 17th Annual Conference of the International Speech Communication Association (Interspeech)}}, keyword = {language recognition}, month = {September}, project = {NST}, title = {{Combining weak tokenisers for phonotactic langauge recognition in a resource-constrained setting}}, year = {2016} }
R. W. N. Ng, M. Doulaty, R. Doddipatla, O. Saz, M. Hasan, T. Hain, W. Aziz, K. Shaf, and L. Specia, “The USFD spoken language translation system for IWSLT 2014,” in 2014 International Workshop on Spoken Language Translation (IWSLT), Lake Tahoe, USA, 2014.
[Bibtex]@inproceedings{ng_iwslt14, address = {Lake Tahoe, USA}, author = {Raymond W. N. Ng and Mortaza Doulaty and Rama Doddipatla and Oscar Saz and Madina Hasan and Thomas Hain and Wilker Aziz and Kashif Shaf and Lucia Specia}, booktitle = {{2014 International Workshop on Spoken Language Translation (IWSLT)}}, title = {{The {USFD} spoken language translation system for {IWSLT} 2014}}, year = {2014} }
R. W. M. Ng, M. Nicolao, O. Saz, M. Hasan, B. Chettri, M. Doulaty, T. Lee, and T. Hain, “Sheffield LRE 2015 System Description,” in Odyssey: The Speaker and Language Recognition Workshop, 2016.
[Bibtex]@inproceedings{ng_odyssey16, author = {Raymond W. M. Ng and Mauro Nicolao and Oscar Saz and Madina Hasan and Bhusan Chettri and Mortaza Doulaty and Tan Lee and Thomas Hain}, booktitle = {{Odyssey: The Speaker and Language Recognition Workshop}}, month = {June}, project = {NST}, title = {{Sheffield {LRE} 2015 System Description}}, year = {2016} }
- K. Shah, R. W. M. Ng, F. Bougares, and L. Specia, “Investigating continuous space language models for machine translation quality estimation,” in 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP), 2015.
[Bibtex]@inproceedings{shah_emnlp15, author = {Kashif Shah and Raymond W. M. Ng and Fethi Bougares and Lucia Specia}, booktitle = {{2015 Conference on Empirical Methods in Natural Language Processing (EMNLP)}}, keyword = {machine translation, quality estimation}, month = {September}, project = {WFST}, title = {{Investigating continuous space language models for machine translation quality estimation}}, year = {2015} }