@article{Tokuda-2013, doi = {10.1109/JPROC.2013.2251852}, title = {Speech Synthesis Based on Hidden Markov Models}, author = {Tokuda, Keiichi; Nankaku, Yoshihiko; Toda, Tomoki; Zen, Heiga; Yamagishi, Junichi; Oura, Keiichiro}, publisher = {IEEE}, journal = {Proceedings of the IEEE}, issnp = {0018-9219}, issne = {1558-2256}, year = {2013}, month = {05}, volume = {101}, issue = {5}, page = {1234--1252}, } @MISC{Lu_combininga, author = {Heng Lu and Simon King and Oliver Watts}, title = {Combining a Vector Space Representation of Linguistic Context with a Deep Neural Network for Text-To-Speech Synthesis}, year = {} } @INPROCEEDINGS{Hashimoto-2015, author={K. Hashimoto and K. Oura and Y. Nankaku and K. Tokuda}, booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, title={The effect of neural networks in statistical parametric speech synthesis}, year={2015}, volume={}, number={}, pages={4455-4459}, keywords={neural nets;speech synthesis;statistical analysis;statistical parametric speech synthesis;deep neural networks;generative models;acoustic models;parameter generation;Hidden Markov models;Artificial neural networks;Speech;Statistical parametric speech synthesis;deep neural network;hidden Markov model}, doi={10.1109/ICASSP.2015.7178813}, ISSN={1520-6149}, month={April} } @inproceedings{Yin2014ModelingDP, title={Modeling DCT parameterized F0 trajectory at intonation phrase level with DNN or decision tree}, author={Xiang Yin and Ming Lei and Zhiliang Hong and Frank K. Soong and Lei He and Zhen-Hua Ling and Li-Rong Dai}, booktitle={INTERSPEECH}, year={2014} } @INPROCEEDINGS{OnTheTrainingAspects, author={Y. Qian and Y. Fan and W. Hu and F. K. Soong}, booktitle={2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, title={On the training aspects of Deep Neural Network (DNN) for parametric TTS synthesis}, year={2014}, volume={}, number={}, pages={3829-3833}, keywords={backpropagation;feature extraction;neural nets;speech synthesis;DNN training;deep neural network;parametric TTS synthesis;text-to-speech synthesis;text features;acoustic features;objective measure;subjective measure;HMM;hidden Markov model;diagonal Gaussian probability family;layer-wise BP pretraining;backpropagation;hyperbolic tangent activation function;sigmoidal function;Decision support systems;Conferences;Acoustics;Speech;Speech processing;Speech Synthesis;HMM;DNN;TTS}, doi={10.1109/ICASSP.2014.6854318}, ISSN={1520-6149}, month={May}, } @inproceedings{HMMSpeakerInterpolation, title={Speaker interpolation in HMM-based speech synthesis system}, author={Takayoshi Yoshimura and Takashi Masuko and Keiichi Tokuda and Takao Kobayashi and Tadashi Kitamura}, booktitle={EUROSPEECH}, year={1997} } @article{STRAIGHT, title = "Restructuring speech representations using a pitch-adaptive time–frequency smoothing and an instantaneous-frequency-based F0 extraction: Possible role of a repetitive structure in sounds1Speech files available. See http://www.elsevier.nl/locate/specom1", journal = "Speech Communication", volume = "27", number = "3", pages = "187 - 207", year = "1999", issn = "0167-6393", doi = "https://doi.org/10.1016/S0167-6393(98)00085-5", url = "http://www.sciencedirect.com/science/article/pii/S0167639398000855", author = "Hideki Kawahara and Ikuyo Masuda-Katsuse and Alain de Cheveigné", keywords = "Speech analysis, Pitch-synchronous, Spline smoothing, Instantaneous frequency, F0 extraction, Speech synthesis, Speech modification" } @inbook{TTSSOTA, place={Cambridge}, title={Speech Synthesis: State of the Art and Challenges for the Future}, DOI={10.1017/9781316676202.019}, booktitle={Social Signal Processing}, publisher={Cambridge University Press}, author={Georgila, Kallirroi}, editor={Burgoon, Judee K. and Magnenat-Thalmann, Nadia and Pantic, Maja and Vinciarelli, AlessandroEditors}, year={2017}, pages={257–272} } @INPROCEEDINGS{SPSSDNN, author={H. Zen and A. Senior and M. Schuster}, booktitle={2013 IEEE International Conference on Acoustics, Speech and Signal Processing}, title={Statistical parametric speech synthesis using deep neural networks}, year={2013}, volume={}, number={}, pages={7962-7966}, keywords={hidden Markov models;neural nets;speech synthesis;statistical parametric speech synthesis;deep neural networks;decision tree clustered context dependent hidden Markov models;HMM;probability densities;speech parameters;speech waveform;decision trees;acoustic realizations;Hidden Markov models;Speech;Speech synthesis;Decision trees;Context;Training data;Neural networks;Statistical parametric speech synthesis;Hidden Markov model;Deep neural network}, doi={10.1109/ICASSP.2013.6639215}, ISSN={1520-6149}, month={May}, } @INPROCEEDINGS{MLSA, author={S. Imai}, booktitle={ICASSP '83. IEEE International Conference on Acoustics, Speech, and Signal Processing}, title={Cepstral analysis synthesis on the mel frequency scale}, year={1983}, volume={8}, number={}, pages={93-96}, keywords={Cepstral analysis;Frequency synthesizers;Speech synthesis;Mel frequency cepstral coefficient;Vocoders;Speech analysis;Nonlinear filters;Fourier transforms;Cepstrum;Quantization}, doi={10.1109/ICASSP.1983.1172250}, ISSN={}, month={April}, } @inproceedings{HMMTTS, title={Simultaneous modeling of spectrum, pitch and duration in HMM-based speech synthesis}, author={Takayoshi Yoshimura and Keiichi Tokuda and Takashi Masuko and Takao Kobayashi and Tadashi Kitamura}, booktitle={EUROSPEECH}, year={1999} } @article{dilated, author = {Fisher Yu and Vladlen Koltun}, title = {Multi-Scale Context Aggregation by Dilated Convolutions}, journal = {CoRR}, volume = {abs/1511.07122}, year = {2015}, url = {http://arxiv.org/abs/1511.07122}, archivePrefix = {arXiv}, eprint = {1511.07122}, timestamp = {Wed, 07 Jun 2017 14:40:43 +0200}, biburl = {http://dblp.org/rec/bib/journals/corr/YuK15}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article {scalingPolicy, author = {LOWE, WILL and BENOIT, KENNETH and MIKHAYLOV, SLAVA and LAVER, MICHAEL}, title = {Scaling Policy Preferences from Coded Political Texts}, journal = {Legislative Studies Quarterly}, volume = {36}, number = {1}, publisher = {Blackwell Publishing Inc}, issn = {1939-9162}, url = {http://dx.doi.org/10.1111/j.1939-9162.2010.00006.x}, doi = {10.1111/j.1939-9162.2010.00006.x}, pages = {123--155}, year = {2011}, } @article{depecheMood, author = {Jacopo Staiano and Marco Guerini}, title = {DepecheMood: a Lexicon for Emotion Analysis from Crowd-Annotated News}, journal = {CoRR}, volume = {abs/1405.1605}, year = {2014}, url = {http://arxiv.org/abs/1405.1605}, archivePrefix = {arXiv}, eprint = {1405.1605}, timestamp = {Wed, 07 Jun 2017 14:41:41 +0200}, biburl = {http://dblp.org/rec/bib/journals/corr/StaianoG14}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article{GRULSTMcomp, author = {Junyoung Chung and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and KyungHyun Cho and Yoshua Bengio}, title = {Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling}, journal = {CoRR}, volume = {abs/1412.3555}, year = {2014}, url = {http://arxiv.org/abs/1412.3555}, archivePrefix = {arXiv}, eprint = {1412.3555}, timestamp = {Wed, 07 Jun 2017 14:40:04 +0200}, biburl = {http://dblp.org/rec/bib/journals/corr/ChungGCB14}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article{GRU, author = {KyungHyun Cho and Bart van Merrienboer and Dzmitry Bahdanau and Yoshua Bengio}, title = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches}, journal = {CoRR}, volume = {abs/1409.1259}, year = {2014}, url = {http://arxiv.org/abs/1409.1259}, archivePrefix = {arXiv}, eprint = {1409.1259}, timestamp = {Wed, 07 Jun 2017 14:42:33 +0200}, biburl = {http://dblp.org/rec/bib/journals/corr/ChoMBB14}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article{sentimentNeuron, author = {Alec Radford and Rafal J{\'{o}}zefowicz and Ilya Sutskever}, title = {Learning to Generate Reviews and Discovering Sentiment}, journal = {CoRR}, volume = {abs/1704.01444}, year = {2017}, url = {http://arxiv.org/abs/1704.01444}, archivePrefix = {arXiv}, eprint = {1704.01444}, timestamp = {Wed, 07 Jun 2017 14:43:05 +0200}, biburl = {http://dblp.org/rec/bib/journals/corr/RadfordJS17}, bibsource = {dblp computer science bibliography, http://dblp.org} } @article{LSTM, author = {Hochreiter, Sepp and Schmidhuber, J\"{u}rgen}, title = {Long Short-Term Memory}, journal = {Neural Comput.}, issue_date = {November 15, 1997}, volume = {9}, number = {8}, month = nov, year = {1997}, issn = {0899-7667}, pages = {1735--1780}, numpages = {46}, url = {http://dx.doi.org/10.1162/neco.1997.9.8.1735}, doi = {10.1162/neco.1997.9.8.1735}, acmid = {1246450}, publisher = {MIT Press}, address = {Cambridge, MA, USA}, } @article{wavenet, author = {A{\"{a}}ron van den Oord and Sander Dieleman and Heiga Zen and Karen Simonyan and Oriol Vinyals and Alex Graves and Nal Kalchbrenner and Andrew W. Senior and Koray Kavukcuoglu}, title = {WaveNet: {A} Generative Model for Raw Audio}, journal = {CoRR}, volume = {abs/1609.03499}, year = {2016}, url = {http://arxiv.org/abs/1609.03499}, archivePrefix = {arXiv}, eprint = {1609.03499}, timestamp = {Wed, 07 Jun 2017 14:42:54 +0200}, biburl = {http://dblp.org/rec/bib/journals/corr/OordDZSVGKSK16}, bibsource = {dblp computer science bibliography, http://dblp.org} } @inproceedings{CLDNNs, title={Learning the speech front-end with raw waveform CLDNNs}, author={Tara N. Sainath and Ron J. Weiss and Andrew W. Senior and Kevin W. Wilson and Oriol Vinyals}, booktitle={INTERSPEECH}, year={2015} } @article{lenet, abstract = {{Multilayer neural networks trained with the back-propagation algorithm constitute the best example of a successful gradient based learning technique. Given an appropriate network architecture, gradient-based learning algorithms can be used to synthesize a complex decision surface that can classify high-dimensional patterns, such as handwritten characters, with minimal preprocessing. This paper reviews various methods applied to handwritten character recognition and compares them on a standard handwritten digit recognition task. Convolutional neural networks, which are specifically designed to deal with the variability of 2D shapes, are shown to outperform all other techniques. Real-life document recognition systems are composed of multiple modules including field extraction, segmentation recognition, and language modeling. A new learning paradigm, called graph transformer networks (GTN), allows such multimodule systems to be trained globally using gradient-based methods so as to minimize an overall performance measure. Two systems for online handwriting recognition are described. Experiments demonstrate the advantage of global training, and the flexibility of graph transformer networks. A graph transformer network for reading a bank cheque is also described. It uses convolutional neural network character recognizers combined with global training techniques to provide record accuracy on business and personal cheques. It is deployed commercially and reads several million cheques per day}}, author = {Lecun, Y. and Bottou, L. and Bengio, Y. and Haffner, P.}, booktitle = {Proceedings of the IEEE}, citeulike-article-id = {4196818}, citeulike-linkout-0 = {http://dx.doi.org/10.1109/5.726791}, citeulike-linkout-1 = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=726791}, day = {06}, doi = {10.1109/5.726791}, institution = {Speech \& Image Process. Services Lab., AT\&T Bell Labs., Red Bank, NJ, USA}, issn = {00189219}, journal = {Proceedings of the IEEE}, keywords = {cnn, lenet-5}, month = nov, number = {11}, pages = {2278--2324}, posted-at = {2016-06-08 06:38:36}, priority = {0}, publisher = {IEEE}, title = {{Gradient-based learning applied to document recognition}}, url = {http://dx.doi.org/10.1109/5.726791}, volume = {86}, year = {1998} } @incollection{alexnet, title = {ImageNet Classification with Deep Convolutional Neural Networks}, author = {Alex Krizhevsky and Sutskever, Ilya and Hinton, Geoffrey E}, booktitle = {Advances in Neural Information Processing Systems 25}, editor = {F. Pereira and C. J. C. Burges and L. Bottou and K. Q. Weinberger}, pages = {1097--1105}, year = {2012}, publisher = {Curran Associates, Inc.}, url = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf} } @article{dropout, author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, journal = {J. Mach. Learn. Res.}, issue_date = {January 2014}, volume = {15}, number = {1}, month = jan, year = {2014}, issn = {1532-4435}, pages = {1929--1958}, numpages = {30}, url = {http://dl.acm.org/citation.cfm?id=2627435.2670313}, acmid = {2670313}, publisher = {JMLR.org}, keywords = {deep learning, model combination, neural networks, regularization}, }