references.bib 14.3 KB
Newer Older
1 2 3



4 5


6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
@MISC{Lu_combininga,
	author = {Heng Lu and Simon King and Oliver Watts},
	title = {Combining a Vector Space Representation of Linguistic Context with a Deep Neural Network for Text-To-Speech Synthesis},
	year = {}
}
@INPROCEEDINGS{Hashimoto-2015, 
	author={K. Hashimoto and K. Oura and Y. Nankaku and K. Tokuda}, 
	booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
	title={The effect of neural networks in statistical parametric speech synthesis}, 
	year={2015}, 
	volume={}, 
	number={}, 
	pages={4455-4459}, 
	keywords={neural nets;speech synthesis;statistical analysis;statistical parametric speech synthesis;deep neural networks;generative models;acoustic models;parameter generation;Hidden Markov models;Artificial neural networks;Speech;Statistical parametric speech synthesis;deep neural network;hidden Markov model}, 
	doi={10.1109/ICASSP.2015.7178813}, 
	ISSN={1520-6149}, 
	month={April}
}
C
Cleanup  
Corentin Jemine 已提交
24 25 26 27 28 29
@inproceedings{Yin2014ModelingDP,
  title={Modeling DCT parameterized F0 trajectory at intonation phrase level with DNN or decision tree},
  author={Xiang Yin and Ming Lei and Zhiliang Hong and Frank K. Soong and Lei He and Zhen-Hua Ling and Li-Rong Dai},
  booktitle={INTERSPEECH},
  year={2014}
}
30
@INPROCEEDINGS{OnTheTrainingAspects, 
C
Cleanup  
Corentin Jemine 已提交
31 32 33 34 35 36 37 38 39 40
	author={Y. Qian and Y. Fan and W. Hu and F. K. Soong}, 
	booktitle={2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
	title={On the training aspects of Deep Neural Network (DNN) for parametric TTS synthesis}, 
	year={2014}, 
	volume={}, 
	number={}, 
	pages={3829-3833}, 
	keywords={backpropagation;feature extraction;neural nets;speech synthesis;DNN training;deep neural network;parametric TTS synthesis;text-to-speech synthesis;text features;acoustic features;objective measure;subjective measure;HMM;hidden Markov model;diagonal Gaussian probability family;layer-wise BP pretraining;backpropagation;hyperbolic tangent activation function;sigmoidal function;Decision support systems;Conferences;Acoustics;Speech;Speech processing;Speech Synthesis;HMM;DNN;TTS}, 
	doi={10.1109/ICASSP.2014.6854318}, 
	ISSN={1520-6149}, 
41
	month={May},
C
Cleanup  
Corentin Jemine 已提交
42
}
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
@inproceedings{HMMSpeakerInterpolation,
  title={Speaker interpolation in HMM-based speech synthesis system},
  author={Takayoshi Yoshimura and Takashi Masuko and Keiichi Tokuda and Takao Kobayashi and Tadashi Kitamura},
  booktitle={EUROSPEECH},
  year={1997}
}
@article{STRAIGHT,
	title = "Restructuring speech representations using a pitch-adaptive time–frequency smoothing and an instantaneous-frequency-based F0 extraction: Possible role of a repetitive structure in sounds1Speech files available. See http://www.elsevier.nl/locate/specom1",
	journal = "Speech Communication",
	volume = "27",
	number = "3",
	pages = "187 - 207",
	year = "1999",
	issn = "0167-6393",
	doi = "https://doi.org/10.1016/S0167-6393(98)00085-5",
	url = "http://www.sciencedirect.com/science/article/pii/S0167639398000855",
	author = "Hideki Kawahara and Ikuyo Masuda-Katsuse and Alain de Cheveigné",
	keywords = "Speech analysis, Pitch-synchronous, Spline smoothing, Instantaneous frequency, F0 extraction, Speech synthesis, Speech modification"
}
@inbook{TTSSOTA, 
	place={Cambridge}, 
	title={Speech Synthesis: State of the Art and Challenges for the Future}, 
	DOI={10.1017/9781316676202.019}, booktitle={Social Signal Processing}, 
	publisher={Cambridge University Press}, 
	author={Georgila, Kallirroi}, 
	editor={Burgoon, 
	Judee K. and Magnenat-Thalmann, 
	Nadia and Pantic, 
	Maja and Vinciarelli, 
	AlessandroEditors}, 
	year={2017}, 
	pages={257–272}
}
@INPROCEEDINGS{SPSSDNN, 
77
	author={H. Zen and A. Senior and M. Schuster}, 
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
	booktitle={2013 IEEE International Conference on Acoustics, Speech and Signal Processing}, 
	title={Statistical parametric speech synthesis using deep neural networks}, 
	year={2013}, 
	volume={}, 
	number={}, 
	pages={7962-7966}, 
	keywords={hidden Markov models;neural nets;speech synthesis;statistical parametric speech synthesis;deep neural networks;decision tree clustered context dependent hidden Markov models;HMM;probability densities;speech parameters;speech waveform;decision trees;acoustic realizations;Hidden Markov models;Speech;Speech synthesis;Decision trees;Context;Training data;Neural networks;Statistical parametric speech synthesis;Hidden Markov model;Deep neural network}, 
	doi={10.1109/ICASSP.2013.6639215}, 
	ISSN={1520-6149}, 
	month={May},
}
@INPROCEEDINGS{MLSA, 
	author={S. Imai}, 
	booktitle={ICASSP '83. IEEE International Conference on Acoustics, Speech, and Signal Processing}, 
	title={Cepstral analysis synthesis on the mel frequency scale}, 
	year={1983}, 
	volume={8}, 
	number={}, 
	pages={93-96}, 
	keywords={Cepstral analysis;Frequency synthesizers;Speech synthesis;Mel frequency cepstral coefficient;Vocoders;Speech analysis;Nonlinear filters;Fourier transforms;Cepstrum;Quantization}, 
	doi={10.1109/ICASSP.1983.1172250}, 
	ISSN={}, 
	month={April},
}
@inproceedings{HMMTTS,
  title={Simultaneous modeling of spectrum, pitch and duration in HMM-based speech synthesis},
  author={Takayoshi Yoshimura and Keiichi Tokuda and Takashi Masuko and Takao Kobayashi and Tadashi Kitamura},
  booktitle={EUROSPEECH},
  year={1999}
}
@article{dilated,
  author    = {Fisher Yu and
               Vladlen Koltun},
  title     = {Multi-Scale Context Aggregation by Dilated Convolutions},
  journal   = {CoRR},
  volume    = {abs/1511.07122},
  year      = {2015},
  url       = {http://arxiv.org/abs/1511.07122},
  archivePrefix = {arXiv},
  eprint    = {1511.07122},
  timestamp = {Wed, 07 Jun 2017 14:40:43 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/YuK15},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article {scalingPolicy,
	author = {LOWE, WILL and BENOIT, KENNETH and MIKHAYLOV, SLAVA and LAVER, MICHAEL},
	title = {Scaling Policy Preferences from Coded Political Texts},
	journal = {Legislative Studies Quarterly},
	volume = {36},
	number = {1},
	publisher = {Blackwell Publishing Inc},
	issn = {1939-9162},
	url = {http://dx.doi.org/10.1111/j.1939-9162.2010.00006.x},
	doi = {10.1111/j.1939-9162.2010.00006.x},
	pages = {123--155},
	year = {2011},
}


@article{depecheMood,
  author    = {Jacopo Staiano and
               Marco Guerini},
  title     = {DepecheMood: a Lexicon for Emotion Analysis from Crowd-Annotated News},
  journal   = {CoRR},
  volume    = {abs/1405.1605},
  year      = {2014},
  url       = {http://arxiv.org/abs/1405.1605},
  archivePrefix = {arXiv},
  eprint    = {1405.1605},
  timestamp = {Wed, 07 Jun 2017 14:41:41 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/StaianoG14},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{GRULSTMcomp,
  author    = {Junyoung Chung and
               {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
               KyungHyun Cho and
               Yoshua Bengio},
  title     = {Empirical Evaluation of Gated Recurrent Neural Networks on Sequence
               Modeling},
  journal   = {CoRR},
  volume    = {abs/1412.3555},
  year      = {2014},
  url       = {http://arxiv.org/abs/1412.3555},
  archivePrefix = {arXiv},
  eprint    = {1412.3555},
  timestamp = {Wed, 07 Jun 2017 14:40:04 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/ChungGCB14},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{GRU,
  author    = {KyungHyun Cho and
               Bart van Merrienboer and
               Dzmitry Bahdanau and
               Yoshua Bengio},
  title     = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches},
  journal   = {CoRR},
  volume    = {abs/1409.1259},
  year      = {2014},
  url       = {http://arxiv.org/abs/1409.1259},
  archivePrefix = {arXiv},
  eprint    = {1409.1259},
  timestamp = {Wed, 07 Jun 2017 14:42:33 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/ChoMBB14},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{sentimentNeuron,
  author    = {Alec Radford and
               Rafal J{\'{o}}zefowicz and
               Ilya Sutskever},
  title     = {Learning to Generate Reviews and Discovering Sentiment},
  journal   = {CoRR},
  volume    = {abs/1704.01444},
  year      = {2017},
  url       = {http://arxiv.org/abs/1704.01444},
  archivePrefix = {arXiv},
  eprint    = {1704.01444},
  timestamp = {Wed, 07 Jun 2017 14:43:05 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/RadfordJS17},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{LSTM,
 author = {Hochreiter, Sepp and Schmidhuber, J\"{u}rgen},
 title = {Long Short-Term Memory},
 journal = {Neural Comput.},
 issue_date = {November 15, 1997},
 volume = {9},
 number = {8},
 month = nov,
 year = {1997},
 issn = {0899-7667},
 pages = {1735--1780},
 numpages = {46},
 url = {http://dx.doi.org/10.1162/neco.1997.9.8.1735},
 doi = {10.1162/neco.1997.9.8.1735},
 acmid = {1246450},
 publisher = {MIT Press},
 address = {Cambridge, MA, USA},
} 

@article{wavenet,
  author    = {A{\"{a}}ron van den Oord and
               Sander Dieleman and
               Heiga Zen and
               Karen Simonyan and
               Oriol Vinyals and
               Alex Graves and
               Nal Kalchbrenner and
               Andrew W. Senior and
               Koray Kavukcuoglu},
  title     = {WaveNet: {A} Generative Model for Raw Audio},
  journal   = {CoRR},
  volume    = {abs/1609.03499},
  year      = {2016},
  url       = {http://arxiv.org/abs/1609.03499},
  archivePrefix = {arXiv},
  eprint    = {1609.03499},
  timestamp = {Wed, 07 Jun 2017 14:42:54 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/OordDZSVGKSK16},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@inproceedings{CLDNNs,
  title={Learning the speech front-end with raw waveform CLDNNs},
  author={Tara N. Sainath and Ron J. Weiss and Andrew W. Senior and Kevin W. Wilson and Oriol Vinyals},
  booktitle={INTERSPEECH},
  year={2015}
}

@article{keywordSpotting,
  author    = {Sercan {\"{O}}mer Arik and
               Markus Kliegl and
               Rewon Child and
               Joel Hestness and
               Andrew Gibiansky and
               Christopher Fougner and
               Ryan Prenger and
               Adam Coates},
  title     = {Convolutional Recurrent Neural Networks for Small-Footprint Keyword
               Spotting},
  journal   = {CoRR},
  volume    = {abs/1703.05390},
  year      = {2017},
  url       = {http://arxiv.org/abs/1703.05390},
  archivePrefix = {arXiv},
  eprint    = {1703.05390},
  timestamp = {Thu, 20 Jul 2017 09:10:44 +0200},
  biburl    = {http://dblp.org/rec/bib/journals/corr/ArikKCHGFPC17},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}

@article{lenet,
    abstract = {{Multilayer neural networks trained with the back-propagation algorithm constitute the best example of a successful gradient based learning technique. Given an appropriate network architecture, gradient-based learning algorithms can be used to synthesize a complex decision surface that can classify high-dimensional patterns, such as handwritten characters, with minimal preprocessing. This paper reviews various methods applied to handwritten character recognition and compares them on a standard handwritten digit recognition task. Convolutional neural networks, which are specifically designed to deal with the variability of 2D shapes, are shown to outperform all other techniques. Real-life document recognition systems are composed of multiple modules including field extraction, segmentation recognition, and language modeling. A new learning paradigm, called graph transformer networks (GTN), allows such multimodule systems to be trained globally using gradient-based methods so as to minimize an overall performance measure. Two systems for online handwriting recognition are described. Experiments demonstrate the advantage of global training, and the flexibility of graph transformer networks. A graph transformer network for reading a bank cheque is also described. It uses convolutional neural network character recognizers combined with global training techniques to provide record accuracy on business and personal cheques. It is deployed commercially and reads several million cheques per day}},
    author = {Lecun, Y. and Bottou, L. and Bengio, Y. and Haffner, P.},
    booktitle = {Proceedings of the IEEE},
    citeulike-article-id = {4196818},
    citeulike-linkout-0 = {http://dx.doi.org/10.1109/5.726791},
    citeulike-linkout-1 = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=726791},
    day = {06},
    doi = {10.1109/5.726791},
    institution = {Speech \& Image Process. Services Lab., AT\&T Bell Labs., Red Bank, NJ, USA},
    issn = {00189219},
    journal = {Proceedings of the IEEE},
    keywords = {cnn, lenet-5},
    month = nov,
    number = {11},
    pages = {2278--2324},
    posted-at = {2016-06-08 06:38:36},
    priority = {0},
    publisher = {IEEE},
    title = {{Gradient-based learning applied to document recognition}},
    url = {http://dx.doi.org/10.1109/5.726791},
    volume = {86},
    year = {1998}
}

@incollection{alexnet,
title = {ImageNet Classification with Deep Convolutional Neural Networks},
author = {Alex Krizhevsky and Sutskever, Ilya and Hinton, Geoffrey E},
booktitle = {Advances in Neural Information Processing Systems 25},
editor = {F. Pereira and C. J. C. Burges and L. Bottou and K. Q. Weinberger},
pages = {1097--1105},
year = {2012},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf}
}

@article{dropout,
 author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
 title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
 journal = {J. Mach. Learn. Res.},
 issue_date = {January 2014},
 volume = {15},
 number = {1},
 month = jan,
 year = {2014},
 issn = {1532-4435},
 pages = {1929--1958},
 numpages = {30},
 url = {http://dl.acm.org/citation.cfm?id=2627435.2670313},
 acmid = {2670313},
 publisher = {JMLR.org},
 keywords = {deep learning, model combination, neural networks, regularization},
} 

@article{hydromodeling,
 author = {Lee, Hyojin, Kang, Kwangmin},
 title = {Interpolation of Missing Precipitation Data Using Kernel Estimations for Hydrologic Modeling Advances in Meteorology},
 year = {2015},
 url = {http://dx.doi.org/10.1155/2015/935868},
}