提交 2b594b4e 编写于 作者: Y Yibing Liu

resolve conflicts in requirements.txt

......@@ -17,13 +17,11 @@ PaddlePaddle提供了丰富的运算单元,帮助大家以模块化的方式
- 1.2 [噪声对比估计加速词向量训练](https://github.com/PaddlePaddle/models/tree/develop/nce_cost)
## 2. 语言模型
## 2. 使用循环神经网络语言模型生成文本
语言模型是自然语言处理领域里一个重要的基础模型,它是一个概率分布模型,利用它可以确定哪个词序列的可能性更大,或者给定若干个词,可以预测下一个最可能出现的词。语言模型被应用在很多领域,如:自动写作、QA、机器翻译、拼写检查、语音识别、词性标注等
语言模型是自然语言处理领域里一个重要的基础模型,除了得到词向量(语言模型训练的副产物),还可以帮助我们生成文本。给定若干个词,语言模型可以帮助我们预测下一个最可能出现的词。在利用语言模型生成文本的例子中,我们重点介绍循环神经网络语言模型,大家可以通过文档中的使用说明快速适配到自己的训练语料,完成自动写诗、自动写散文等有趣的模型
在语言模型的例子中,我们以文本生成为例,提供了RNN LM(包括LSTM、GRU)和N-Gram LM,供大家学习和使用。用户可以通过文档中的 “使用说明” 快速上手:适配训练语料,以训练 “自动写诗”、“自动写散文” 等有趣的模型。
- 2.1 [基于LSTM、GRU、N-Gram的文本生成模型](https://github.com/PaddlePaddle/models/tree/develop/language_model)
- 2.1 [使用循环神经网络语言模型生成文本](https://github.com/PaddlePaddle/models/tree/develop/generate_sequence_by_rnn_lm)
## 3. 点击率预估
......@@ -65,6 +63,14 @@ PaddlePaddle提供了丰富的运算单元,帮助大家以模块化的方式
- 7.1 [无注意力机制的编码器解码器模型](https://github.com/PaddlePaddle/models/tree/develop/nmt_without_attention)
## 8. 图像分类
图像相比文字能够提供更加生动、容易理解及更具艺术感的信息,是人们转递与交换信息的重要来源。在图像分类的例子中,我们向大家介绍如何在PaddlePaddle中训练AlexNet、VGG、GoogLeNet和ResNet模型。同时还提供了一个模型转换工具,能够将Caffe训练好的模型文件,转换为PaddlePaddle的模型文件。
- 8.1 [将Caffe模型文件转换为PaddlePaddle模型文件](https://github.com/PaddlePaddle/models/tree/develop/image_classification/caffe2paddle)
- 8.2 [AlexNet](https://github.com/PaddlePaddle/models/tree/develop/image_classification)
- 8.3 [VGG](https://github.com/PaddlePaddle/models/tree/develop/image_classification)
- 8.4 [Residual Network](https://github.com/PaddlePaddle/models/tree/develop/image_classification)
## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
......@@ -6,7 +6,7 @@ from __future__ import print_function
import numpy as np
import io
import soundfile
import scikits.samplerate
import resampy
from scipy import signal
import random
import copy
......@@ -308,7 +308,7 @@ class AudioSegment(object):
prior_mean_squared = 10.**(prior_db / 10.)
prior_sum_of_squares = prior_mean_squared * prior_samples
cumsum_of_squares = np.cumsum(self.samples**2)
sample_count = np.arange(len(self.num_samples)) + 1
sample_count = np.arange(self.num_samples) + 1
if startup_sample_idx > 0:
cumsum_of_squares[:startup_sample_idx] = \
cumsum_of_squares[startup_sample_idx]
......@@ -321,21 +321,19 @@ class AudioSegment(object):
gain_db = target_db - rms_estimate_db
self.gain_db(gain_db)
def resample(self, target_sample_rate, quality='sinc_medium'):
def resample(self, target_sample_rate, filter='kaiser_best'):
"""Resample the audio to a target sample rate.
Note that this is an in-place transformation.
:param target_sample_rate: Target sample rate.
:type target_sample_rate: int
:param quality: One of {'sinc_fastest', 'sinc_medium', 'sinc_best'}.
Sets resampling speed/quality tradeoff.
See http://www.mega-nerd.com/SRC/api_misc.html#Converters
:type quality: str
:param filter: The resampling filter to use one of {'kaiser_best',
'kaiser_fast'}.
:type filter: str
"""
resample_ratio = target_sample_rate / self._sample_rate
self._samples = scikits.samplerate.resample(
self._samples, r=resample_ratio, type=quality)
self._samples = resampy.resample(
self.samples, self.sample_rate, target_sample_rate, filter=filter)
self._sample_rate = target_sample_rate
def pad_silence(self, duration, sides='both'):
......
......@@ -7,6 +7,10 @@ import json
import random
from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor
from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor
from data_utils.augmentor.resample import ResampleAugmentor
from data_utils.augmentor.online_bayesian_normalization import \
OnlineBayesianNormalizationAugmentor
class AugmentationPipeline(object):
......@@ -79,5 +83,11 @@ class AugmentationPipeline(object):
return VolumePerturbAugmentor(self._rng, **params)
elif augmentor_type == "shift":
return ShiftPerturbAugmentor(self._rng, **params)
elif augmentor_type == "speed":
return SpeedPerturbAugmentor(self._rng, **params)
elif augmentor_type == "resample":
return ResampleAugmentor(self._rng, **params)
elif augmentor_type == "bayesian_normal":
return OnlineBayesianNormalizationAugmentor(self._rng, **params)
else:
raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
"""Contain the online bayesian normalization augmentation model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from data_utils.augmentor.base import AugmentorBase
class OnlineBayesianNormalizationAugmentor(AugmentorBase):
"""Augmentation model for adding online bayesian normalization.
:param rng: Random generator object.
:type rng: random.Random
:param target_db: Target RMS value in decibels.
:type target_db: float
:param prior_db: Prior RMS estimate in decibels.
:type prior_db: float
:param prior_samples: Prior strength in number of samples.
:type prior_samples: int
:param startup_delay: Default 0.0s. If provided, this function will
accrue statistics for the first startup_delay
seconds before applying online normalization.
:type starup_delay: float.
"""
def __init__(self,
rng,
target_db,
prior_db,
prior_samples,
startup_delay=0.0):
self._target_db = target_db
self._prior_db = prior_db
self._prior_samples = prior_samples
self._rng = rng
self._startup_delay = startup_delay
def transform_audio(self, audio_segment):
"""Normalizes the input audio using the online Bayesian approach.
Note that this is an in-place transformation.
:param audio_segment: Audio segment to add effects to.
:type audio_segment: AudioSegment|SpeechSegment
"""
audio_segment.normalize_online_bayesian(self._target_db, self._prior_db,
self._prior_samples,
self._startup_delay)
"""Contain the resample augmentation model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from data_utils.augmentor.base import AugmentorBase
class ResampleAugmentor(AugmentorBase):
"""Augmentation model for resampling.
See more info here:
https://ccrma.stanford.edu/~jos/resample/index.html
:param rng: Random generator object.
:type rng: random.Random
:param new_sample_rate: New sample rate in Hz.
:type new_sample_rate: int
"""
def __init__(self, rng, new_sample_rate):
self._new_sample_rate = new_sample_rate
self._rng = rng
def transform_audio(self, audio_segment):
"""Resamples the input audio to a target sample rate.
Note that this is an in-place transformation.
:param audio: Audio segment to add effects to.
:type audio: AudioSegment|SpeechSegment
"""
audio_segment.resample(self._new_sample_rate)
"""Contain the speech perturbation augmentation model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from data_utils.augmentor.base import AugmentorBase
class SpeedPerturbAugmentor(AugmentorBase):
"""Augmentation model for adding speed perturbation.
See reference paper here:
http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
:param rng: Random generator object.
:type rng: random.Random
:param min_speed_rate: Lower bound of new speed rate to sample and should
not be smaller than 0.9.
:type min_speed_rate: float
:param max_speed_rate: Upper bound of new speed rate to sample and should
not be larger than 1.1.
:type max_speed_rate: float
"""
def __init__(self, rng, min_speed_rate, max_speed_rate):
if min_speed_rate < 0.9:
raise ValueError(
"Sampling speed below 0.9 can cause unnatural effects")
if max_speed_rate > 1.1:
raise ValueError(
"Sampling speed above 1.1 can cause unnatural effects")
self._min_speed_rate = min_speed_rate
self._max_speed_rate = max_speed_rate
self._rng = rng
def transform_audio(self, audio_segment):
"""Sample a new speed rate from the given range and
changes the speed of the given audio clip.
Note that this is an in-place transformation.
:param audio_segment: Audio segment to add effects to.
:type audio_segment: AudioSegment|SpeechSegment
"""
sampled_speed = self._rng.uniform(self._min_speed_rate,
self._max_speed_rate)
audio_segment.change_speed(sampled_speed)
......@@ -37,4 +37,4 @@ class VolumePerturbAugmentor(AugmentorBase):
:type audio_segment: AudioSegmenet|SpeechSegment
"""
gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS)
audio_segment.apply_gain(gain)
audio_segment.gain_db(gain)
SoundFile==0.9.0.post1
wget==3.2
scipy==0.13.1
resampy==0.1.5
https://github.com/kpu/kenlm/archive/master.zip
#!/bin/bash
# install python dependencies
if [ -f 'requirements.txt' ]; then
if [ -f "requirements.txt" ]; then
pip install -r requirements.txt
fi
if [ $? != 0 ]; then
......@@ -9,21 +9,21 @@ if [ $? != 0 ]; then
exit 1
fi
# install scikits.samplerate
curl -O "http://www.mega-nerd.com/SRC/libsamplerate-0.1.9.tar.gz"
# install package Soundfile
curl -O "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz"
if [ $? != 0 ]; then
echo "Download libsamplerate-0.1.9.tar.gz failed !!!"
echo "Download libsndfile-1.0.28.tar.gz failed !!!"
exit 1
fi
tar -xvf libsamplerate-0.1.9.tar.gz
cd libsamplerate-0.1.9
tar -zxvf libsndfile-1.0.28.tar.gz
cd libsndfile-1.0.28
./configure && make && make install
cd -
rm -rf libsamplerate-0.1.9
rm libsamplerate-0.1.9.tar.gz
pip install scikits.samplerate==0.3.3
rm -rf libsndfile-1.0.28
rm libsndfile-1.0.28.tar.gz
pip install SoundFile==0.9.0.post1
if [ $? != 0 ]; then
echo "Install scikits.samplerate failed !!!"
echo "Install SoundFile failed !!!"
exit 1
fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册