diff --git a/hub_module/modules/audio/tts/deepvoice3_ljspeech/README.md b/hub_module/modules/audio/tts/deepvoice3_ljspeech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fc7128b9bbca160993cb5dc3e17aa95f08331357 --- /dev/null +++ b/hub_module/modules/audio/tts/deepvoice3_ljspeech/README.md @@ -0,0 +1,119 @@ +## 概述 + +Deep Voice 3是百度研究院2017年发布的端到端的TTS模型(论文录用于ICLR 2018)。它是一个基于卷积神经网络和注意力机制的seq2seq模型,由于不包含循环神经网络,它可以并行训练,远快于基于循环神经网络的模型。Deep Voice 3可以学习到多个说话人的特征,也支持搭配多种声码器使用。deepvoice3_ljspeech是基于ljspeech英文语音数据集预训练得到的英文TTS模型,仅支持预测。 + +

+
+

+ +更多详情参考论文[Deep Voice 3: Scaling Text-to-Speech with Convolutional Sequence Learning](https://arxiv.org/abs/1710.07654) + +## 命令行预测 + +```shell +$ hub run deepvoice3_ljspeech --input_text='Simple as this proposition is, it is necessary to be stated' +``` + +## API + +```python +def synthesize(texts, use_gpu=False, vocoder="griffin-lim"): +``` + +预测API,由输入文本合成对应音频波形。 + +**参数** + +* texts (list\[str\]): 待预测文本; +* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; +* vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" + +**返回** + +* wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 +* sample\_rate (int): 合成音频的采样率。 + +**代码示例** + +```python +import paddlehub as hub +import soundfile as sf + +# Load deepvoice3_ljspeech module. +module = hub.Module(name="deepvoice3_ljspeech") + +# Predict sentiment label +test_texts = ['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'] +wavs, sample_rate = module.synthesize(texts=test_texts) +for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) +``` + +## 服务部署 + +PaddleHub Serving 可以部署在线服务。 + +### 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m deepvoice3_ljspeech +``` + +这样就完成了一个服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +### 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json + +import soundfile as sf + +# 发送HTTP请求 + +data = {'texts':['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], + 'use_gpu':False} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/deepvoice3_ljspeech" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存结果 +result = r.json()["results"] +wavs = result["wavs"] +sample_rate = result["sample_rate"] +for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) +``` + +## 查看代码 + +https://github.com/PaddlePaddle/Parakeet + +### 依赖 + +paddlepaddle >= 1.8.2 + +paddlehub >= 1.7.0 + +**NOTE:** 除了python依赖外还必须安装libsndfile库 +对于Ubuntu用户,请执行: +``` +sudo apt-get install libsndfile1 +``` +对于Centos用户,请执行: +``` +sudo yum install libsndfile +``` + +## 更新历史 + +* 1.0.0 + + 初始发布 diff --git a/hub_module/modules/audio/tts/deepvoice3_ljspeech/__init__.py b/hub_module/modules/audio/tts/deepvoice3_ljspeech/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/audio/tts/deepvoice3_ljspeech/module.py b/hub_module/modules/audio/tts/deepvoice3_ljspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..0847cc3cec2a7df8874354e1898eb151f8c55593 --- /dev/null +++ b/hub_module/modules/audio/tts/deepvoice3_ljspeech/module.py @@ -0,0 +1,359 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import argparse +import ast +import importlib.util + +import nltk +import numpy as np +import paddle.fluid as fluid +import paddle.fluid.dygraph as dg +import paddlehub as hub +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +from paddlehub.common.dir import THIRD_PARTY_HOME +from paddlehub.common.utils import mkdir +from paddlehub.common.downloader import default_downloader +from paddlehub.module.module import runnable +from paddlehub.module.nlp_module import DataFormatError + +lack_dependency = [] +for dependency in ["ruamel", "parakeet", "soundfile", "librosa"]: + if not importlib.util.find_spec(dependency): + lack_dependency.append(dependency) + +# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. +_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" +_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" +nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") +tokenizers_path = os.path.join(nltk_path, "tokenizers") +corpora_path = os.path.join(nltk_path, "corpora") +punkt_path = os.path.join(tokenizers_path, "punkt") +cmudict_path = os.path.join(corpora_path, "cmudict") + +if not os.path.exists(punkt_path): + default_downloader.download_file_and_uncompress( + url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) +if not os.path.exists(cmudict_path): + default_downloader.download_file_and_uncompress( + url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) +nltk.data.path.append(nltk_path) + +if not lack_dependency: + import soundfile as sf + import librosa + import ruamel.yaml + from parakeet.utils import io + from parakeet.g2p import en + from parakeet.models.deepvoice3 import Encoder, Decoder, PostNet, SpectraNet + from parakeet.models.waveflow import WaveFlowModule +else: + raise ImportError( + "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" + % ", ".join(lack_dependency)) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +class WaveflowVocoder(object): + def __init__(self, config_path, checkpoint_path): + with open(config_path, 'rt') as f: + config = ruamel.yaml.safe_load(f) + ns = argparse.Namespace() + for k, v in config.items(): + setattr(ns, k, v) + ns.use_fp16 = False + + self.model = WaveFlowModule(ns) + io.load_parameters(self.model, checkpoint_path=checkpoint_path) + + def __call__(self, mel): + with dg.no_grad(): + self.model.eval() + audio = self.model.synthesize(mel) + self.model.train() + return audio + + +class GriffinLimVocoder(object): + def __init__(self, + sharpening_factor=1.4, + sample_rate=22050, + n_fft=1024, + win_length=1024, + hop_length=256): + self.sample_rate = sample_rate + self.n_fft = n_fft + self.sharpening_factor = sharpening_factor + self.win_length = win_length + self.hop_length = hop_length + + def __call__(self, mel): + spec = librosa.feature.inverse.mel_to_stft( + np.exp(mel), + sr=self.sample_rate, + n_fft=self.n_fft, + fmin=0, + fmax=8000.0, + power=1.0) + audio = librosa.core.griffinlim( + spec**self.sharpening_factor, + win_length=self.win_length, + hop_length=self.hop_length) + return audio + + +@moduleinfo( + name="deepvoice3_ljspeech", + version="1.0.0", + summary= + "Deep Voice 3, a fully-convolutional attention-based neural text-to-speech (TTS) system.", + author="paddlepaddle", + author_email="", + type="nlp/tts", +) +class DeepVoice3(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", + "step-1780000") + self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", + "vocoder", "step-2000000") + self.waveflow_config_path = os.path.join( + self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") + tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", + "ljspeech.yaml") + with open(tts_checkpoint_path) as f: + self.tts_config = ruamel.yaml.safe_load(f) + + with fluid.dygraph.guard(fluid.CPUPlace()): + char_embedding = dg.Embedding((en.n_vocab, + self.tts_config["char_dim"])) + multi_speaker = self.tts_config["n_speakers"] > 1 + speaker_embedding = dg.Embedding((self.tts_config["n_speakers"], self.tts_config["speaker_dim"])) \ + if multi_speaker else None + encoder = Encoder( + self.tts_config["encoder_layers"], + self.tts_config["char_dim"], + self.tts_config["encoder_dim"], + self.tts_config["kernel_size"], + has_bias=multi_speaker, + bias_dim=self.tts_config["speaker_dim"], + keep_prob=1.0 - self.tts_config["dropout"]) + decoder = Decoder( + self.tts_config["n_mels"], + self.tts_config["reduction_factor"], + list(self.tts_config["prenet_sizes"]) + + [self.tts_config["char_dim"]], + self.tts_config["decoder_layers"], + self.tts_config["kernel_size"], + self.tts_config["attention_dim"], + position_encoding_weight=self.tts_config["position_weight"], + omega=self.tts_config["position_rate"], + has_bias=multi_speaker, + bias_dim=self.tts_config["speaker_dim"], + keep_prob=1.0 - self.tts_config["dropout"]) + postnet = PostNet( + self.tts_config["postnet_layers"], + self.tts_config["char_dim"], + self.tts_config["postnet_dim"], + self.tts_config["kernel_size"], + self.tts_config["n_mels"], + self.tts_config["reduction_factor"], + has_bias=multi_speaker, + bias_dim=self.tts_config["speaker_dim"], + keep_prob=1.0 - self.tts_config["dropout"]) + self.tts_model = SpectraNet(char_embedding, speaker_embedding, + encoder, decoder, postnet) + io.load_parameters( + model=self.tts_model, checkpoint_path=self.tts_checkpoint_path) + + self.waveflow = WaveflowVocoder( + config_path=self.waveflow_config_path, + checkpoint_path=self.waveflow_checkpoint_path) + self.griffin = GriffinLimVocoder( + sharpening_factor=self.tts_config["sharpening_factor"], + sample_rate=self.tts_config["sample_rate"], + n_fft=self.tts_config["n_fft"], + win_length=self.tts_config["win_length"], + hop_length=self.tts_config["hop_length"]) + + def synthesize(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Get the synthetic wavs from the texts. + + Args: + texts(list): the input texts to be predicted. + use_gpu(bool): whether use gpu to predict or not + vocoder(str): the vocoder name, "griffin-lim" or "waveflow" + + Returns: + wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. + sample_rate(int): the audio sample rate. + """ + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + + if texts and isinstance(texts, list): + predicted_data = texts + else: + raise ValueError( + "The input data is inconsistent with expectations.") + + wavs = [] + with fluid.dygraph.guard(place): + self.tts_model.eval() + self.waveflow.model.eval() + monotonic_layers = [4] + for text in predicted_data: + # init input + logger.info("Processing sentence: %s" % text) + text = en.text_to_sequence(text, p=1.0) + text = np.expand_dims(np.array(text, dtype="int64"), 0) + lengths = np.array([text.size], dtype=np.int64) + text_seqs = dg.to_variable(text) + text_lengths = dg.to_variable(lengths) + + decoder_layers = self.tts_config["decoder_layers"] + force_monotonic_attention = [False] * decoder_layers + for i in monotonic_layers: + force_monotonic_attention[i] = True + + outputs = self.tts_model( + text_seqs, + text_lengths, + speakers=None, + force_monotonic_attention=force_monotonic_attention, + window=(self.tts_config["backward_step"], + self.tts_config["forward_step"])) + decoded, refined, attentions = outputs + if vocoder == 'griffin-lim': + # synthesis use griffin-lim + wav = self.griffin(refined.numpy()[0].T) + elif vocoder == 'waveflow': + # synthesis use waveflow + wav = self.waveflow( + fluid.layers.transpose(refined, [0, 2, 1])).numpy()[0] + else: + raise ValueError( + 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' + % vocoder) + wavs.append(wav) + return wavs, self.tts_config["sample_rate"] + + @serving + def serving_method(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Run as a service. + """ + wavs, sample_rate = self.synthesize(texts, use_gpu, vocoder) + wavs = [wav.tolist() for wav in wavs] + result = {"wavs": wavs, "sample_rate": sample_rate} + return result + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--vocoder', + type=str, + default="griffin-lim", + choices=['griffin-lim', 'waveflow'], + help="the vocoder name") + + def add_module_output_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--output_path', + type=str, + default=os.path.abspath( + os.path.join(os.path.curdir, f"{self.name}_prediction")), + help="path to save experiment results") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Ouput options", description="Ouput path. Optional.") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + self.add_module_output_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + mkdir(args.output_path) + wavs, sample_rate = self.synthesize( + texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) + + for index, wav in enumerate(wavs): + sf.write( + os.path.join(args.output_path, f"{index}.wav"), wav, + sample_rate) + + ret = f"The synthesized wav files have been saved in {args.output_path}" + return ret + + +if __name__ == "__main__": + module = DeepVoice3() + test_text = [ + "Simple as this proposition is, it is necessary to be stated", + "Parakeet stands for Paddle PARAllel text-to-speech toolkit.", + ] + wavs, sample_rate = module.synthesize(texts=test_text, vocoder="waveflow") + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/audio/tts/fastspeech_ljspeech/README.md b/hub_module/modules/audio/tts/fastspeech_ljspeech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5709ee1795fbb01166cd45c541e0ce5d88339fd6 --- /dev/null +++ b/hub_module/modules/audio/tts/fastspeech_ljspeech/README.md @@ -0,0 +1,120 @@ +## 概述 + +FastSpeech是基于Transformer的前馈神经网络,作者从encoder-decoder结构的teacher model中提取attention对角线来做发音持续时间预测,即使用长度调节器对文本序列进行扩展来匹配目标梅尔频谱的长度,以便并行生成梅尔频谱。该模型基本上消除了复杂情况下的跳词和重复的问题,并且可以平滑地调整语音速度,更重要的是,该模型大幅度提升了梅尔频谱的生成速度。fastspeech_ljspeech是基于ljspeech英文语音数据集预训练得到的英文TTS模型,仅支持预测。 + +

+
+

+ +更多详情参考论文[FastSpeech: Fast, Robust and Controllable Text to Speech](https://arxiv.org/abs/1905.09263) + +## 命令行预测 + +```shell +$ hub run fastspeech_ljspeech --input_text='Simple as this proposition is, it is necessary to be stated' +``` + +## API + +```python +def synthesize(texts, use_gpu=False, speed=1.0, vocoder="griffin-lim"): +``` + +预测API,由输入文本合成对应音频波形。 + +**参数** + +* texts (list\[str\]): 待预测文本; +* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; +* speed(float): 音频速度,1.0表示以原速输出。 +* vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" + +**返回** + +* wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 +* sample\_rate (int): 合成音频的采样率。 + +**代码示例** + +```python +import paddlehub as hub +import soundfile as sf + +# Load fastspeech_ljspeech module. +module = hub.Module(name="fastspeech_ljspeech") + +# Predict sentiment label +test_texts = ['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'] +wavs, sample_rate = module.synthesize(texts=test_texts) +for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) +``` + +## 服务部署 + +PaddleHub Serving 可以部署在线服务。 + +### 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m fastspeech_ljspeech +``` + +这样就完成了一个服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +### 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json + +import soundfile as sf + +# 发送HTTP请求 + +data = {'texts':['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], + 'use_gpu':False} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/fastspeech_ljspeech" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存结果 +result = r.json()["results"] +wavs = result["wavs"] +sample_rate = result["sample_rate"] +for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) +``` + +## 查看代码 + +https://github.com/PaddlePaddle/Parakeet + +### 依赖 + +paddlepaddle >= 1.8.2 + +paddlehub >= 1.7.0 + +**NOTE:** 除了python依赖外还必须安装libsndfile库 +对于Ubuntu用户,请执行: +``` +sudo apt-get install libsndfile1 +``` +对于Centos用户,请执行: +``` +sudo yum install libsndfile +``` + +## 更新历史 + +* 1.0.0 + + 初始发布 diff --git a/hub_module/modules/audio/tts/fastspeech_ljspeech/__init__.py b/hub_module/modules/audio/tts/fastspeech_ljspeech/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/audio/tts/fastspeech_ljspeech/module.py b/hub_module/modules/audio/tts/fastspeech_ljspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e308d53a84f6c6307578ba58ec8ca0910c3a1aa8 --- /dev/null +++ b/hub_module/modules/audio/tts/fastspeech_ljspeech/module.py @@ -0,0 +1,311 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import ast +import argparse +import importlib.util + +import nltk +import paddle.fluid as fluid +import paddle.fluid.dygraph as dg +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.common.utils import mkdir +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +from paddlehub.common.dir import THIRD_PARTY_HOME +from paddlehub.common.downloader import default_downloader + +lack_dependency = [] +for dependency in ["ruamel", "parakeet", "soundfile", "librosa"]: + if not importlib.util.find_spec(dependency): + lack_dependency.append(dependency) + +# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. +_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" +_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" +nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") +tokenizers_path = os.path.join(nltk_path, "tokenizers") +corpora_path = os.path.join(nltk_path, "corpora") +punkt_path = os.path.join(tokenizers_path, "punkt") +cmudict_path = os.path.join(corpora_path, "cmudict") + +if not os.path.exists(punkt_path): + default_downloader.download_file_and_uncompress( + url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) +if not os.path.exists(cmudict_path): + default_downloader.download_file_and_uncompress( + url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) +nltk.data.path.append(nltk_path) + +if not lack_dependency: + import soundfile as sf + import librosa + from ruamel import yaml + from parakeet.models.fastspeech.fastspeech import FastSpeech as FastSpeechModel + from parakeet.g2p.en import text_to_sequence + from parakeet.models.transformer_tts.utils import * + from parakeet.utils import io + from parakeet.modules.weight_norm import WeightNormWrapper + from parakeet.models.waveflow import WaveFlowModule +else: + raise ImportError( + "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" + % ", ".join(lack_dependency)) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +@moduleinfo( + name="fastspeech_ljspeech", + version="1.0.0", + summary= + "FastSpeech proposes a novel feed-forward network based on Transformer to generate mel-spectrogram in parallel for TTS. See https://arxiv.org/abs/1905.09263 for details.", + author="baidu-nlp", + author_email="", + type="nlp/tts", +) +class FastSpeech(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", + "step-162000") + self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", + "vocoder", "step-2000000") + self.waveflow_config_path = os.path.join( + self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") + + tts_config_path = os.path.join(self.directory, "assets", "tts", + "ljspeech.yaml") + with open(tts_config_path) as f: + self.tts_config = yaml.load(f, Loader=yaml.Loader) + with fluid.dygraph.guard(fluid.CPUPlace()): + self.tts_model = FastSpeechModel( + self.tts_config['network'], + num_mels=self.tts_config['audio']['num_mels']) + io.load_parameters( + model=self.tts_model, checkpoint_path=self.tts_checkpoint_path) + + # Build vocoder. + args = AttrDict() + args.config = self.waveflow_config_path + args.use_fp16 = False + self.waveflow_config = io.add_yaml_config_to_args(args) + self.waveflow = WaveFlowModule(self.waveflow_config) + io.load_parameters( + model=self.waveflow, + checkpoint_path=self.waveflow_checkpoint_path) + + def synthesize(self, texts, use_gpu=False, speed=1.0, + vocoder="griffin-lim"): + """ + Get the synthetic wavs from the texts. + + Args: + texts(list): the input texts to be predicted. + use_gpu(bool): whether use gpu to predict or not. Default False. + speed(float): Controlling the voice speed. Default 1.0. + vocoder(str): the vocoder name, "griffin-lim" or "waveflow". + + Returns: + wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. + sample_rate(int): the audio sample rate. + """ + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + if texts and isinstance(texts, list): + predicted_data = texts + else: + raise ValueError( + "The input data is inconsistent with expectations.") + + wavs = [] + with fluid.dygraph.guard(place): + self.tts_model.eval() + self.waveflow.eval() + for text in predicted_data: + # init input + logger.info("Processing sentence: %s" % text) + text = np.asarray(text_to_sequence(text)) + text = np.expand_dims(text, axis=0) + pos_text = np.arange(1, text.shape[1] + 1) + pos_text = np.expand_dims(pos_text, axis=0) + + text = dg.to_variable(text).astype(np.int64) + pos_text = dg.to_variable(pos_text).astype(np.int64) + + _, mel_output_postnet = self.tts_model( + text, pos_text, alpha=1 / speed) + + if vocoder == 'griffin-lim': + # synthesis use griffin-lim + wav = self.synthesis_with_griffinlim( + mel_output_postnet, self.tts_config['audio']) + elif vocoder == 'waveflow': + wav = self.synthesis_with_waveflow( + mel_output_postnet, self.waveflow_config.sigma) + else: + raise ValueError( + 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' + % vocoder) + wavs.append(wav) + return wavs, self.tts_config['audio']['sr'] + + def synthesis_with_griffinlim(self, mel_output, cfg): + # synthesis with griffin-lim + mel_output = fluid.layers.transpose( + fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_output = np.exp(mel_output.numpy()) + basis = librosa.filters.mel( + cfg['sr'], + cfg['n_fft'], + cfg['num_mels'], + fmin=cfg['fmin'], + fmax=cfg['fmax']) + inv_basis = np.linalg.pinv(basis) + spec = np.maximum(1e-10, np.dot(inv_basis, mel_output)) + + wav = librosa.core.griffinlim( + spec**cfg['power'], + hop_length=cfg['hop_length'], + win_length=cfg['win_length']) + + return wav + + def synthesis_with_waveflow(self, mel_output, sigma): + mel_spectrogram = fluid.layers.transpose( + fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_spectrogram = fluid.layers.unsqueeze(mel_spectrogram, [0]) + + for layer in self.waveflow.sublayers(): + if isinstance(layer, WeightNormWrapper): + layer.remove_weight_norm() + + # Run model inference. + wav = self.waveflow.synthesize(mel_spectrogram, sigma=sigma) + return wav.numpy()[0] + + @serving + def serving_method(self, + texts, + use_gpu=False, + speed=1.0, + vocoder="griffin-lim"): + """ + Run as a service. + """ + wavs, sample_rate = self.synthesize(texts, use_gpu, speed, vocoder) + wavs = [wav.tolist() for wav in wavs] + result = {"wavs": wavs, "sample_rate": sample_rate} + return result + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--vocoder', + type=str, + default="griffin-lim", + choices=['griffin-lim', 'waveflow'], + help="the vocoder name") + + def add_module_output_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--output_path', + type=str, + default=os.path.abspath( + os.path.join(os.path.curdir, f"{self.name}_prediction")), + help="path to save experiment results") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Ouput options", description="Ouput path. Optional.") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + self.add_module_output_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + mkdir(args.output_path) + wavs, sample_rate = self.synthesize( + texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) + + for index, wav in enumerate(wavs): + sf.write( + os.path.join(args.output_path, f"{index}.wav"), wav, + sample_rate) + + ret = f"The synthesized wav files have been saved in {args.output_path}" + return ret + + +if __name__ == "__main__": + + module = FastSpeech() + test_text = [ + "Simple as this proposition is, it is necessary to be stated", + ] + wavs, sample_rate = module.synthesize( + texts=test_text, speed=1, vocoder="waveflow") + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/modules/audio/tts/transformer_tts_ljspeech/README.md b/hub_module/modules/audio/tts/transformer_tts_ljspeech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..933e02748e2a6cd4cd23813a5a4a5ffbe3a98ca7 --- /dev/null +++ b/hub_module/modules/audio/tts/transformer_tts_ljspeech/README.md @@ -0,0 +1,118 @@ +## 概述 + +TansformerTTS 是使用了 Transformer 结构的端到端语音合成模型,对 Transformer 和 Tacotron2 进行了融合,取得了令人满意的效果。因为删除了 RNN 的循环连接,可并行的提供 decoder 的输入,进行并行训练,大大提升了模型的训练速度。transformer_tts_ljspeech是基于ljspeech英文语音数据集预训练得到的英文TTS模型,仅支持预测。 + +

+
+

+ +更多详情参考论文[Neural Speech Synthesis with Transformer Network](https://arxiv.org/abs/1809.08895) + +## 命令行预测 + +```shell +$ hub run transformer_tts_ljspeech --input_text="Life was like a box of chocolates, you never know what you're gonna get." --use_gpu True --vocoder griffin-lim +``` + +## API + +```python +def synthesize(texts, use_gpu=False, vocoder="griffin-lim"): +``` + +预测API,由输入文本合成对应音频波形。 + +**参数** + +* texts (list\[str\]): 待预测文本; +* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; +* vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" + +**返回** + +* wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 +* sample\_rate (int): 合成音频的采样率。 + +**代码示例** + +```python +import paddlehub as hub +import soundfile as sf + +# Load transformer_tts_ljspeech module. +module = hub.Module(name="transformer_tts_ljspeech") + +# Predict sentiment label +test_texts = ["Life was like a box of chocolates, you never know what you're gonna get."] +wavs, sample_rate = module.synthesize(texts=test_texts, use_gpu=True, vocoder="waveflow") +for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) +``` + +## 服务部署 + +PaddleHub Serving 可以部署在线服务。 + +### 第一步:启动PaddleHub Serving + +运行启动命令: +```shell +$ hub serving start -m transformer_tts_ljspeech +``` + +这样就完成了一个服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +### 第二步:发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json + +import soundfile as sf + +# 发送HTTP请求 + +data = {'texts':['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], + 'use_gpu':False} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/transformer_tts_ljspeech" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) + +# 保存结果 +result = r.json()["results"] +wavs = result["wavs"] +sample_rate = result["sample_rate"] +for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) +``` + +## 查看代码 + +https://github.com/PaddlePaddle/Parakeet + +### 依赖 + +paddlepaddle >= 1.8.2 + +paddlehub >= 1.7.0 + +**NOTE:** 除了python依赖外还必须安装libsndfile库 +对于Ubuntu用户,请执行: +``` +sudo apt-get install libsndfile1 +``` +对于Centos用户,请执行: +``` +sudo yum install libsndfile +``` + +## 更新历史 + +* 1.0.0 + + 初始发布 diff --git a/hub_module/modules/audio/tts/transformer_tts_ljspeech/__init__.py b/hub_module/modules/audio/tts/transformer_tts_ljspeech/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hub_module/modules/audio/tts/transformer_tts_ljspeech/module.py b/hub_module/modules/audio/tts/transformer_tts_ljspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2f58c083d6ba9b20cb434ff58110fbd0fb6e23b3 --- /dev/null +++ b/hub_module/modules/audio/tts/transformer_tts_ljspeech/module.py @@ -0,0 +1,329 @@ +# coding:utf-8 +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import ast +import argparse +import importlib.util + +import nltk +import paddle.fluid as fluid +import paddle.fluid.dygraph as dg +import paddlehub as hub +from paddlehub.module.module import runnable +from paddlehub.common.utils import mkdir +from paddlehub.module.nlp_module import DataFormatError +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +from paddlehub.common.dir import THIRD_PARTY_HOME +from paddlehub.common.downloader import default_downloader + +lack_dependency = [] +for dependency in ["ruamel", "parakeet", "scipy", "soundfile", "librosa"]: + if not importlib.util.find_spec(dependency): + lack_dependency.append(dependency) + +# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package. +_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz" +_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz" +nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data") +tokenizers_path = os.path.join(nltk_path, "tokenizers") +corpora_path = os.path.join(nltk_path, "corpora") +punkt_path = os.path.join(tokenizers_path, "punkt") +cmudict_path = os.path.join(corpora_path, "cmudict") + +if not os.path.exists(punkt_path): + default_downloader.download_file_and_uncompress( + url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True) +if not os.path.exists(cmudict_path): + default_downloader.download_file_and_uncompress( + url=_CMUDICT_URL, save_path=corpora_path, print_progress=True) +nltk.data.path.append(nltk_path) + +if not lack_dependency: + import soundfile as sf + import librosa + from ruamel import yaml + from scipy.io.wavfile import write + from parakeet.g2p.en import text_to_sequence + from parakeet.models.transformer_tts.utils import * + from parakeet.models.transformer_tts import TransformerTTS as TransformerTTSModel + from parakeet.models.waveflow import WaveFlowModule + from parakeet.utils import io + from parakeet.modules.weight_norm import WeightNormWrapper +else: + raise ImportError( + "The module requires additional dependencies: %s. You can install parakeet via 'git clone https://github.com/PaddlePaddle/Parakeet && cd Parakeet && pip install -e .' and others via pip install" + % ", ".join(lack_dependency)) + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +@moduleinfo( + name="transformer_tts_ljspeech", + version="1.0.0", + summary= + "Transformer TTS introduces and adapts the multi-head attention mechanism to replace the RNN structures and also the original attention mechanism in Tacotron2. See https://arxiv.org/abs/1809.08895 for details", + author="baidu-nlp", + author_email="", + type="nlp/tts", +) +class TransformerTTS(hub.NLPPredictionModule): + def _initialize(self): + """ + initialize with the necessary elements + """ + self.tts_checkpoint_path = os.path.join(self.directory, "assets", "tts", + "step-120000") + self.waveflow_checkpoint_path = os.path.join(self.directory, "assets", + "vocoder", "step-2000000") + self.waveflow_config_path = os.path.join( + self.directory, "assets", "vocoder", "waveflow_ljspeech.yaml") + + tts_config_path = os.path.join(self.directory, "assets", "tts", + "ljspeech.yaml") + with open(tts_config_path) as f: + self.tts_config = yaml.load(f, Loader=yaml.Loader) + + # The max length of audio when synthsis. + self.max_len = 1000 + # The threshold of stop token which indicates the time step should stop generate spectrum or not. + self.stop_threshold = 0.5 + + with fluid.dygraph.guard(fluid.CPUPlace()): + # Build TTS. + with fluid.unique_name.guard(): + network_cfg = self.tts_config['network'] + self.tts_model = TransformerTTSModel( + network_cfg['embedding_size'], network_cfg['hidden_size'], + network_cfg['encoder_num_head'], + network_cfg['encoder_n_layers'], + self.tts_config['audio']['num_mels'], + network_cfg['outputs_per_step'], + network_cfg['decoder_num_head'], + network_cfg['decoder_n_layers']) + io.load_parameters( + model=self.tts_model, + checkpoint_path=self.tts_checkpoint_path) + + # Build vocoder. + args = AttrDict() + args.config = self.waveflow_config_path + args.use_fp16 = False + self.waveflow_config = io.add_yaml_config_to_args(args) + self.waveflow = WaveFlowModule(self.waveflow_config) + io.load_parameters( + model=self.waveflow, + checkpoint_path=self.waveflow_checkpoint_path) + + def synthesize(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Get the synthetic wavs from the texts. + + Args: + texts(list): the input texts to be predicted. + use_gpu(bool): whether use gpu to predict or not + vocoder(str): the vocoder name, "griffin-lim" or "waveflow" + + Returns: + wavs(str): the audio wav with sample rate . You can use soundfile.write to save it. + sample_rate(int): the audio sample rate. + """ + if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: + use_gpu = False + logger.warning( + "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" + ) + if use_gpu: + place = fluid.CUDAPlace(0) + else: + place = fluid.CPUPlace() + + if texts and isinstance(texts, list): + predicted_data = texts + else: + raise ValueError( + "The input data is inconsistent with expectations.") + + wavs = [] + with fluid.dygraph.guard(place): + self.tts_model.eval() + self.waveflow.eval() + for text in predicted_data: + # init input + logger.info("Processing sentence: %s" % text) + text = np.asarray(text_to_sequence(text)) + text = fluid.layers.unsqueeze( + dg.to_variable(text).astype(np.int64), [0]) + mel_input = dg.to_variable(np.zeros([1, 1, + 80])).astype(np.float32) + pos_text = np.arange(1, text.shape[1] + 1) + pos_text = fluid.layers.unsqueeze( + dg.to_variable(pos_text).astype(np.int64), [0]) + + for i in range(self.max_len): + pos_mel = np.arange(1, mel_input.shape[1] + 1) + pos_mel = fluid.layers.unsqueeze( + dg.to_variable(pos_mel).astype(np.int64), [0]) + mel_pred, postnet_pred, attn_probs, stop_preds, attn_enc, attn_dec = self.tts_model( + text, mel_input, pos_text, pos_mel) + if stop_preds.numpy()[0, -1] > self.stop_threshold: + break + mel_input = fluid.layers.concat( + [mel_input, postnet_pred[:, -1:, :]], axis=1) + if vocoder == 'griffin-lim': + # synthesis use griffin-lim + wav = self.synthesis_with_griffinlim( + postnet_pred, self.tts_config['audio']) + elif vocoder == 'waveflow': + # synthesis use waveflow + wav = self.synthesis_with_waveflow( + postnet_pred, self.waveflow_config.sigma) + else: + raise ValueError( + 'vocoder error, we only support griffinlim and waveflow, but recevied %s.' + % vocoder) + wavs.append(wav) + return wavs, self.tts_config['audio']['sr'] + + def synthesis_with_griffinlim(self, mel_output, cfg): + # synthesis with griffin-lim + mel_output = fluid.layers.transpose( + fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_output = np.exp(mel_output.numpy()) + basis = librosa.filters.mel( + cfg['sr'], + cfg['n_fft'], + cfg['num_mels'], + fmin=cfg['fmin'], + fmax=cfg['fmax']) + inv_basis = np.linalg.pinv(basis) + spec = np.maximum(1e-10, np.dot(inv_basis, mel_output)) + + wav = librosa.core.griffinlim( + spec**cfg['power'], + hop_length=cfg['hop_length'], + win_length=cfg['win_length']) + + return wav + + def synthesis_with_waveflow(self, mel_output, sigma): + mel_spectrogram = fluid.layers.transpose( + fluid.layers.squeeze(mel_output, [0]), [1, 0]) + mel_spectrogram = fluid.layers.unsqueeze(mel_spectrogram, [0]) + + for layer in self.waveflow.sublayers(): + if isinstance(layer, WeightNormWrapper): + layer.remove_weight_norm() + + # Run model inference. + wav = self.waveflow.synthesize(mel_spectrogram, sigma=sigma) + return wav.numpy()[0] + + @serving + def serving_method(self, texts, use_gpu=False, vocoder="griffin-lim"): + """ + Run as a service. + """ + wavs, sample_rate = self.synthesize(texts, use_gpu, vocoder) + wavs = [wav.tolist() for wav in wavs] + result = {"wavs": wavs, "sample_rate": sample_rate} + return result + + def add_module_config_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU for prediction") + + self.arg_config_group.add_argument( + '--vocoder', + type=str, + default="griffin-lim", + choices=['griffin-lim', 'waveflow'], + help="the vocoder name") + + def add_module_output_arg(self): + """ + Add the command config options + """ + self.arg_config_group.add_argument( + '--output_path', + type=str, + default=os.path.abspath( + os.path.join(os.path.curdir, f"{self.name}_prediction")), + help="path to save experiment results") + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group( + title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group( + title="Ouput options", description="Ouput path. Optional.") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", + description= + "Run configuration for controlling module behavior, optional.") + + self.add_module_config_arg() + self.add_module_input_arg() + self.add_module_output_arg() + + args = self.parser.parse_args(argvs) + + try: + input_data = self.check_input_data(args) + except DataFormatError and RuntimeError: + self.parser.print_help() + return None + + mkdir(args.output_path) + wavs, sample_rate = self.synthesize( + texts=input_data, use_gpu=args.use_gpu, vocoder=args.vocoder) + + for index, wav in enumerate(wavs): + sf.write( + os.path.join(args.output_path, f"{index}.wav"), wav, + sample_rate) + + ret = f"The synthesized wav files have been saved in {args.output_path}" + return ret + + +if __name__ == "__main__": + + module = TransformerTTS() + test_text = [ + "Life was like a box of chocolates, you never know what you're gonna get.", + ] + wavs, sample_rate = module.synthesize(texts=test_text, vocoder="waveflow") + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) diff --git a/hub_module/scripts/configs/deep_voice3.yml b/hub_module/scripts/configs/deep_voice3.yml new file mode 100644 index 0000000000000000000000000000000000000000..4acdb7133bf022031e2bafe2cfd10d8bb0a47a08 --- /dev/null +++ b/hub_module/scripts/configs/deep_voice3.yml @@ -0,0 +1,9 @@ +name: ace2p +dir: "modules/audio/tts/deep_voice3" +exclude: + - README.md +resources: + - + url: https://paddlespeech.bj.bcebos.com/Parakeet/deepvoice3_ljspeech_griffin-lim_ckpt_1.0.zip + dest: assets + uncompress: True diff --git a/paddlehub/module/nlp_module.py b/paddlehub/module/nlp_module.py index 435f1a9294e02bda4ea5dd959cade5a9706cf901..b696d52e90c3335b87dfa1023efdcac62187d30d 100644 --- a/paddlehub/module/nlp_module.py +++ b/paddlehub/module/nlp_module.py @@ -19,7 +19,6 @@ from __future__ import print_function import argparse import ast -import json import os import re import six @@ -31,7 +30,6 @@ import paddle.fluid as fluid import paddlehub as hub from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor from paddlehub.common import paddle_helper, tmp_dir -from paddlehub.common.logger import logger from paddlehub.common.utils import sys_stdin_encoding, version_compare from paddlehub.io.parser import txt_parser from paddlehub.module.module import runnable diff --git a/requirements.txt b/requirements.txt index 37372665c93089f01b9331dd9e1bd9ff6b9f116b..d430c04e9c85fd24becadc468d81601506b5588e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ cma >= 2.7.0 sentencepiece colorlog tqdm +nltk # pandas no longer support python2 in version 0.25 and above pandas ; python_version >= "3"