diff --git a/demos/speech_web/README.md b/demos/speech_web/README.md index 87a9258396161b9c39f08362385b0b796014c164..4e598028dec064f1a25aa7918664644d4e8db887 100644 --- a/demos/speech_web/README.md +++ b/demos/speech_web/README.md @@ -25,7 +25,7 @@ PaddleSpeechDemo 是一个以 PaddleSpeech 的语音交互功能为主体开发 运行效果: - ![效果](docs/效果展示.png) + ![效果](https://user-images.githubusercontent.com/30135920/191188766-12e7ca15-f7b4-45f8-9da5-0c0b0bbe5fcb.png) ## 安装 @@ -192,7 +192,7 @@ cd speech_server python vc.py --port 8010 ``` -> 如果你是其它的系统,可以使用 conda 安装 mfa v2 进行体验,安装请参考 [Montreal Forced Aligner](https://montreal-forced-aligner.readthedocs.io/en/latest/getting_started.html),使用 MFA v2 需要自行配置环境,并修改调用 MFA 相关的代码, mfa v1 与 mfa v2 使用上有差异 +如果你是其它的系统,可以使用 conda 安装 mfa v2 进行体验,安装请参考 [Montreal Forced Aligner](https://montreal-forced-aligner.readthedocs.io/en/latest/getting_started.html),使用 MFA v2 需要自行配置环境,并修改调用 MFA 相关的代码, mfa v1 与 mfa v2 使用上有差异 ### 开启前端服务 diff --git "a/demos/speech_web/docs/\346\225\210\346\236\234\345\261\225\347\244\272.png" "b/demos/speech_web/docs/\346\225\210\346\236\234\345\261\225\347\244\272.png" deleted file mode 100644 index d166f0dee498834e4d9e96b67886c36e46ef75d7..0000000000000000000000000000000000000000 Binary files "a/demos/speech_web/docs/\346\225\210\346\236\234\345\261\225\347\244\272.png" and /dev/null differ diff --git a/demos/speech_web/speech_server/src/ernie_sat.py b/demos/speech_web/speech_server/src/ernie_sat.py index 59537a5f0485f7bebb1c14d8049f9fa10c5ac55f..91a8915a7e3e4f7bcbb63f1ae25abdc195444abf 100644 --- a/demos/speech_web/speech_server/src/ernie_sat.py +++ b/demos/speech_web/speech_server/src/ernie_sat.py @@ -1,5 +1,6 @@ import os +from .util import MAIN_ROOT from .util import run_cmd @@ -20,10 +21,8 @@ class SAT: self.cross_voc_model_path = os.path.realpath( "source/model/hifigan_aishell3_ckpt_0.2.0") - self.now_file_path = os.path.dirname(__file__) - self.BIN_DIR = os.path.realpath( - os.path.join(self.now_file_path, - "../../../../paddlespeech/t2s/exps/ernie_sat")) + self.BIN_DIR = os.path.join(MAIN_ROOT, + "paddlespeech/t2s/exps/ernie_sat") def zh_synthesize_edit(self, old_str: str, @@ -37,11 +36,6 @@ class SAT: print("task name only in ['edit', 'synthesize']") return None - # 运行时的 PYTHONPATH - PYTHONPATH = os.path.realpath( - os.path.join(self.now_file_path, - "../../../../examples/aishell3/ernie_sat")) - # 推理文件配置 config_path = os.path.join(self.zh_pretrain_model_path, "default.yaml") phones_dict = os.path.join(self.zh_pretrain_model_path, @@ -84,9 +78,6 @@ class SAT: source_lang: str, target_lang: str, erniesat_ckpt_name: str="snapshot_iter_489000.pdz"): - PYTHONPATH = os.path.realpath( - os.path.join(self.now_file_path, - "../../../../examples/aishell3_vctk/ernie_sat")) # 推理文件配置 config_path = os.path.join(self.cross_pretrain_model_path, "default.yaml") @@ -117,9 +108,6 @@ class SAT: output_name: os.PathLike, task_name: str="synthesize", erniesat_ckpt_name: str="snapshot_iter_199500.pdz"): - PYTHONPATH = os.path.realpath( - os.path.join(self.now_file_path, - "../../../../examples/vctk/ernie_sat")) # 推理文件配置 config_path = os.path.join(self.en_pretrain_model_path, "default.yaml") diff --git a/demos/speech_web/speech_server/src/finetune.py b/demos/speech_web/speech_server/src/finetune.py index 17269f0bbf6a05cef3206864ce9b7e8d5a7aeb4a..8af96fded31f680b1f91b8f6d81078486df8aa87 100644 --- a/demos/speech_web/speech_server/src/finetune.py +++ b/demos/speech_web/speech_server/src/finetune.py @@ -1,5 +1,6 @@ import os +from .util import MAIN_ROOT from .util import run_cmd @@ -17,12 +18,10 @@ def find_max_ckpt(model_path): class FineTune: def __init__(self): self.now_file_path = os.path.dirname(__file__) - self.PYTHONPATH = os.path.realpath( - os.path.join(self.now_file_path, - "../../../../examples/other/tts_finetune/tts3")) - self.BIN_DIR = os.path.realpath( - os.path.join(self.now_file_path, - "../../../../paddlespeech/t2s/exps/fastspeech2")) + self.PYTHONPATH = os.path.join(MAIN_ROOT, + "examples/other/tts_finetune/tts3") + self.BIN_DIR = os.path.join(MAIN_ROOT, + "paddlespeech/t2s/exps/fastspeech2") self.pretrained_model_dir = os.path.realpath( "source/model/fastspeech2_aishell3_ckpt_1.1.0") self.voc_model_dir = os.path.realpath( diff --git a/demos/speech_web/speech_server/src/tdnn_clone.py b/demos/speech_web/speech_server/src/tdnn_clone.py index 4c3b3e8b03689d84514041c5cb465cc3b0a5319c..c24b9b077cc81077350e0f4ae8c8bf936e1f6dda 100644 --- a/demos/speech_web/speech_server/src/tdnn_clone.py +++ b/demos/speech_web/speech_server/src/tdnn_clone.py @@ -1,16 +1,14 @@ import os import shutil +from .util import MAIN_ROOT from .util import run_cmd class VoiceCloneTDNN(): def __init__(self): # Path 到指定路径上 - self.now_file_path = os.path.dirname(__file__) - self.BIN_DIR = os.path.realpath( - os.path.join(self.now_file_path, - "../../../../paddlespeech/t2s/exps")) + self.BIN_DIR = os.path.join(MAIN_ROOT, "paddlespeech/t2s/exps") self.am = "fastspeech2_aishell3" self.am_config = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/default.yaml" diff --git a/demos/speech_web/speech_server/src/util.py b/demos/speech_web/speech_server/src/util.py index 773d6ec9e9d96f7ccfd99a509dcee200f2b12237..a69e6c42f6259c4d20a3460818a295d0f3cacfb3 100644 --- a/demos/speech_web/speech_server/src/util.py +++ b/demos/speech_web/speech_server/src/util.py @@ -2,6 +2,9 @@ import os import random import subprocess +NOW_FILE_PATH = os.path.dirname(__file__) +MAIN_ROOT = os.path.realpath(os.path.join(NOW_FILE_PATH, "../../../../")) + def randName(n=5): return "".join(random.sample('zyxwvutsrqponmlkjihgfedcba', n))