diff --git a/examples/csmsc/vits/local/synthesize.sh b/examples/csmsc/vits/local/synthesize.sh index c15d5f99ff2f6a51c02630b72230999809cefcde..a4b35ec0aaa9f2ba830d2a35375283c0cea9389a 100755 --- a/examples/csmsc/vits/local/synthesize.sh +++ b/examples/csmsc/vits/local/synthesize.sh @@ -15,4 +15,4 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --phones_dict=dump/phone_id_map.txt \ --test_metadata=dump/test/norm/metadata.jsonl \ --output_dir=${train_output_path}/test -fi \ No newline at end of file +fi diff --git a/examples/csmsc/vits/local/train.sh b/examples/csmsc/vits/local/train.sh index 42fff26cadd03ee0eddcecec634438fd7482fef7..289837a5d10af58b3c6fcb679e0ea895040d5d64 100755 --- a/examples/csmsc/vits/local/train.sh +++ b/examples/csmsc/vits/local/train.sh @@ -3,6 +3,11 @@ config_path=$1 train_output_path=$2 +# install monotonic_align +cd ${MAIN_ROOT}/paddlespeech/t2s/models/vits/monotonic_align +python3 setup.py build_ext --inplace +cd - + python3 ${BIN_DIR}/train.py \ --train-metadata=dump/train/norm/metadata.jsonl \ --dev-metadata=dump/dev/norm/metadata.jsonl \