diff --git a/README.md b/README.md
index 26854ae6fa93d8e9559d8c7594ef04ad6f6a9632..3fdb2945f55eb44db902a2b97c1b7f4382892b59 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+
([简体中文](./README_cn.md)|English)
@@ -494,6 +495,14 @@ PaddleSpeech supports a series of most popular models. They are summarized in [r
ge2e-fastspeech2-aishell3
+
+ End-to-End |
+ VITS |
+ CSMSC |
+
+ VITS-csmsc
+ |
+
diff --git a/README_cn.md b/README_cn.md
index 8c018a08e3d99a3e33d9750c3eacebb41cc17d80..91a01d71047c1aa38cd8c9d059b39e9ca5245d7a 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -1,3 +1,4 @@
+
(简体中文|[English](./README.md))
@@ -481,6 +482,15 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块:文本前端、声
ge2e-fastspeech2-aishell3
+
+
+ 端到端 |
+ VITS |
+ CSMSC |
+
+ VITS-csmsc
+ |
+
diff --git a/examples/csmsc/vits/README.md b/examples/csmsc/vits/README.md
index 0c16840a04e32be8fefb3bae6c23fb4bd853be9f..5ca57e3a3603eb53fe4bf7c16fc1ba51bbc14147 100644
--- a/examples/csmsc/vits/README.md
+++ b/examples/csmsc/vits/README.md
@@ -144,3 +144,34 @@ optional arguments:
6. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
## Pretrained Model
+
+The pretrained model can be downloaded here:
+
+- [vits_csmsc_ckpt_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/vits/vits_csmsc_ckpt_1.1.0.zip) (add_blank=true)
+
+VITS checkpoint contains files listed below.
+```text
+vits_csmsc_ckpt_1.1.0
+├── default.yaml # default config used to train vitx
+├── phone_id_map.txt # phone vocabulary file when training vits
+└── snapshot_iter_350000.pdz # model parameters and optimizer states
+```
+
+ps: This ckpt is not good enough, a better result is training
+
+You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained VITS.
+
+```bash
+source path.sh
+add_blank=true
+
+FLAGS_allocator_strategy=naive_best_fit \
+FLAGS_fraction_of_gpu_memory_to_use=0.01 \
+python3 ${BIN_DIR}/synthesize_e2e.py \
+ --config=vits_csmsc_ckpt_1.1.0/default.yaml \
+ --ckpt=vits_csmsc_ckpt_1.1.0/snapshot_iter_350000.pdz \
+ --phones_dict=vits_csmsc_ckpt_1.1.0/phone_id_map.txt \
+ --output_dir=exp/default/test_e2e \
+ --text=${BIN_DIR}/../sentences.txt \
+ --add-blank=${add_blank}
+```
diff --git a/examples/csmsc/vits/local/synthesize.sh b/examples/csmsc/vits/local/synthesize.sh
index c15d5f99ff2f6a51c02630b72230999809cefcde..a4b35ec0aaa9f2ba830d2a35375283c0cea9389a 100755
--- a/examples/csmsc/vits/local/synthesize.sh
+++ b/examples/csmsc/vits/local/synthesize.sh
@@ -15,4 +15,4 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--phones_dict=dump/phone_id_map.txt \
--test_metadata=dump/test/norm/metadata.jsonl \
--output_dir=${train_output_path}/test
-fi
\ No newline at end of file
+fi
diff --git a/examples/csmsc/vits/local/train.sh b/examples/csmsc/vits/local/train.sh
index 42fff26cadd03ee0eddcecec634438fd7482fef7..289837a5d10af58b3c6fcb679e0ea895040d5d64 100755
--- a/examples/csmsc/vits/local/train.sh
+++ b/examples/csmsc/vits/local/train.sh
@@ -3,6 +3,11 @@
config_path=$1
train_output_path=$2
+# install monotonic_align
+cd ${MAIN_ROOT}/paddlespeech/t2s/models/vits/monotonic_align
+python3 setup.py build_ext --inplace
+cd -
+
python3 ${BIN_DIR}/train.py \
--train-metadata=dump/train/norm/metadata.jsonl \
--dev-metadata=dump/dev/norm/metadata.jsonl \