Merge branch 'develop' of github.com:iftaken/PaddleSpeech into dev-web-cmd

d5d94cbd · iftaken · a488ec83 · c6361a5f · d5d94cbd · a488ec83
52 changed file
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -20,6 +20,7 @@ onnxruntime==1.10.0
 opencc
 paddlenlp
 paddlepaddle>=2.2.2
+paddlespeech_ctcdecoders
 paddlespeech_feat
 pandas
 pathos == 0.2.8
@@ -27,8 +28,8 @@ pattern_singleton
 Pillow>=9.0.0
 praatio==5.0.0
 prettytable
-pypinyin<=0.44.0
 pypinyin-dict
+pypinyin<=0.44.0
 python-dateutil
 pyworld==0.2.12
 recommonmark>=0.5.0

--- a/docs/source/api/paddlespeech.cls.exps.panns.deploy.predict.rst
+++ b/docs/source/api/paddlespeech.cls.exps.panns.deploy.predict.rst
-paddlespeech.cls.exps.panns.deploy.predict module
-=================================================
-
-.. automodule:: paddlespeech.cls.exps.panns.deploy.predict
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.cls.exps.panns.deploy.rst
+++ b/docs/source/api/paddlespeech.cls.exps.panns.deploy.rst
@@ -12,4 +12,3 @@ Submodules
 .. toctree::
   :maxdepth: 4

-   paddlespeech.cls.exps.panns.deploy.predict
--- a/docs/source/api/paddlespeech.cls.exps.panns.export_model.rst
+++ b/docs/source/api/paddlespeech.cls.exps.panns.export_model.rst
-paddlespeech.cls.exps.panns.export\_model module
-================================================
-
-.. automodule:: paddlespeech.cls.exps.panns.export_model
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.cls.exps.panns.predict.rst
+++ b/docs/source/api/paddlespeech.cls.exps.panns.predict.rst
-paddlespeech.cls.exps.panns.predict module
-==========================================
-
-.. automodule:: paddlespeech.cls.exps.panns.predict
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.cls.exps.panns.rst
+++ b/docs/source/api/paddlespeech.cls.exps.panns.rst
@@ -20,6 +20,3 @@ Submodules
 .. toctree::
   :maxdepth: 4

-   paddlespeech.cls.exps.panns.export_model
-   paddlespeech.cls.exps.panns.predict
-   paddlespeech.cls.exps.panns.train
--- a/docs/source/api/paddlespeech.cls.exps.panns.train.rst
+++ b/docs/source/api/paddlespeech.cls.exps.panns.train.rst
-paddlespeech.cls.exps.panns.train module
-========================================
-
-.. automodule:: paddlespeech.cls.exps.panns.train
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.kws.exps.mdtc.plot_det_curve.rst
+++ b/docs/source/api/paddlespeech.kws.exps.mdtc.plot_det_curve.rst
-paddlespeech.kws.exps.mdtc.plot\_det\_curve module
-==================================================
-
-.. automodule:: paddlespeech.kws.exps.mdtc.plot_det_curve
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.kws.exps.mdtc.rst
+++ b/docs/source/api/paddlespeech.kws.exps.mdtc.rst
@@ -14,6 +14,5 @@ Submodules

   paddlespeech.kws.exps.mdtc.collate
   paddlespeech.kws.exps.mdtc.compute_det
-   paddlespeech.kws.exps.mdtc.plot_det_curve
   paddlespeech.kws.exps.mdtc.score
   paddlespeech.kws.exps.mdtc.train
--- a/docs/source/api/paddlespeech.s2t.decoders.ctcdecoder.rst
+++ b/docs/source/api/paddlespeech.s2t.decoders.ctcdecoder.rst
@@ -13,5 +13,4 @@ Submodules
   :maxdepth: 4

   paddlespeech.s2t.decoders.ctcdecoder.decoders_deprecated
-   paddlespeech.s2t.decoders.ctcdecoder.scorer_deprecated
   paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper
--- a/docs/source/api/paddlespeech.s2t.decoders.ctcdecoder.scorer_deprecated.rst
+++ b/docs/source/api/paddlespeech.s2t.decoders.ctcdecoder.scorer_deprecated.rst
-paddlespeech.s2t.decoders.ctcdecoder.scorer\_deprecated module
-==============================================================
-
-.. automodule:: paddlespeech.s2t.decoders.ctcdecoder.scorer_deprecated
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.decoders.recog_bin.rst
+++ b/docs/source/api/paddlespeech.s2t.decoders.recog_bin.rst
-paddlespeech.s2t.decoders.recog\_bin module
-===========================================
-
-.. automodule:: paddlespeech.s2t.decoders.recog_bin
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.decoders.rst
+++ b/docs/source/api/paddlespeech.s2t.decoders.rst
@@ -23,5 +23,4 @@ Submodules
   :maxdepth: 4

   paddlespeech.s2t.decoders.recog
-   paddlespeech.s2t.decoders.recog_bin
   paddlespeech.s2t.decoders.utils
--- a/docs/source/api/paddlespeech.s2t.decoders.scorers.ngram.rst
+++ b/docs/source/api/paddlespeech.s2t.decoders.scorers.ngram.rst
-paddlespeech.s2t.decoders.scorers.ngram module
-==============================================
-
-.. automodule:: paddlespeech.s2t.decoders.scorers.ngram
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.decoders.scorers.rst
+++ b/docs/source/api/paddlespeech.s2t.decoders.scorers.rst
@@ -15,5 +15,4 @@ Submodules
   paddlespeech.s2t.decoders.scorers.ctc
   paddlespeech.s2t.decoders.scorers.ctc_prefix_score
   paddlespeech.s2t.decoders.scorers.length_bonus
-   paddlespeech.s2t.decoders.scorers.ngram
   paddlespeech.s2t.decoders.scorers.scorer_interface
--- a/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.client.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.client.rst
-paddlespeech.s2t.exps.deepspeech2.bin.deploy.client module
-==========================================================
-
-.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.client
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.record.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.record.rst
-paddlespeech.s2t.exps.deepspeech2.bin.deploy.record module
-==========================================================
-
-.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.record
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.rst
@@ -12,8 +12,5 @@ Submodules
 .. toctree::
   :maxdepth: 4

-   paddlespeech.s2t.exps.deepspeech2.bin.deploy.client
-   paddlespeech.s2t.exps.deepspeech2.bin.deploy.record
   paddlespeech.s2t.exps.deepspeech2.bin.deploy.runtime
-   paddlespeech.s2t.exps.deepspeech2.bin.deploy.send
   paddlespeech.s2t.exps.deepspeech2.bin.deploy.server
--- a/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.send.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.send.rst
-paddlespeech.s2t.exps.deepspeech2.bin.deploy.send module
-========================================================
-
-.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.send
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.exps.u2.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.u2.rst
@@ -21,4 +21,3 @@ Submodules
   :maxdepth: 4

   paddlespeech.s2t.exps.u2.model
-   paddlespeech.s2t.exps.u2.trainer
--- a/docs/source/api/paddlespeech.s2t.exps.u2.trainer.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.u2.trainer.rst
-paddlespeech.s2t.exps.u2.trainer module
-=======================================
-
-.. automodule:: paddlespeech.s2t.exps.u2.trainer
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.exps.u2_kaldi.bin.recog.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.u2_kaldi.bin.recog.rst
-paddlespeech.s2t.exps.u2\_kaldi.bin.recog module
-================================================
-
-.. automodule:: paddlespeech.s2t.exps.u2_kaldi.bin.recog
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.exps.u2_kaldi.bin.rst
+++ b/docs/source/api/paddlespeech.s2t.exps.u2_kaldi.bin.rst
@@ -12,6 +12,5 @@ Submodules
 .. toctree::
   :maxdepth: 4

-   paddlespeech.s2t.exps.u2_kaldi.bin.recog
   paddlespeech.s2t.exps.u2_kaldi.bin.test
   paddlespeech.s2t.exps.u2_kaldi.bin.train
--- a/docs/source/api/paddlespeech.s2t.training.extensions.rst
+++ b/docs/source/api/paddlespeech.s2t.training.extensions.rst
@@ -15,5 +15,3 @@ Submodules
   paddlespeech.s2t.training.extensions.evaluator
   paddlespeech.s2t.training.extensions.extension
   paddlespeech.s2t.training.extensions.plot
-   paddlespeech.s2t.training.extensions.snapshot
-   paddlespeech.s2t.training.extensions.visualizer
--- a/docs/source/api/paddlespeech.s2t.training.extensions.snapshot.rst
+++ b/docs/source/api/paddlespeech.s2t.training.extensions.snapshot.rst
-paddlespeech.s2t.training.extensions.snapshot module
-====================================================
-
-.. automodule:: paddlespeech.s2t.training.extensions.snapshot
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.training.extensions.visualizer.rst
+++ b/docs/source/api/paddlespeech.s2t.training.extensions.visualizer.rst
-paddlespeech.s2t.training.extensions.visualizer module
-======================================================
-
-.. automodule:: paddlespeech.s2t.training.extensions.visualizer
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.training.updaters.rst
+++ b/docs/source/api/paddlespeech.s2t.training.updaters.rst
@@ -13,5 +13,4 @@ Submodules
   :maxdepth: 4

   paddlespeech.s2t.training.updaters.standard_updater
-   paddlespeech.s2t.training.updaters.trainer
   paddlespeech.s2t.training.updaters.updater
--- a/docs/source/api/paddlespeech.s2t.training.updaters.trainer.rst
+++ b/docs/source/api/paddlespeech.s2t.training.updaters.trainer.rst
-paddlespeech.s2t.training.updaters.trainer module
-=================================================
-
-.. automodule:: paddlespeech.s2t.training.updaters.trainer
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.add_deltas.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.add_deltas.rst
-paddlespeech.s2t.transform.add\_deltas module
-=============================================
-
-.. automodule:: paddlespeech.s2t.transform.add_deltas
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.channel_selector.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.channel_selector.rst
-paddlespeech.s2t.transform.channel\_selector module
-===================================================
-
-.. automodule:: paddlespeech.s2t.transform.channel_selector
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.cmvn.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.cmvn.rst
-paddlespeech.s2t.transform.cmvn module
-======================================
-
-.. automodule:: paddlespeech.s2t.transform.cmvn
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.functional.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.functional.rst
-paddlespeech.s2t.transform.functional module
-============================================
-
-.. automodule:: paddlespeech.s2t.transform.functional
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.perturb.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.perturb.rst
-paddlespeech.s2t.transform.perturb module
-=========================================
-
-.. automodule:: paddlespeech.s2t.transform.perturb
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.rst
-paddlespeech.s2t.transform package
-==================================
-
-.. automodule:: paddlespeech.s2t.transform
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
----------
-
-.. toctree::
-   :maxdepth: 4
-
-   paddlespeech.s2t.transform.add_deltas
-   paddlespeech.s2t.transform.channel_selector
-   paddlespeech.s2t.transform.cmvn
-   paddlespeech.s2t.transform.functional
-   paddlespeech.s2t.transform.perturb
-   paddlespeech.s2t.transform.spec_augment
-   paddlespeech.s2t.transform.spectrogram
-   paddlespeech.s2t.transform.transform_interface
-   paddlespeech.s2t.transform.transformation
-   paddlespeech.s2t.transform.wpe
--- a/docs/source/api/paddlespeech.s2t.transform.spec_augment.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.spec_augment.rst
-paddlespeech.s2t.transform.spec\_augment module
-===============================================
-
-.. automodule:: paddlespeech.s2t.transform.spec_augment
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.spectrogram.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.spectrogram.rst
-paddlespeech.s2t.transform.spectrogram module
-=============================================
-
-.. automodule:: paddlespeech.s2t.transform.spectrogram
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.transform_interface.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.transform_interface.rst
-paddlespeech.s2t.transform.transform\_interface module
-======================================================
-
-.. automodule:: paddlespeech.s2t.transform.transform_interface
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.transformation.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.transformation.rst
-paddlespeech.s2t.transform.transformation module
-================================================
-
-.. automodule:: paddlespeech.s2t.transform.transformation
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.s2t.transform.wpe.rst
+++ b/docs/source/api/paddlespeech.s2t.transform.wpe.rst
-paddlespeech.s2t.transform.wpe module
-=====================================
-
-.. automodule:: paddlespeech.s2t.transform.wpe
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.server.engine.acs.python.acs_engine.rst
+++ b/docs/source/api/paddlespeech.server.engine.acs.python.acs_engine.rst
-paddlespeech.server.engine.acs.python.acs\_engine module
-========================================================
-
-.. automodule:: paddlespeech.server.engine.acs.python.acs_engine
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.server.engine.acs.python.rst
+++ b/docs/source/api/paddlespeech.server.engine.acs.python.rst
@@ -12,4 +12,3 @@ Submodules
 .. toctree::
   :maxdepth: 4

-   paddlespeech.server.engine.acs.python.acs_engine
--- a/docs/source/api/paddlespeech.server.utils.log.rst
+++ b/docs/source/api/paddlespeech.server.utils.log.rst
-paddlespeech.server.utils.log module
-====================================
-
-.. automodule:: paddlespeech.server.utils.log
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.t2s.exps.rst
+++ b/docs/source/api/paddlespeech.t2s.exps.rst
@@ -30,10 +30,10 @@ Submodules

   paddlespeech.t2s.exps.inference
   paddlespeech.t2s.exps.inference_streaming
+   paddlespeech.t2s.models.vits.monotonic_align
   paddlespeech.t2s.exps.ort_predict
   paddlespeech.t2s.exps.ort_predict_e2e
   paddlespeech.t2s.exps.ort_predict_streaming
-   paddlespeech.t2s.exps.stream_play_tts
   paddlespeech.t2s.exps.syn_utils
   paddlespeech.t2s.exps.synthesize
   paddlespeech.t2s.exps.synthesize_e2e

--- a/docs/source/api/paddlespeech.t2s.exps.stream_play_tts.rst
+++ b/docs/source/api/paddlespeech.t2s.exps.stream_play_tts.rst
-paddlespeech.t2s.exps.stream\_play\_tts module
-==============================================
-
-.. automodule:: paddlespeech.t2s.exps.stream_play_tts
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.t2s.models.ernie_sat.mlm.rst
+++ b/docs/source/api/paddlespeech.t2s.models.ernie_sat.mlm.rst
-paddlespeech.t2s.models.ernie\_sat.mlm module
-=============================================
-
-.. automodule:: paddlespeech.t2s.models.ernie_sat.mlm
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.core.rst
+++ b/docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.core.rst
-paddlespeech.t2s.models.vits.monotonic\_align.core module
-=========================================================
-
-.. automodule:: paddlespeech.t2s.models.vits.monotonic_align.core
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.rst
+++ b/docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.rst
-paddlespeech.t2s.models.vits.monotonic\_align package
-=====================================================
-
-.. automodule:: paddlespeech.t2s.models.vits.monotonic_align
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
-Submodules
----------
-
-.. toctree::
-   :maxdepth: 4
-
-   paddlespeech.t2s.models.vits.monotonic_align.core
-   paddlespeech.t2s.models.vits.monotonic_align.setup
--- a/docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.setup.rst
+++ b/docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.setup.rst
-paddlespeech.t2s.models.vits.monotonic\_align.setup module
-==========================================================
-
-.. automodule:: paddlespeech.t2s.models.vits.monotonic_align.setup
-   :members:
-   :undoc-members:
-   :show-inheritance:
--- a/docs/source/api/paddlespeech.t2s.models.vits.rst
+++ b/docs/source/api/paddlespeech.t2s.models.vits.rst
@@ -12,7 +12,6 @@ Subpackages
 .. toctree::
   :maxdepth: 4

-   paddlespeech.t2s.models.vits.monotonic_align
   paddlespeech.t2s.models.vits.wavenet

 Submodules

--- a/docs/source/tts/demo.rst
+++ b/docs/source/tts/demo.rst
@@ -42,7 +42,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td >Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0001.wav"
                        type="audio/wav">
@@ -50,7 +50,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                
            
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_0.wav"
                        type="audio/wav">
@@ -61,7 +61,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>in being comparatively modern.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0002.wav"
                        type="audio/wav">
@@ -70,7 +70,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.

            </td>
            <td>
-             <audio controls="controls">
+             <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_1.wav"
                        type="audio/wav">
@@ -81,7 +81,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0003.wav"
                        type="audio/wav">
@@ -89,7 +89,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_2.wav"
                        type="audio/wav">
@@ -100,7 +100,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>produced the block books, which were the immediate predecessors of the true printed book</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0004.wav"
                        type="audio/wav">
@@ -108,7 +108,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_3.wav"
                        type="audio/wav">
@@ -119,7 +119,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0005.wav"
                        type="audio/wav">
@@ -127,7 +127,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_4.wav"
                        type="audio/wav">
@@ -153,7 +153,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>昨日，这名“伤者”与医生全部被警方依法刑事拘留</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009901.wav"
                        type="audio/wav">
@@ -161,7 +161,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009901.wav"
                        type="audio/wav">
@@ -172,7 +172,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>钱伟长想到上海来办学校是经过深思熟虑的。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009902.wav"
                        type="audio/wav">
@@ -180,7 +180,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009902.wav"
                        type="audio/wav">
@@ -191,7 +191,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>她见我一进门就骂，吃饭时也骂，骂得我抬不起头。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009903.wav"
                        type="audio/wav">
@@ -199,7 +199,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009903.wav"
                        type="audio/wav">
@@ -210,7 +210,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>李述德在离开之前，只说了一句“柱驼杀父亲了”</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009904.wav"
                        type="audio/wav">
@@ -218,7 +218,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009904.wav"
                        type="audio/wav">
@@ -230,7 +230,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
        <tr>
            <td>这种车票和保险单捆绑出售属于重复性购买。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009905.wav"
                        type="audio/wav">
@@ -238,7 +238,7 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009905.wav"
                        type="audio/wav">
@@ -271,7 +271,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>Life was like a box of chocolates, you never know what you're gonna get.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                        <source
                            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/001.wav"
                            type="audio/wav">
@@ -279,7 +279,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td> 
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                        <source
                            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav"
                            type="audio/wav">
@@ -290,7 +290,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>With great power there must come great responsibility.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                        <source
                            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/002.wav"
                            type="audio/wav">
@@ -298,7 +298,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td> 
-            <audio controls="controls">
+            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_2.wav"
                        type="audio/wav">
@@ -309,7 +309,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>To be or not to be, that’s a question.</td>
            <td>
-            <audio controls="controls">
+            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/003.wav"
                        type="audio/wav">
@@ -318,7 +318,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
            </td>

            <td> 
-            <audio controls="controls">
+            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_3.wav"
                        type="audio/wav">
@@ -330,7 +330,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>A man can be destroyed but not defeated.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/004.wav"
                        type="audio/wav">
@@ -339,7 +339,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
            </td>

            <td> 
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_4.wav"
                        type="audio/wav">
@@ -350,7 +350,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>Do not, for one repulse, give up the purpose that you resolved to effort.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/005.wav"
                        type="audio/wav">
@@ -359,7 +359,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
            </td>

            <td> 
-            <audio controls="controls">
+            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_5.wav"
                        type="audio/wav">
@@ -370,7 +370,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>Death is just a part of life, something we're all destined to do.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/006.wav"
                        type="audio/wav">
@@ -379,7 +379,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
            </td>

            <td> 
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_6.wav"
                        type="audio/wav">
@@ -390,7 +390,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>I think it's hard winning a war with words. </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/007.wav"
                        type="audio/wav">
@@ -399,7 +399,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
            </td>

            <td> 
-            <audio controls="controls">
+            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_7.wav"
                        type="audio/wav">
@@ -410,7 +410,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>Don’t argue with the people of strong determination, because they may change the fact!</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/008.wav"
                        type="audio/wav">
@@ -419,7 +419,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
            </td>

            <td> 
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_8.wav"
                        type="audio/wav">
@@ -430,7 +430,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>Love you three thousand times.</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/009.wav"
                        type="audio/wav">
@@ -439,7 +439,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
            </td>

            <td> 
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_9.wav"
                        type="audio/wav">
@@ -465,7 +465,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>凯莫瑞安联合体的经济崩溃，迫在眉睫。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/001.wav"
                        type="audio/wav">
@@ -473,7 +473,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/001.wav"
                        type="audio/wav">
@@ -484,7 +484,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>对于所有想要离开那片废土，去寻找更美好生活的人来说。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/002.wav"
                        type="audio/wav">
@@ -492,7 +492,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/002.wav"
                        type="audio/wav">
@@ -503,7 +503,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>克哈，是你们所有人安全的港湾。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/003.wav"
                        type="audio/wav">
@@ -511,7 +511,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/003.wav"
                        type="audio/wav">
@@ -523,7 +523,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>为了保护尤摩扬人民不受异虫的残害，我所做的，比他们自己的领导委员会都多。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/004.wav"
                        type="audio/wav">
@@ -531,7 +531,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/004.wav"
                        type="audio/wav">
@@ -542,7 +542,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>无论他们如何诽谤我，我将继续为所有泰伦人的最大利益，而努力奋斗。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/005.wav"
                        type="audio/wav">
@@ -550,7 +550,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/005.wav"
                        type="audio/wav">
@@ -561,7 +561,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>身为你们的元首，我带领泰伦人实现了人类统治领地和经济的扩张。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/006.wav"
                        type="audio/wav">
@@ -569,7 +569,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/006.wav"
                        type="audio/wav">
@@ -580,7 +580,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>我们将继续成长，用行动回击那些只会说风凉话，不愿意和我们相向而行的害群之马。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/007.wav"
                        type="audio/wav">
@@ -588,7 +588,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/007.wav"
                        type="audio/wav">
@@ -599,7 +599,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>帝国武装力量，无数的优秀儿女，正时刻守卫着我们的家园大门，但是他们孤木难支。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/008.wav"
                        type="audio/wav">
@@ -607,7 +607,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/008.wav"
                        type="audio/wav">
@@ -618,7 +618,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        <tr>
            <td>凡是今天应征入伍者，所获的所有刑罚罪责，减半。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/009.wav"
                        type="audio/wav">
@@ -626,7 +626,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/009.wav"
                        type="audio/wav">
@@ -641,11 +641,11 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog

    <table border="2" cellspacing="1" cellpadding="1"> 
        <tr>
-            <th align="center"> FastSpeech2-Conformer + ParallelWaveGAN </th>
+            <th align="center"> FastSpeech2-Conformer + </br>ParallelWaveGAN </th>
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/001.wav"
                        type="audio/wav">
@@ -655,7 +655,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/002.wav"
                        type="audio/wav">
@@ -665,7 +665,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/003.wav"
                        type="audio/wav">
@@ -676,7 +676,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog

        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/004.wav"
                        type="audio/wav">
@@ -686,7 +686,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/005.wav"
                        type="audio/wav">
@@ -696,7 +696,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/006.wav"
                        type="audio/wav">
@@ -706,7 +706,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/007.wav"
                        type="audio/wav">
@@ -716,7 +716,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/008.wav"
                        type="audio/wav">
@@ -726,7 +726,7 @@ Audio samples generated by a TTS system. Text is first transformed into spectrog
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/009.wav"
                        type="audio/wav">
@@ -756,7 +756,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/0.wav"
                        type="audio/wav">
@@ -764,7 +764,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/0_002.wav"
                        type="audio/wav">
@@ -774,7 +774,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/1.wav"
                        type="audio/wav">
@@ -782,7 +782,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/1_002.wav"
                        type="audio/wav">
@@ -792,7 +792,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/2.wav"
                        type="audio/wav">
@@ -800,7 +800,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/2_002.wav"
                        type="audio/wav">
@@ -810,7 +810,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/3.wav"
                        type="audio/wav">
@@ -818,7 +818,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/3_002.wav"
                        type="audio/wav">
@@ -828,7 +828,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/4.wav"
                        type="audio/wav">
@@ -836,7 +836,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/4_002.wav"
                        type="audio/wav">
@@ -846,7 +846,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/5.wav"
                        type="audio/wav">
@@ -854,7 +854,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/5_002.wav"
                        type="audio/wav">
@@ -864,7 +864,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/6.wav"
                        type="audio/wav">
@@ -872,7 +872,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/6_002.wav"
                        type="audio/wav">
@@ -882,7 +882,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/7.wav"
                        type="audio/wav">
@@ -890,7 +890,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/7_002.wav"
                        type="audio/wav">
@@ -900,7 +900,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/8.wav"
                        type="audio/wav">
@@ -908,7 +908,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/8_002.wav"
                        type="audio/wav">
@@ -918,7 +918,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/9.wav"
                        type="audio/wav">
@@ -926,7 +926,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/9_002.wav"
                        type="audio/wav">
@@ -936,7 +936,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/10.wav"
                        type="audio/wav">
@@ -944,7 +944,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/10_002.wav"
                        type="audio/wav">
@@ -954,7 +954,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/11.wav"
                        type="audio/wav">
@@ -962,7 +962,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/11_002.wav"
                        type="audio/wav">
@@ -972,7 +972,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/12.wav"
                        type="audio/wav">
@@ -980,7 +980,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/12_002.wav"
                        type="audio/wav">
@@ -990,7 +990,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/13.wav"
                        type="audio/wav">
@@ -998,7 +998,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/13_002.wav"
                        type="audio/wav">
@@ -1008,7 +1008,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/14.wav"
                        type="audio/wav">
@@ -1016,7 +1016,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/14_002.wav"
                        type="audio/wav">
@@ -1026,7 +1026,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/15.wav"
                        type="audio/wav">
@@ -1034,7 +1034,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/15_002.wav"
                        type="audio/wav">
@@ -1044,7 +1044,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/16.wav"
                        type="audio/wav">
@@ -1052,7 +1052,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/16_002.wav"
                        type="audio/wav">
@@ -1062,7 +1062,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/17.wav"
                        type="audio/wav">
@@ -1070,7 +1070,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/17_002.wav"
                        type="audio/wav">
@@ -1080,7 +1080,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/18.wav"
                        type="audio/wav">
@@ -1088,7 +1088,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/18_002.wav"
                        type="audio/wav">
@@ -1098,7 +1098,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/19.wav"
                        type="audio/wav">
@@ -1106,7 +1106,7 @@ PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generate
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/19_002.wav"
                        type="audio/wav">
@@ -1142,7 +1142,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav"
                        type="audio/wav">
@@ -1150,7 +1150,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav"
                        type="audio/wav">
@@ -1158,7 +1158,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav"
                        type="audio/wav">
@@ -1168,7 +1168,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_002.wav"
                        type="audio/wav">
@@ -1176,7 +1176,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_002.wav"
                        type="audio/wav">
@@ -1184,7 +1184,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_002.wav"
                        type="audio/wav">
@@ -1194,7 +1194,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_003.wav"
                        type="audio/wav">
@@ -1202,7 +1202,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_003.wav"
                        type="audio/wav">
@@ -1210,7 +1210,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_003.wav"
                        type="audio/wav">
@@ -1220,7 +1220,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_004.wav"
                        type="audio/wav">
@@ -1228,7 +1228,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_004.wav"
                        type="audio/wav">
@@ -1236,7 +1236,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_004.wav"
                        type="audio/wav">
@@ -1246,7 +1246,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_005.wav"
                        type="audio/wav">
@@ -1254,7 +1254,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_005.wav"
                        type="audio/wav">
@@ -1262,7 +1262,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_005.wav"
                        type="audio/wav">
@@ -1272,7 +1272,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_007.wav"
                        type="audio/wav">
@@ -1280,7 +1280,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_007.wav"
                        type="audio/wav">
@@ -1288,7 +1288,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_007.wav"
                        type="audio/wav">
@@ -1298,7 +1298,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_008.wav"
                        type="audio/wav">
@@ -1306,7 +1306,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_008.wav"
                        type="audio/wav">
@@ -1314,7 +1314,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_008.wav"
                        type="audio/wav">
@@ -1324,7 +1324,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
        </tr>
        <tr>
             <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x0.8_009.wav"
                        type="audio/wav">
@@ -1332,7 +1332,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1_009.wav"
                        type="audio/wav">
@@ -1340,7 +1340,7 @@ The duration control in FastSpeech2 can control the speed of audios will keep th
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/speed/x1.2_009.wav"
                        type="audio/wav">
@@ -1374,7 +1374,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/001.wav"
                        type="audio/wav">
@@ -1382,7 +1382,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice/001.wav"
                        type="audio/wav">
@@ -1392,7 +1392,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/002.wav"
                        type="audio/wav">
@@ -1400,7 +1400,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice/002.wav"
                        type="audio/wav">
@@ -1410,7 +1410,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/003.wav"
                        type="audio/wav">
@@ -1418,7 +1418,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice/003.wav"
                        type="audio/wav">
@@ -1428,7 +1428,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/004.wav"
                        type="audio/wav">
@@ -1436,7 +1436,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//004.wav"
                        type="audio/wav">
@@ -1446,7 +1446,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/005.wav"
                        type="audio/wav">
@@ -1454,7 +1454,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//005.wav"
                        type="audio/wav">
@@ -1464,7 +1464,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/007.wav"
                        type="audio/wav">
@@ -1472,7 +1472,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//007.wav"
                        type="audio/wav">
@@ -1482,7 +1482,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/008.wav"
                        type="audio/wav">
@@ -1490,7 +1490,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//008.wav"
                        type="audio/wav">
@@ -1500,7 +1500,7 @@ The nomal audios are in the second column of the previous table.
        </tr>
        <tr>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/robot/009.wav"
                        type="audio/wav">
@@ -1508,7 +1508,7 @@ The nomal audios are in the second column of the previous table.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/child_voice//009.wav"
                        type="audio/wav">
@@ -1542,7 +1542,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>他只是一个纸老虎。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/001.wav"
                        type="audio/wav">
@@ -1550,7 +1550,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/001.wav"
                        type="audio/wav">
@@ -1561,7 +1561,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>手表厂有五种好产品。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/002.wav"
                        type="audio/wav">
@@ -1569,7 +1569,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/002.wav"
                        type="audio/wav">
@@ -1580,7 +1580,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>老板的轿车需要保养。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/003.wav"
                        type="audio/wav">
@@ -1588,7 +1588,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/003.wav"
                        type="audio/wav">
@@ -1599,7 +1599,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>我们所有人都好喜欢你呀。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/004.wav"
                        type="audio/wav">
@@ -1607,7 +1607,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/004.wav"
                        type="audio/wav">
@@ -1618,7 +1618,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>岂有此理。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/005.wav"
                        type="audio/wav">
@@ -1626,7 +1626,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/005.wav"
                        type="audio/wav">
@@ -1637,7 +1637,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>虎骨酒多少钱一瓶。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/006.wav"
                        type="audio/wav">
@@ -1645,7 +1645,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/006.wav"
                        type="audio/wav">
@@ -1656,7 +1656,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>这件事情需要冷处理。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/007.wav"
                        type="audio/wav">
@@ -1664,7 +1664,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/007.wav"
                        type="audio/wav">
@@ -1675,7 +1675,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>这个老奶奶是个大喇叭。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/008.wav"
                        type="audio/wav">
@@ -1683,7 +1683,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/008.wav"
                        type="audio/wav">
@@ -1694,7 +1694,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>我喜欢说相声。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/009.wav"
                        type="audio/wav">
@@ -1702,7 +1702,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/009.wav"
                        type="audio/wav">
@@ -1713,7 +1713,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
        <tr>
            <td>有一天，我路过了一栋楼。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/with_frontend/010.wav"
                        type="audio/wav">
@@ -1721,7 +1721,7 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/without_frontend/010.wav"
                        type="audio/wav">
@@ -1736,3 +1736,141 @@ We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.
    <br> 


+Finetune FastSpeech2 for CSMSC
+--------------------------------------
+
+Finetuning demos of `tts_finetune/tts3 <https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/tts_finetune/tts3>`_  for CSMSC dataset.
+
+When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Freeze encoder && duration_predictor`` for audio quality.
+
+.. raw:: html
+
+    <div class="table">
+    CSMSC reference audio (fastspeech2_csmsc + hifigan_aishlle3 in CLI): 欢迎使用飞桨语音套件。
+    <br>
+    <br>
+    <audio controls="controls" style="width: 220px;">
+        <source
+            src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/ref_fastspeech2_csmsc_hifigan_aishell3.wav"
+            type="audio/wav">
+        Your browser does not support the <code>audio</code> element.
+    </audio>
+    <br>
+    <br>
+    <table border="2" cellspacing="1" cellpadding="1">
+        <tr>
+            <th align="center"> Frozen Method</th>
+            <th align="center"> train_num=10, </br> bs=10, </br> epoch=100, </br> lr=1e-4 </th>
+            <th align="center"> train_num=18, </br> bs=18, </br> epoch=100, </br> lr=1e-4 </th>
+            <th align="center"> train_num=97, </br> bs=64, </br> epoch=100, </br> lr=1e-4 </th>
+            <th align="center"> train_num=196, </br> bs=64, </br> epoch=100, </br> lr=1e-4 </th>
+        </tr>
+        <tr>
+            <td>Non Frozen</td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train10_bn10_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train18_bn18_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train97_bn64_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train196_bn64_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>Freeze encoder</td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_bn10_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_bn18_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_bn64_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_bn64_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+        <tr>
+            <td>Freeze encoder && </br> duration_predictor</td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_duration_bn10_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_duration_bn18_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+            <td>
+                <audio controls="controls" style="width: 150px;">
+                    <source
+                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
+                        type="audio/wav">
+                    Your browser does not support the <code>audio</code> element.
+                </audio>
+            </td>
+        </tr>
+    <table>
+    </div>
+    <br>
+    <br> 
--- a/docs/source/tts/demo_2.rst
+++ b/docs/source/tts/demo_2.rst
@@ -19,7 +19,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>早上好，今天是2020/10/29，最低温度是-3°C。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/001.wav"
                        type="audio/wav">
@@ -27,7 +27,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav"
                        type="audio/wav">
@@ -38,7 +38,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>你好，我的编号是37249，很高兴为您服务。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/002.wav"
                        type="audio/wav">
@@ -46,7 +46,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/002.wav"
                        type="audio/wav">
@@ -57,7 +57,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>我们公司有37249个人。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/003.wav"
                        type="audio/wav">
@@ -65,7 +65,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/003.wav"
                        type="audio/wav">
@@ -76,7 +76,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>我出生于2005年10月8日。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/004.wav"
                        type="audio/wav">
@@ -84,7 +84,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/004.wav"
                        type="audio/wav">
@@ -95,7 +95,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>我们习惯在12:30吃中午饭。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/005.wav"
                        type="audio/wav">
@@ -103,7 +103,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/005.wav"
                        type="audio/wav">
@@ -114,7 +114,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>只要有超过3/4的人投票同意，你就会成为我们的新班长。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/006.wav"
                        type="audio/wav">
@@ -122,7 +122,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/006.wav"
                        type="audio/wav">
@@ -133,7 +133,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>我要买一只价值999.9元的手表。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/007.wav"
                        type="audio/wav">
@@ -141,7 +141,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/007.wav"
                        type="audio/wav">
@@ -152,7 +152,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>我的手机号是18544139121，欢迎来电。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/008.wav"
                        type="audio/wav">
@@ -160,7 +160,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/008.wav"
                        type="audio/wav">
@@ -171,7 +171,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>明天有62%的概率降雨。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/009.wav"
                        type="audio/wav">
@@ -179,7 +179,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/009.wav"
                        type="audio/wav">
@@ -190,7 +190,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>手表厂有五种好产品。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/010.wav"
                        type="audio/wav">
@@ -198,7 +198,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/010.wav"
                        type="audio/wav">
@@ -209,7 +209,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>跑马场有五百匹很勇敢的千里马。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/011.wav"
                        type="audio/wav">
@@ -217,7 +217,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/011.wav"
                        type="audio/wav">
@@ -228,7 +228,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>有一天，我看到了一栋楼，我顿感不妙，因为我看不清里面有没有人。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/012.wav"
                        type="audio/wav">
@@ -236,7 +236,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/012.wav"
                        type="audio/wav">
@@ -247,7 +247,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>史小姐拿着小雨伞去找她的老保姆了。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/013.wav"
                        type="audio/wav">
@@ -255,7 +255,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/013.wav"
                        type="audio/wav">
@@ -266,7 +266,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
        <tr>
            <td>不要相信这个老奶奶说的话，她一点儿也不好。</td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/014.wav"
                        type="audio/wav">
@@ -274,7 +274,7 @@ FastSpeech2 + Parallel WaveGAN in CSMSC
                </audio>
            </td>
            <td>
-                <audio controls="controls">
+                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.bj.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/014.wav"
                        type="audio/wav">

--- a/examples/other/mfa/run.sh
+++ b/examples/other/mfa/run.sh
@@ -39,7 +39,7 @@ fi
 export PATH="$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin"
 if [ ! -d "$EXP_DIR/baker_alignment" ]; then
    echo "Start MFA training..."
-    mfa_train_and_align $EXP_DIR/baker_corpus "$EXP_DIR/$LEXICON_NAME.lexicon" $EXP_DIR/baker_alignment -o $EXP_DIR/baker_model --clean --verbose --temp_directory exp/.mfa_train_and_align
+    mfa_train_and_align $EXP_DIR/baker_corpus "$EXP_DIR/$LEXICON_NAME.lexicon" $EXP_DIR/baker_alignment -o $EXP_DIR/baker_model --clean --verbose --temp_directory $EXP_DIR/.mfa_train_and_align
    echo "training done!"
    echo "results: $EXP_DIR/baker_alignment"
    echo "model: $EXP_DIR/baker_model"