diff --git a/setup.py b/setup.py
index a6b18f9793163f65f8531be330586f9e26c778e1..cdb899e426ce82f6ea356fddf8b53d29cf17ed0b 100644
--- a/setup.py
+++ b/setup.py
@@ -61,6 +61,9 @@ requirements = {
         "visualdl",
         "webrtcvad",
         "yacs~=0.1.8",
+        # fastapi server
+        "fastapi",
+        "uvicorn",
     ],
     "develop": [
         "ConfigArgParse",
diff --git a/speechserving/speechserving/conf/application.yaml b/speechserving/speechserving/conf/application.yaml
index 29b40b15826cd3d1f18eaa427766e1610484dc7a..8c4d9bc62d2ca4dac2676c48ca7154e85e414c35 100644
--- a/speechserving/speechserving/conf/application.yaml
+++ b/speechserving/speechserving/conf/application.yaml
@@ -11,4 +11,5 @@ port: 8090
 ##################################################################
 # add engine type (Options: asr, tts) and config file here.
 engine_backend:
-    asr: 'conf/asr/asr.yaml'
\ No newline at end of file
+    asr: 'conf/asr/asr.yaml'
+    tts: 'conf/tts/tts.yaml'
diff --git a/speechserving/speechserving/engine/tts/python/tts_engine.py b/speechserving/speechserving/engine/tts/python/tts_engine.py
index 65e35fb8fe77bd86b33d9fee91de3a70499c1fc1..e8d42619bdddcbedf67831d47f9831680dbfe3f2 100644
--- a/speechserving/speechserving/engine/tts/python/tts_engine.py
+++ b/speechserving/speechserving/engine/tts/python/tts_engine.py
@@ -13,19 +13,18 @@
 # limitations under the License.
 import argparse
 import base64
-import os
-import random
+import io
 
 import librosa
 import numpy as np
 import soundfile as sf
 import yaml
 from engine.base_engine import BaseEngine
-from ffmpeg import audio
+from scipy.io import wavfile
 
 from paddlespeech.cli.log import logger
 from paddlespeech.cli.tts.infer import TTSExecutor
-from utils.audio_types import wav2pcm
+from utils.audio_process import change_speed
 from utils.errors import ErrorCode
 from utils.exception import ServerBaseException
 
@@ -107,26 +106,27 @@ class TTSEngine(BaseEngine):
         wav_vol = wav_tar_fs * volume
 
         # transform speed
-        hash = random.getrandbits(128)
-        temp_wav = str(hash) + ".wav"
-        temp_speed_wav = str(hash + 1) + ".wav"
-        sf.write(temp_wav, wav_vol.reshape(-1, 1), target_fs)
-        audio.a_speed(temp_wav, speed, temp_speed_wav)
-        os.system("rm %s" % (temp_wav))
+        try:  # windows not support soxbindings
+            wav_speed = change_speed(wav_vol, speed, target_fs)
+        except:
+            raise ServerBaseException(
+                ErrorCode.SERVER_INTERNAL_ERR,
+                "Can not install soxbindings on your system.")
 
         # wav to base64
-        with open(temp_speed_wav, 'rb') as f:
-            base64_bytes = base64.b64encode(f.read())
-            wav_base64 = base64_bytes.decode('utf-8')
+        buf = io.BytesIO()
+        wavfile.write(buf, target_fs, wav_speed)
+        base64_bytes = base64.b64encode(buf.read())
+        wav_base64 = base64_bytes.decode('utf-8')
 
         # save audio
         if audio_path is not None and audio_path.endswith(".wav"):
-            os.system("mv %s %s" % (temp_speed_wav, audio_path))
+            sf.write(audio_path, wav_speed, target_fs)
         elif audio_path is not None and audio_path.endswith(".pcm"):
-            wav2pcm(temp_speed_wav, audio_path, data_type=np.int16)
-            os.system("rm %s" % (temp_speed_wav))
-        else:
-            os.system("rm %s" % (temp_speed_wav))
+            wav_norm = wav_speed * (32767 / max(0.001,
+                                                np.max(np.abs(wav_speed))))
+            with open(audio_path, "wb") as f:
+                f.write(wav_norm.astype(np.int16))
 
         return target_fs, wav_base64
 
diff --git a/speechserving/speechserving/utils/audio_process.py b/speechserving/speechserving/utils/audio_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..51a19b3600a5295bde1a41e92c6dd32fe681553f
--- /dev/null
+++ b/speechserving/speechserving/utils/audio_process.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import wave
+
+import numpy as np
+
+
+def wav2pcm(wavfile, pcmfile, data_type=np.int16):
+    f = open(wavfile, "rb")
+    f.seek(0)
+    f.read(44)
+    data = np.fromfile(f, dtype=data_type)
+    data.tofile(pcmfile)
+
+
+def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
+    pcmf = open(pcm_file, 'rb')
+    pcmdata = pcmf.read()
+    pcmf.close()
+
+    if bits % 8 != 0:
+        raise ValueError("bits % 8 must == 0. now bits:" + str(bits))
+
+    wavfile = wave.open(wav_file, 'wb')
+    wavfile.setnchannels(channels)
+    wavfile.setsampwidth(bits // 8)
+    wavfile.setframerate(sample_rate)
+    wavfile.writeframes(pcmdata)
+    wavfile.close()
+
+
+def change_speed(sample_raw, speed_rate, sample_rate):
+    """Change the audio speed by linear interpolation.
+    Note that this is an in-place transformation.
+    :param speed_rate: Rate of speed change:
+                       speed_rate > 1.0, speed up the audio;
+                       speed_rate = 1.0, unchanged;
+                       speed_rate < 1.0, slow down the audio;
+                       speed_rate <= 0.0, not allowed, raise ValueError.
+    :type speed_rate: float
+    :raises ValueError: If speed_rate <= 0.0.
+    """
+    if speed_rate == 1.0:
+        return
+    if speed_rate <= 0:
+        raise ValueError("speed_rate should be greater than zero.")
+
+    # numpy
+    # old_length = self._samples.shape[0]
+    # new_length = int(old_length / speed_rate)
+    # old_indices = np.arange(old_length)
+    # new_indices = np.linspace(start=0, stop=old_length, num=new_length)
+    # self._samples = np.interp(new_indices, old_indices, self._samples)
+
+    # sox, slow
+    try:
+        import soxbindings as sox
+    except ImportError:
+        try:
+            from paddlespeech.s2t.utils import dynamic_pip_install
+            package = "sox"
+            dynamic_pip_install.install(package)
+            package = "soxbindings"
+            dynamic_pip_install.install(package)
+            import soxbindings as sox
+        except Exception:
+            raise RuntimeError("Can not install soxbindings on your system.")
+
+    tfm = sox.Transformer()
+    tfm.set_globals(multithread=False)
+    tfm.tempo(speed_rate)
+    sample_speed = tfm.build_array(
+        input_array=sample_raw,
+        sample_rate_in=sample_rate).squeeze(-1).astype(np.float32).copy()
+
+    return sample_speed
diff --git a/speechserving/speechserving/utils/audio_types.py b/speechserving/speechserving/utils/audio_types.py
deleted file mode 100644
index eb655ddd5902d27fb91fbc0718f7362400af91b4..0000000000000000000000000000000000000000
--- a/speechserving/speechserving/utils/audio_types.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import wave
-
-import numpy as np
-
-
-def wav2pcm(wavfile, pcmfile, data_type=np.int16):
-    f = open(wavfile, "rb")
-    f.seek(0)
-    f.read(44)
-    data = np.fromfile(f, dtype=data_type)
-    data.tofile(pcmfile)
-
-
-def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
-    pcmf = open(pcm_file, 'rb')
-    pcmdata = pcmf.read()
-    pcmf.close()
-
-    if bits % 8 != 0:
-        raise ValueError("bits % 8 must == 0. now bits:" + str(bits))
-
-    wavfile = wave.open(wav_file, 'wb')
-    wavfile.setnchannels(channels)
-    wavfile.setsampwidth(bits // 8)
-    wavfile.setframerate(sample_rate)
-    wavfile.writeframes(pcmdata)
-    wavfile.close()