fix the asr online client bug, return None, test=doc

48fa84be · xiongxinlei · babac27a · 48fa84be · 48fa84be · 48fa84be
7 changed file
--- a/paddlespeech/s2t/modules/encoder.py
+++ b/paddlespeech/s2t/modules/encoder.py
@@ -317,8 +317,6 @@ class BaseEncoder(nn.Layer):
        outputs = []
        offset = 0
        # Feed forward overlap input step by step
-        print(f"context: {context}")
-        print(f"stride: {stride}")
        for cur in range(0, num_frames - context + 1, stride):
            end = min(cur + decoding_window, num_frames)
            chunk_xs = xs[:, cur:end, :]

--- a/paddlespeech/server/README.md
+++ b/paddlespeech/server/README.md
@@ -35,3 +35,16 @@
 ```bash
 paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
 ```
+
+ ## Online ASR Server
+
+### Lanuch online asr server
+```
+paddlespeech_server start --config_file conf/ws_conformer_application.yaml
+```
+
+### Access online asr server
+
+```
+paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
+```
\ No newline at end of file
--- a/paddlespeech/server/README_cn.md
+++ b/paddlespeech/server/README_cn.md
@@ -35,3 +35,17 @@
 ```bash
 paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
 ```
+
+## 流式ASR
+
+### 启动流式语音识别服务
+
+```
+paddlespeech_server start --config_file conf/ws_conformer_application.yaml
+```
+
+### 访问流式语音识别服务
+
+```
+paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input zh.wav
+```
\ No newline at end of file
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -277,11 +277,12 @@ class ASRClientExecutor(BaseExecutor):
                lang=lang,
                audio_format=audio_format)
            time_end = time.time()
-            logger.info(res.json())
+            logger.info(res)
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to speech recognition.")
+            logger.error(e)
            return False

    @stats_wrapper
@@ -299,9 +300,10 @@ class ASRClientExecutor(BaseExecutor):
        logging.info("asr websocket client start")
        handler = ASRAudioHandler(server_ip, port)
        loop = asyncio.get_event_loop()
-        loop.run_until_complete(handler.run(input))
+        res = loop.run_until_complete(handler.run(input))
        logging.info("asr websocket client finished")

+        return res['asr_results']

 @cli_client_register(
    name='paddlespeech_client.cls', description='visit cls service')

--- a/paddlespeech/server/engine/asr/online/asr_engine.py
+++ b/paddlespeech/server/engine/asr/online/asr_engine.py
@@ -473,7 +473,7 @@ class PaddleASRConnectionHanddler:
        ctc_probs = self.model.ctc.log_softmax(ys)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)

-        self.searcher.search(None, ctc_probs, self.cached_feat.place)
+        self.searcher.search(ctc_probs, self.cached_feat.place)

        self.hyps = self.searcher.get_one_best_hyps()
        assert self.cached_feat.shape[0] == 1
@@ -823,7 +823,7 @@ class ASRServerExecutor(ASRExecutor):
        ctc_probs = self.model.ctc.log_softmax(
            encoder_out)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)
-        self.searcher.search(xs, ctc_probs, xs.place)
+        self.searcher.search(ctc_probs, xs.place)
        # update the one best result
        self.hyps = self.searcher.get_one_best_hyps()


--- a/paddlespeech/server/engine/asr/online/ctc_search.py
+++ b/paddlespeech/server/engine/asr/online/ctc_search.py
@@ -24,19 +24,18 @@ class CTCPrefixBeamSearch:
        """Implement the ctc prefix beam search

        Args:
-            config (_type_): _description_
+            config (yacs.config.CfgNode): _description_
        """
        self.config = config
        self.reset()

-    def search(self, xs, ctc_probs, device, blank_id=0):
+    def search(self, ctc_probs, device, blank_id=0):
        """ctc prefix beam search method decode a chunk feature

        Args:
            xs (paddle.Tensor): feature data
            ctc_probs (paddle.Tensor): the ctc probability of all the tokens
-            encoder_out (paddle.Tensor): _description_
-            encoder_mask (_type_): _description_
+            device (paddle.fluid.core_avx.Place): the feature host device, such as CUDAPlace(0).
            blank_id (int, optional): the blank id in the vocab. Defaults to 0.

        Returns:
@@ -45,7 +44,6 @@ class CTCPrefixBeamSearch:
        # decode 
        logger.info("start to ctc prefix search")

-        # device = xs.place
        batch_size = 1
        beam_size = self.config.beam_size
        maxlen = ctc_probs.shape[0]

--- a/paddlespeech/server/tests/asr/online/websocket_client.py
+++ b/paddlespeech/server/tests/asr/online/websocket_client.py
@@ -34,10 +34,9 @@ class ASRAudioHandler:
    def read_wave(self, wavfile_path: str):
        samples, sample_rate = soundfile.read(wavfile_path, dtype='int16')
        x_len = len(samples)
-        # chunk_stride = 40 * 16  #40ms, sample_rate = 16kHz
-        chunk_size = 80 * 16  #80ms, sample_rate = 16kHz

-        if x_len % chunk_size != 0:
+        chunk_size = 85 * 16  #80ms, sample_rate = 16kHz
+        if x_len % chunk_size!= 0:
            padding_len_x = chunk_size - x_len % chunk_size
        else:
            padding_len_x = 0
@@ -48,7 +47,6 @@ class ASRAudioHandler:
        assert (x_len + padding_len_x) % chunk_size == 0
        num_chunk = (x_len + padding_len_x) / chunk_size
        num_chunk = int(num_chunk)
-
        for i in range(0, num_chunk):
            start = i * chunk_size
            end = start + chunk_size
@@ -82,7 +80,6 @@ class ASRAudioHandler:
                msg = json.loads(msg)
                logging.info("receive msg={}".format(msg))

-            result = msg
            # finished 
            audio_info = json.dumps(
                {
@@ -98,8 +95,8 @@ class ASRAudioHandler:
            
            # decode the bytes to str
            msg = json.loads(msg)
-            logging.info("receive msg={}".format(msg))
-
+            logging.info("final receive msg={}".format(msg))
+            result = msg
            return result