From d923a930fa7b1af139220b0babda5b7c807a13e6 Mon Sep 17 00:00:00 2001 From: Xinghai Sun Date: Thu, 3 Aug 2017 12:26:33 +0800 Subject: [PATCH] Add ASR demo usage to README.md for DS2. --- deep_speech_2/README.md | 16 ++++++++++++++++ deep_speech_2/demo_client.py | 17 +++++++++++++++-- deep_speech_2/demo_server.py | 3 ++- 3 files changed, 33 insertions(+), 3 deletions(-) mode change 100644 => 100755 deep_speech_2/README.md diff --git a/deep_speech_2/README.md b/deep_speech_2/README.md old mode 100644 new mode 100755 index 62b05171..27dc6488 --- a/deep_speech_2/README.md +++ b/deep_speech_2/README.md @@ -143,3 +143,19 @@ python tune.py --help ``` Then reset parameters with the tuning result before inference or evaluating. + +### Playing with the ASR Demo + +A real-time ASR demo (`demo_server.py` and `demo_client.py`) are prepared for users to try out the ASR model with their own voice. After a model and language model is prepared, we can first start the demo server: + +``` +CUDA_VISIBLE_DEVICES=0 python demo_server.py +``` +And then in another console, start the client: + +``` +python demo_client.py +``` +On the client console, press and hold "white-space" key and start talking, then release the "white-space" key when you finish your speech. The decoding results (infered transcription) will be displayed. + +If you would like to start server and client in two machines. Please use `--host_ip` and `--host_port` to indicate the actual IP address and port, for both `demo_server.py` and `demo_client.py`. diff --git a/deep_speech_2/demo_client.py b/deep_speech_2/demo_client.py index a789d816..ddf4dd1b 100644 --- a/deep_speech_2/demo_client.py +++ b/deep_speech_2/demo_client.py @@ -1,10 +1,23 @@ +"""Client-end for the ASR demo.""" from pynput import keyboard import struct import socket import sys +import argparse import pyaudio -HOST, PORT = "10.104.18.14", 8086 +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--host_ip", + default="localhost", + type=str, + help="Server IP address. (default: %(default)s)") +parser.add_argument( + "--host_port", + default=8086, + type=int, + help="Server Port. (default: %(default)s)") +args = parser.parse_args() is_recording = False enable_trigger_record = True @@ -42,7 +55,7 @@ def callback(in_data, frame_count, time_info, status): elif len(data_list) > 0: # Connect to server and send data sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect((HOST, PORT)) + sock.connect((args.host_ip, args.host_port)) sent = ''.join(data_list) sock.sendall(struct.pack('>i', len(sent)) + sent) print('Speech[length=%d] Sent.' % len(sent)) diff --git a/deep_speech_2/demo_server.py b/deep_speech_2/demo_server.py index d6c0de40..8a55e726 100644 --- a/deep_speech_2/demo_server.py +++ b/deep_speech_2/demo_server.py @@ -1,3 +1,4 @@ +"""Server-end for the ASR demo.""" import os import time import random @@ -17,7 +18,7 @@ from data_utils.utils import read_manifest parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--host_ip", - default="10.104.18.14", + default="localhost", type=str, help="Server IP address. (default: %(default)s)") parser.add_argument( -- GitLab