diff --git a/deep_speech_2/README.md b/deep_speech_2/README.md old mode 100644 new mode 100755 index 62b051714becd7276e99bd564ce57586a8d02dca..27dc648873fcd9c3767010cba3de839d53c56352 --- a/deep_speech_2/README.md +++ b/deep_speech_2/README.md @@ -143,3 +143,19 @@ python tune.py --help ``` Then reset parameters with the tuning result before inference or evaluating. + +### Playing with the ASR Demo + +A real-time ASR demo (`demo_server.py` and `demo_client.py`) are prepared for users to try out the ASR model with their own voice. After a model and language model is prepared, we can first start the demo server: + +``` +CUDA_VISIBLE_DEVICES=0 python demo_server.py +``` +And then in another console, start the client: + +``` +python demo_client.py +``` +On the client console, press and hold "white-space" key and start talking, then release the "white-space" key when you finish your speech. The decoding results (infered transcription) will be displayed. + +If you would like to start server and client in two machines. Please use `--host_ip` and `--host_port` to indicate the actual IP address and port, for both `demo_server.py` and `demo_client.py`. diff --git a/deep_speech_2/demo_client.py b/deep_speech_2/demo_client.py index a789d816479d626c86630ac9760db42130ff6572..ddf4dd1bf3f5ea62661e181e0dd2fb3f3b1379c6 100644 --- a/deep_speech_2/demo_client.py +++ b/deep_speech_2/demo_client.py @@ -1,10 +1,23 @@ +"""Client-end for the ASR demo.""" from pynput import keyboard import struct import socket import sys +import argparse import pyaudio -HOST, PORT = "10.104.18.14", 8086 +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument( + "--host_ip", + default="localhost", + type=str, + help="Server IP address. (default: %(default)s)") +parser.add_argument( + "--host_port", + default=8086, + type=int, + help="Server Port. (default: %(default)s)") +args = parser.parse_args() is_recording = False enable_trigger_record = True @@ -42,7 +55,7 @@ def callback(in_data, frame_count, time_info, status): elif len(data_list) > 0: # Connect to server and send data sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect((HOST, PORT)) + sock.connect((args.host_ip, args.host_port)) sent = ''.join(data_list) sock.sendall(struct.pack('>i', len(sent)) + sent) print('Speech[length=%d] Sent.' % len(sent)) diff --git a/deep_speech_2/demo_server.py b/deep_speech_2/demo_server.py index d6c0de40acf1a2e8e7ceb7cb1aae9c61f30f0bf3..8a55e726570f06f55b4d19bd6af06dd42ed01ae1 100644 --- a/deep_speech_2/demo_server.py +++ b/deep_speech_2/demo_server.py @@ -1,3 +1,4 @@ +"""Server-end for the ASR demo.""" import os import time import random @@ -17,7 +18,7 @@ from data_utils.utils import read_manifest parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--host_ip", - default="10.104.18.14", + default="localhost", type=str, help="Server IP address. (default: %(default)s)") parser.add_argument(