Floating point exception
Created by: johntyty912
I tried to write a script to record my voice and stop recording when I finished, but didn't work because when the script run feature = data_generator.process_utterance(filename, "")
in test.py, I got Floating point exception. However, when I run the demo_server.py and the demo_cilent.py, it work prefectly. Please help!!!!
Update: I found my speech_segment.samples only contain 0.0s. what is the problem???
Update2: I found it is because my record2.py cannot record anything. any suggestions?
here's my code:
record2.py
from sys import byteorder
from array import array
from struct import pack
import pyaudio
import wave
THRESHOLD = 1000000000
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt32
RATE = 16000
def is_silent(snd_data):
"Returns 'True' if below the 'silent' threshold"
return max(snd_data) < THRESHOLD
def normalize(snd_data):
"Average the volume out"
MAXIMUM = 2147483648
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
r = array('i')
for i in snd_data:
r.append(int(i*times))
return r
def trim(snd_data):
"Trim the blank spots at the start and end"
def _trim(snd_data):
snd_started = False
r = array('i')
for i in snd_data:
if not snd_started and abs(i)>THRESHOLD:
snd_started = True
r.append(i)
elif snd_started:
r.append(i)
return r
# Trim to the left
snd_data = _trim(snd_data)
# Trim to the right
snd_data.reverse()
snd_data = _trim(snd_data)
snd_data.reverse()
return snd_data
def add_silence(snd_data, seconds):
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
r = array('i', [0 for i in xrange(int(seconds*RATE))])
r.extend(snd_data)
r.extend([0 for i in xrange(int(seconds*RATE))])
return r
def record():
"""
Record a word or words from the microphone and
return the data as an array of signed shorts.
Normalizes the audio, trims silence from the
start and end, and pads with 0.5 seconds of
blank sound to make sure VLC et al can play
it without getting chopped off.
"""
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=1, rate=RATE,
input=True, output=True,
frames_per_buffer=CHUNK_SIZE)
num_silent = 0
snd_started = False
r = array('i')
print('listening')
while 1:
# little endian, signed short
snd_data = array('i', stream.read(CHUNK_SIZE))
print max(snd_data)
if byteorder == 'big':
snd_data.byteswap()
r.extend(snd_data)
silent = is_silent(snd_data)
if silent and snd_started:
num_silent += 1
elif not silent and not snd_started:
snd_started = True
if snd_started and num_silent > 10:
break
sample_width = p.get_sample_size(FORMAT)
stream.stop_stream()
stream.close()
p.terminate()
r = normalize(r)
r = trim(r)
r = add_silence(r, 0.5)
return sample_width, r
def record_to_file(path):
"Records from the microphone and outputs the resulting data to 'path'"
sample_width, data = record()
data = pack('<' + ('i'*len(data)), *data)
wf = wave.open(path, 'wb')
wf.setnchannels(1)
wf.setsampwidth(sample_width)
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
if __name__ == '__main__':
print("please speak a word into the microphone")
record_to_file('demo.wav')
print("done - result written to demo.wav")
test.py
from record2 import record_to_file
"""Server-end for the ASR demo."""
import os
import time
import random
import argparse
import functools
from time import gmtime, strftime
import SocketServer
import struct
import wave
import paddle.v2 as paddle
import _init_paths
from data_utils.data import DataGenerator
from model_utils.model import DeepSpeech2Model
from data_utils.utility import read_manifest
from utils.utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('host_port', int, 8086, "Server's IP port.")
add_arg('beam_size', int, 300, "Beam search width.")
add_arg('num_conv_layers', int, 2, "# of convolution layers.")
add_arg('num_rnn_layers', int, 3, "# of recurrent layers.")
add_arg('rnn_layer_size', int, 1024, "# of recurrent cells per layer.")
add_arg('alpha', float, 2.6, "Coef of LM for beam search.")
add_arg('beta', float, 5.0, "Coef of WC for beam search.")
add_arg('cutoff_prob', float, 0.99, "Cutoff probability for pruning.")
add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.")
add_arg('use_gru', bool, True, "Use GRUs instead of simple RNNs.")
add_arg('use_gpu', bool, False, "Use GPU or not.")
add_arg('share_rnn_weights',bool, False, "Share input-hidden weights across "
"bi-directional RNNs. Not for GRU.")
add_arg('host_ip', str,
'localhost',
"Server's IP address.")
add_arg('speech_save_dir', str,
'demo_cache',
"Directory to save demo audios.")
add_arg('warmup_manifest', str,
'data/aishell/manifest.test',
"Filepath of manifest to warm up.")
add_arg('mean_std_path', str,
'models/aishell/mean_std.npz',
"Filepath of normalizer's mean & std.")
add_arg('vocab_path', str,
'models/aishell/vocab.txt',
"Filepath of vocabulary.")
add_arg('model_path', str,
'models/aishell/params.tar.gz',
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model.")
add_arg('lang_model_path', str,
'models/lm/zh_giga.no_cna_cmn.prune01244.klm',
"Filepath for language model.")
add_arg('decoding_method', str,
'ctc_beam_search',
"Decoding method. Options: ctc_beam_search, ctc_greedy",
choices = ['ctc_beam_search', 'ctc_greedy'])
add_arg('specgram_type', str,
'linear',
"Audio feature type. Options: linear, mfcc.",
choices=['linear', 'mfcc'])
# yapf: disable
args = parser.parse_args()
# prepare data generator
data_generator = DataGenerator(
vocab_filepath=args.vocab_path,
mean_std_filepath=args.mean_std_path,
augmentation_config='{}',
specgram_type=args.specgram_type,
num_threads=1,
keep_transcription_text=True)
# prepare ASR model
ds2_model = DeepSpeech2Model(
vocab_size=data_generator.vocab_size,
num_conv_layers=args.num_conv_layers,
num_rnn_layers=args.num_rnn_layers,
rnn_layer_size=args.rnn_layer_size,
use_gru=args.use_gru,
pretrained_model_path=args.model_path,
share_rnn_weights=args.share_rnn_weights)
vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list]
if args.decoding_method == "ctc_beam_search":
ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path,
vocab_list)
# prepare ASR inference handler
def file_to_transcript(filename):
feature = data_generator.process_utterance(filename, "")
probs_split = ds2_model.infer_batch_probs(
infer_data=[feature],
feeding_dict=data_generator.feeding)
if args.decoding_method == "ctc_greedy":
result_transcript = ds2_model.decode_batch_greedy(
probs_split=probs_split,
vocab_list=vocab_list)
else:
result_transcript = ds2_model.decode_batch_beam_search(
probs_split=probs_split,
beam_alpha=args.alpha,
beam_beta=args.beta,
beam_size=args.beam_size,
cutoff_prob=args.cutoff_prob,
cutoff_top_n=args.cutoff_top_n,
vocab_list=vocab_list,
num_processes=1)
return result_transcript[0]
paddle.init(use_gpu=args.use_gpu, trainer_count=1)
if __name__ == '__main__':
record_to_file('demo.wav')
print file_to_transcript('demo.wav')
and I got this:
*** Aborted at 1534487245 (unix time) try "date -d @1534487245" if you are using GNU date ***
PC: @ 0x0 (unknown)
*** SIGFPE (@0x7f2329804bc3) received by PID 30608 (TID 0x7f232a8bc700) from PID 696273859; stack trace: ***
@ 0x7f232a4d2390 (unknown)
@ 0x7f2329804bc3 log10f
@ 0x7f231599225a (unknown)
@ 0x7f2315aa550c (unknown)
@ 0x7f2315aaed62 (unknown)
@ 0x7f2315aaf46e (unknown)
@ 0x4c15bf PyEval_EvalFrameEx
@ 0x4b9ab6 PyEval_EvalCodeEx
@ 0x4d54b9 (unknown)
@ 0x4a5371 PyObject_CallFunction
@ 0x41cdd7 _PyObject_GenericGetAttrWithDict
@ 0x4bc24b PyEval_EvalFrameEx
@ 0x4b9ab6 PyEval_EvalCodeEx
@ 0x4c16e7 PyEval_EvalFrameEx
@ 0x4b9ab6 PyEval_EvalCodeEx
@ 0x4c16e7 PyEval_EvalFrameEx
@ 0x4b9ab6 PyEval_EvalCodeEx
@ 0x4c1e6f PyEval_EvalFrameEx
@ 0x4b9ab6 PyEval_EvalCodeEx
@ 0x4c1e6f PyEval_EvalFrameEx
@ 0x4c136f PyEval_EvalFrameEx
@ 0x4b9ab6 PyEval_EvalCodeEx
@ 0x4eb30f (unknown)
@ 0x4e5422 PyRun_FileExFlags
@ 0x4e3cd6 PyRun_SimpleFileExFlags
@ 0x493ae2 Py_Main
@ 0x7f232a117830 __libc_start_main
@ 0x4933e9 _start
@ 0x0 (unknown)
Floating point exception (core dumped)