Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • DeepSpeech
  • Issue
  • #272

D
DeepSpeech
  • 项目概览

PaddlePaddle / DeepSpeech
大约 2 年 前同步成功

通知 210
Star 8425
Fork 1598
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 245
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 3
  • Wiki 0
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
D
DeepSpeech
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 245
    • Issue 245
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 3
    • 合并请求 3
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 0
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
已关闭
开放中
Opened 8月 17, 2018 by saxon_zh@saxon_zhGuest

Floating point exception

Created by: johntyty912

I tried to write a script to record my voice and stop recording when I finished, but didn't work because when the script run feature = data_generator.process_utterance(filename, "") in test.py, I got Floating point exception. However, when I run the demo_server.py and the demo_cilent.py, it work prefectly. Please help!!!! Update: I found my speech_segment.samples only contain 0.0s. what is the problem??? Update2: I found it is because my record2.py cannot record anything. any suggestions? here's my code:

record2.py

from sys import byteorder
from array import array
from struct import pack

import pyaudio
import wave

THRESHOLD = 1000000000
CHUNK_SIZE = 1024
FORMAT = pyaudio.paInt32
RATE = 16000

def is_silent(snd_data):
    "Returns 'True' if below the 'silent' threshold"
    return max(snd_data) < THRESHOLD

def normalize(snd_data):
    "Average the volume out"
    MAXIMUM = 2147483648
    times = float(MAXIMUM)/max(abs(i) for i in snd_data)

    r = array('i')
    for i in snd_data:
        r.append(int(i*times))
    return r

def trim(snd_data):
    "Trim the blank spots at the start and end"
    def _trim(snd_data):
        snd_started = False
        r = array('i')

        for i in snd_data:
            if not snd_started and abs(i)>THRESHOLD:
                snd_started = True
                r.append(i)

            elif snd_started:
                r.append(i)
        return r

    # Trim to the left
    snd_data = _trim(snd_data)

    # Trim to the right
    snd_data.reverse()
    snd_data = _trim(snd_data)
    snd_data.reverse()
    return snd_data

def add_silence(snd_data, seconds):
    "Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
    r = array('i', [0 for i in xrange(int(seconds*RATE))])
    r.extend(snd_data)
    r.extend([0 for i in xrange(int(seconds*RATE))])
    return r

def record():
    """
    Record a word or words from the microphone and
    return the data as an array of signed shorts.

    Normalizes the audio, trims silence from the
    start and end, and pads with 0.5 seconds of
    blank sound to make sure VLC et al can play
    it without getting chopped off.
    """
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT, channels=1, rate=RATE,
        input=True, output=True,
        frames_per_buffer=CHUNK_SIZE)

    num_silent = 0
    snd_started = False

    r = array('i')
    print('listening')
    while 1:
        # little endian, signed short
        snd_data = array('i', stream.read(CHUNK_SIZE))
        print max(snd_data)
        if byteorder == 'big':
            snd_data.byteswap()
        r.extend(snd_data)
        silent = is_silent(snd_data)

        if silent and snd_started:
            num_silent += 1
        elif not silent and not snd_started:
            snd_started = True

        if snd_started and num_silent > 10:
            break

    sample_width = p.get_sample_size(FORMAT)
    stream.stop_stream()
    stream.close()
    p.terminate()

    r = normalize(r)
    r = trim(r)
    r = add_silence(r, 0.5)
    return sample_width, r

def record_to_file(path):
    "Records from the microphone and outputs the resulting data to 'path'"
    sample_width, data = record()
    data = pack('<' + ('i'*len(data)), *data)

    wf = wave.open(path, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(sample_width)
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()

if __name__ == '__main__':
    print("please speak a word into the microphone")
    record_to_file('demo.wav')
    print("done - result written to demo.wav")

test.py

from record2 import record_to_file

"""Server-end for the ASR demo."""
import os
import time
import random
import argparse
import functools
from time import gmtime, strftime
import SocketServer
import struct
import wave
import paddle.v2 as paddle
import _init_paths
from data_utils.data import DataGenerator
from model_utils.model import DeepSpeech2Model
from data_utils.utility import read_manifest
from utils.utility import add_arguments, print_arguments

parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('host_port',        int,    8086,    "Server's IP port.")
add_arg('beam_size',        int,    300,    "Beam search width.")
add_arg('num_conv_layers',  int,    2,      "# of convolution layers.")
add_arg('num_rnn_layers',   int,    3,      "# of recurrent layers.")
add_arg('rnn_layer_size',   int,    1024,   "# of recurrent cells per layer.")
add_arg('alpha',            float,  2.6,   "Coef of LM for beam search.")
add_arg('beta',             float,  5.0,   "Coef of WC for beam search.")
add_arg('cutoff_prob',      float,  0.99,    "Cutoff probability for pruning.")
add_arg('cutoff_top_n',     int,    40,     "Cutoff number for pruning.")
add_arg('use_gru',          bool,   True,  "Use GRUs instead of simple RNNs.")
add_arg('use_gpu',          bool,   False,   "Use GPU or not.")
add_arg('share_rnn_weights',bool,   False,   "Share input-hidden weights across "
                                            "bi-directional RNNs. Not for GRU.")
add_arg('host_ip',          str,
        'localhost',
        "Server's IP address.")
add_arg('speech_save_dir',  str,
        'demo_cache',
        "Directory to save demo audios.")
add_arg('warmup_manifest',  str,
        'data/aishell/manifest.test',
        "Filepath of manifest to warm up.")
add_arg('mean_std_path',    str,
        'models/aishell/mean_std.npz',
        "Filepath of normalizer's mean & std.")
add_arg('vocab_path',       str,
        'models/aishell/vocab.txt',
        "Filepath of vocabulary.")
add_arg('model_path',       str,
        'models/aishell/params.tar.gz',
        "If None, the training starts from scratch, "
        "otherwise, it resumes from the pre-trained model.")
add_arg('lang_model_path',  str,
        'models/lm/zh_giga.no_cna_cmn.prune01244.klm',
        "Filepath for language model.")
add_arg('decoding_method',  str,
        'ctc_beam_search',
        "Decoding method. Options: ctc_beam_search, ctc_greedy",
        choices = ['ctc_beam_search', 'ctc_greedy'])
add_arg('specgram_type',    str,
        'linear',
        "Audio feature type. Options: linear, mfcc.",
        choices=['linear', 'mfcc'])
# yapf: disable
args = parser.parse_args()

# prepare data generator
data_generator = DataGenerator(
    vocab_filepath=args.vocab_path,
    mean_std_filepath=args.mean_std_path,
    augmentation_config='{}',
    specgram_type=args.specgram_type,
    num_threads=1,
    keep_transcription_text=True)

# prepare ASR model
ds2_model = DeepSpeech2Model(
    vocab_size=data_generator.vocab_size,
    num_conv_layers=args.num_conv_layers,
    num_rnn_layers=args.num_rnn_layers,
    rnn_layer_size=args.rnn_layer_size,
    use_gru=args.use_gru,
    pretrained_model_path=args.model_path,
    share_rnn_weights=args.share_rnn_weights)

vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list]

if args.decoding_method == "ctc_beam_search":
    ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path,
                              vocab_list)

# prepare ASR inference handler
def file_to_transcript(filename):
    feature = data_generator.process_utterance(filename, "")
    probs_split = ds2_model.infer_batch_probs(
        infer_data=[feature],
        feeding_dict=data_generator.feeding)

    if args.decoding_method == "ctc_greedy":
        result_transcript = ds2_model.decode_batch_greedy(
            probs_split=probs_split,
            vocab_list=vocab_list)
    else:
        result_transcript = ds2_model.decode_batch_beam_search(
            probs_split=probs_split,
            beam_alpha=args.alpha,
            beam_beta=args.beta,
            beam_size=args.beam_size,
            cutoff_prob=args.cutoff_prob,
            cutoff_top_n=args.cutoff_top_n,
            vocab_list=vocab_list,
            num_processes=1)
    return result_transcript[0]

paddle.init(use_gpu=args.use_gpu, trainer_count=1)

if __name__ == '__main__':
    record_to_file('demo.wav')
    print file_to_transcript('demo.wav')

and I got this:

*** Aborted at 1534487245 (unix time) try "date -d @1534487245" if you are using GNU date ***
PC: @                0x0 (unknown)
*** SIGFPE (@0x7f2329804bc3) received by PID 30608 (TID 0x7f232a8bc700) from PID 696273859; stack trace: ***
    @     0x7f232a4d2390 (unknown)
    @     0x7f2329804bc3 log10f
    @     0x7f231599225a (unknown)
    @     0x7f2315aa550c (unknown)
    @     0x7f2315aaed62 (unknown)
    @     0x7f2315aaf46e (unknown)
    @           0x4c15bf PyEval_EvalFrameEx
    @           0x4b9ab6 PyEval_EvalCodeEx
    @           0x4d54b9 (unknown)
    @           0x4a5371 PyObject_CallFunction
    @           0x41cdd7 _PyObject_GenericGetAttrWithDict
    @           0x4bc24b PyEval_EvalFrameEx
    @           0x4b9ab6 PyEval_EvalCodeEx
    @           0x4c16e7 PyEval_EvalFrameEx
    @           0x4b9ab6 PyEval_EvalCodeEx
    @           0x4c16e7 PyEval_EvalFrameEx
    @           0x4b9ab6 PyEval_EvalCodeEx
    @           0x4c1e6f PyEval_EvalFrameEx
    @           0x4b9ab6 PyEval_EvalCodeEx
    @           0x4c1e6f PyEval_EvalFrameEx
    @           0x4c136f PyEval_EvalFrameEx
    @           0x4b9ab6 PyEval_EvalCodeEx
    @           0x4eb30f (unknown)
    @           0x4e5422 PyRun_FileExFlags
    @           0x4e3cd6 PyRun_SimpleFileExFlags
    @           0x493ae2 Py_Main
    @     0x7f232a117830 __libc_start_main
    @           0x4933e9 _start
    @                0x0 (unknown)
Floating point exception (core dumped)
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/DeepSpeech#272
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7