提交 28dafea0 编写于 作者: H Hui Zhang

add fill zero opt for frontend

上级 83f885c6
......@@ -20,6 +20,7 @@
// feature
DEFINE_bool(use_fbank, false, "False for fbank; or linear feature");
DEFINE_bool(fill_zero, false, "fill zero at last chunk, when chunk < chunk_size");
// DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear
// feature, or fbank");
DEFINE_int32(num_bins, 161, "num bins of mel");
......
......@@ -47,17 +47,16 @@ bool Assembler::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
// read frame by frame from base_feature_extractor_ into cache_
bool Assembler::Compute(Vector<BaseFloat>* feats) {
// compute and feed frame by frame
bool result = false;
while (feature_cache_.size() < frame_chunk_size_) {
Vector<BaseFloat> feature;
result = base_extractor_->Read(&feature);
bool result = base_extractor_->Read(&feature);
if (result == false || feature.Dim() == 0) {
VLOG(1) << "result: " << result << "feature dim: " << feature.Dim();
VLOG(1) << "result: " << result << " feature dim: " << feature.Dim();
if (IsFinished() == false) {
LOG(INFO) << "finished reading feature. cache size: " << feature_cache_.size();
VLOG(1) << "finished reading feature. cache size: " << feature_cache_.size();
return false;
} else {
LOG(INFO) << "break";
VLOG(1) << "break";
break;
}
}
......@@ -103,7 +102,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
counter++;
}
CHECK(feature_cache_.size() == cache_size_ );
CHECK(feature_cache_.size() == cache_size_);
return true;
}
......
......@@ -27,6 +27,7 @@
// feature
DECLARE_bool(use_fbank);
DECLARE_bool(fill_zero);
DECLARE_int32(num_bins);
DECLARE_string(cmvn_file);
......@@ -80,15 +81,18 @@ struct FeaturePipelineOptions {
// assembler opts
opts.assembler_opts.subsampling_rate = FLAGS_subsampling_rate;
LOG(INFO) << "subsampling rate: "
<< opts.assembler_opts.subsampling_rate;
opts.assembler_opts.receptive_filed_length =
FLAGS_receptive_field_length;
opts.assembler_opts.nnet_decoder_chunk = FLAGS_nnet_decoder_chunk;
opts.assembler_opts.fill_zero = FLAGS_fill_zero;
LOG(INFO) << "subsampling rate: "
<< opts.assembler_opts.subsampling_rate;
LOG(INFO) << "nnet receptive filed length: "
<< opts.assembler_opts.receptive_filed_length;
opts.assembler_opts.nnet_decoder_chunk = FLAGS_nnet_decoder_chunk;
LOG(INFO) << "nnet chunk size: "
<< opts.assembler_opts.nnet_decoder_chunk;
LOG(INFO) << "frontend fill zeros: "
<< opts.assembler_opts.fill_zero;
return opts;
}
};
......
......@@ -114,7 +114,7 @@ bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
// read one frame likelihood
bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
if (EnsureFrameHaveComputed(frame) == false) {
LOG(INFO) << "framelikehood exit.";
VLOG(1) << "framelikehood exit.";
return false;
}
......
......@@ -38,6 +38,8 @@ struct RecognizerResource {
resource.acoustic_scale = FLAGS_acoustic_scale;
resource.feature_pipeline_opts =
FeaturePipelineOptions::InitFromFlags();
resource.feature_pipeline_opts.assembler_opts.fill_zero = true;
LOG(INFO) << "ds2 need fill zero be true: " << resource.feature_pipeline_opts.assembler_opts.fill_zero;
resource.model_opts = ModelOptions::InitFromFlags();
resource.tlg_opts = TLGDecoderOptions::InitFromFlags();
return resource;
......
......@@ -101,6 +101,8 @@ struct U2RecognizerResource {
resource.feature_pipeline_opts =
ppspeech::FeaturePipelineOptions::InitFromFlags();
resource.feature_pipeline_opts.assembler_opts.fill_zero = false;
LOG(INFO) << "u2 need fill zero be false: " << resource.feature_pipeline_opts.assembler_opts.fill_zero;
resource.model_opts = ppspeech::ModelOptions::InitFromFlags();
resource.decoder_opts = ppspeech::DecodeOptions::InitFromFlags();
return resource;
......
......@@ -85,9 +85,6 @@ int main(int argc, char* argv[]) {
cnt++;
}
CHECK(sample_offset == tot_samples);
VLOG(1) << "num decode: " << cnt;
// recognizer.SetFinished();
// second pass decoding
recognizer.Rescoring();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册