提交 fc72ab1e 编写于 作者: H Hui Zhang

more debug info

上级 48271260
......@@ -20,4 +20,4 @@ fi
mkdir -p build
cmake -B build -DBOOST_ROOT:STRING=${boost_SOURCE_DIR}
cmake --build build
cmake --build build -j
......@@ -76,11 +76,15 @@ void CTCPrefixBeamSearch::AdvanceDecode(
// forward frame by frame
std::vector<kaldi::BaseFloat> frame_prob;
bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
if (flag == false) break;
if (flag == false) {
LOG(INFO) << "decoder advance decode exit." << frame_prob.size();
break;
}
std::vector<std::vector<kaldi::BaseFloat>> likelihood;
likelihood.push_back(frame_prob);
AdvanceDecoding(likelihood);
VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;
}
}
......@@ -114,7 +118,11 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
std::vector<float> topk_score;
std::vector<int32_t> topk_index;
TopK(logp_t, first_beam_size, &topk_score, &topk_index);
VLOG(2) << "topk: " << num_frame_decoded_ << " " << *std::max_element(logp_t.begin(), logp_t.end()) << " " << topk_score[0];
for (int i = 0; i < topk_score.size(); i++){
VLOG(2) << "topk: " << num_frame_decoded_ << " " << topk_score[i];
}
// 2. token passing
for (int i = 0; i < topk_index.size(); ++i) {
int id = topk_index[i];
......@@ -295,7 +303,18 @@ void CTCPrefixBeamSearch::UpdateOutputs(
outputs_.emplace_back(output);
}
void CTCPrefixBeamSearch::FinalizeSearch() { UpdateFinalContext(); }
void CTCPrefixBeamSearch::FinalizeSearch() {
UpdateFinalContext();
VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;
int cnt = 0;
for (int i = 0; i < hypotheses_.size(); i ++){
VLOG(2) << "hyp " << cnt << " len: " << hypotheses_[i].size() << " ctc score: " << likelihood_[i];
for (int j = 0; j < hypotheses_[i].size(); j ++){
VLOG(2) << hypotheses_[i][j];
}
}
}
void CTCPrefixBeamSearch::UpdateFinalContext() {
if (context_graph_ == nullptr) return;
......
......@@ -52,15 +52,21 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
Vector<BaseFloat> feature;
result = base_extractor_->Read(&feature);
if (result == false || feature.Dim() == 0) {
if (IsFinished() == false) return false;
break;
VLOG(1) << "result: " << result << "feature dim: " << feature.Dim();
if (IsFinished() == false) {
LOG(INFO) << "finished reading feature. cache size: " << feature_cache_.size();
return false;
} else {
LOG(INFO) << "break";
break;
}
}
CHECK(feature.Dim() == dim_);
feature_cache_.push(feature);
nframes_ += 1;
VLOG(1) << "nframes: " << nframes_;
feature_cache_.push(feature);
}
if (feature_cache_.size() < receptive_filed_length_) {
......@@ -68,8 +74,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
return false;
}
if (fill_zero_){
if (fill_zero_) {
while (feature_cache_.size() < frame_chunk_size_) {
Vector<BaseFloat> feature(dim_, kaldi::kSetZero);
nframes_ += 1;
......@@ -79,6 +84,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
int32 this_chunk_size = std::min(static_cast<int32>(feature_cache_.size()), frame_chunk_size_);
feats->Resize(dim_ * this_chunk_size);
VLOG(1) << "read " << this_chunk_size << " feat.";
int32 counter = 0;
while (counter < this_chunk_size) {
......@@ -97,6 +103,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
counter++;
}
CHECK(feature_cache_.size() == cache_size_ );
return result;
}
......
......@@ -41,12 +41,14 @@ class FeatureCache : public FrontendInterface {
virtual size_t Dim() const { return dim_; }
virtual void SetFinished() {
LOG(INFO) << "set finished";
// std::unique_lock<std::mutex> lock(mutex_);
base_extractor_->SetFinished();
LOG(INFO) << "set finished";
// read the last chunk data
Compute();
// ready_feed_condition_.notify_one();
LOG(INFO) << "compute last feats done.";
}
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
......
......@@ -36,8 +36,6 @@ void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
frames_ready_ += likelihood.NumRows();
}
// Decodable::Init(DecodableConfig config) {
//}
// return the size of frame have computed.
int32 Decodable::NumFramesReady() const { return frames_ready_; }
......@@ -70,9 +68,10 @@ bool Decodable::AdvanceChunk() {
Vector<BaseFloat> features;
if (frontend_ == NULL || frontend_->Read(&features) == false) {
// no feat or frontend_ not init.
VLOG(1) << "decodable exit;";
return false;
}
VLOG(2) << "Forward with " << features.Dim() << " frames.";
VLOG(2) << "Forward in " << features.Dim() / frontend_->Dim() << " feats.";
// forward feats
NnetOut out;
......@@ -80,6 +79,7 @@ bool Decodable::AdvanceChunk() {
int32& vocab_dim = out.vocab_dim;
Vector<BaseFloat>& logprobs = out.logprobs;
VLOG(2) << "Forward out " << logprobs.Dim() / vocab_dim << " decoder frames.";
// cache nnet outupts
nnet_out_cache_.Resize(logprobs.Dim() / vocab_dim, vocab_dim);
nnet_out_cache_.CopyRowsFromVec(logprobs);
......@@ -114,15 +114,20 @@ bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
// read one frame likelihood
bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
if (EnsureFrameHaveComputed(frame) == false) {
LOG(INFO) << "framelikehood exit.";
return false;
}
int nrows = nnet_out_cache_.NumRows();
CHECK(nrows == (frames_ready_ - frame_offset_));
int vocab_size = nnet_out_cache_.NumCols();
likelihood->resize(vocab_size);
for (int32 idx = 0; idx < vocab_size; ++idx) {
(*likelihood)[idx] =
nnet_out_cache_(frame - frame_offset_, idx) * acoustic_scale_;
VLOG(4) << "nnet out: " << frame << " offset:" << frame_offset_ << " " << nnet_out_cache_.NumRows() << " logprob: " << nnet_out_cache_(frame - frame_offset_, idx);
}
return true;
}
......
......@@ -440,6 +440,7 @@ void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
max_hyps_len = std::max(max_hyps_len, len);
hyps_len_ptr[i] = static_cast<int64_t>(len);
}
VLOG(2) << "max_hyps_len: " << max_hyps_len;
paddle::Tensor hyps_tensor =
paddle::full({num_hyps, max_hyps_len}, eos_, paddle::DataType::INT64);
......@@ -625,8 +626,8 @@ void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
// combinded left-to-right and right-to-lfet score
(*rescoring_score)[i] =
score * (1 - reverse_weight) + r_score * reverse_weight;
VLOG(1) << "hyp " << i << " score: " << score << " r_score: " << r_score
<< " reverse_weight: " << reverse_weight;
VLOG(1) << "hyp " << i << " " << hyp.size() << " score: " << score << " r_score: " << r_score
<< " reverse_weight: " << reverse_weight << " final score: " << (*rescoring_score)[i];
}
}
......
......@@ -52,7 +52,6 @@ void U2Recognizer::Reset() {
num_frames_ = 0;
result_.clear();
feature_pipeline_->Reset();
decodable_->Reset();
decoder_->Reset();
}
......@@ -62,7 +61,6 @@ void U2Recognizer::ResetContinuousDecoding() {
num_frames_ = 0;
result_.clear();
feature_pipeline_->Reset();
decodable_->Reset();
decoder_->Reset();
}
......@@ -192,10 +190,12 @@ void U2Recognizer::AttentionRescoring() {
// combine ctc score and rescoring score
for (size_t i = 0; i < num_hyps; i++) {
VLOG(1) << "hyp " << i << " rescoring_score: " << rescoring_score[i]
<< " ctc_score: " << result_[i].score;
<< " ctc_score: " << result_[i].score << " rescoring_weight: " << opts_.decoder_opts.rescoring_weight << " ctc_weight: " << opts_.decoder_opts.ctc_weight;
result_[i].score =
opts_.decoder_opts.rescoring_weight * rescoring_score[i] +
opts_.decoder_opts.ctc_weight * result_[i].score;
VLOG(1) << "hyp: " << result_[0].sentence << " score: " << result_[0].score;
}
std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
......
......@@ -62,6 +62,7 @@ int main(int argc, char* argv[]) {
LOG(INFO) << "wav len (sample): " << tot_samples;
int sample_offset = 0;
int cnt = 0;
while (sample_offset < tot_samples) {
int cur_chunk_size =
std::min(chunk_sample_size, tot_samples - sample_offset);
......@@ -77,12 +78,14 @@ int main(int argc, char* argv[]) {
recognizer.SetFinished();
}
recognizer.Decode();
LOG(INFO) << "Pratial result: " << recognizer.GetPartialResult();
LOG(INFO) << "Pratial result: " << cnt << " " << recognizer.GetPartialResult();
// no overlap
sample_offset += cur_chunk_size;
cnt++;
}
CHECK(sample_offset == tot_samples);
VLOG(1) << "num decode: " << cnt;
// recognizer.SetFinished();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册