// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "base/kaldi-common.h" namespace paddleaudio { template StreamingFeatureTpl::StreamingFeatureTpl( const Options& opts) : opts_(opts), computer_(opts), window_function_(opts.frame_opts) { //window_function_(computer_.GetFrameOptions()) { the opt set to zero } template bool StreamingFeatureTpl::ComputeFeature(const kaldi::VectorBase& wav, kaldi::Vector* feats) { // append remaned waves kaldi::int32 wav_len = wav.Dim(); if (wav_len == 0) return false; kaldi::int32 left_len = remained_wav_.Dim(); kaldi::Vector waves(left_len + wav_len); waves.Range(0, left_len).CopyFromVec(remained_wav_); waves.Range(left_len, wav_len).CopyFromVec(wav); // cache remaned waves kaldi::FrameExtractionOptions frame_opts = computer_.GetFrameOptions(); kaldi::int32 num_frames = kaldi::NumFrames(waves.Dim(), frame_opts); kaldi::int32 frame_shift = frame_opts.WindowShift(); kaldi::int32 left_samples = waves.Dim() - frame_shift * num_frames; remained_wav_.Resize(left_samples); remained_wav_.CopyFromVec( waves.Range(frame_shift * num_frames, left_samples)); // compute speech feature Compute(waves, feats); return true; } // Compute feat template bool StreamingFeatureTpl::Compute( const kaldi::Vector& waves, kaldi::Vector* feats) { kaldi::BaseFloat vtln_warp = 1.0; const kaldi::FrameExtractionOptions& frame_opts = computer_.GetFrameOptions(); kaldi::int32 num_samples = waves.Dim(); kaldi::int32 frame_length = frame_opts.WindowSize(); kaldi::int32 sample_rate = frame_opts.samp_freq; if (num_samples < frame_length) { return false; } kaldi::int32 num_frames = kaldi::NumFrames(num_samples, frame_opts); feats->Resize(num_frames * Dim()); kaldi::Vector window; bool need_raw_log_energy = computer_.NeedRawLogEnergy(); for (kaldi::int32 frame = 0; frame < num_frames; frame++) { kaldi::BaseFloat raw_log_energy = 0.0; kaldi::ExtractWindow(0, waves, frame, frame_opts, window_function_, &window, need_raw_log_energy ? &raw_log_energy : NULL); kaldi::Vector this_feature(computer_.Dim(), kaldi::kUndefined); computer_.Compute(raw_log_energy, vtln_warp, &window, &this_feature); kaldi::SubVector output_row( feats->Data() + frame * Dim(), Dim()); output_row.CopyFromVec(this_feature); } return true; } } // namespace paddleaudio