diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc index e1f0a89548ad0362950a758ceed445f2c61ee9d8..c29d2b21fd0e58192bc62e1f55110f691eb56d91 100644 --- a/speechx/examples/feat/linear_spectrogram_main.cc +++ b/speechx/examples/feat/linear_spectrogram_main.cc @@ -20,7 +20,7 @@ #include "frontend/audio_cache.h" #include "frontend/data_cache.h" #include "frontend/feature_cache.h" -#include "frontend/feature_extractor_interface.h" +#include "frontend/frontend_itf.h" #include "frontend/normalizer.h" #include "kaldi/feat/wave-reader.h" #include "kaldi/util/kaldi-io.h" @@ -170,13 +170,13 @@ int main(int argc, char* argv[]) { // feature pipeline: wave cache --> decibel_normalizer --> hanning // window -->linear_spectrogram --> global cmvn -> feat cache - // std::unique_ptr data_source(new + // std::unique_ptr data_source(new // ppspeech::DataCache()); - std::unique_ptr data_source( + std::unique_ptr data_source( new ppspeech::AudioCache()); ppspeech::DecibelNormalizerOptions db_norm_opt; - std::unique_ptr db_norm( + std::unique_ptr db_norm( new ppspeech::DecibelNormalizer(db_norm_opt, std::move(data_source))); ppspeech::LinearSpectrogramOptions opt; @@ -185,10 +185,10 @@ int main(int argc, char* argv[]) { LOG(INFO) << "frame length (ms): " << opt.frame_opts.frame_length_ms; LOG(INFO) << "frame shift (ms): " << opt.frame_opts.frame_shift_ms; - std::unique_ptr linear_spectrogram( + std::unique_ptr linear_spectrogram( new ppspeech::LinearSpectrogram(opt, std::move(db_norm))); - std::unique_ptr cmvn( + std::unique_ptr cmvn( new ppspeech::CMVN(FLAGS_cmvn_write_path, std::move(linear_spectrogram))); diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio_cache.h index b6c82c69e13bac6cce47bb421724c50b2b43267b..f48da12b7d77052980cbe42afd6cefdf090f8288 100644 --- a/speechx/speechx/frontend/audio_cache.h +++ b/speechx/speechx/frontend/audio_cache.h @@ -16,12 +16,12 @@ #pragma once #include "base/common.h" -#include "frontend/feature_extractor_interface.h" +#include "frontend/frontend_itf.h" namespace ppspeech { // waves cache -class AudioCache : public FeatureExtractorInterface { +class AudioCache : public FrontendInterface { public: explicit AudioCache(int buffer_size = kint16max); diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/data_cache.h index dea51d76ee63cde1cae9fc87e3c04c321e5538e0..b8ce6bf65ee311c4f414ae27b0045d437ebcec38 100644 --- a/speechx/speechx/frontend/data_cache.h +++ b/speechx/speechx/frontend/data_cache.h @@ -17,13 +17,13 @@ #include "base/common.h" -#include "frontend/feature_extractor_interface.h" +#include "frontend/frontend_itf.h" namespace ppspeech { // A data source for testing different frontend module. // It accepts waves or feats. -class DataCache : public FeatureExtractorInterface { +class DataCache : public FrontendInterface { public: explicit DataCache() { finished_ = false; } diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/fbank.h index 7d9cf4221254acb1f0eec4430b4f778cd8e1f255..68267b3d0efee480c89b0816489c6014baceb13f 100644 --- a/speechx/speechx/frontend/fbank.h +++ b/speechx/speechx/frontend/fbank.h @@ -20,10 +20,10 @@ namespace ppspeech { -class FbankExtractor : FeatureExtractorInterface { +class FbankExtractor : FrontendInterface { public: explicit FbankExtractor(const FbankOptions& opts, - share_ptr pre_extractor); + share_ptr pre_extractor); virtual void AcceptWaveform( const kaldi::Vector& input) = 0; virtual void Read(kaldi::Vector* feat) = 0; diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/feature_cache.cc index dad6907ce4985563682d80039d500450e8c96ccc..53b7076d56d7db08c5f1ade5d2f896bf87c6c0d2 100644 --- a/speechx/speechx/frontend/feature_cache.cc +++ b/speechx/speechx/frontend/feature_cache.cc @@ -24,7 +24,7 @@ using kaldi::SubVector; using std::unique_ptr; FeatureCache::FeatureCache( - int max_size, unique_ptr base_extractor) { + int max_size, unique_ptr base_extractor) { max_size_ = max_size; base_extractor_ = std::move(base_extractor); } diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/feature_cache.h index f52b9b0f6943096c399a7ce8f79076fa5efa72d4..1281ec35a4652f99d26f388b9f786cccb9107e32 100644 --- a/speechx/speechx/frontend/feature_cache.h +++ b/speechx/speechx/frontend/feature_cache.h @@ -15,15 +15,15 @@ #pragma once #include "base/common.h" -#include "frontend/feature_extractor_interface.h" +#include "frontend/frontend_itf.h" namespace ppspeech { -class FeatureCache : public FeatureExtractorInterface { +class FeatureCache : public FrontendInterface { public: explicit FeatureCache( int32 max_size = kint16max, - std::unique_ptr base_extractor = NULL); + std::unique_ptr base_extractor = NULL); // Feed feats or waves virtual void Accept(const kaldi::VectorBase& inputs); @@ -53,7 +53,7 @@ class FeatureCache : public FeatureExtractorInterface { bool Compute(); size_t max_size_; - std::unique_ptr base_extractor_; + std::unique_ptr base_extractor_; std::mutex mutex_; std::queue> cache_; diff --git a/speechx/speechx/frontend/feature_extractor_controller.h b/speechx/speechx/frontend/feature_extractor_controller.h deleted file mode 100644 index 0544a1e298b8e7dc871d13f546398a5c28308b0e..0000000000000000000000000000000000000000 --- a/speechx/speechx/frontend/feature_extractor_controller.h +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. diff --git a/speechx/speechx/frontend/feature_extractor_controller_impl.h b/speechx/speechx/frontend/feature_extractor_controller_impl.h deleted file mode 100644 index 0544a1e298b8e7dc871d13f546398a5c28308b0e..0000000000000000000000000000000000000000 --- a/speechx/speechx/frontend/feature_extractor_controller_impl.h +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. diff --git a/speechx/speechx/frontend/feature_extractor_interface.h b/speechx/speechx/frontend/frontend_itf.h similarity index 97% rename from speechx/speechx/frontend/feature_extractor_interface.h rename to speechx/speechx/frontend/frontend_itf.h index 5da2526b98c2107992936b0736837feb2ec47652..7913cc7c086564bd01981f46eac5701a1a799ff1 100644 --- a/speechx/speechx/frontend/feature_extractor_interface.h +++ b/speechx/speechx/frontend/frontend_itf.h @@ -19,7 +19,7 @@ namespace ppspeech { -class FeatureExtractorInterface { +class FrontendInterface { public: // Feed inputs: features(2D saved in 1D) or waveforms(1D). virtual void Accept(const kaldi::VectorBase& inputs) = 0; diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/linear_spectrogram.cc index 41bc8743a939e4f45b5607d71435e246fc480ca7..2ba00785aa70f578025f2b59ec233c4acb855e83 100644 --- a/speechx/speechx/frontend/linear_spectrogram.cc +++ b/speechx/speechx/frontend/linear_spectrogram.cc @@ -27,7 +27,7 @@ using std::vector; LinearSpectrogram::LinearSpectrogram( const LinearSpectrogramOptions& opts, - std::unique_ptr base_extractor) { + std::unique_ptr base_extractor) { opts_ = opts; base_extractor_ = std::move(base_extractor); int32 window_size = opts.frame_opts.WindowSize(); diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/linear_spectrogram.h index 10853904d1dc3417938973aa7ea5018c9e5ae590..136441efeec99d6936d73043e4147c5783113779 100644 --- a/speechx/speechx/frontend/linear_spectrogram.h +++ b/speechx/speechx/frontend/linear_spectrogram.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/feature_extractor_interface.h" +#include "frontend/frontend_itf.h" #include "kaldi/feat/feature-window.h" namespace ppspeech { @@ -35,11 +35,11 @@ struct LinearSpectrogramOptions { } }; -class LinearSpectrogram : public FeatureExtractorInterface { +class LinearSpectrogram : public FrontendInterface { public: explicit LinearSpectrogram( const LinearSpectrogramOptions& opts, - std::unique_ptr base_extractor); + std::unique_ptr base_extractor); virtual void Accept(const kaldi::VectorBase& inputs); virtual bool Read(kaldi::Vector* feats); // the dim_ is the dim of single frame feature @@ -61,7 +61,7 @@ class LinearSpectrogram : public FeatureExtractorInterface { std::vector hanning_window_; kaldi::BaseFloat hanning_window_energy_; LinearSpectrogramOptions opts_; - std::unique_ptr base_extractor_; + std::unique_ptr base_extractor_; int chunk_sample_size_; DISALLOW_COPY_AND_ASSIGN(LinearSpectrogram); }; diff --git a/speechx/speechx/frontend/normalizer.cc b/speechx/speechx/frontend/normalizer.cc index 524125619a74f8e12b7d9932b7529a4ab3bfde0f..26f11b692d2d5c022e26860f2b6f693f904b0741 100644 --- a/speechx/speechx/frontend/normalizer.cc +++ b/speechx/speechx/frontend/normalizer.cc @@ -28,7 +28,7 @@ using std::unique_ptr; DecibelNormalizer::DecibelNormalizer( const DecibelNormalizerOptions& opts, - std::unique_ptr base_extractor) { + std::unique_ptr base_extractor) { base_extractor_ = std::move(base_extractor); opts_ = opts; dim_ = 1; @@ -92,7 +92,7 @@ bool DecibelNormalizer::Compute(VectorBase* waves) const { } CMVN::CMVN(std::string cmvn_file, - unique_ptr base_extractor) + unique_ptr base_extractor) : var_norm_(true) { base_extractor_ = std::move(base_extractor); bool binary; diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h index 352d1e1677e7e9566f3c3ea08d65235ae897a73a..df181961248b4581337eccf4962874dde3e619e7 100644 --- a/speechx/speechx/frontend/normalizer.h +++ b/speechx/speechx/frontend/normalizer.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/feature_extractor_interface.h" +#include "frontend/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "kaldi/util/options-itf.h" @@ -40,11 +40,11 @@ struct DecibelNormalizerOptions { } }; -class DecibelNormalizer : public FeatureExtractorInterface { +class DecibelNormalizer : public FrontendInterface { public: explicit DecibelNormalizer( const DecibelNormalizerOptions& opts, - std::unique_ptr base_extractor); + std::unique_ptr base_extractor); virtual void Accept(const kaldi::VectorBase& waves); virtual bool Read(kaldi::Vector* waves); // noramlize audio, the dim is 1. @@ -57,15 +57,15 @@ class DecibelNormalizer : public FeatureExtractorInterface { bool Compute(kaldi::VectorBase* waves) const; DecibelNormalizerOptions opts_; size_t dim_; - std::unique_ptr base_extractor_; + std::unique_ptr base_extractor_; kaldi::Vector waveform_; }; -class CMVN : public FeatureExtractorInterface { +class CMVN : public FrontendInterface { public: explicit CMVN(std::string cmvn_file, - std::unique_ptr base_extractor); + std::unique_ptr base_extractor); virtual void Accept(const kaldi::VectorBase& inputs); // the length of feats = feature_row * feature_dim, @@ -81,7 +81,7 @@ class CMVN : public FeatureExtractorInterface { void Compute(kaldi::VectorBase* feats) const; void ApplyCMVN(kaldi::MatrixBase* feats); kaldi::Matrix stats_; - std::unique_ptr base_extractor_; + std::unique_ptr base_extractor_; size_t dim_; bool var_norm_; }; diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/nnet/decodable.cc index e6315d07a670e5a555ada56aa5014d394b4b98c7..542168d245950c253f42ed9d87014646d547eb67 100644 --- a/speechx/speechx/nnet/decodable.cc +++ b/speechx/speechx/nnet/decodable.cc @@ -22,7 +22,7 @@ using std::vector; using kaldi::Vector; Decodable::Decodable(const std::shared_ptr& nnet, - const std::shared_ptr& frontend) + const std::shared_ptr& frontend) : frontend_(frontend), nnet_(nnet), frame_offset_(0), frames_ready_(0) {} void Decodable::Acceptlikelihood(const Matrix& likelihood) { diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h index 7938b5823e8aa4e75816c025a79e94816a96ba04..ef17601fac992554fae25428d9a2d27063c3da10 100644 --- a/speechx/speechx/nnet/decodable.h +++ b/speechx/speechx/nnet/decodable.h @@ -13,7 +13,7 @@ // limitations under the License. #include "base/common.h" -#include "frontend/feature_extractor_interface.h" +#include "frontend/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "nnet/decodable-itf.h" #include "nnet/nnet_interface.h" @@ -26,7 +26,7 @@ class Decodable : public kaldi::DecodableInterface { public: explicit Decodable( const std::shared_ptr& nnet, - const std::shared_ptr& frontend); + const std::shared_ptr& frontend); // void Init(DecodableOpts config); virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index); virtual bool IsLastFrame(int32 frame) const; @@ -41,7 +41,7 @@ class Decodable : public kaldi::DecodableInterface { private: bool AdvanceChunk(); - std::shared_ptr frontend_; + std::shared_ptr frontend_; std::shared_ptr nnet_; kaldi::Matrix nnet_cache_; // std::vector> nnet_cache_;