diff --git a/speechx/examples/decoder/offline_decoder_main.cc b/speechx/examples/decoder/offline_decoder_main.cc index 6bd83b9b156ee0e452fe0d9c1bda6e6f960090ec..9a9c14a0cff9f2b239a45ad7a12c2a2f20f4d601 100644 --- a/speechx/examples/decoder/offline_decoder_main.cc +++ b/speechx/examples/decoder/offline_decoder_main.cc @@ -17,7 +17,7 @@ #include "base/flags.h" #include "base/log.h" #include "decoder/ctc_beam_search_decoder.h" -#include "frontend/data_cache.h" +#include "frontend/audio/data_cache.h" #include "kaldi/util/table-types.h" #include "nnet/decodable.h" #include "nnet/paddle_nnet.h" diff --git a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc index 4d5ffe145ac91796e0376344f07996e9ed9e6cfd..7f6c572ca2ff6675d98474a5eb85381d1703e641 100644 --- a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc +++ b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc @@ -17,7 +17,7 @@ #include "base/flags.h" #include "base/log.h" #include "decoder/ctc_beam_search_decoder.h" -#include "frontend/data_cache.h" +#include "frontend/audio/data_cache.h" #include "kaldi/util/table-types.h" #include "nnet/decodable.h" #include "nnet/paddle_nnet.h" diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc index c29d2b21fd0e58192bc62e1f55110f691eb56d91..8f32bac2a861534062d34bf948af46a2da49bfb9 100644 --- a/speechx/examples/feat/linear_spectrogram_main.cc +++ b/speechx/examples/feat/linear_spectrogram_main.cc @@ -14,19 +14,18 @@ // todo refactor, repalce with gtest -#include "frontend/linear_spectrogram.h" #include "base/flags.h" #include "base/log.h" -#include "frontend/audio_cache.h" -#include "frontend/data_cache.h" -#include "frontend/feature_cache.h" -#include "frontend/frontend_itf.h" -#include "frontend/normalizer.h" #include "kaldi/feat/wave-reader.h" #include "kaldi/util/kaldi-io.h" #include "kaldi/util/table-types.h" -#include +#include "frontend/audio/linear_spectrogram.h" +#include "frontend/audio/audio_cache.h" +#include "frontend/audio/data_cache.h" +#include "frontend/audio/feature_cache.h" +#include "frontend/audio/frontend_itf.h" +#include "frontend/audio/normalizer.h" DEFINE_string(wav_rspecifier, "", "test wav scp path"); DEFINE_string(feature_wspecifier, "", "output feats wspecifier"); diff --git a/speechx/speechx/frontend/CMakeLists.txt b/speechx/speechx/frontend/CMakeLists.txt index 35243b6e3ce2a39333c99f77f709dea2d06e47e5..7d10fdec9e1d5529c1b5d9a83bc2d8e798abc9ee 100644 --- a/speechx/speechx/frontend/CMakeLists.txt +++ b/speechx/speechx/frontend/CMakeLists.txt @@ -1,11 +1,2 @@ -project(frontend) -add_library(frontend STATIC - cmvn.cc - db_norm.cc - linear_spectrogram.cc - audio_cache.cc - feature_cache.cc -) - -target_link_libraries(frontend PUBLIC kaldi-matrix) \ No newline at end of file +add_subdirectory(audio) \ No newline at end of file diff --git a/speechx/speechx/frontend/audio/CMakeLists.txt b/speechx/speechx/frontend/audio/CMakeLists.txt index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..35243b6e3ce2a39333c99f77f709dea2d06e47e5 100644 --- a/speechx/speechx/frontend/audio/CMakeLists.txt +++ b/speechx/speechx/frontend/audio/CMakeLists.txt @@ -0,0 +1,11 @@ +project(frontend) + +add_library(frontend STATIC + cmvn.cc + db_norm.cc + linear_spectrogram.cc + audio_cache.cc + feature_cache.cc +) + +target_link_libraries(frontend PUBLIC kaldi-matrix) \ No newline at end of file diff --git a/speechx/speechx/frontend/audio_cache.cc b/speechx/speechx/frontend/audio/audio_cache.cc similarity index 98% rename from speechx/speechx/frontend/audio_cache.cc rename to speechx/speechx/frontend/audio/audio_cache.cc index d44ed592c2c07af0b82b3275b3b8e5b8f62f7573..c3233e595d874adc4b21000e94881f670892b525 100644 --- a/speechx/speechx/frontend/audio_cache.cc +++ b/speechx/speechx/frontend/audio/audio_cache.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "frontend/audio_cache.h" +#include "frontend/audio/audio_cache.h" #include "kaldi/base/timer.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio/audio_cache.h similarity index 97% rename from speechx/speechx/frontend/audio_cache.h rename to speechx/speechx/frontend/audio/audio_cache.h index f48da12b7d77052980cbe42afd6cefdf090f8288..17e1a83895b9f24eb3a6a24d5b73bd72bb53b11b 100644 --- a/speechx/speechx/frontend/audio_cache.h +++ b/speechx/speechx/frontend/audio/audio_cache.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/cmvn.cc b/speechx/speechx/frontend/audio/cmvn.cc similarity index 98% rename from speechx/speechx/frontend/cmvn.cc rename to speechx/speechx/frontend/audio/cmvn.cc index d9bba943548b5fee262104ce86ad48fcf303e9f6..706492b7ca613a225bba4956258838c9c0ea6c4f 100644 --- a/speechx/speechx/frontend/cmvn.cc +++ b/speechx/speechx/frontend/audio/cmvn.cc @@ -1,5 +1,5 @@ -#include "frontend/normalizer.h" +#include "frontend/audio/cmvn.h" #include "kaldi/feat/cmvn.h" #include "kaldi/util/kaldi-io.h" @@ -13,7 +13,6 @@ using kaldi::SubVector; using std::unique_ptr; - CMVN::CMVN(std::string cmvn_file, unique_ptr base_extractor) : var_norm_(true) { diff --git a/speechx/speechx/frontend/cmvn.h b/speechx/speechx/frontend/audio/cmvn.h similarity index 94% rename from speechx/speechx/frontend/cmvn.h rename to speechx/speechx/frontend/audio/cmvn.h index fdf2a87a435410dad1ebf6808eda2e1fe942e08f..b3cfbb11a6e7cdb2d159eab913b49f942e344205 100644 --- a/speechx/speechx/frontend/cmvn.h +++ b/speechx/speechx/frontend/audio/cmvn.h @@ -1,7 +1,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "kaldi/util/options-itf.h" diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/audio/data_cache.h similarity index 97% rename from speechx/speechx/frontend/data_cache.h rename to speechx/speechx/frontend/audio/data_cache.h index b8ce6bf65ee311c4f414ae27b0045d437ebcec38..a812278ce2e1aa8fb66c57885e36f324e25fe078 100644 --- a/speechx/speechx/frontend/data_cache.h +++ b/speechx/speechx/frontend/audio/data_cache.h @@ -17,7 +17,7 @@ #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/db_norm.cc b/speechx/speechx/frontend/audio/db_norm.cc similarity index 98% rename from speechx/speechx/frontend/db_norm.cc rename to speechx/speechx/frontend/audio/db_norm.cc index 830af13bec6f08561b8fafdd66dc1d772212ce39..931e932d6d6aa725655ddc2cccf933529425c6f8 100644 --- a/speechx/speechx/frontend/db_norm.cc +++ b/speechx/speechx/frontend/audio/db_norm.cc @@ -13,7 +13,7 @@ // limitations under the License. -#include "frontend/normalizer.h" +#include "frontend/audio/db_norm.h" #include "kaldi/feat/cmvn.h" #include "kaldi/util/kaldi-io.h" diff --git a/speechx/speechx/frontend/db_norm.h b/speechx/speechx/frontend/audio/db_norm.h similarity index 97% rename from speechx/speechx/frontend/db_norm.h rename to speechx/speechx/frontend/audio/db_norm.h index 3d37107150fb2b8581f895c9e911526f67573a58..425971437d1d2b154d645727720c460140d5e117 100644 --- a/speechx/speechx/frontend/db_norm.h +++ b/speechx/speechx/frontend/audio/db_norm.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "kaldi/util/options-itf.h" diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/audio/fbank.h similarity index 100% rename from speechx/speechx/frontend/fbank.h rename to speechx/speechx/frontend/audio/fbank.h diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/audio/feature_cache.cc similarity index 97% rename from speechx/speechx/frontend/feature_cache.cc rename to speechx/speechx/frontend/audio/feature_cache.cc index 53b7076d56d7db08c5f1ade5d2f896bf87c6c0d2..d7bea61ad62908d2e55b0a845de1faaa57d6a49c 100644 --- a/speechx/speechx/frontend/feature_cache.cc +++ b/speechx/speechx/frontend/audio/feature_cache.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "frontend/feature_cache.h" +#include "frontend/audio/feature_cache.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/audio/feature_cache.h similarity index 97% rename from speechx/speechx/frontend/feature_cache.h rename to speechx/speechx/frontend/audio/feature_cache.h index 1281ec35a4652f99d26f388b9f786cccb9107e32..99961b5e28252691ecb81c60a5e8448a541e52f0 100644 --- a/speechx/speechx/frontend/feature_cache.h +++ b/speechx/speechx/frontend/audio/feature_cache.h @@ -15,7 +15,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/frontend_itf.h b/speechx/speechx/frontend/audio/frontend_itf.h similarity index 100% rename from speechx/speechx/frontend/frontend_itf.h rename to speechx/speechx/frontend/audio/frontend_itf.h diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/audio/linear_spectrogram.cc similarity index 98% rename from speechx/speechx/frontend/linear_spectrogram.cc rename to speechx/speechx/frontend/audio/linear_spectrogram.cc index 2ba00785aa70f578025f2b59ec233c4acb855e83..827b8eccff7c82a75eaa8fdb51d127c72f6565ba 100644 --- a/speechx/speechx/frontend/linear_spectrogram.cc +++ b/speechx/speechx/frontend/audio/linear_spectrogram.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "frontend/linear_spectrogram.h" +#include "frontend/audio/linear_spectrogram.h" #include "kaldi/base/kaldi-math.h" #include "kaldi/matrix/matrix-functions.h" diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/audio/linear_spectrogram.h similarity index 97% rename from speechx/speechx/frontend/linear_spectrogram.h rename to speechx/speechx/frontend/audio/linear_spectrogram.h index 136441efeec99d6936d73043e4147c5783113779..bbf8d6853d5c644361d3aa4441fb7172dabc3083 100644 --- a/speechx/speechx/frontend/linear_spectrogram.h +++ b/speechx/speechx/frontend/audio/linear_spectrogram.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/feat/feature-window.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/mfcc.h b/speechx/speechx/frontend/audio/mfcc.h similarity index 100% rename from speechx/speechx/frontend/mfcc.h rename to speechx/speechx/frontend/audio/mfcc.h diff --git a/speechx/speechx/frontend/audio/normalizer.h b/speechx/speechx/frontend/audio/normalizer.h new file mode 100644 index 0000000000000000000000000000000000000000..df9e4b751427248c8c8c920cb2196ae3f697ed07 --- /dev/null +++ b/speechx/speechx/frontend/audio/normalizer.h @@ -0,0 +1,4 @@ +#pragma once + +#include "frontend/audio/cmvn.h" +#include "frontend/audio/db_norm.h" \ No newline at end of file diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h deleted file mode 100644 index 89599c2a624113b26b709f4a63158139e9c0691b..0000000000000000000000000000000000000000 --- a/speechx/speechx/frontend/normalizer.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#include "frontend/cmvn.h" -#include "frontend/db_norm.h" \ No newline at end of file diff --git a/speechx/speechx/frontend/window.h b/speechx/speechx/frontend/window.h deleted file mode 100644 index 70d6307ec0c5d0bbe3a0e8847247ce7f26bdc5db..0000000000000000000000000000000000000000 --- a/speechx/speechx/frontend/window.h +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// extract the window of kaldi feat. diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h index ef17601fac992554fae25428d9a2d27063c3da10..c75a0f4de911eb236ea9f0b9a2b9079daac12f3f 100644 --- a/speechx/speechx/nnet/decodable.h +++ b/speechx/speechx/nnet/decodable.h @@ -13,7 +13,7 @@ // limitations under the License. #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "nnet/decodable-itf.h" #include "nnet/nnet_interface.h"