From 42c7537ce629a7fc717d27e5fc36073d86f8fce6 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 1 Apr 2022 10:43:39 +0000 Subject: [PATCH] frontend to audio dir --- speechx/examples/decoder/offline_decoder_main.cc | 2 +- .../decoder/offline_decoder_sliding_chunk_main.cc | 2 +- speechx/examples/feat/linear_spectrogram_main.cc | 13 ++++++------- speechx/speechx/frontend/CMakeLists.txt | 11 +---------- speechx/speechx/frontend/audio/CMakeLists.txt | 11 +++++++++++ .../speechx/frontend/{ => audio}/audio_cache.cc | 2 +- .../speechx/frontend/{ => audio}/audio_cache.h | 2 +- speechx/speechx/frontend/{ => audio}/cmvn.cc | 3 +-- speechx/speechx/frontend/{ => audio}/cmvn.h | 2 +- speechx/speechx/frontend/{ => audio}/data_cache.h | 2 +- speechx/speechx/frontend/{ => audio}/db_norm.cc | 2 +- speechx/speechx/frontend/{ => audio}/db_norm.h | 2 +- speechx/speechx/frontend/{ => audio}/fbank.h | 0 .../speechx/frontend/{ => audio}/feature_cache.cc | 2 +- .../speechx/frontend/{ => audio}/feature_cache.h | 2 +- .../speechx/frontend/{ => audio}/frontend_itf.h | 0 .../frontend/{ => audio}/linear_spectrogram.cc | 2 +- .../frontend/{ => audio}/linear_spectrogram.h | 2 +- speechx/speechx/frontend/{ => audio}/mfcc.h | 0 speechx/speechx/frontend/audio/normalizer.h | 4 ++++ speechx/speechx/frontend/normalizer.h | 4 ---- speechx/speechx/frontend/window.h | 15 --------------- speechx/speechx/nnet/decodable.h | 2 +- 23 files changed, 36 insertions(+), 51 deletions(-) rename speechx/speechx/frontend/{ => audio}/audio_cache.cc (98%) rename speechx/speechx/frontend/{ => audio}/audio_cache.h (97%) rename speechx/speechx/frontend/{ => audio}/cmvn.cc (98%) rename speechx/speechx/frontend/{ => audio}/cmvn.h (94%) rename speechx/speechx/frontend/{ => audio}/data_cache.h (97%) rename speechx/speechx/frontend/{ => audio}/db_norm.cc (98%) rename speechx/speechx/frontend/{ => audio}/db_norm.h (97%) rename speechx/speechx/frontend/{ => audio}/fbank.h (100%) rename speechx/speechx/frontend/{ => audio}/feature_cache.cc (97%) rename speechx/speechx/frontend/{ => audio}/feature_cache.h (97%) rename speechx/speechx/frontend/{ => audio}/frontend_itf.h (100%) rename speechx/speechx/frontend/{ => audio}/linear_spectrogram.cc (98%) rename speechx/speechx/frontend/{ => audio}/linear_spectrogram.h (97%) rename speechx/speechx/frontend/{ => audio}/mfcc.h (100%) create mode 100644 speechx/speechx/frontend/audio/normalizer.h delete mode 100644 speechx/speechx/frontend/normalizer.h delete mode 100644 speechx/speechx/frontend/window.h diff --git a/speechx/examples/decoder/offline_decoder_main.cc b/speechx/examples/decoder/offline_decoder_main.cc index 6bd83b9b..9a9c14a0 100644 --- a/speechx/examples/decoder/offline_decoder_main.cc +++ b/speechx/examples/decoder/offline_decoder_main.cc @@ -17,7 +17,7 @@ #include "base/flags.h" #include "base/log.h" #include "decoder/ctc_beam_search_decoder.h" -#include "frontend/data_cache.h" +#include "frontend/audio/data_cache.h" #include "kaldi/util/table-types.h" #include "nnet/decodable.h" #include "nnet/paddle_nnet.h" diff --git a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc index 4d5ffe14..7f6c572c 100644 --- a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc +++ b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc @@ -17,7 +17,7 @@ #include "base/flags.h" #include "base/log.h" #include "decoder/ctc_beam_search_decoder.h" -#include "frontend/data_cache.h" +#include "frontend/audio/data_cache.h" #include "kaldi/util/table-types.h" #include "nnet/decodable.h" #include "nnet/paddle_nnet.h" diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc index c29d2b21..8f32bac2 100644 --- a/speechx/examples/feat/linear_spectrogram_main.cc +++ b/speechx/examples/feat/linear_spectrogram_main.cc @@ -14,19 +14,18 @@ // todo refactor, repalce with gtest -#include "frontend/linear_spectrogram.h" #include "base/flags.h" #include "base/log.h" -#include "frontend/audio_cache.h" -#include "frontend/data_cache.h" -#include "frontend/feature_cache.h" -#include "frontend/frontend_itf.h" -#include "frontend/normalizer.h" #include "kaldi/feat/wave-reader.h" #include "kaldi/util/kaldi-io.h" #include "kaldi/util/table-types.h" -#include +#include "frontend/audio/linear_spectrogram.h" +#include "frontend/audio/audio_cache.h" +#include "frontend/audio/data_cache.h" +#include "frontend/audio/feature_cache.h" +#include "frontend/audio/frontend_itf.h" +#include "frontend/audio/normalizer.h" DEFINE_string(wav_rspecifier, "", "test wav scp path"); DEFINE_string(feature_wspecifier, "", "output feats wspecifier"); diff --git a/speechx/speechx/frontend/CMakeLists.txt b/speechx/speechx/frontend/CMakeLists.txt index 35243b6e..7d10fdec 100644 --- a/speechx/speechx/frontend/CMakeLists.txt +++ b/speechx/speechx/frontend/CMakeLists.txt @@ -1,11 +1,2 @@ -project(frontend) -add_library(frontend STATIC - cmvn.cc - db_norm.cc - linear_spectrogram.cc - audio_cache.cc - feature_cache.cc -) - -target_link_libraries(frontend PUBLIC kaldi-matrix) \ No newline at end of file +add_subdirectory(audio) \ No newline at end of file diff --git a/speechx/speechx/frontend/audio/CMakeLists.txt b/speechx/speechx/frontend/audio/CMakeLists.txt index e69de29b..35243b6e 100644 --- a/speechx/speechx/frontend/audio/CMakeLists.txt +++ b/speechx/speechx/frontend/audio/CMakeLists.txt @@ -0,0 +1,11 @@ +project(frontend) + +add_library(frontend STATIC + cmvn.cc + db_norm.cc + linear_spectrogram.cc + audio_cache.cc + feature_cache.cc +) + +target_link_libraries(frontend PUBLIC kaldi-matrix) \ No newline at end of file diff --git a/speechx/speechx/frontend/audio_cache.cc b/speechx/speechx/frontend/audio/audio_cache.cc similarity index 98% rename from speechx/speechx/frontend/audio_cache.cc rename to speechx/speechx/frontend/audio/audio_cache.cc index d44ed592..c3233e59 100644 --- a/speechx/speechx/frontend/audio_cache.cc +++ b/speechx/speechx/frontend/audio/audio_cache.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "frontend/audio_cache.h" +#include "frontend/audio/audio_cache.h" #include "kaldi/base/timer.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio/audio_cache.h similarity index 97% rename from speechx/speechx/frontend/audio_cache.h rename to speechx/speechx/frontend/audio/audio_cache.h index f48da12b..17e1a838 100644 --- a/speechx/speechx/frontend/audio_cache.h +++ b/speechx/speechx/frontend/audio/audio_cache.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/cmvn.cc b/speechx/speechx/frontend/audio/cmvn.cc similarity index 98% rename from speechx/speechx/frontend/cmvn.cc rename to speechx/speechx/frontend/audio/cmvn.cc index d9bba943..706492b7 100644 --- a/speechx/speechx/frontend/cmvn.cc +++ b/speechx/speechx/frontend/audio/cmvn.cc @@ -1,5 +1,5 @@ -#include "frontend/normalizer.h" +#include "frontend/audio/cmvn.h" #include "kaldi/feat/cmvn.h" #include "kaldi/util/kaldi-io.h" @@ -13,7 +13,6 @@ using kaldi::SubVector; using std::unique_ptr; - CMVN::CMVN(std::string cmvn_file, unique_ptr base_extractor) : var_norm_(true) { diff --git a/speechx/speechx/frontend/cmvn.h b/speechx/speechx/frontend/audio/cmvn.h similarity index 94% rename from speechx/speechx/frontend/cmvn.h rename to speechx/speechx/frontend/audio/cmvn.h index fdf2a87a..b3cfbb11 100644 --- a/speechx/speechx/frontend/cmvn.h +++ b/speechx/speechx/frontend/audio/cmvn.h @@ -1,7 +1,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "kaldi/util/options-itf.h" diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/audio/data_cache.h similarity index 97% rename from speechx/speechx/frontend/data_cache.h rename to speechx/speechx/frontend/audio/data_cache.h index b8ce6bf6..a812278c 100644 --- a/speechx/speechx/frontend/data_cache.h +++ b/speechx/speechx/frontend/audio/data_cache.h @@ -17,7 +17,7 @@ #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/db_norm.cc b/speechx/speechx/frontend/audio/db_norm.cc similarity index 98% rename from speechx/speechx/frontend/db_norm.cc rename to speechx/speechx/frontend/audio/db_norm.cc index 830af13b..931e932d 100644 --- a/speechx/speechx/frontend/db_norm.cc +++ b/speechx/speechx/frontend/audio/db_norm.cc @@ -13,7 +13,7 @@ // limitations under the License. -#include "frontend/normalizer.h" +#include "frontend/audio/db_norm.h" #include "kaldi/feat/cmvn.h" #include "kaldi/util/kaldi-io.h" diff --git a/speechx/speechx/frontend/db_norm.h b/speechx/speechx/frontend/audio/db_norm.h similarity index 97% rename from speechx/speechx/frontend/db_norm.h rename to speechx/speechx/frontend/audio/db_norm.h index 3d371071..42597143 100644 --- a/speechx/speechx/frontend/db_norm.h +++ b/speechx/speechx/frontend/audio/db_norm.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "kaldi/util/options-itf.h" diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/audio/fbank.h similarity index 100% rename from speechx/speechx/frontend/fbank.h rename to speechx/speechx/frontend/audio/fbank.h diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/audio/feature_cache.cc similarity index 97% rename from speechx/speechx/frontend/feature_cache.cc rename to speechx/speechx/frontend/audio/feature_cache.cc index 53b7076d..d7bea61a 100644 --- a/speechx/speechx/frontend/feature_cache.cc +++ b/speechx/speechx/frontend/audio/feature_cache.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "frontend/feature_cache.h" +#include "frontend/audio/feature_cache.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/audio/feature_cache.h similarity index 97% rename from speechx/speechx/frontend/feature_cache.h rename to speechx/speechx/frontend/audio/feature_cache.h index 1281ec35..99961b5e 100644 --- a/speechx/speechx/frontend/feature_cache.h +++ b/speechx/speechx/frontend/audio/feature_cache.h @@ -15,7 +15,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/frontend_itf.h b/speechx/speechx/frontend/audio/frontend_itf.h similarity index 100% rename from speechx/speechx/frontend/frontend_itf.h rename to speechx/speechx/frontend/audio/frontend_itf.h diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/audio/linear_spectrogram.cc similarity index 98% rename from speechx/speechx/frontend/linear_spectrogram.cc rename to speechx/speechx/frontend/audio/linear_spectrogram.cc index 2ba00785..827b8ecc 100644 --- a/speechx/speechx/frontend/linear_spectrogram.cc +++ b/speechx/speechx/frontend/audio/linear_spectrogram.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "frontend/linear_spectrogram.h" +#include "frontend/audio/linear_spectrogram.h" #include "kaldi/base/kaldi-math.h" #include "kaldi/matrix/matrix-functions.h" diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/audio/linear_spectrogram.h similarity index 97% rename from speechx/speechx/frontend/linear_spectrogram.h rename to speechx/speechx/frontend/audio/linear_spectrogram.h index 136441ef..bbf8d685 100644 --- a/speechx/speechx/frontend/linear_spectrogram.h +++ b/speechx/speechx/frontend/audio/linear_spectrogram.h @@ -16,7 +16,7 @@ #pragma once #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/feat/feature-window.h" namespace ppspeech { diff --git a/speechx/speechx/frontend/mfcc.h b/speechx/speechx/frontend/audio/mfcc.h similarity index 100% rename from speechx/speechx/frontend/mfcc.h rename to speechx/speechx/frontend/audio/mfcc.h diff --git a/speechx/speechx/frontend/audio/normalizer.h b/speechx/speechx/frontend/audio/normalizer.h new file mode 100644 index 00000000..df9e4b75 --- /dev/null +++ b/speechx/speechx/frontend/audio/normalizer.h @@ -0,0 +1,4 @@ +#pragma once + +#include "frontend/audio/cmvn.h" +#include "frontend/audio/db_norm.h" \ No newline at end of file diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h deleted file mode 100644 index 89599c2a..00000000 --- a/speechx/speechx/frontend/normalizer.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#include "frontend/cmvn.h" -#include "frontend/db_norm.h" \ No newline at end of file diff --git a/speechx/speechx/frontend/window.h b/speechx/speechx/frontend/window.h deleted file mode 100644 index 70d6307e..00000000 --- a/speechx/speechx/frontend/window.h +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// extract the window of kaldi feat. diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h index ef17601f..c75a0f4d 100644 --- a/speechx/speechx/nnet/decodable.h +++ b/speechx/speechx/nnet/decodable.h @@ -13,7 +13,7 @@ // limitations under the License. #include "base/common.h" -#include "frontend/frontend_itf.h" +#include "frontend/audio/frontend_itf.h" #include "kaldi/matrix/kaldi-matrix.h" #include "nnet/decodable-itf.h" #include "nnet/nnet_interface.h" -- GitLab