diff --git a/speechx/examples/decoder/offline_decoder_main.cc b/speechx/examples/decoder/offline_decoder_main.cc
index 6bd83b9b156ee0e452fe0d9c1bda6e6f960090ec..9a9c14a0cff9f2b239a45ad7a12c2a2f20f4d601 100644
--- a/speechx/examples/decoder/offline_decoder_main.cc
+++ b/speechx/examples/decoder/offline_decoder_main.cc
@@ -17,7 +17,7 @@
 #include "base/flags.h"
 #include "base/log.h"
 #include "decoder/ctc_beam_search_decoder.h"
-#include "frontend/data_cache.h"
+#include "frontend/audio/data_cache.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
 #include "nnet/paddle_nnet.h"
diff --git a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
index 4d5ffe145ac91796e0376344f07996e9ed9e6cfd..7f6c572ca2ff6675d98474a5eb85381d1703e641 100644
--- a/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
+++ b/speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
@@ -17,7 +17,7 @@
 #include "base/flags.h"
 #include "base/log.h"
 #include "decoder/ctc_beam_search_decoder.h"
-#include "frontend/data_cache.h"
+#include "frontend/audio/data_cache.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
 #include "nnet/paddle_nnet.h"
diff --git a/speechx/examples/feat/linear_spectrogram_main.cc b/speechx/examples/feat/linear_spectrogram_main.cc
index c29d2b21fd0e58192bc62e1f55110f691eb56d91..8f32bac2a861534062d34bf948af46a2da49bfb9 100644
--- a/speechx/examples/feat/linear_spectrogram_main.cc
+++ b/speechx/examples/feat/linear_spectrogram_main.cc
@@ -14,19 +14,18 @@
 
 // todo refactor, repalce with gtest
 
-#include "frontend/linear_spectrogram.h"
 #include "base/flags.h"
 #include "base/log.h"
-#include "frontend/audio_cache.h"
-#include "frontend/data_cache.h"
-#include "frontend/feature_cache.h"
-#include "frontend/frontend_itf.h"
-#include "frontend/normalizer.h"
 #include "kaldi/feat/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
 #include "kaldi/util/table-types.h"
 
-#include <glog/logging.h>
+#include "frontend/audio/linear_spectrogram.h"
+#include "frontend/audio/audio_cache.h"
+#include "frontend/audio/data_cache.h"
+#include "frontend/audio/feature_cache.h"
+#include "frontend/audio/frontend_itf.h"
+#include "frontend/audio/normalizer.h"
 
 DEFINE_string(wav_rspecifier, "", "test wav scp path");
 DEFINE_string(feature_wspecifier, "", "output feats wspecifier");
diff --git a/speechx/speechx/frontend/CMakeLists.txt b/speechx/speechx/frontend/CMakeLists.txt
index 35243b6e3ce2a39333c99f77f709dea2d06e47e5..7d10fdec9e1d5529c1b5d9a83bc2d8e798abc9ee 100644
--- a/speechx/speechx/frontend/CMakeLists.txt
+++ b/speechx/speechx/frontend/CMakeLists.txt
@@ -1,11 +1,2 @@
-project(frontend)
 
-add_library(frontend STATIC
-  cmvn.cc
-  db_norm.cc
-  linear_spectrogram.cc
-  audio_cache.cc
-  feature_cache.cc
-)
-
-target_link_libraries(frontend PUBLIC kaldi-matrix)
\ No newline at end of file
+add_subdirectory(audio)
\ No newline at end of file
diff --git a/speechx/speechx/frontend/audio/CMakeLists.txt b/speechx/speechx/frontend/audio/CMakeLists.txt
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..35243b6e3ce2a39333c99f77f709dea2d06e47e5 100644
--- a/speechx/speechx/frontend/audio/CMakeLists.txt
+++ b/speechx/speechx/frontend/audio/CMakeLists.txt
@@ -0,0 +1,11 @@
+project(frontend)
+
+add_library(frontend STATIC
+  cmvn.cc
+  db_norm.cc
+  linear_spectrogram.cc
+  audio_cache.cc
+  feature_cache.cc
+)
+
+target_link_libraries(frontend PUBLIC kaldi-matrix)
\ No newline at end of file
diff --git a/speechx/speechx/frontend/audio_cache.cc b/speechx/speechx/frontend/audio/audio_cache.cc
similarity index 98%
rename from speechx/speechx/frontend/audio_cache.cc
rename to speechx/speechx/frontend/audio/audio_cache.cc
index d44ed592c2c07af0b82b3275b3b8e5b8f62f7573..c3233e595d874adc4b21000e94881f670892b525 100644
--- a/speechx/speechx/frontend/audio_cache.cc
+++ b/speechx/speechx/frontend/audio/audio_cache.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "frontend/audio_cache.h"
+#include "frontend/audio/audio_cache.h"
 #include "kaldi/base/timer.h"
 
 namespace ppspeech {
diff --git a/speechx/speechx/frontend/audio_cache.h b/speechx/speechx/frontend/audio/audio_cache.h
similarity index 97%
rename from speechx/speechx/frontend/audio_cache.h
rename to speechx/speechx/frontend/audio/audio_cache.h
index f48da12b7d77052980cbe42afd6cefdf090f8288..17e1a83895b9f24eb3a6a24d5b73bd72bb53b11b 100644
--- a/speechx/speechx/frontend/audio_cache.h
+++ b/speechx/speechx/frontend/audio/audio_cache.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 
 namespace ppspeech {
 
diff --git a/speechx/speechx/frontend/cmvn.cc b/speechx/speechx/frontend/audio/cmvn.cc
similarity index 98%
rename from speechx/speechx/frontend/cmvn.cc
rename to speechx/speechx/frontend/audio/cmvn.cc
index d9bba943548b5fee262104ce86ad48fcf303e9f6..706492b7ca613a225bba4956258838c9c0ea6c4f 100644
--- a/speechx/speechx/frontend/cmvn.cc
+++ b/speechx/speechx/frontend/audio/cmvn.cc
@@ -1,5 +1,5 @@
 
-#include "frontend/normalizer.h"
+#include "frontend/audio/cmvn.h"
 #include "kaldi/feat/cmvn.h"
 #include "kaldi/util/kaldi-io.h"
 
@@ -13,7 +13,6 @@ using kaldi::SubVector;
 using std::unique_ptr;
 
 
-
 CMVN::CMVN(std::string cmvn_file,
            unique_ptr<FrontendInterface> base_extractor)
     : var_norm_(true) {
diff --git a/speechx/speechx/frontend/cmvn.h b/speechx/speechx/frontend/audio/cmvn.h
similarity index 94%
rename from speechx/speechx/frontend/cmvn.h
rename to speechx/speechx/frontend/audio/cmvn.h
index fdf2a87a435410dad1ebf6808eda2e1fe942e08f..b3cfbb11a6e7cdb2d159eab913b49f942e344205 100644
--- a/speechx/speechx/frontend/cmvn.h
+++ b/speechx/speechx/frontend/audio/cmvn.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
diff --git a/speechx/speechx/frontend/data_cache.h b/speechx/speechx/frontend/audio/data_cache.h
similarity index 97%
rename from speechx/speechx/frontend/data_cache.h
rename to speechx/speechx/frontend/audio/data_cache.h
index b8ce6bf65ee311c4f414ae27b0045d437ebcec38..a812278ce2e1aa8fb66c57885e36f324e25fe078 100644
--- a/speechx/speechx/frontend/data_cache.h
+++ b/speechx/speechx/frontend/audio/data_cache.h
@@ -17,7 +17,7 @@
 
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 
 
 namespace ppspeech {
diff --git a/speechx/speechx/frontend/db_norm.cc b/speechx/speechx/frontend/audio/db_norm.cc
similarity index 98%
rename from speechx/speechx/frontend/db_norm.cc
rename to speechx/speechx/frontend/audio/db_norm.cc
index 830af13bec6f08561b8fafdd66dc1d772212ce39..931e932d6d6aa725655ddc2cccf933529425c6f8 100644
--- a/speechx/speechx/frontend/db_norm.cc
+++ b/speechx/speechx/frontend/audio/db_norm.cc
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 
-#include "frontend/normalizer.h"
+#include "frontend/audio/db_norm.h"
 #include "kaldi/feat/cmvn.h"
 #include "kaldi/util/kaldi-io.h"
 
diff --git a/speechx/speechx/frontend/db_norm.h b/speechx/speechx/frontend/audio/db_norm.h
similarity index 97%
rename from speechx/speechx/frontend/db_norm.h
rename to speechx/speechx/frontend/audio/db_norm.h
index 3d37107150fb2b8581f895c9e911526f67573a58..425971437d1d2b154d645727720c460140d5e117 100644
--- a/speechx/speechx/frontend/db_norm.h
+++ b/speechx/speechx/frontend/audio/db_norm.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
 
diff --git a/speechx/speechx/frontend/fbank.h b/speechx/speechx/frontend/audio/fbank.h
similarity index 100%
rename from speechx/speechx/frontend/fbank.h
rename to speechx/speechx/frontend/audio/fbank.h
diff --git a/speechx/speechx/frontend/feature_cache.cc b/speechx/speechx/frontend/audio/feature_cache.cc
similarity index 97%
rename from speechx/speechx/frontend/feature_cache.cc
rename to speechx/speechx/frontend/audio/feature_cache.cc
index 53b7076d56d7db08c5f1ade5d2f896bf87c6c0d2..d7bea61ad62908d2e55b0a845de1faaa57d6a49c 100644
--- a/speechx/speechx/frontend/feature_cache.cc
+++ b/speechx/speechx/frontend/audio/feature_cache.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "frontend/feature_cache.h"
+#include "frontend/audio/feature_cache.h"
 
 namespace ppspeech {
 
diff --git a/speechx/speechx/frontend/feature_cache.h b/speechx/speechx/frontend/audio/feature_cache.h
similarity index 97%
rename from speechx/speechx/frontend/feature_cache.h
rename to speechx/speechx/frontend/audio/feature_cache.h
index 1281ec35a4652f99d26f388b9f786cccb9107e32..99961b5e28252691ecb81c60a5e8448a541e52f0 100644
--- a/speechx/speechx/frontend/feature_cache.h
+++ b/speechx/speechx/frontend/audio/feature_cache.h
@@ -15,7 +15,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 
 namespace ppspeech {
 
diff --git a/speechx/speechx/frontend/frontend_itf.h b/speechx/speechx/frontend/audio/frontend_itf.h
similarity index 100%
rename from speechx/speechx/frontend/frontend_itf.h
rename to speechx/speechx/frontend/audio/frontend_itf.h
diff --git a/speechx/speechx/frontend/linear_spectrogram.cc b/speechx/speechx/frontend/audio/linear_spectrogram.cc
similarity index 98%
rename from speechx/speechx/frontend/linear_spectrogram.cc
rename to speechx/speechx/frontend/audio/linear_spectrogram.cc
index 2ba00785aa70f578025f2b59ec233c4acb855e83..827b8eccff7c82a75eaa8fdb51d127c72f6565ba 100644
--- a/speechx/speechx/frontend/linear_spectrogram.cc
+++ b/speechx/speechx/frontend/audio/linear_spectrogram.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "frontend/linear_spectrogram.h"
+#include "frontend/audio/linear_spectrogram.h"
 #include "kaldi/base/kaldi-math.h"
 #include "kaldi/matrix/matrix-functions.h"
 
diff --git a/speechx/speechx/frontend/linear_spectrogram.h b/speechx/speechx/frontend/audio/linear_spectrogram.h
similarity index 97%
rename from speechx/speechx/frontend/linear_spectrogram.h
rename to speechx/speechx/frontend/audio/linear_spectrogram.h
index 136441efeec99d6936d73043e4147c5783113779..bbf8d6853d5c644361d3aa4441fb7172dabc3083 100644
--- a/speechx/speechx/frontend/linear_spectrogram.h
+++ b/speechx/speechx/frontend/audio/linear_spectrogram.h
@@ -16,7 +16,7 @@
 #pragma once
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/feat/feature-window.h"
 
 namespace ppspeech {
diff --git a/speechx/speechx/frontend/mfcc.h b/speechx/speechx/frontend/audio/mfcc.h
similarity index 100%
rename from speechx/speechx/frontend/mfcc.h
rename to speechx/speechx/frontend/audio/mfcc.h
diff --git a/speechx/speechx/frontend/audio/normalizer.h b/speechx/speechx/frontend/audio/normalizer.h
new file mode 100644
index 0000000000000000000000000000000000000000..df9e4b751427248c8c8c920cb2196ae3f697ed07
--- /dev/null
+++ b/speechx/speechx/frontend/audio/normalizer.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#include "frontend/audio/cmvn.h"
+#include "frontend/audio/db_norm.h"
\ No newline at end of file
diff --git a/speechx/speechx/frontend/normalizer.h b/speechx/speechx/frontend/normalizer.h
deleted file mode 100644
index 89599c2a624113b26b709f4a63158139e9c0691b..0000000000000000000000000000000000000000
--- a/speechx/speechx/frontend/normalizer.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#pragma once
-
-#include "frontend/cmvn.h"
-#include "frontend/db_norm.h"
\ No newline at end of file
diff --git a/speechx/speechx/frontend/window.h b/speechx/speechx/frontend/window.h
deleted file mode 100644
index 70d6307ec0c5d0bbe3a0e8847247ce7f26bdc5db..0000000000000000000000000000000000000000
--- a/speechx/speechx/frontend/window.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// extract the window of kaldi feat.
diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/nnet/decodable.h
index ef17601fac992554fae25428d9a2d27063c3da10..c75a0f4de911eb236ea9f0b9a2b9079daac12f3f 100644
--- a/speechx/speechx/nnet/decodable.h
+++ b/speechx/speechx/nnet/decodable.h
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #include "base/common.h"
-#include "frontend/frontend_itf.h"
+#include "frontend/audio/frontend_itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "nnet/decodable-itf.h"
 #include "nnet/nnet_interface.h"