[TTS]Add license and reformat for TTSCppFrontend (#3030)

1aa7495d · 小湉湉 · GitHub · 259f4936 · 1aa7495d · 1aa7495d
13 changed file
--- a/demos/TTSArmLinux/src/Predictor.hpp
+++ b/demos/TTSArmLinux/src/Predictor.hpp
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <algorithm>
 #include <chrono>
-#include <iostream>
 #include <fstream>
+#include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
@@ -10,24 +23,28 @@
 using namespace paddle::lite_api;
 class PredictorInterface {
-public:
+  public:
    virtual ~PredictorInterface() = 0;
-    virtual bool Init(
+    virtual bool Init(const std::string &AcousticModelPath,
-            const std::string &AcousticModelPath,
+                      const std::string &VocoderPath,
-            const std::string &VocoderPath,
+                      PowerMode cpuPowerMode,
-            PowerMode cpuPowerMode,
+                      int cpuThreadNum,
-            int cpuThreadNum,
+                      // WAV采样率（必须与模型输出匹配）
-            // WAV采样率（必须与模型输出匹配）
+                      // 如果播放速度和音调异常，请修改采样率
-            // 如果播放速度和音调异常，请修改采样率
+                      // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
-            // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
+                      uint32_t wavSampleRate) = 0;
-            uint32_t wavSampleRate
+    virtual std::shared_ptr<PaddlePredictor> LoadModel(
-    ) = 0;
+        const std::string &modelPath,
-    virtual std::shared_ptr<PaddlePredictor> LoadModel(const std::string &modelPath, int cpuThreadNum, PowerMode cpuPowerMode) = 0;
+        int cpuThreadNum,
+        PowerMode cpuPowerMode) = 0;
    virtual void ReleaseModel() = 0;
    virtual bool RunModel(const std::vector<int64_t> &phones) = 0;
-    virtual std::unique_ptr<const Tensor> GetAcousticModelOutput(const std::vector<int64_t> &phones) = 0;
+    virtual std::unique_ptr<const Tensor> GetAcousticModelOutput(
-    virtual std::unique_ptr<const Tensor> GetVocoderOutput(std::unique_ptr<const Tensor> &&amOutput) = 0;
+        const std::vector<int64_t> &phones) = 0;
-    virtual void VocoderOutputToWav(std::unique_ptr<const Tensor> &&vocOutput) = 0;
+    virtual std::unique_ptr<const Tensor> GetVocoderOutput(
+        std::unique_ptr<const Tensor> &&amOutput) = 0;
+    virtual void VocoderOutputToWav(
+        std::unique_ptr<const Tensor> &&vocOutput) = 0;
    virtual void SaveFloatWav(float *floatWav, int64_t size) = 0;
    virtual bool IsLoaded() = 0;
    virtual float GetInferenceTime() = 0;
@@ -45,23 +62,22 @@ PredictorInterface::~PredictorInterface() {}
 // WavDataType: WAV数据类型
 // 可在 int16_t 和 float 之间切换，
 // 用于生成 16-bit PCM 或 32-bit IEEE float 格式的 WAV
-template<typename WavDataType>
+template <typename WavDataType>
 class Predictor : public PredictorInterface {
-public:
+  public:
-    virtual bool Init(
+    bool Init(const std::string &AcousticModelPath,
-            const std::string &AcousticModelPath,
+              const std::string &VocoderPath,
-            const std::string &VocoderPath,
+              PowerMode cpuPowerMode,
-            PowerMode cpuPowerMode,
+              int cpuThreadNum,
-            int cpuThreadNum,
+              // WAV采样率（必须与模型输出匹配）
-            // WAV采样率（必须与模型输出匹配）
+              // 如果播放速度和音调异常，请修改采样率
-            // 如果播放速度和音调异常，请修改采样率
+              // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
-            // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
+              uint32_t wavSampleRate) override {
-            uint32_t wavSampleRate
-    ) override {
        // Release model if exists
        ReleaseModel();
-        acoustic_model_predictor_ = LoadModel(AcousticModelPath, cpuThreadNum, cpuPowerMode);
+        acoustic_model_predictor_ =
+            LoadModel(AcousticModelPath, cpuThreadNum, cpuPowerMode);
        if (acoustic_model_predictor_ == nullptr) {
            return false;
        }
@@ -80,7 +96,10 @@ public:
        ReleaseWav();
    }
-    virtual std::shared_ptr<PaddlePredictor> LoadModel(const std::string &modelPath, int cpuThreadNum, PowerMode cpuPowerMode) override {
+    std::shared_ptr<PaddlePredictor> LoadModel(
+        const std::string &modelPath,
+        int cpuThreadNum,
+        PowerMode cpuPowerMode) override {
        if (modelPath.empty()) {
            return nullptr;
        }
@@ -94,12 +113,12 @@ public:
        return CreatePaddlePredictor<MobileConfig>(config);
    }
-    virtual void ReleaseModel() override {
+    void ReleaseModel() override {
        acoustic_model_predictor_ = nullptr;
        vocoder_predictor_ = nullptr;
    }
-    virtual bool RunModel(const std::vector<int64_t> &phones) override {
+    bool RunModel(const std::vector<int64_t> &phones) override {
        if (!IsLoaded()) {
            return false;
        }
@@ -115,12 +134,13 @@ public:
        // 计算用时
        std::chrono::duration<float> duration = end - start;
-        inference_time_ = duration.count() * 1000; // 单位：毫秒
+        inference_time_ = duration.count() * 1000;  // 单位：毫秒
        return true;
    }
-    virtual std::unique_ptr<const Tensor> GetAcousticModelOutput(const std::vector<int64_t> &phones) override {
+    std::unique_ptr<const Tensor> GetAcousticModelOutput(
+        const std::vector<int64_t> &phones) override {
        auto phones_handle = acoustic_model_predictor_->GetInput(0);
        phones_handle->Resize({static_cast<int64_t>(phones.size())});
        phones_handle->CopyFromCpu(phones.data());
@@ -139,7 +159,8 @@ public:
        return am_output_handle;
    }
-    virtual std::unique_ptr<const Tensor> GetVocoderOutput(std::unique_ptr<const Tensor> &&amOutput) override {
+    std::unique_ptr<const Tensor> GetVocoderOutput(
+        std::unique_ptr<const Tensor> &&amOutput) override {
        auto mel_handle = vocoder_predictor_->GetInput(0);
        // [?, 80]
        auto dims = amOutput->shape();
@@ -161,7 +182,8 @@ public:
        return voc_output_handle;
    }
-    virtual void VocoderOutputToWav(std::unique_ptr<const Tensor> &&vocOutput) override {
+    void VocoderOutputToWav(
+        std::unique_ptr<const Tensor> &&vocOutput) override {
        // 获取输出Tensor的数据
        int64_t output_size = 1;
        for (auto dim : vocOutput->shape()) {
@@ -172,39 +194,31 @@ public:
        SaveFloatWav(output_data, output_size);
    }
-    virtual void SaveFloatWav(float *floatWav, int64_t size) override;
+    void SaveFloatWav(float *floatWav, int64_t size) override;
-    virtual bool IsLoaded() override {
+    bool IsLoaded() override {
-        return acoustic_model_predictor_ != nullptr && vocoder_predictor_ != nullptr;
+        return acoustic_model_predictor_ != nullptr &&
+               vocoder_predictor_ != nullptr;
    }
-    virtual float GetInferenceTime() override {
+    float GetInferenceTime() override { return inference_time_; }
-        return inference_time_;
-    }
-    const std::vector<WavDataType> & GetWav() {
+    const std::vector<WavDataType> &GetWav() { return wav_; }
-        return wav_;
-    }
-    virtual int GetWavSize() override {
+    int GetWavSize() override { return wav_.size() * sizeof(WavDataType); }
-        return wav_.size() * sizeof(WavDataType);
-    }
    // 获取WAV持续时间（单位：毫秒）
-    virtual float GetWavDuration() override {
+    float GetWavDuration() override {
-        return static_cast<float>(GetWavSize()) / sizeof(WavDataType) / static_cast<float>(wav_sample_rate_) * 1000;
+        return static_cast<float>(GetWavSize()) / sizeof(WavDataType) /
+               static_cast<float>(wav_sample_rate_) * 1000;
    }
    // 获取RTF（合成时间 / 音频时长）
-    virtual float GetRTF() override {
+    float GetRTF() override { return GetInferenceTime() / GetWavDuration(); }
-        return GetInferenceTime() / GetWavDuration();
-    }
-    virtual void ReleaseWav() override {
+    void ReleaseWav() override { wav_.clear(); }
-        wav_.clear();
-    }
-    virtual bool WriteWavToFile(const std::string &wavPath) override {
+    bool WriteWavToFile(const std::string &wavPath) override {
        std::ofstream fout(wavPath, std::ios::binary);
        if (!fout.is_open()) {
            return false;
@@ -216,18 +230,20 @@ public:
        header.data_size = GetWavSize();
        header.size = sizeof(header) - 8 + header.data_size;
        header.sample_rate = wav_sample_rate_;
-        header.byte_rate = header.sample_rate * header.num_channels * header.bits_per_sample / 8;
+        header.byte_rate = header.sample_rate * header.num_channels *
+                           header.bits_per_sample / 8;
        header.block_align = header.num_channels * header.bits_per_sample / 8;
-        fout.write(reinterpret_cast<const char*>(&header), sizeof(header));
+        fout.write(reinterpret_cast<const char *>(&header), sizeof(header));
        // 写入wav数据
-        fout.write(reinterpret_cast<const char*>(wav_.data()), header.data_size);
+        fout.write(reinterpret_cast<const char *>(wav_.data()),
+                   header.data_size);
        fout.close();
        return true;
    }
-protected:
+  protected:
    struct WavHeader {
        // RIFF 头
        char riff[4] = {'R', 'I', 'F', 'F'};
@@ -250,19 +266,17 @@ protected:
    };
    enum WavAudioFormat {
-        WAV_FORMAT_16BIT_PCM   = 1, // 16-bit PCM 格式
+        WAV_FORMAT_16BIT_PCM = 1,   // 16-bit PCM 格式
        WAV_FORMAT_32BIT_FLOAT = 3  // 32-bit IEEE float 格式
    };
-protected:
+  protected:
    // 返回值通过模板特化由 WavDataType 决定
    inline uint16_t GetWavAudioFormat();
-    inline float Abs(float number) {
+    inline float Abs(float number) { return (number < 0) ? -number : number; }
-        return (number < 0) ? -number : number;
-    }
-protected:
+  protected:
    float inference_time_ = 0;
    uint32_t wav_sample_rate_ = 0;
    std::vector<WavDataType> wav_;
@@ -270,36 +284,36 @@ protected:
    std::shared_ptr<PaddlePredictor> vocoder_predictor_ = nullptr;
 };
-template<>
+template <>
 uint16_t Predictor<int16_t>::GetWavAudioFormat() {
    return Predictor::WAV_FORMAT_16BIT_PCM;
 }
-template<>
+template <>
 uint16_t Predictor<float>::GetWavAudioFormat() {
    return Predictor::WAV_FORMAT_32BIT_FLOAT;
 }
 // 保存 16-bit PCM 格式 WAV
-template<>
+template <>
 void Predictor<int16_t>::SaveFloatWav(float *floatWav, int64_t size) {
    wav_.resize(size);
    float maxSample = 0.01;
    // 寻找最大采样值
-    for (int64_t i=0; i<size; i++) {
+    for (int64_t i = 0; i < size; i++) {
        float sample = Abs(floatWav[i]);
        if (sample > maxSample) {
            maxSample = sample;
        }
    }
    // 把采样值缩放到 int_16 范围
-    for (int64_t i=0; i<size; i++) {
+    for (int64_t i = 0; i < size; i++) {
        wav_[i] = floatWav[i] * 32767.0f / maxSample;
    }
 }
 // 保存 32-bit IEEE float 格式 WAV
-template<>
+template <>
 void Predictor<float>::SaveFloatWav(float *floatWav, int64_t size) {
    wav_.resize(size);
    std::copy_n(floatWav, size, wav_.data());

--- a/demos/TTSArmLinux/src/main.cc
+++ b/demos/TTSArmLinux/src/main.cc
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <front/front_interface.h>
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <paddle_api.h>
 #include <cstdlib>
 #include <iostream>
+#include <map>
 #include <memory>
 #include <string>
-#include <map>
-#include <glog/logging.h>
-#include <gflags/gflags.h>
-#include <paddle_api.h>
-#include <front/front_interface.h>
 #include "Predictor.hpp"
 using namespace paddle::lite_api;
-DEFINE_string(sentence, "你好，欢迎使用语音合成服务", "Text to be synthesized (Chinese only. English will crash the program.)");
+DEFINE_string(
+    sentence,
+    "你好，欢迎使用语音合成服务",
+    "Text to be synthesized (Chinese only. English will crash the program.)");
 DEFINE_string(front_conf, "./front.conf", "Front configuration file");
-DEFINE_string(acoustic_model, "./models/cpu/fastspeech2_csmsc_arm.nb", "Acoustic model .nb file");
+DEFINE_string(acoustic_model,
-DEFINE_string(vocoder, "./models/cpu/fastspeech2_csmsc_arm.nb", "vocoder .nb file");
+              "./models/cpu/fastspeech2_csmsc_arm.nb",
+              "Acoustic model .nb file");
+DEFINE_string(vocoder,
+              "./models/cpu/fastspeech2_csmsc_arm.nb",
+              "vocoder .nb file");
 DEFINE_string(output_wav, "./output/tts.wav", "Output WAV file");
-DEFINE_string(wav_bit_depth, "16", "WAV bit depth, 16 (16-bit PCM) or 32 (32-bit IEEE float)");
+DEFINE_string(wav_bit_depth,
-DEFINE_string(wav_sample_rate, "24000", "WAV sample rate, should match the output of the vocoder");
+              "16",
+              "WAV bit depth, 16 (16-bit PCM) or 32 (32-bit IEEE float)");
+DEFINE_string(wav_sample_rate,
+              "24000",
+              "WAV sample rate, should match the output of the vocoder");
 DEFINE_string(cpu_thread, "1", "CPU thread numbers");
 int main(int argc, char *argv[]) {
@@ -53,7 +78,7 @@ int main(int argc, char *argv[]) {
    // 繁体转简体
    std::wstring sentence_simp;
-    front_inst->Trand2Simp(ws_sentence, sentence_simp); 
+    front_inst->Trand2Simp(ws_sentence, &sentence_simp);
    ws_sentence = sentence_simp;
    std::string s_sentence;
@@ -63,28 +88,30 @@ int main(int argc, char *argv[]) {
    // 根据标点进行分句
    LOG(INFO) << "Start to segment sentences by punctuation";
-    front_inst->SplitByPunc(ws_sentence, sentence_part); 
+    front_inst->SplitByPunc(ws_sentence, &sentence_part);
    LOG(INFO) << "Segment sentences through punctuation successfully";
    // 分句后获取音素id
-    LOG(INFO) << "Start to get the phoneme and tone id sequence of each sentence";
+    LOG(INFO)
-    for(int i = 0; i < sentence_part.size(); i++) {
+        << "Start to get the phoneme and tone id sequence of each sentence";
+    for (int i = 0; i < sentence_part.size(); i++) {
-        LOG(INFO) << "Raw sentence is: " << ppspeech::wstring2utf8string(sentence_part[i]);
+        LOG(INFO) << "Raw sentence is: "
-        front_inst->SentenceNormalize(sentence_part[i]);
+                  << ppspeech::wstring2utf8string(sentence_part[i]);
+        front_inst->SentenceNormalize(&sentence_part[i]);
        s_sentence = ppspeech::wstring2utf8string(sentence_part[i]);
        LOG(INFO) << "After normalization sentence is: " << s_sentence;
-        if (0 != front_inst->GetSentenceIds(s_sentence, phoneids, toneids)) {
+        if (0 != front_inst->GetSentenceIds(s_sentence, &phoneids, &toneids)) {
            LOG(ERROR) << "TTS inst get sentence phoneids and toneids failed";
            return -1;
        }
    }
-    LOG(INFO) << "The phoneids of the sentence is: " << limonp::Join(phoneids.begin(), phoneids.end(), " ");
+    LOG(INFO) << "The phoneids of the sentence is: "
-    LOG(INFO) << "The toneids of the sentence is: " << limonp::Join(toneids.begin(), toneids.end(), " ");
+              << limonp::Join(phoneids.begin(), phoneids.end(), " ");
+    LOG(INFO) << "The toneids of the sentence is: "
+              << limonp::Join(toneids.begin(), toneids.end(), " ");
    LOG(INFO) << "Get the phoneme id sequence of each sentence successfully";
    /////////////////////////// 后端：音素转音频 ///////////////////////////
@@ -99,13 +126,19 @@ int main(int argc, char *argv[]) {
    // CPU电源模式
    const PowerMode cpuPowerMode = PowerMode::LITE_POWER_HIGH;
-    if (!predictor->Init(FLAGS_acoustic_model, FLAGS_vocoder, cpuPowerMode, cpuThreadNum, wavSampleRate)) {
+    if (!predictor->Init(FLAGS_acoustic_model,
+                         FLAGS_vocoder,
+                         cpuPowerMode,
+                         cpuThreadNum,
+                         wavSampleRate)) {
        LOG(ERROR) << "predictor init failed" << std::endl;
        return -1;
    }
    std::vector<int64_t> phones(phoneids.size());
-    std::transform(phoneids.begin(), phoneids.end(), phones.begin(), [](int x) { return static_cast<int64_t>(x); });
+    std::transform(phoneids.begin(), phoneids.end(), phones.begin(), [](int x) {
+        return static_cast<int64_t>(x);
+    });
    if (!predictor->RunModel(phones)) {
        LOG(ERROR) << "predictor run model failed" << std::endl;
@@ -113,7 +146,8 @@ int main(int argc, char *argv[]) {
    }
    LOG(INFO) << "Inference time: " << predictor->GetInferenceTime() << " ms, "
-              << "WAV size (without header): " << predictor->GetWavSize() << " bytes, "
+              << "WAV size (without header): " << predictor->GetWavSize()
+              << " bytes, "
              << "WAV duration: " << predictor->GetWavDuration() << " ms, "
              << "RTF: " << predictor->GetRTF() << std::endl;

--- a/demos/TTSCppFrontend/README.md
+++ b/demos/TTSCppFrontend/README.md
@@ -38,6 +38,7 @@ If the download speed is too slow, you can open [third-party/CMakeLists.txt](thi
 ```
 ## Run
+You can change `--phone2id_path` in `./front_demo/front.conf` to the `phone_id_map.txt` of your own acoustic model.
 ```
 ./run_front_demo.sh

--- a/demos/TTSCppFrontend/front_demo/front_demo.cpp
+++ b/demos/TTSCppFrontend/front_demo/front_demo.cpp
-#include <string>
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-//#include "utils/dir_utils.h"
+//
-#include "front/front_interface.h"
+// Licensed under the Apache License, Version 2.0 (the "License");
-#include <glog/logging.h>
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <gflags/gflags.h>
+#include <glog/logging.h>
 #include <map>
+#include <string>
+#include "front/front_interface.h"
 DEFINE_string(sentence, "你好，欢迎使用语音合成服务", "Text to be synthesized");
 DEFINE_string(front_conf, "./front_demo/front.conf", "Front conf file");
-//DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid");
+// DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid");
 int main(int argc, char** argv) {
    gflags::ParseCommandLineFlags(&argc, &argv, true);
    // 实例化文本前端引擎
-    ppspeech::FrontEngineInterface *front_inst = nullptr;
+    ppspeech::FrontEngineInterface* front_inst = nullptr;
    front_inst = new ppspeech::FrontEngineInterface(FLAGS_front_conf);
    if ((!front_inst) || (front_inst->init())) {
        LOG(ERROR) << "Creater tts engine failed!";
@@ -28,7 +41,7 @@ int main(int argc, char** argv) {
    // 繁体转简体
    std::wstring sentence_simp;
-    front_inst->Trand2Simp(ws_sentence, sentence_simp); 
+    front_inst->Trand2Simp(ws_sentence, &sentence_simp);
    ws_sentence = sentence_simp;
    std::string s_sentence;
@@ -38,28 +51,29 @@ int main(int argc, char** argv) {
    // 根据标点进行分句
    LOG(INFO) << "Start to segment sentences by punctuation";
-    front_inst->SplitByPunc(ws_sentence, sentence_part); 
+    front_inst->SplitByPunc(ws_sentence, &sentence_part);
    LOG(INFO) << "Segment sentences through punctuation successfully";
    // 分句后获取音素id
-    LOG(INFO) << "Start to get the phoneme and tone id sequence of each sentence";
+    LOG(INFO)
-    for(int i = 0; i < sentence_part.size(); i++) {
+        << "Start to get the phoneme and tone id sequence of each sentence";
+    for (int i = 0; i < sentence_part.size(); i++) {
-        LOG(INFO) << "Raw sentence is: " << ppspeech::wstring2utf8string(sentence_part[i]);
+        LOG(INFO) << "Raw sentence is: "
-        front_inst->SentenceNormalize(sentence_part[i]);
+                  << ppspeech::wstring2utf8string(sentence_part[i]);
+        front_inst->SentenceNormalize(&sentence_part[i]);
        s_sentence = ppspeech::wstring2utf8string(sentence_part[i]);
        LOG(INFO) << "After normalization sentence is: " << s_sentence;
-        if (0 != front_inst->GetSentenceIds(s_sentence, phoneids, toneids)) {
+        if (0 != front_inst->GetSentenceIds(s_sentence, &phoneids, &toneids)) {
            LOG(ERROR) << "TTS inst get sentence phoneids and toneids failed";
            return -1;
        }
    }
-    LOG(INFO) << "The phoneids of the sentence is: " << limonp::Join(phoneids.begin(), phoneids.end(), " ");
+    LOG(INFO) << "The phoneids of the sentence is: "
-    LOG(INFO) << "The toneids of the sentence is: " << limonp::Join(toneids.begin(), toneids.end(), " ");
+              << limonp::Join(phoneids.begin(), phoneids.end(), " ");
+    LOG(INFO) << "The toneids of the sentence is: "
+              << limonp::Join(toneids.begin(), toneids.end(), " ");
    LOG(INFO) << "Get the phoneme id sequence of each sentence successfully";
    return EXIT_SUCCESS;
 }
--- a/demos/TTSCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
+++ b/demos/TTSCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
-# !/usr/bin/env python3
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-# -*- coding: utf-8 -*-
-########################################################################
 #
-# Copyright     2021    liangyunming(liangyunming@baidu.com)
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-# Execute the script when PaddleSpeech has been installed
+#     http://www.apache.org/licenses/LICENSE-2.0
-# PaddleSpeech: https://github.com/PaddlePaddle/PaddleSpeech
+#
+# Unless required by applicable law or agreed to in writing, software
-########################################################################
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import configparser
 from paddlespeech.t2s.frontend.zh_frontend import Frontend
-def get_phone(frontend, word, merge_sentences=True, print_info=False, robot=False, get_tone_ids=False):
+def get_phone(frontend,
+              word,
+              merge_sentences=True,
+              print_info=False,
+              robot=False,
+              get_tone_ids=False):
    phonemes = frontend.get_phonemes(word, merge_sentences, print_info, robot)
    # Some optimizations
    phones, tones = frontend._get_phone_tone(phonemes[0], get_tone_ids)
@@ -22,7 +31,10 @@ def get_phone(frontend, word, merge_sentences=True, print_info=False, robot=Fals
    return phones, tones
-def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=False):
+def gen_word2phone_dict(frontend,
+                        jieba_words_dict,
+                        word2phone_dict,
+                        get_tone=False):
    with open(jieba_words_dict, "r") as f1, open(word2phone_dict, "w+") as f2:
        for line in f1.readlines():
            word = line.split(" ")[0]
@@ -30,9 +42,9 @@ def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=Fa
            phone_str = ""
            if tone:
-                assert(len(phone) == len(tone))
+                assert (len(phone) == len(tone))
                for i in range(len(tone)):
-                    phone_tone = phone[i] + tone[i] 
+                    phone_tone = phone[i] + tone[i]
                    phone_str += (" " + phone_tone)
                phone_str = phone_str.strip("sp0").strip(" ")
            else:
@@ -45,43 +57,55 @@ def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=Fa
 def main():
-    parser = argparse.ArgumentParser(
+    parser = argparse.ArgumentParser(description="Generate dictionary")
-        description="Generate dictionary")
    parser.add_argument(
        "--config", type=str, default="./config.ini", help="config file.")
    parser.add_argument(
-        "--am_type", type=str, default="fastspeech2", help="fastspeech2 or speedyspeech")
+        "--am_type",
+        type=str,
+        default="fastspeech2",
+        help="fastspeech2 or speedyspeech")
    args = parser.parse_args()
    # Read config
    cf = configparser.ConfigParser()
    cf.read(args.config)
-    jieba_words_dict_file = cf.get("jieba", "jieba_words_dict")  # get words dict
+    jieba_words_dict_file = cf.get("jieba",
+                                   "jieba_words_dict")  # get words dict
    am_type = args.am_type
-    if(am_type == "fastspeech2"):
+    if (am_type == "fastspeech2"):
        phone2id_dict_file = cf.get(am_type, "phone2id_dict")
        word2phone_dict_file = cf.get(am_type, "word2phone_dict")
        frontend = Frontend(phone_vocab_path=phone2id_dict_file)
        print("frontend done!")
-        gen_word2phone_dict(frontend, jieba_words_dict_file, word2phone_dict_file, get_tone=False)
+        gen_word2phone_dict(
+            frontend,
-    elif(am_type == "speedyspeech"):
+            jieba_words_dict_file,
+            word2phone_dict_file,
+            get_tone=False)
+    elif (am_type == "speedyspeech"):
        phone2id_dict_file = cf.get(am_type, "phone2id_dict")
        tone2id_dict_file = cf.get(am_type, "tone2id_dict")
        word2phone_dict_file = cf.get(am_type, "word2phone_dict")
-        frontend = Frontend(phone_vocab_path=phone2id_dict_file, tone_vocab_path=tone2id_dict_file)
+        frontend = Frontend(
+            phone_vocab_path=phone2id_dict_file,
+            tone_vocab_path=tone2id_dict_file)
        print("frontend done!")
-        gen_word2phone_dict(frontend, jieba_words_dict_file, word2phone_dict_file, get_tone=True)
+        gen_word2phone_dict(
+            frontend,
+            jieba_words_dict_file,
+            word2phone_dict_file,
+            get_tone=True)
    else:
        print("Please set correct am type, fastspeech2 or speedyspeech.")
 if __name__ == "__main__":
    main()
--- a/demos/TTSCppFrontend/front_demo/gentools/genid.py
+++ b/demos/TTSCppFrontend/front_demo/gentools/genid.py
-#from parakeet.frontend.vocab import Vocab
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 PHONESFILE = "./dict/phones.txt"
 PHONES_ID_FILE = "./dict/phonesid.dict"
 TONESFILE = "./dict/tones.txt"
 TONES_ID_FILE = "./dict/tonesid.dict"
 def GenIdFile(file, idfile):
    id = 2
    with open(file, 'r') as f1, open(idfile, "w+") as f2:
@@ -16,7 +29,7 @@ def GenIdFile(file, idfile):
            f2.write(phone + " " + str(id) + "\n")
            id += 1
 if __name__ == "__main__":
    GenIdFile(PHONESFILE, PHONES_ID_FILE)
    GenIdFile(TONESFILE, TONES_ID_FILE)
--- a/demos/TTSCppFrontend/front_demo/gentools/word2phones.py
+++ b/demos/TTSCppFrontend/front_demo/gentools/word2phones.py
-from pypinyin import lazy_pinyin, Style
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import re
+from pypinyin import lazy_pinyin
+from pypinyin import Style
 worddict = "./dict/jieba_part.dict.utf8"
 newdict = "./dict/word_phones.dict"
 def GenPhones(initials, finals, seperate=True):
    phones = []
@@ -14,9 +30,9 @@ def GenPhones(initials, finals, seperate=True):
            elif c in ['zh', 'ch', 'sh', 'r']:
                v = re.sub('i', 'iii', v)
        if c:
-            if seperate == True:
+            if seperate is True:
                phones.append(c + '0')
-            elif seperate == False:
+            elif seperate is False:
                phones.append(c)
            else:
                print("Not sure whether phone and tone need to be separated")
@@ -28,8 +44,10 @@ def GenPhones(initials, finals, seperate=True):
 with open(worddict, "r") as f1, open(newdict, "w+") as f2:
    for line in f1.readlines():
        word = line.split(" ")[0]
-        initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
+        initials = lazy_pinyin(
-        finals = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
+            word, neutral_tone_with_five=True, style=Style.INITIALS)
+        finals = lazy_pinyin(
+            word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
        phones = GenPhones(initials, finals, True)

--- a/demos/TTSCppFrontend/src/base/type_conv.cpp
+++ b/demos/TTSCppFrontend/src/base/type_conv.cpp
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "base/type_conv.h"
 namespace ppspeech {
 // wstring to string
-std::string wstring2utf8string(const std::wstring& str)
+std::string wstring2utf8string(const std::wstring& str) {
-{
+    static std::wstring_convert<std::codecvt_utf8<wchar_t>> strCnv;
-    static std::wstring_convert<std::codecvt_utf8<wchar_t> > strCnv;
    return strCnv.to_bytes(str);
 }
-// string to wstring 
-std::wstring utf8string2wstring(const std::string& str)
-{
-    static std::wstring_convert< std::codecvt_utf8<wchar_t> > strCnv;
-    return strCnv.from_bytes(str);
-}
+// string to wstring
+std::wstring utf8string2wstring(const std::string& str) {
+    static std::wstring_convert<std::codecvt_utf8<wchar_t>> strCnv;
+    return strCnv.from_bytes(str);
 }
+}  // namespace ppspeech
--- a/demos/TTSCppFrontend/src/base/type_conv.h
+++ b/demos/TTSCppFrontend/src/base/type_conv.h
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef BASE_TYPE_CONVC_H
 #define BASE_TYPE_CONVC_H
-#include <string>
-#include <locale>
 #include <codecvt>
+#include <locale>
+#include <string>
 namespace ppspeech {
 // wstring to string
 std::string wstring2utf8string(const std::wstring& str);
-// string to wstring 
-std::wstring utf8string2wstring(const std::string& str);
+// string to wstring
+std::wstring utf8string2wstring(const std::string& str);
 }
 #endif  // BASE_TYPE_CONVC_H
\ No newline at end of file
--- a/demos/TTSCppFrontend/src/front/front_interface.cpp
+++ b/demos/TTSCppFrontend/src/front/front_interface.cpp
--- a/demos/TTSCppFrontend/src/front/front_interface.h
+++ b/demos/TTSCppFrontend/src/front/front_interface.h
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H
 #define PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H
+#include <glog/logging.h>
+#include <fstream>
 #include <map>
-#include <string>
 #include <memory>
-#include <fstream>
+#include <string>
-#include <glog/logging.h>
 //#include "utils/dir_utils.h"
 #include <cppjieba/Jieba.hpp>
-#include "front/text_normalize.h"
 #include "absl/strings/str_split.h"
+#include "front/text_normalize.h"
 namespace ppspeech {
-    class FrontEngineInterface : public TextNormalizer{
-        public:
-            FrontEngineInterface(std::string conf) : _conf_file(conf) {
-                TextNormalizer();
-                _jieba = nullptr;
-                _initialed = false;
-                init();
-            }
-            int init();
-            ~FrontEngineInterface() {
-            }
-            // 读取配置文件
-            int ReadConfFile();
-            // 简体转繁体
-            int Trand2Simp(const std::wstring &sentence, std::wstring &sentence_simp);
-            // 生成字典
-            int GenDict(const std::string &file, std::map<std::string, std::string> &map);
-            // 由 词+词性的分词结果转为仅包含词的结果
-            int GetSegResult(std::vector<std::pair<std::string, std::string>> &seg, std::vector<std::string> &seg_words);
-            // 生成句子的音素，音调id。如果音素和音调未分开，则 toneids 为空（fastspeech2），反之则不为空(speedyspeech)
-            int GetSentenceIds(const std::string &sentence, std::vector<int> &phoneids, std::vector<int> &toneids);
-            // 根据分词结果获取词的音素，音调id，并对读音进行适当修改 (ModifyTone)。如果音素和音调未分开，则 toneids 为空（fastspeech2），反之则不为空(speedyspeech)
-            int GetWordsIds(const std::vector<std::pair<std::string, std::string>> &cut_result, std::vector<int> &phoneids, std::vector<int> &toneids);
-            // 结巴分词生成包含词和词性的分词结果，再对分词结果进行适当修改 (MergeforModify)
-            int Cut(const std::string &sentence, std::vector<std::pair<std::string, std::string>> &cut_result);
-            // 字词到音素的映射，查找字典
-            int GetPhone(const std::string &word, std::string &phone);
-            // 音素到音素id
-            int Phone2Phoneid(const std::string &phone, std::vector<int> &phoneid, std::vector<int> &toneids);
-            // 根据韵母判断该词中每个字的读音都为第三声。true表示词中每个字都是第三声
-            bool AllToneThree(const std::vector<std::string> &finals);
-            // 判断词是否是叠词
-            bool IsReduplication(const std::string &word);
-            // 获取每个字词的声母韵母列表
-            int GetInitialsFinals(const std::string &word, std::vector<std::string> &word_initials, std::vector<std::string> &word_finals);
-            // 获取每个字词的韵母列表
-            int GetFinals(const std::string &word, std::vector<std::string> &word_finals);
-            // 整个词转成向量形式，向量的每个元素对应词的一个字
+class FrontEngineInterface : public TextNormalizer {
-            int Word2WordVec(const std::string &word, std::vector<std::wstring> &wordvec);
+  public:
+    explicit FrontEngineInterface(std::string conf) : _conf_file(conf) {
+        TextNormalizer();
+        _jieba = nullptr;
+        _initialed = false;
+        init();
+    }
-            // 将整个词重新进行 full cut，分词后，各个词会在词典中
+    int init();
-            int SplitWord(const std::string &word, std::vector<std::string> &fullcut_word);
+    ~FrontEngineInterface() {}
-            // 对分词结果进行处理：对包含“不”字的分词结果进行整理
-            std::vector<std::pair<std::string, std::string>> MergeBu(std::vector<std::pair<std::string, std::string>> &seg_result);
-            // 对分词结果进行处理：对包含“一”字的分词结果进行整理
+    // 读取配置文件
-            std::vector<std::pair<std::string, std::string>> Mergeyi(std::vector<std::pair<std::string, std::string>> &seg_result);
+    int ReadConfFile();
-            // 对分词结果进行处理：对前后相同的两个字进行合并
+    // 简体转繁体
-            std::vector<std::pair<std::string, std::string>> MergeReduplication(std::vector<std::pair<std::string, std::string>> &seg_result);
+    int Trand2Simp(const std::wstring &sentence, std::wstring *sentence_simp);
-            // 对一个词和后一个词他们的读音均为第三声的两个词进行合并
+    // 生成字典
-            std::vector<std::pair<std::string, std::string>> MergeThreeTones(std::vector<std::pair<std::string, std::string>> &seg_result);
+    int GenDict(const std::string &file,
+                std::map<std::string, std::string> *map);
-            // 对一个词的最后一个读音和后一个词的第一个读音为第三声的两个词进行合并
+    // 由 词+词性的分词结果转为仅包含词的结果
-            std::vector<std::pair<std::string, std::string>> MergeThreeTones2(std::vector<std::pair<std::string, std::string>> &seg_result);
+    int GetSegResult(std::vector<std::pair<std::string, std::string>> *seg,
+                     std::vector<std::string> *seg_words);
-            // 对分词结果进行处理：对包含“儿”字的分词结果进行整理
+    // 生成句子的音素，音调id。如果音素和音调未分开，则 toneids
-            std::vector<std::pair<std::string, std::string>> MergeEr(std::vector<std::pair<std::string, std::string>> &seg_result);
+    // 为空（fastspeech2），反之则不为空(speedyspeech)
+    int GetSentenceIds(const std::string &sentence,
+                       std::vector<int> *phoneids,
+                       std::vector<int> *toneids);
-            // 对分词结果进行处理、修改
+    // 根据分词结果获取词的音素，音调id，并对读音进行适当修改
-            int MergeforModify(std::vector<std::pair<std::string, std::string>> &seg_result, std::vector<std::pair<std::string, std::string>> &merge_seg_result);
+    // (ModifyTone)。如果音素和音调未分开，则 toneids
+    // 为空（fastspeech2），反之则不为空(speedyspeech)
+    int GetWordsIds(
+        const std::vector<std::pair<std::string, std::string>> &cut_result,
+        std::vector<int> *phoneids,
+        std::vector<int> *toneids);
+    // 结巴分词生成包含词和词性的分词结果，再对分词结果进行适当修改
+    // (MergeforModify)
+    int Cut(const std::string &sentence,
+            std::vector<std::pair<std::string, std::string>> *cut_result);
-            // 对包含“不”字的相关词音调进行修改
+    // 字词到音素的映射，查找字典
-            int BuSandi(const std::string &word, std::vector<std::string> &finals);
+    int GetPhone(const std::string &word, std::string *phone);
-            // 对包含“一”字的相关词音调进行修改
+    // 音素到音素id
-            int YiSandhi(const std::string &word, std::vector<std::string> &finals);
+    int Phone2Phoneid(const std::string &phone,
+                      std::vector<int> *phoneid,
+                      std::vector<int> *toneids);
-            // 对一些特殊词（包括量词，语助词等）的相关词音调进行修改
-            int NeuralSandhi(const std::string &word, const std::string &pos, std::vector<std::string> &finals);
-            // 对包含第三声的相关词音调进行修改
+    // 根据韵母判断该词中每个字的读音都为第三声。true表示词中每个字都是第三声
-            int ThreeSandhi(const std::string &word, std::vector<std::string> &finals);
+    bool AllToneThree(const std::vector<std::string> &finals);
-            // 对字词音调进行处理、修改
+    // 判断词是否是叠词
-            int ModifyTone(const std::string &word, const std::string &pos, std::vector<std::string> &finals);
+    bool IsReduplication(const std::string &word);
+    // 获取每个字词的声母韵母列表
+    int GetInitialsFinals(const std::string &word,
+                          std::vector<std::string> *word_initials,
+                          std::vector<std::string> *word_finals);
+    // 获取每个字词的韵母列表
+    int GetFinals(const std::string &word,
+                  std::vector<std::string> *word_finals);
+    // 整个词转成向量形式，向量的每个元素对应词的一个字
+    int Word2WordVec(const std::string &word,
+                     std::vector<std::wstring> *wordvec);
+    // 将整个词重新进行 full cut，分词后，各个词会在词典中
+    int SplitWord(const std::string &word,
+                  std::vector<std::string> *fullcut_word);
+    // 对分词结果进行处理：对包含“不”字的分词结果进行整理
+    std::vector<std::pair<std::string, std::string>> MergeBu(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+    // 对分词结果进行处理：对包含“一”字的分词结果进行整理
+    std::vector<std::pair<std::string, std::string>> Mergeyi(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+    // 对分词结果进行处理：对前后相同的两个字进行合并
+    std::vector<std::pair<std::string, std::string>> MergeReduplication(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+    // 对一个词和后一个词他们的读音均为第三声的两个词进行合并
+    std::vector<std::pair<std::string, std::string>> MergeThreeTones(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+    // 对一个词的最后一个读音和后一个词的第一个读音为第三声的两个词进行合并
+    std::vector<std::pair<std::string, std::string>> MergeThreeTones2(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+    // 对分词结果进行处理：对包含“儿”字的分词结果进行整理
+    std::vector<std::pair<std::string, std::string>> MergeEr(
+        std::vector<std::pair<std::string, std::string>> *seg_result);
+    // 对分词结果进行处理、修改
+    int MergeforModify(
+        std::vector<std::pair<std::string, std::string>> *seg_result,
+        std::vector<std::pair<std::string, std::string>> *merge_seg_result);
-            // 对儿化音进行处理
-            std::vector<std::vector<std::string>> MergeErhua(const std::vector<std::string> &initials, const std::vector<std::string> &finals, const std::string &word, const std::string &pos);
+    // 对包含“不”字的相关词音调进行修改
+    int BuSandi(const std::string &word, std::vector<std::string> *finals);
-        private:
+    // 对包含“一”字的相关词音调进行修改
-            bool _initialed;
+    int YiSandhi(const std::string &word, std::vector<std::string> *finals);
-            cppjieba::Jieba *_jieba;
-            std::vector<std::string> _punc;
+    // 对一些特殊词（包括量词，语助词等）的相关词音调进行修改
-            std::vector<std::string> _punc_omit;
+    int NeuralSandhi(const std::string &word,
+                     const std::string &pos,
+                     std::vector<std::string> *finals);
-            std::string _conf_file;
+    // 对包含第三声的相关词音调进行修改
-            std::map<std::string, std::string> conf_map;
+    int ThreeSandhi(const std::string &word, std::vector<std::string> *finals);
-            std::map<std::string, std::string> word_phone_map;
-            std::map<std::string, std::string> phone_id_map;
+    // 对字词音调进行处理、修改
-            std::map<std::string, std::string> tone_id_map;
+    int ModifyTone(const std::string &word,
-            std::map<std::string, std::string> trand_simp_map;
+                   const std::string &pos,
+                   std::vector<std::string> *finals);
-            std::string _jieba_dict_path;
+    // 对儿化音进行处理
-            std::string _jieba_hmm_path;
+    std::vector<std::vector<std::string>> MergeErhua(
-            std::string _jieba_user_dict_path;
+        const std::vector<std::string> &initials,
-            std::string _jieba_idf_path;
+        const std::vector<std::string> &finals,
-            std::string _jieba_stop_word_path;
+        const std::string &word,
+        const std::string &pos);
-            std::string _seperate_tone;
+  private:
-            std::string _word2phone_path;
+    bool _initialed;
-            std::string _phone2id_path;
+    cppjieba::Jieba *_jieba;
-            std::string _tone2id_path;
+    std::vector<std::string> _punc;
-            std::string _trand2simp_path;
+    std::vector<std::string> _punc_omit;
-            std::vector<std::string> must_erhua;
+    std::string _conf_file;
-            std::vector<std::string> not_erhua;
+    std::map<std::string, std::string> conf_map;
+    std::map<std::string, std::string> word_phone_map;
+    std::map<std::string, std::string> phone_id_map;
+    std::map<std::string, std::string> tone_id_map;
+    std::map<std::string, std::string> trand_simp_map;
-            std::vector<std::string> must_not_neural_tone_words;
-            std::vector<std::string> must_neural_tone_words;
+    std::string _jieba_dict_path;
+    std::string _jieba_hmm_path;
+    std::string _jieba_user_dict_path;
+    std::string _jieba_idf_path;
+    std::string _jieba_stop_word_path;
+    std::string _seperate_tone;
+    std::string _word2phone_path;
+    std::string _phone2id_path;
+    std::string _tone2id_path;
+    std::string _trand2simp_path;
+    std::vector<std::string> must_erhua;
+    std::vector<std::string> not_erhua;
-    };
+    std::vector<std::string> must_not_neural_tone_words;
-}
+    std::vector<std::string> must_neural_tone_words;
+};
+}  // namespace ppspeech
 #endif
\ No newline at end of file
--- a/demos/TTSCppFrontend/src/front/text_normalize.cpp
+++ b/demos/TTSCppFrontend/src/front/text_normalize.cpp
--- a/demos/TTSCppFrontend/src/front/text_normalize.h
+++ b/demos/TTSCppFrontend/src/front/text_normalize.h
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H
 #define PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H
+#include <glog/logging.h>
+#include <codecvt>
 #include <map>
 #include <regex>
 #include <string>
-#include <codecvt>
-#include <glog/logging.h>
 #include "absl/strings/str_split.h"
 #include "absl/strings/strip.h"
 #include "base/type_conv.h"
@@ -13,50 +26,52 @@
 namespace ppspeech {
 class TextNormalizer {
-public:
+  public:
-    TextNormalizer() {
+    TextNormalizer() { InitMap(); }
-        InitMap();
+    ~TextNormalizer() {}
-    }
-    ~TextNormalizer() {
-    }
    int InitMap();
-    int Replace(std::wstring &sentence, const int &pos, const int &len, const std::wstring &repstr);
+    int Replace(std::wstring *sentence,
-    int SplitByPunc(const std::wstring &sentence, std::vector<std::wstring> &sentence_part);
+                const int &pos,
+                const int &len,
+                const std::wstring &repstr);
+    int SplitByPunc(const std::wstring &sentence,
+                    std::vector<std::wstring> *sentence_part);
-    std::string CreateTextValue(const std::string &num,  bool use_zero=true);
+    std::string CreateTextValue(const std::string &num, bool use_zero = true);
-    std::string SingleDigit2Text(const std::string &num_str, bool alt_one = false);
+    std::string SingleDigit2Text(const std::string &num_str,
+                                 bool alt_one = false);
    std::string SingleDigit2Text(const std::wstring &num, bool alt_one = false);
-    std::string MultiDigit2Text(const std::string &num_str, bool alt_one = false, bool use_zero = true);
+    std::string MultiDigit2Text(const std::string &num_str,
-    std::string MultiDigit2Text(const std::wstring &num, bool alt_one = false, bool use_zero = true);
+                                bool alt_one = false,
+                                bool use_zero = true);
+    std::string MultiDigit2Text(const std::wstring &num,
+                                bool alt_one = false,
+                                bool use_zero = true);
    std::string Digits2Text(const std::string &num_str);
    std::string Digits2Text(const std::wstring &num);
-    int ReData(std::wstring &sentence);
+    int ReData(std::wstring *sentence);
-    int ReData2(std::wstring &sentence);
+    int ReData2(std::wstring *sentence);
-    int ReTime(std::wstring &sentence);
+    int ReTime(std::wstring *sentence);
-    int ReTemperature(std::wstring &sentence);
+    int ReTemperature(std::wstring *sentence);
-    int ReFrac(std::wstring &sentence);
+    int ReFrac(std::wstring *sentence);
-    int RePercentage(std::wstring &sentence);
+    int RePercentage(std::wstring *sentence);
-    int ReMobilePhone(std::wstring &sentence);
+    int ReMobilePhone(std::wstring *sentence);
-    int RePhone(std::wstring &sentence);
+    int RePhone(std::wstring *sentence);
-    int ReRange(std::wstring &sentence);
+    int ReRange(std::wstring *sentence);
-    int ReInterger(std::wstring &sentence);
+    int ReInterger(std::wstring *sentence);
-    int ReDecimalNum(std::wstring &sentence);
+    int ReDecimalNum(std::wstring *sentence);
-    int RePositiveQuantifiers(std::wstring &sentence);
+    int RePositiveQuantifiers(std::wstring *sentence);
-    int ReDefalutNum(std::wstring &sentence);
+    int ReDefalutNum(std::wstring *sentence);
-    int ReNumber(std::wstring &sentence);
+    int ReNumber(std::wstring *sentence);
-    int SentenceNormalize(std::wstring &sentence);
+    int SentenceNormalize(std::wstring *sentence);
-private:
-    std::map<std::string, std::string> digits_map;
-    std::map<int, std::string> units_map;
+  private:
+    std::map<std::string, std::string> digits_map;
+    std::map<int, std::string> units_map;
 };
+}  // namespace ppspeech
-}
 #endif
\ No newline at end of file