Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
1aa7495d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1aa7495d
编写于
3月 14, 2023
作者:
小湉湉
提交者:
GitHub
3月 14, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[TTS]Add license and reformat for TTSCppFrontend (#3030)
上级
259f4936
变更
13
展开全部
显示空白变更内容
内联
并排
Showing
13 changed file
with
1323 addition
and
848 deletion
+1323
-848
demos/TTSArmLinux/src/Predictor.hpp
demos/TTSArmLinux/src/Predictor.hpp
+87
-73
demos/TTSArmLinux/src/main.cc
demos/TTSArmLinux/src/main.cc
+60
-26
demos/TTSCppFrontend/README.md
demos/TTSCppFrontend/README.md
+1
-0
demos/TTSCppFrontend/front_demo/front_demo.cpp
demos/TTSCppFrontend/front_demo/front_demo.cpp
+34
-20
demos/TTSCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
...SCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
+50
-26
demos/TTSCppFrontend/front_demo/gentools/genid.py
demos/TTSCppFrontend/front_demo/gentools/genid.py
+15
-2
demos/TTSCppFrontend/front_demo/gentools/word2phones.py
demos/TTSCppFrontend/front_demo/gentools/word2phones.py
+23
-5
demos/TTSCppFrontend/src/base/type_conv.cpp
demos/TTSCppFrontend/src/base/type_conv.cpp
+20
-10
demos/TTSCppFrontend/src/base/type_conv.h
demos/TTSCppFrontend/src/base/type_conv.h
+18
-5
demos/TTSCppFrontend/src/front/front_interface.cpp
demos/TTSCppFrontend/src/front/front_interface.cpp
+586
-389
demos/TTSCppFrontend/src/front/front_interface.h
demos/TTSCppFrontend/src/front/front_interface.h
+159
-117
demos/TTSCppFrontend/src/front/text_normalize.cpp
demos/TTSCppFrontend/src/front/text_normalize.cpp
+218
-138
demos/TTSCppFrontend/src/front/text_normalize.h
demos/TTSCppFrontend/src/front/text_normalize.h
+52
-37
未找到文件。
demos/TTSArmLinux/src/Predictor.hpp
浏览文件 @
1aa7495d
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <chrono>
#include <iostream>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
...
...
@@ -10,24 +23,28 @@
using
namespace
paddle
::
lite_api
;
class
PredictorInterface
{
public:
public:
virtual
~
PredictorInterface
()
=
0
;
virtual
bool
Init
(
const
std
::
string
&
AcousticModelPath
,
virtual
bool
Init
(
const
std
::
string
&
AcousticModelPath
,
const
std
::
string
&
VocoderPath
,
PowerMode
cpuPowerMode
,
int
cpuThreadNum
,
// WAV采样率(必须与模型输出匹配)
// 如果播放速度和音调异常,请修改采样率
// 常见采样率:16000, 24000, 32000, 44100, 48000, 96000
uint32_t
wavSampleRate
)
=
0
;
virtual
std
::
shared_ptr
<
PaddlePredictor
>
LoadModel
(
const
std
::
string
&
modelPath
,
int
cpuThreadNum
,
PowerMode
cpuPowerMode
)
=
0
;
uint32_t
wavSampleRate
)
=
0
;
virtual
std
::
shared_ptr
<
PaddlePredictor
>
LoadModel
(
const
std
::
string
&
modelPath
,
int
cpuThreadNum
,
PowerMode
cpuPowerMode
)
=
0
;
virtual
void
ReleaseModel
()
=
0
;
virtual
bool
RunModel
(
const
std
::
vector
<
int64_t
>
&
phones
)
=
0
;
virtual
std
::
unique_ptr
<
const
Tensor
>
GetAcousticModelOutput
(
const
std
::
vector
<
int64_t
>
&
phones
)
=
0
;
virtual
std
::
unique_ptr
<
const
Tensor
>
GetVocoderOutput
(
std
::
unique_ptr
<
const
Tensor
>
&&
amOutput
)
=
0
;
virtual
void
VocoderOutputToWav
(
std
::
unique_ptr
<
const
Tensor
>
&&
vocOutput
)
=
0
;
virtual
std
::
unique_ptr
<
const
Tensor
>
GetAcousticModelOutput
(
const
std
::
vector
<
int64_t
>
&
phones
)
=
0
;
virtual
std
::
unique_ptr
<
const
Tensor
>
GetVocoderOutput
(
std
::
unique_ptr
<
const
Tensor
>
&&
amOutput
)
=
0
;
virtual
void
VocoderOutputToWav
(
std
::
unique_ptr
<
const
Tensor
>
&&
vocOutput
)
=
0
;
virtual
void
SaveFloatWav
(
float
*
floatWav
,
int64_t
size
)
=
0
;
virtual
bool
IsLoaded
()
=
0
;
virtual
float
GetInferenceTime
()
=
0
;
...
...
@@ -45,23 +62,22 @@ PredictorInterface::~PredictorInterface() {}
// WavDataType: WAV数据类型
// 可在 int16_t 和 float 之间切换,
// 用于生成 16-bit PCM 或 32-bit IEEE float 格式的 WAV
template
<
typename
WavDataType
>
template
<
typename
WavDataType
>
class
Predictor
:
public
PredictorInterface
{
public:
virtual
bool
Init
(
const
std
::
string
&
AcousticModelPath
,
public:
bool
Init
(
const
std
::
string
&
AcousticModelPath
,
const
std
::
string
&
VocoderPath
,
PowerMode
cpuPowerMode
,
int
cpuThreadNum
,
// WAV采样率(必须与模型输出匹配)
// 如果播放速度和音调异常,请修改采样率
// 常见采样率:16000, 24000, 32000, 44100, 48000, 96000
uint32_t
wavSampleRate
)
override
{
uint32_t
wavSampleRate
)
override
{
// Release model if exists
ReleaseModel
();
acoustic_model_predictor_
=
LoadModel
(
AcousticModelPath
,
cpuThreadNum
,
cpuPowerMode
);
acoustic_model_predictor_
=
LoadModel
(
AcousticModelPath
,
cpuThreadNum
,
cpuPowerMode
);
if
(
acoustic_model_predictor_
==
nullptr
)
{
return
false
;
}
...
...
@@ -80,7 +96,10 @@ public:
ReleaseWav
();
}
virtual
std
::
shared_ptr
<
PaddlePredictor
>
LoadModel
(
const
std
::
string
&
modelPath
,
int
cpuThreadNum
,
PowerMode
cpuPowerMode
)
override
{
std
::
shared_ptr
<
PaddlePredictor
>
LoadModel
(
const
std
::
string
&
modelPath
,
int
cpuThreadNum
,
PowerMode
cpuPowerMode
)
override
{
if
(
modelPath
.
empty
())
{
return
nullptr
;
}
...
...
@@ -94,12 +113,12 @@ public:
return
CreatePaddlePredictor
<
MobileConfig
>
(
config
);
}
v
irtual
v
oid
ReleaseModel
()
override
{
void
ReleaseModel
()
override
{
acoustic_model_predictor_
=
nullptr
;
vocoder_predictor_
=
nullptr
;
}
virtual
bool
RunModel
(
const
std
::
vector
<
int64_t
>
&
phones
)
override
{
bool
RunModel
(
const
std
::
vector
<
int64_t
>
&
phones
)
override
{
if
(
!
IsLoaded
())
{
return
false
;
}
...
...
@@ -120,7 +139,8 @@ public:
return
true
;
}
virtual
std
::
unique_ptr
<
const
Tensor
>
GetAcousticModelOutput
(
const
std
::
vector
<
int64_t
>
&
phones
)
override
{
std
::
unique_ptr
<
const
Tensor
>
GetAcousticModelOutput
(
const
std
::
vector
<
int64_t
>
&
phones
)
override
{
auto
phones_handle
=
acoustic_model_predictor_
->
GetInput
(
0
);
phones_handle
->
Resize
({
static_cast
<
int64_t
>
(
phones
.
size
())});
phones_handle
->
CopyFromCpu
(
phones
.
data
());
...
...
@@ -139,7 +159,8 @@ public:
return
am_output_handle
;
}
virtual
std
::
unique_ptr
<
const
Tensor
>
GetVocoderOutput
(
std
::
unique_ptr
<
const
Tensor
>
&&
amOutput
)
override
{
std
::
unique_ptr
<
const
Tensor
>
GetVocoderOutput
(
std
::
unique_ptr
<
const
Tensor
>
&&
amOutput
)
override
{
auto
mel_handle
=
vocoder_predictor_
->
GetInput
(
0
);
// [?, 80]
auto
dims
=
amOutput
->
shape
();
...
...
@@ -161,7 +182,8 @@ public:
return
voc_output_handle
;
}
virtual
void
VocoderOutputToWav
(
std
::
unique_ptr
<
const
Tensor
>
&&
vocOutput
)
override
{
void
VocoderOutputToWav
(
std
::
unique_ptr
<
const
Tensor
>
&&
vocOutput
)
override
{
// 获取输出Tensor的数据
int64_t
output_size
=
1
;
for
(
auto
dim
:
vocOutput
->
shape
())
{
...
...
@@ -172,39 +194,31 @@ public:
SaveFloatWav
(
output_data
,
output_size
);
}
v
irtual
v
oid
SaveFloatWav
(
float
*
floatWav
,
int64_t
size
)
override
;
void
SaveFloatWav
(
float
*
floatWav
,
int64_t
size
)
override
;
virtual
bool
IsLoaded
()
override
{
return
acoustic_model_predictor_
!=
nullptr
&&
vocoder_predictor_
!=
nullptr
;
bool
IsLoaded
()
override
{
return
acoustic_model_predictor_
!=
nullptr
&&
vocoder_predictor_
!=
nullptr
;
}
virtual
float
GetInferenceTime
()
override
{
return
inference_time_
;
}
float
GetInferenceTime
()
override
{
return
inference_time_
;
}
const
std
::
vector
<
WavDataType
>
&
GetWav
()
{
return
wav_
;
}
const
std
::
vector
<
WavDataType
>
&
GetWav
()
{
return
wav_
;
}
virtual
int
GetWavSize
()
override
{
return
wav_
.
size
()
*
sizeof
(
WavDataType
);
}
int
GetWavSize
()
override
{
return
wav_
.
size
()
*
sizeof
(
WavDataType
);
}
// 获取WAV持续时间(单位:毫秒)
virtual
float
GetWavDuration
()
override
{
return
static_cast
<
float
>
(
GetWavSize
())
/
sizeof
(
WavDataType
)
/
static_cast
<
float
>
(
wav_sample_rate_
)
*
1000
;
float
GetWavDuration
()
override
{
return
static_cast
<
float
>
(
GetWavSize
())
/
sizeof
(
WavDataType
)
/
static_cast
<
float
>
(
wav_sample_rate_
)
*
1000
;
}
// 获取RTF(合成时间 / 音频时长)
virtual
float
GetRTF
()
override
{
return
GetInferenceTime
()
/
GetWavDuration
();
}
float
GetRTF
()
override
{
return
GetInferenceTime
()
/
GetWavDuration
();
}
virtual
void
ReleaseWav
()
override
{
wav_
.
clear
();
}
void
ReleaseWav
()
override
{
wav_
.
clear
();
}
virtual
bool
WriteWavToFile
(
const
std
::
string
&
wavPath
)
override
{
bool
WriteWavToFile
(
const
std
::
string
&
wavPath
)
override
{
std
::
ofstream
fout
(
wavPath
,
std
::
ios
::
binary
);
if
(
!
fout
.
is_open
())
{
return
false
;
...
...
@@ -216,18 +230,20 @@ public:
header
.
data_size
=
GetWavSize
();
header
.
size
=
sizeof
(
header
)
-
8
+
header
.
data_size
;
header
.
sample_rate
=
wav_sample_rate_
;
header
.
byte_rate
=
header
.
sample_rate
*
header
.
num_channels
*
header
.
bits_per_sample
/
8
;
header
.
byte_rate
=
header
.
sample_rate
*
header
.
num_channels
*
header
.
bits_per_sample
/
8
;
header
.
block_align
=
header
.
num_channels
*
header
.
bits_per_sample
/
8
;
fout
.
write
(
reinterpret_cast
<
const
char
*>
(
&
header
),
sizeof
(
header
));
fout
.
write
(
reinterpret_cast
<
const
char
*>
(
&
header
),
sizeof
(
header
));
// 写入wav数据
fout
.
write
(
reinterpret_cast
<
const
char
*>
(
wav_
.
data
()),
header
.
data_size
);
fout
.
write
(
reinterpret_cast
<
const
char
*>
(
wav_
.
data
()),
header
.
data_size
);
fout
.
close
();
return
true
;
}
protected:
protected:
struct
WavHeader
{
// RIFF 头
char
riff
[
4
]
=
{
'R'
,
'I'
,
'F'
,
'F'
};
...
...
@@ -254,15 +270,13 @@ protected:
WAV_FORMAT_32BIT_FLOAT
=
3
// 32-bit IEEE float 格式
};
protected:
protected:
// 返回值通过模板特化由 WavDataType 决定
inline
uint16_t
GetWavAudioFormat
();
inline
float
Abs
(
float
number
)
{
return
(
number
<
0
)
?
-
number
:
number
;
}
inline
float
Abs
(
float
number
)
{
return
(
number
<
0
)
?
-
number
:
number
;
}
protected:
protected:
float
inference_time_
=
0
;
uint32_t
wav_sample_rate_
=
0
;
std
::
vector
<
WavDataType
>
wav_
;
...
...
@@ -270,36 +284,36 @@ protected:
std
::
shared_ptr
<
PaddlePredictor
>
vocoder_predictor_
=
nullptr
;
};
template
<
>
template
<
>
uint16_t
Predictor
<
int16_t
>::
GetWavAudioFormat
()
{
return
Predictor
::
WAV_FORMAT_16BIT_PCM
;
}
template
<
>
template
<
>
uint16_t
Predictor
<
float
>::
GetWavAudioFormat
()
{
return
Predictor
::
WAV_FORMAT_32BIT_FLOAT
;
}
// 保存 16-bit PCM 格式 WAV
template
<
>
template
<
>
void
Predictor
<
int16_t
>::
SaveFloatWav
(
float
*
floatWav
,
int64_t
size
)
{
wav_
.
resize
(
size
);
float
maxSample
=
0.01
;
// 寻找最大采样值
for
(
int64_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
int64_t
i
=
0
;
i
<
size
;
i
++
)
{
float
sample
=
Abs
(
floatWav
[
i
]);
if
(
sample
>
maxSample
)
{
maxSample
=
sample
;
}
}
// 把采样值缩放到 int_16 范围
for
(
int64_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
int64_t
i
=
0
;
i
<
size
;
i
++
)
{
wav_
[
i
]
=
floatWav
[
i
]
*
32767.0
f
/
maxSample
;
}
}
// 保存 32-bit IEEE float 格式 WAV
template
<
>
template
<
>
void
Predictor
<
float
>::
SaveFloatWav
(
float
*
floatWav
,
int64_t
size
)
{
wav_
.
resize
(
size
);
std
::
copy_n
(
floatWav
,
size
,
wav_
.
data
());
...
...
demos/TTSArmLinux/src/main.cc
浏览文件 @
1aa7495d
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <front/front_interface.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <paddle_api.h>
#include <cstdlib>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <map>
#include <glog/logging.h>
#include <gflags/gflags.h>
#include <paddle_api.h>
#include <front/front_interface.h>
#include "Predictor.hpp"
using
namespace
paddle
::
lite_api
;
DEFINE_string
(
sentence
,
"你好,欢迎使用语音合成服务"
,
"Text to be synthesized (Chinese only. English will crash the program.)"
);
DEFINE_string
(
sentence
,
"你好,欢迎使用语音合成服务"
,
"Text to be synthesized (Chinese only. English will crash the program.)"
);
DEFINE_string
(
front_conf
,
"./front.conf"
,
"Front configuration file"
);
DEFINE_string
(
acoustic_model
,
"./models/cpu/fastspeech2_csmsc_arm.nb"
,
"Acoustic model .nb file"
);
DEFINE_string
(
vocoder
,
"./models/cpu/fastspeech2_csmsc_arm.nb"
,
"vocoder .nb file"
);
DEFINE_string
(
acoustic_model
,
"./models/cpu/fastspeech2_csmsc_arm.nb"
,
"Acoustic model .nb file"
);
DEFINE_string
(
vocoder
,
"./models/cpu/fastspeech2_csmsc_arm.nb"
,
"vocoder .nb file"
);
DEFINE_string
(
output_wav
,
"./output/tts.wav"
,
"Output WAV file"
);
DEFINE_string
(
wav_bit_depth
,
"16"
,
"WAV bit depth, 16 (16-bit PCM) or 32 (32-bit IEEE float)"
);
DEFINE_string
(
wav_sample_rate
,
"24000"
,
"WAV sample rate, should match the output of the vocoder"
);
DEFINE_string
(
wav_bit_depth
,
"16"
,
"WAV bit depth, 16 (16-bit PCM) or 32 (32-bit IEEE float)"
);
DEFINE_string
(
wav_sample_rate
,
"24000"
,
"WAV sample rate, should match the output of the vocoder"
);
DEFINE_string
(
cpu_thread
,
"1"
,
"CPU thread numbers"
);
int
main
(
int
argc
,
char
*
argv
[])
{
...
...
@@ -53,7 +78,7 @@ int main(int argc, char *argv[]) {
// 繁体转简体
std
::
wstring
sentence_simp
;
front_inst
->
Trand2Simp
(
ws_sentence
,
sentence_simp
);
front_inst
->
Trand2Simp
(
ws_sentence
,
&
sentence_simp
);
ws_sentence
=
sentence_simp
;
std
::
string
s_sentence
;
...
...
@@ -63,26 +88,28 @@ int main(int argc, char *argv[]) {
// 根据标点进行分句
LOG
(
INFO
)
<<
"Start to segment sentences by punctuation"
;
front_inst
->
SplitByPunc
(
ws_sentence
,
sentence_part
);
front_inst
->
SplitByPunc
(
ws_sentence
,
&
sentence_part
);
LOG
(
INFO
)
<<
"Segment sentences through punctuation successfully"
;
// 分句后获取音素id
LOG
(
INFO
)
<<
"Start to get the phoneme and tone id sequence of each sentence"
;
for
(
int
i
=
0
;
i
<
sentence_part
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"Raw sentence is: "
<<
ppspeech
::
wstring2utf8string
(
sentence_part
[
i
]);
front_inst
->
SentenceNormalize
(
sentence_part
[
i
]);
LOG
(
INFO
)
<<
"Start to get the phoneme and tone id sequence of each sentence"
;
for
(
int
i
=
0
;
i
<
sentence_part
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"Raw sentence is: "
<<
ppspeech
::
wstring2utf8string
(
sentence_part
[
i
]);
front_inst
->
SentenceNormalize
(
&
sentence_part
[
i
]);
s_sentence
=
ppspeech
::
wstring2utf8string
(
sentence_part
[
i
]);
LOG
(
INFO
)
<<
"After normalization sentence is: "
<<
s_sentence
;
if
(
0
!=
front_inst
->
GetSentenceIds
(
s_sentence
,
phoneids
,
toneids
))
{
if
(
0
!=
front_inst
->
GetSentenceIds
(
s_sentence
,
&
phoneids
,
&
toneids
))
{
LOG
(
ERROR
)
<<
"TTS inst get sentence phoneids and toneids failed"
;
return
-
1
;
}
}
LOG
(
INFO
)
<<
"The phoneids of the sentence is: "
<<
limonp
::
Join
(
phoneids
.
begin
(),
phoneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"The toneids of the sentence is: "
<<
limonp
::
Join
(
toneids
.
begin
(),
toneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"The phoneids of the sentence is: "
<<
limonp
::
Join
(
phoneids
.
begin
(),
phoneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"The toneids of the sentence is: "
<<
limonp
::
Join
(
toneids
.
begin
(),
toneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"Get the phoneme id sequence of each sentence successfully"
;
...
...
@@ -99,13 +126,19 @@ int main(int argc, char *argv[]) {
// CPU电源模式
const
PowerMode
cpuPowerMode
=
PowerMode
::
LITE_POWER_HIGH
;
if
(
!
predictor
->
Init
(
FLAGS_acoustic_model
,
FLAGS_vocoder
,
cpuPowerMode
,
cpuThreadNum
,
wavSampleRate
))
{
if
(
!
predictor
->
Init
(
FLAGS_acoustic_model
,
FLAGS_vocoder
,
cpuPowerMode
,
cpuThreadNum
,
wavSampleRate
))
{
LOG
(
ERROR
)
<<
"predictor init failed"
<<
std
::
endl
;
return
-
1
;
}
std
::
vector
<
int64_t
>
phones
(
phoneids
.
size
());
std
::
transform
(
phoneids
.
begin
(),
phoneids
.
end
(),
phones
.
begin
(),
[](
int
x
)
{
return
static_cast
<
int64_t
>
(
x
);
});
std
::
transform
(
phoneids
.
begin
(),
phoneids
.
end
(),
phones
.
begin
(),
[](
int
x
)
{
return
static_cast
<
int64_t
>
(
x
);
});
if
(
!
predictor
->
RunModel
(
phones
))
{
LOG
(
ERROR
)
<<
"predictor run model failed"
<<
std
::
endl
;
...
...
@@ -113,7 +146,8 @@ int main(int argc, char *argv[]) {
}
LOG
(
INFO
)
<<
"Inference time: "
<<
predictor
->
GetInferenceTime
()
<<
" ms, "
<<
"WAV size (without header): "
<<
predictor
->
GetWavSize
()
<<
" bytes, "
<<
"WAV size (without header): "
<<
predictor
->
GetWavSize
()
<<
" bytes, "
<<
"WAV duration: "
<<
predictor
->
GetWavDuration
()
<<
" ms, "
<<
"RTF: "
<<
predictor
->
GetRTF
()
<<
std
::
endl
;
...
...
demos/TTSCppFrontend/README.md
浏览文件 @
1aa7495d
...
...
@@ -38,6 +38,7 @@ If the download speed is too slow, you can open [third-party/CMakeLists.txt](thi
```
## Run
You can change
`--phone2id_path`
in
`./front_demo/front.conf`
to the
`phone_id_map.txt`
of your own acoustic model.
```
./run_front_demo.sh
...
...
demos/TTSCppFrontend/front_demo/front_demo.cpp
浏览文件 @
1aa7495d
#include <string>
//#include "utils/dir_utils.h"
#include "front/front_interface.h"
#include <glog/logging.h>
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <map>
#include <string>
#include "front/front_interface.h"
DEFINE_string
(
sentence
,
"你好,欢迎使用语音合成服务"
,
"Text to be synthesized"
);
DEFINE_string
(
front_conf
,
"./front_demo/front.conf"
,
"Front conf file"
);
//DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid");
//
DEFINE_string(seperate_tone, "true", "If true, get phoneids and tonesid");
int
main
(
int
argc
,
char
**
argv
)
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
// 实例化文本前端引擎
ppspeech
::
FrontEngineInterface
*
front_inst
=
nullptr
;
ppspeech
::
FrontEngineInterface
*
front_inst
=
nullptr
;
front_inst
=
new
ppspeech
::
FrontEngineInterface
(
FLAGS_front_conf
);
if
((
!
front_inst
)
||
(
front_inst
->
init
()))
{
LOG
(
ERROR
)
<<
"Creater tts engine failed!"
;
...
...
@@ -28,7 +41,7 @@ int main(int argc, char** argv) {
// 繁体转简体
std
::
wstring
sentence_simp
;
front_inst
->
Trand2Simp
(
ws_sentence
,
sentence_simp
);
front_inst
->
Trand2Simp
(
ws_sentence
,
&
sentence_simp
);
ws_sentence
=
sentence_simp
;
std
::
string
s_sentence
;
...
...
@@ -38,28 +51,29 @@ int main(int argc, char** argv) {
// 根据标点进行分句
LOG
(
INFO
)
<<
"Start to segment sentences by punctuation"
;
front_inst
->
SplitByPunc
(
ws_sentence
,
sentence_part
);
front_inst
->
SplitByPunc
(
ws_sentence
,
&
sentence_part
);
LOG
(
INFO
)
<<
"Segment sentences through punctuation successfully"
;
// 分句后获取音素id
LOG
(
INFO
)
<<
"Start to get the phoneme and tone id sequence of each sentence"
;
for
(
int
i
=
0
;
i
<
sentence_part
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"Raw sentence is: "
<<
ppspeech
::
wstring2utf8string
(
sentence_part
[
i
]);
front_inst
->
SentenceNormalize
(
sentence_part
[
i
]);
LOG
(
INFO
)
<<
"Start to get the phoneme and tone id sequence of each sentence"
;
for
(
int
i
=
0
;
i
<
sentence_part
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"Raw sentence is: "
<<
ppspeech
::
wstring2utf8string
(
sentence_part
[
i
]);
front_inst
->
SentenceNormalize
(
&
sentence_part
[
i
]);
s_sentence
=
ppspeech
::
wstring2utf8string
(
sentence_part
[
i
]);
LOG
(
INFO
)
<<
"After normalization sentence is: "
<<
s_sentence
;
if
(
0
!=
front_inst
->
GetSentenceIds
(
s_sentence
,
phoneids
,
toneids
))
{
if
(
0
!=
front_inst
->
GetSentenceIds
(
s_sentence
,
&
phoneids
,
&
toneids
))
{
LOG
(
ERROR
)
<<
"TTS inst get sentence phoneids and toneids failed"
;
return
-
1
;
}
}
LOG
(
INFO
)
<<
"The phoneids of the sentence is: "
<<
limonp
::
Join
(
phoneids
.
begin
(),
phoneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"The toneids of the sentence is: "
<<
limonp
::
Join
(
toneids
.
begin
(),
toneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"The phoneids of the sentence is: "
<<
limonp
::
Join
(
phoneids
.
begin
(),
phoneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"The toneids of the sentence is: "
<<
limonp
::
Join
(
toneids
.
begin
(),
toneids
.
end
(),
" "
);
LOG
(
INFO
)
<<
"Get the phoneme id sequence of each sentence successfully"
;
return
EXIT_SUCCESS
;
}
demos/TTSCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
浏览文件 @
1aa7495d
# !/usr/bin/env python3
# -*- coding: utf-8 -*-
########################################################################
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Copyright 2021 liangyunming(liangyunming@baidu.com)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# Execute the script when PaddleSpeech has been installed
# PaddleSpeech: https://github.com/PaddlePaddle/PaddleSpeech
########################################################################
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
configparser
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
def
get_phone
(
frontend
,
word
,
merge_sentences
=
True
,
print_info
=
False
,
robot
=
False
,
get_tone_ids
=
False
):
def
get_phone
(
frontend
,
word
,
merge_sentences
=
True
,
print_info
=
False
,
robot
=
False
,
get_tone_ids
=
False
):
phonemes
=
frontend
.
get_phonemes
(
word
,
merge_sentences
,
print_info
,
robot
)
# Some optimizations
phones
,
tones
=
frontend
.
_get_phone_tone
(
phonemes
[
0
],
get_tone_ids
)
...
...
@@ -22,7 +31,10 @@ def get_phone(frontend, word, merge_sentences=True, print_info=False, robot=Fals
return
phones
,
tones
def
gen_word2phone_dict
(
frontend
,
jieba_words_dict
,
word2phone_dict
,
get_tone
=
False
):
def
gen_word2phone_dict
(
frontend
,
jieba_words_dict
,
word2phone_dict
,
get_tone
=
False
):
with
open
(
jieba_words_dict
,
"r"
)
as
f1
,
open
(
word2phone_dict
,
"w+"
)
as
f2
:
for
line
in
f1
.
readlines
():
word
=
line
.
split
(
" "
)[
0
]
...
...
@@ -30,7 +42,7 @@ def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=Fa
phone_str
=
""
if
tone
:
assert
(
len
(
phone
)
==
len
(
tone
))
assert
(
len
(
phone
)
==
len
(
tone
))
for
i
in
range
(
len
(
tone
)):
phone_tone
=
phone
[
i
]
+
tone
[
i
]
phone_str
+=
(
" "
+
phone_tone
)
...
...
@@ -45,39 +57,51 @@ def gen_word2phone_dict(frontend, jieba_words_dict, word2phone_dict, get_tone=Fa
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
"Generate dictionary"
)
parser
=
argparse
.
ArgumentParser
(
description
=
"Generate dictionary"
)
parser
.
add_argument
(
"--config"
,
type
=
str
,
default
=
"./config.ini"
,
help
=
"config file."
)
parser
.
add_argument
(
"--am_type"
,
type
=
str
,
default
=
"fastspeech2"
,
help
=
"fastspeech2 or speedyspeech"
)
"--am_type"
,
type
=
str
,
default
=
"fastspeech2"
,
help
=
"fastspeech2 or speedyspeech"
)
args
=
parser
.
parse_args
()
# Read config
cf
=
configparser
.
ConfigParser
()
cf
.
read
(
args
.
config
)
jieba_words_dict_file
=
cf
.
get
(
"jieba"
,
"jieba_words_dict"
)
# get words dict
jieba_words_dict_file
=
cf
.
get
(
"jieba"
,
"jieba_words_dict"
)
# get words dict
am_type
=
args
.
am_type
if
(
am_type
==
"fastspeech2"
):
if
(
am_type
==
"fastspeech2"
):
phone2id_dict_file
=
cf
.
get
(
am_type
,
"phone2id_dict"
)
word2phone_dict_file
=
cf
.
get
(
am_type
,
"word2phone_dict"
)
frontend
=
Frontend
(
phone_vocab_path
=
phone2id_dict_file
)
print
(
"frontend done!"
)
gen_word2phone_dict
(
frontend
,
jieba_words_dict_file
,
word2phone_dict_file
,
get_tone
=
False
)
gen_word2phone_dict
(
frontend
,
jieba_words_dict_file
,
word2phone_dict_file
,
get_tone
=
False
)
elif
(
am_type
==
"speedyspeech"
):
elif
(
am_type
==
"speedyspeech"
):
phone2id_dict_file
=
cf
.
get
(
am_type
,
"phone2id_dict"
)
tone2id_dict_file
=
cf
.
get
(
am_type
,
"tone2id_dict"
)
word2phone_dict_file
=
cf
.
get
(
am_type
,
"word2phone_dict"
)
frontend
=
Frontend
(
phone_vocab_path
=
phone2id_dict_file
,
tone_vocab_path
=
tone2id_dict_file
)
frontend
=
Frontend
(
phone_vocab_path
=
phone2id_dict_file
,
tone_vocab_path
=
tone2id_dict_file
)
print
(
"frontend done!"
)
gen_word2phone_dict
(
frontend
,
jieba_words_dict_file
,
word2phone_dict_file
,
get_tone
=
True
)
gen_word2phone_dict
(
frontend
,
jieba_words_dict_file
,
word2phone_dict_file
,
get_tone
=
True
)
else
:
print
(
"Please set correct am type, fastspeech2 or speedyspeech."
)
...
...
demos/TTSCppFrontend/front_demo/gentools/genid.py
浏览文件 @
1aa7495d
#from parakeet.frontend.vocab import Vocab
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PHONESFILE
=
"./dict/phones.txt"
PHONES_ID_FILE
=
"./dict/phonesid.dict"
TONESFILE
=
"./dict/tones.txt"
TONES_ID_FILE
=
"./dict/tonesid.dict"
def
GenIdFile
(
file
,
idfile
):
id
=
2
with
open
(
file
,
'r'
)
as
f1
,
open
(
idfile
,
"w+"
)
as
f2
:
...
...
@@ -16,7 +29,7 @@ def GenIdFile(file, idfile):
f2
.
write
(
phone
+
" "
+
str
(
id
)
+
"
\n
"
)
id
+=
1
if
__name__
==
"__main__"
:
GenIdFile
(
PHONESFILE
,
PHONES_ID_FILE
)
GenIdFile
(
TONESFILE
,
TONES_ID_FILE
)
demos/TTSCppFrontend/front_demo/gentools/word2phones.py
浏览文件 @
1aa7495d
from
pypinyin
import
lazy_pinyin
,
Style
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
re
from
pypinyin
import
lazy_pinyin
from
pypinyin
import
Style
worddict
=
"./dict/jieba_part.dict.utf8"
newdict
=
"./dict/word_phones.dict"
def
GenPhones
(
initials
,
finals
,
seperate
=
True
):
phones
=
[]
...
...
@@ -14,9 +30,9 @@ def GenPhones(initials, finals, seperate=True):
elif
c
in
[
'zh'
,
'ch'
,
'sh'
,
'r'
]:
v
=
re
.
sub
(
'i'
,
'iii'
,
v
)
if
c
:
if
seperate
==
True
:
if
seperate
is
True
:
phones
.
append
(
c
+
'0'
)
elif
seperate
==
False
:
elif
seperate
is
False
:
phones
.
append
(
c
)
else
:
print
(
"Not sure whether phone and tone need to be separated"
)
...
...
@@ -28,8 +44,10 @@ def GenPhones(initials, finals, seperate=True):
with
open
(
worddict
,
"r"
)
as
f1
,
open
(
newdict
,
"w+"
)
as
f2
:
for
line
in
f1
.
readlines
():
word
=
line
.
split
(
" "
)[
0
]
initials
=
lazy_pinyin
(
word
,
neutral_tone_with_five
=
True
,
style
=
Style
.
INITIALS
)
finals
=
lazy_pinyin
(
word
,
neutral_tone_with_five
=
True
,
style
=
Style
.
FINALS_TONE3
)
initials
=
lazy_pinyin
(
word
,
neutral_tone_with_five
=
True
,
style
=
Style
.
INITIALS
)
finals
=
lazy_pinyin
(
word
,
neutral_tone_with_five
=
True
,
style
=
Style
.
FINALS_TONE3
)
phones
=
GenPhones
(
initials
,
finals
,
True
)
...
...
demos/TTSCppFrontend/src/base/type_conv.cpp
浏览文件 @
1aa7495d
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "base/type_conv.h"
namespace
ppspeech
{
// wstring to string
std
::
string
wstring2utf8string
(
const
std
::
wstring
&
str
)
{
static
std
::
wstring_convert
<
std
::
codecvt_utf8
<
wchar_t
>
>
strCnv
;
std
::
string
wstring2utf8string
(
const
std
::
wstring
&
str
)
{
static
std
::
wstring_convert
<
std
::
codecvt_utf8
<
wchar_t
>>
strCnv
;
return
strCnv
.
to_bytes
(
str
);
}
// string to wstring
std
::
wstring
utf8string2wstring
(
const
std
::
string
&
str
)
{
static
std
::
wstring_convert
<
std
::
codecvt_utf8
<
wchar_t
>
>
strCnv
;
std
::
wstring
utf8string2wstring
(
const
std
::
string
&
str
)
{
static
std
::
wstring_convert
<
std
::
codecvt_utf8
<
wchar_t
>>
strCnv
;
return
strCnv
.
from_bytes
(
str
);
}
}
}
// namespace ppspeech
demos/TTSCppFrontend/src/base/type_conv.h
浏览文件 @
1aa7495d
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BASE_TYPE_CONVC_H
#define BASE_TYPE_CONVC_H
#include <string>
#include <locale>
#include <codecvt>
#include <locale>
#include <string>
namespace
ppspeech
{
...
...
@@ -12,7 +26,6 @@ std::string wstring2utf8string(const std::wstring& str);
// string to wstring
std
::
wstring
utf8string2wstring
(
const
std
::
string
&
str
);
}
#endif // BASE_TYPE_CONVC_H
\ No newline at end of file
demos/TTSCppFrontend/src/front/front_interface.cpp
浏览文件 @
1aa7495d
此差异已折叠。
点击以展开。
demos/TTSCppFrontend/src/front/front_interface.h
浏览文件 @
1aa7495d
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H
#define PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H
#include <glog/logging.h>
#include <fstream>
#include <map>
#include <string>
#include <memory>
#include <fstream>
#include <glog/logging.h>
#include <string>
//#include "utils/dir_utils.h"
#include <cppjieba/Jieba.hpp>
#include "front/text_normalize.h"
#include "absl/strings/str_split.h"
#include "front/text_normalize.h"
namespace
ppspeech
{
class
FrontEngineInterface
:
public
TextNormalizer
{
class
FrontEngineInterface
:
public
TextNormalizer
{
public:
FrontEngineInterface
(
std
::
string
conf
)
:
_conf_file
(
conf
)
{
explicit
FrontEngineInterface
(
std
::
string
conf
)
:
_conf_file
(
conf
)
{
TextNormalizer
();
_jieba
=
nullptr
;
_initialed
=
false
;
...
...
@@ -24,36 +37,48 @@ namespace ppspeech {
}
int
init
();
~
FrontEngineInterface
()
{
}
~
FrontEngineInterface
()
{}
// 读取配置文件
int
ReadConfFile
();
// 简体转繁体
int
Trand2Simp
(
const
std
::
wstring
&
sentence
,
std
::
wstring
&
sentence_simp
);
int
Trand2Simp
(
const
std
::
wstring
&
sentence
,
std
::
wstring
*
sentence_simp
);
// 生成字典
int
GenDict
(
const
std
::
string
&
file
,
std
::
map
<
std
::
string
,
std
::
string
>
&
map
);
int
GenDict
(
const
std
::
string
&
file
,
std
::
map
<
std
::
string
,
std
::
string
>
*
map
);
// 由 词+词性的分词结果转为仅包含词的结果
int
GetSegResult
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg
,
std
::
vector
<
std
::
string
>
&
seg_words
);
// 生成句子的音素,音调id。如果音素和音调未分开,则 toneids 为空(fastspeech2),反之则不为空(speedyspeech)
int
GetSentenceIds
(
const
std
::
string
&
sentence
,
std
::
vector
<
int
>
&
phoneids
,
std
::
vector
<
int
>
&
toneids
);
// 根据分词结果获取词的音素,音调id,并对读音进行适当修改 (ModifyTone)。如果音素和音调未分开,则 toneids 为空(fastspeech2),反之则不为空(speedyspeech)
int
GetWordsIds
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
cut_result
,
std
::
vector
<
int
>
&
phoneids
,
std
::
vector
<
int
>
&
toneids
);
// 结巴分词生成包含词和词性的分词结果,再对分词结果进行适当修改 (MergeforModify)
int
Cut
(
const
std
::
string
&
sentence
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
cut_result
);
int
GetSegResult
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg
,
std
::
vector
<
std
::
string
>
*
seg_words
);
// 生成句子的音素,音调id。如果音素和音调未分开,则 toneids
// 为空(fastspeech2),反之则不为空(speedyspeech)
int
GetSentenceIds
(
const
std
::
string
&
sentence
,
std
::
vector
<
int
>
*
phoneids
,
std
::
vector
<
int
>
*
toneids
);
// 根据分词结果获取词的音素,音调id,并对读音进行适当修改
// (ModifyTone)。如果音素和音调未分开,则 toneids
// 为空(fastspeech2),反之则不为空(speedyspeech)
int
GetWordsIds
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
cut_result
,
std
::
vector
<
int
>
*
phoneids
,
std
::
vector
<
int
>
*
toneids
);
// 结巴分词生成包含词和词性的分词结果,再对分词结果进行适当修改
// (MergeforModify)
int
Cut
(
const
std
::
string
&
sentence
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
cut_result
);
// 字词到音素的映射,查找字典
int
GetPhone
(
const
std
::
string
&
word
,
std
::
string
&
phone
);
int
GetPhone
(
const
std
::
string
&
word
,
std
::
string
*
phone
);
// 音素到音素id
int
Phone2Phoneid
(
const
std
::
string
&
phone
,
std
::
vector
<
int
>
&
phoneid
,
std
::
vector
<
int
>
&
toneids
);
int
Phone2Phoneid
(
const
std
::
string
&
phone
,
std
::
vector
<
int
>
*
phoneid
,
std
::
vector
<
int
>
*
toneids
);
// 根据韵母判断该词中每个字的读音都为第三声。true表示词中每个字都是第三声
...
...
@@ -63,58 +88,78 @@ namespace ppspeech {
bool
IsReduplication
(
const
std
::
string
&
word
);
// 获取每个字词的声母韵母列表
int
GetInitialsFinals
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
&
word_initials
,
std
::
vector
<
std
::
string
>
&
word_finals
);
int
GetInitialsFinals
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
*
word_initials
,
std
::
vector
<
std
::
string
>
*
word_finals
);
// 获取每个字词的韵母列表
int
GetFinals
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
&
word_finals
);
int
GetFinals
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
*
word_finals
);
// 整个词转成向量形式,向量的每个元素对应词的一个字
int
Word2WordVec
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
wstring
>
&
wordvec
);
int
Word2WordVec
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
wstring
>
*
wordvec
);
// 将整个词重新进行 full cut,分词后,各个词会在词典中
int
SplitWord
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
&
fullcut_word
);
int
SplitWord
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
*
fullcut_word
);
// 对分词结果进行处理:对包含“不”字的分词结果进行整理
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeBu
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg_result
);
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeBu
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg_result
);
// 对分词结果进行处理:对包含“一”字的分词结果进行整理
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
Mergeyi
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg_result
);
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
Mergeyi
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg_result
);
// 对分词结果进行处理:对前后相同的两个字进行合并
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeReduplication
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg_result
);
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeReduplication
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg_result
);
// 对一个词和后一个词他们的读音均为第三声的两个词进行合并
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeThreeTones
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg_result
);
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeThreeTones
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg_result
);
// 对一个词的最后一个读音和后一个词的第一个读音为第三声的两个词进行合并
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeThreeTones2
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg_result
);
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeThreeTones2
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg_result
);
// 对分词结果进行处理:对包含“儿”字的分词结果进行整理
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeEr
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg_result
);
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
MergeEr
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg_result
);
// 对分词结果进行处理、修改
int
MergeforModify
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
seg_result
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
merge_seg_result
);
int
MergeforModify
(
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
seg_result
,
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
*
merge_seg_result
);
// 对包含“不”字的相关词音调进行修改
int
BuSandi
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
&
finals
);
int
BuSandi
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
*
finals
);
// 对包含“一”字的相关词音调进行修改
int
YiSandhi
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
&
finals
);
int
YiSandhi
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
*
finals
);
// 对一些特殊词(包括量词,语助词等)的相关词音调进行修改
int
NeuralSandhi
(
const
std
::
string
&
word
,
const
std
::
string
&
pos
,
std
::
vector
<
std
::
string
>
&
finals
);
int
NeuralSandhi
(
const
std
::
string
&
word
,
const
std
::
string
&
pos
,
std
::
vector
<
std
::
string
>
*
finals
);
// 对包含第三声的相关词音调进行修改
int
ThreeSandhi
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
&
finals
);
int
ThreeSandhi
(
const
std
::
string
&
word
,
std
::
vector
<
std
::
string
>
*
finals
);
// 对字词音调进行处理、修改
int
ModifyTone
(
const
std
::
string
&
word
,
const
std
::
string
&
pos
,
std
::
vector
<
std
::
string
>
&
finals
);
int
ModifyTone
(
const
std
::
string
&
word
,
const
std
::
string
&
pos
,
std
::
vector
<
std
::
string
>
*
finals
);
// 对儿化音进行处理
std
::
vector
<
std
::
vector
<
std
::
string
>>
MergeErhua
(
const
std
::
vector
<
std
::
string
>
&
initials
,
const
std
::
vector
<
std
::
string
>
&
finals
,
const
std
::
string
&
word
,
const
std
::
string
&
pos
);
std
::
vector
<
std
::
vector
<
std
::
string
>>
MergeErhua
(
const
std
::
vector
<
std
::
string
>
&
initials
,
const
std
::
vector
<
std
::
string
>
&
finals
,
const
std
::
string
&
word
,
const
std
::
string
&
pos
);
private:
...
...
@@ -148,9 +193,6 @@ namespace ppspeech {
std
::
vector
<
std
::
string
>
must_not_neural_tone_words
;
std
::
vector
<
std
::
string
>
must_neural_tone_words
;
};
}
};
}
// namespace ppspeech
#endif
\ No newline at end of file
demos/TTSCppFrontend/src/front/text_normalize.cpp
浏览文件 @
1aa7495d
此差异已折叠。
点击以展开。
demos/TTSCppFrontend/src/front/text_normalize.h
浏览文件 @
1aa7495d
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H
#define PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H
#include <glog/logging.h>
#include <codecvt>
#include <map>
#include <regex>
#include <string>
#include <codecvt>
#include <glog/logging.h>
#include "absl/strings/str_split.h"
#include "absl/strings/strip.h"
#include "base/type_conv.h"
...
...
@@ -13,50 +26,52 @@
namespace
ppspeech
{
class
TextNormalizer
{
public:
TextNormalizer
()
{
InitMap
();
}
~
TextNormalizer
()
{
}
public:
TextNormalizer
()
{
InitMap
();
}
~
TextNormalizer
()
{}
int
InitMap
();
int
Replace
(
std
::
wstring
&
sentence
,
const
int
&
pos
,
const
int
&
len
,
const
std
::
wstring
&
repstr
);
int
SplitByPunc
(
const
std
::
wstring
&
sentence
,
std
::
vector
<
std
::
wstring
>
&
sentence_part
);
int
Replace
(
std
::
wstring
*
sentence
,
const
int
&
pos
,
const
int
&
len
,
const
std
::
wstring
&
repstr
);
int
SplitByPunc
(
const
std
::
wstring
&
sentence
,
std
::
vector
<
std
::
wstring
>
*
sentence_part
);
std
::
string
CreateTextValue
(
const
std
::
string
&
num
,
bool
use_zero
=
true
);
std
::
string
SingleDigit2Text
(
const
std
::
string
&
num_str
,
bool
alt_one
=
false
);
std
::
string
CreateTextValue
(
const
std
::
string
&
num
,
bool
use_zero
=
true
);
std
::
string
SingleDigit2Text
(
const
std
::
string
&
num_str
,
bool
alt_one
=
false
);
std
::
string
SingleDigit2Text
(
const
std
::
wstring
&
num
,
bool
alt_one
=
false
);
std
::
string
MultiDigit2Text
(
const
std
::
string
&
num_str
,
bool
alt_one
=
false
,
bool
use_zero
=
true
);
std
::
string
MultiDigit2Text
(
const
std
::
wstring
&
num
,
bool
alt_one
=
false
,
bool
use_zero
=
true
);
std
::
string
MultiDigit2Text
(
const
std
::
string
&
num_str
,
bool
alt_one
=
false
,
bool
use_zero
=
true
);
std
::
string
MultiDigit2Text
(
const
std
::
wstring
&
num
,
bool
alt_one
=
false
,
bool
use_zero
=
true
);
std
::
string
Digits2Text
(
const
std
::
string
&
num_str
);
std
::
string
Digits2Text
(
const
std
::
wstring
&
num
);
int
ReData
(
std
::
wstring
&
sentence
);
int
ReData2
(
std
::
wstring
&
sentence
);
int
ReTime
(
std
::
wstring
&
sentence
);
int
ReTemperature
(
std
::
wstring
&
sentence
);
int
ReFrac
(
std
::
wstring
&
sentence
);
int
RePercentage
(
std
::
wstring
&
sentence
);
int
ReMobilePhone
(
std
::
wstring
&
sentence
);
int
RePhone
(
std
::
wstring
&
sentence
);
int
ReRange
(
std
::
wstring
&
sentence
);
int
ReInterger
(
std
::
wstring
&
sentence
);
int
ReDecimalNum
(
std
::
wstring
&
sentence
);
int
RePositiveQuantifiers
(
std
::
wstring
&
sentence
);
int
ReDefalutNum
(
std
::
wstring
&
sentence
);
int
ReNumber
(
std
::
wstring
&
sentence
);
int
SentenceNormalize
(
std
::
wstring
&
sentence
);
private:
std
::
map
<
std
::
string
,
std
::
string
>
digits_map
;
std
::
map
<
int
,
std
::
string
>
units_map
;
int
ReData
(
std
::
wstring
*
sentence
);
int
ReData2
(
std
::
wstring
*
sentence
);
int
ReTime
(
std
::
wstring
*
sentence
);
int
ReTemperature
(
std
::
wstring
*
sentence
);
int
ReFrac
(
std
::
wstring
*
sentence
);
int
RePercentage
(
std
::
wstring
*
sentence
);
int
ReMobilePhone
(
std
::
wstring
*
sentence
);
int
RePhone
(
std
::
wstring
*
sentence
);
int
ReRange
(
std
::
wstring
*
sentence
);
int
ReInterger
(
std
::
wstring
*
sentence
);
int
ReDecimalNum
(
std
::
wstring
*
sentence
);
int
RePositiveQuantifiers
(
std
::
wstring
*
sentence
);
int
ReDefalutNum
(
std
::
wstring
*
sentence
);
int
ReNumber
(
std
::
wstring
*
sentence
);
int
SentenceNormalize
(
std
::
wstring
*
sentence
);
private:
std
::
map
<
std
::
string
,
std
::
string
>
digits_map
;
std
::
map
<
int
,
std
::
string
>
units_map
;
};
}
}
// namespace ppspeech
#endif
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录