Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ab4217c2
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ab4217c2
编写于
3月 22, 2023
作者:
J
jlqian98
提交者:
GitHub
3月 22, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Engine] add TN/ITN functions (#3047)
* add AddBlk, ReverseFrac function * rename text processing functions
上级
704e363a
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
134 addition
and
0 deletion
+134
-0
runtime/engine/common/utils/text_process.cc
runtime/engine/common/utils/text_process.cc
+74
-0
runtime/engine/common/utils/text_process.h
runtime/engine/common/utils/text_process.h
+13
-0
runtime/engine/common/utils/text_process_test.cc
runtime/engine/common/utils/text_process_test.cc
+47
-0
未找到文件。
runtime/engine/common/utils/text_process.cc
0 → 100644
浏览文件 @
ab4217c2
#include "utils/text_process.h"
namespace
ppspeech
{
std
::
string
DelBlank
(
const
std
::
string
&
str
)
{
std
::
string
out
=
""
;
int
ptr_in
=
0
;
// the pointer of input string (for traversal)
int
end
=
str
.
size
();
int
ptr_out
=
-
1
;
// the pointer of output string (last char)
while
(
ptr_in
!=
end
)
{
while
(
ptr_in
!=
end
&&
str
[
ptr_in
]
==
' '
)
{
ptr_in
+=
1
;
}
if
(
ptr_in
==
end
)
return
out
;
if
(
ptr_out
!=
-
1
&&
isalpha
(
str
[
ptr_in
])
&&
isalpha
(
str
[
ptr_out
])
&&
str
[
ptr_in
-
1
]
==
' '
)
// add a space when the last and current chars are in English and there have space(s) between them
out
+=
' '
;
out
+=
str
[
ptr_in
];
ptr_out
=
ptr_in
;
ptr_in
+=
1
;
}
return
out
;
}
std
::
string
AddBlank
(
const
std
::
string
&
str
)
{
std
::
string
out
=
""
;
int
ptr
=
0
;
// the pointer of the input string
int
end
=
str
.
size
();
while
(
ptr
!=
end
)
{
if
(
isalpha
(
str
[
ptr
]))
{
if
(
ptr
==
0
or
str
[
ptr
-
1
]
!=
' '
)
out
+=
" "
;
// add pre-space for an English word
while
(
isalpha
(
str
[
ptr
]))
{
out
+=
str
[
ptr
];
ptr
+=
1
;
}
out
+=
" "
;
// add post-space for an English word
}
else
{
out
+=
str
[
ptr
];
ptr
+=
1
;
}
}
return
out
;
}
std
::
string
ReverseFraction
(
const
std
::
string
&
str
)
{
std
::
string
out
=
""
;
int
ptr
=
0
;
// the pointer of the input string
int
end
=
str
.
size
();
int
left
,
right
,
frac
;
// the start index of the left tag, right tag and '/'.
left
=
right
=
frac
=
0
;
int
len_tag
=
5
;
// length of "<tag>"
while
(
ptr
!=
end
)
{
// find the position of left tag, right tag and '/'. (xxx<tag>num1/num2</tag>)
left
=
str
.
find
(
"<tag>"
,
ptr
);
if
(
left
==
-
1
)
break
;
out
+=
str
.
substr
(
ptr
,
left
-
ptr
);
// content before left tag (xxx)
frac
=
str
.
find
(
"/"
,
left
);
right
=
str
.
find
(
"<tag>"
,
frac
);
out
+=
str
.
substr
(
frac
+
1
,
right
-
frac
-
1
)
+
'/'
+
str
.
substr
(
left
+
len_tag
,
frac
-
left
-
len_tag
);
// num2/num1
ptr
=
right
+
len_tag
;
}
if
(
ptr
!=
end
)
{
out
+=
str
.
substr
(
ptr
,
end
-
ptr
);
}
return
out
;
}
}
// namespace ppspeech
\ No newline at end of file
runtime/engine/common/utils/text_process.h
0 → 100644
浏览文件 @
ab4217c2
#include <string>
#include <vector>
#include <cctype>
namespace
ppspeech
{
std
::
string
DelBlank
(
const
std
::
string
&
str
);
std
::
string
AddBlank
(
const
std
::
string
&
str
);
std
::
string
ReverseFraction
(
const
std
::
string
&
str
);
}
// namespace ppspeech
\ No newline at end of file
runtime/engine/common/utils/text_process_test.cc
0 → 100644
浏览文件 @
ab4217c2
#include "utils/text_process.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
TEST
(
TextProcess
,
DelBlankTest
)
{
std
::
string
test_str
=
"我 今天 去 了 超市 花了 120 元。"
;
std
::
string
out_str
=
ppspeech
::
DelBlank
(
test_str
);
int
ret
=
out_str
.
compare
(
"我今天去了超市花了120元。"
);
EXPECT_EQ
(
ret
,
0
);
test_str
=
"how are you today"
;
out_str
=
ppspeech
::
DelBlank
(
test_str
);
ret
=
out_str
.
compare
(
"how are you today"
);
EXPECT_EQ
(
ret
,
0
);
test_str
=
"我 的 paper 在 哪里?"
;
out_str
=
ppspeech
::
DelBlank
(
test_str
);
ret
=
out_str
.
compare
(
"我的paper在哪里?"
);
EXPECT_EQ
(
ret
,
0
);
}
TEST
(
TextProcess
,
AddBlankTest
)
{
std
::
string
test_str
=
"how are you"
;
std
::
string
out_str
=
ppspeech
::
AddBlank
(
test_str
);
int
ret
=
out_str
.
compare
(
" how are you "
);
EXPECT_EQ
(
ret
,
0
);
test_str
=
"欢迎来到China。"
;
out_str
=
ppspeech
::
AddBlank
(
test_str
);
ret
=
out_str
.
compare
(
"欢迎来到 China 。"
);
EXPECT_EQ
(
ret
,
0
);
}
TEST
(
TextProcess
,
ReverseFractionTest
)
{
std
::
string
test_str
=
"<tag>3/1<tag>"
;
std
::
string
out_str
=
ppspeech
::
ReverseFraction
(
test_str
);
int
ret
=
out_str
.
compare
(
"1/3"
);
std
::
cout
<<
out_str
<<
std
::
endl
;
EXPECT_EQ
(
ret
,
0
);
test_str
=
"<tag>3/1<tag> <tag>100/10000<tag>"
;
out_str
=
ppspeech
::
ReverseFraction
(
test_str
);
ret
=
out_str
.
compare
(
"1/3 10000/100"
);
std
::
cout
<<
out_str
<<
std
::
endl
;
EXPECT_EQ
(
ret
,
0
);
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录