Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
eef364d1
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
eef364d1
编写于
8月 23, 2017
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adapt to the last three commits
上级
8dc0b2b0
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
95 addition
and
2 deletion
+95
-2
deploy/README.md
deploy/README.md
+1
-1
deploy/scorer.cpp
deploy/scorer.cpp
+85
-0
deploy/scorer.h
deploy/scorer.h
+9
-1
未找到文件。
deploy/README.md
浏览文件 @
eef364d1
...
...
@@ -14,7 +14,7 @@ wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz
tar
-xzvf
openfst-1.6.3.tar.gz
```
-
[
**
swig**
]
: Compiling for python interface requires swig, please make sure swig being installed.
-
[
**
SWIG**
](
http://www.swig.org
)
: Compiling for python interface requires swig, please make sure swig being installed.
-
[
**ThreadPool**
](
http://progsch.net/wordpress/
)
: A library for C++ thread pool
...
...
deploy/scorer.cpp
浏览文件 @
eef364d1
...
...
@@ -3,9 +3,13 @@
#include "lm/config.hh"
#include "lm/state.hh"
#include "lm/model.hh"
#include "util/tokenize_piece.hh"
#include "util/string_piece.hh"
#include "scorer.h"
#include "decoder_utils.h"
using
namespace
lm
::
ngram
;
Scorer
::
Scorer
(
double
alpha
,
double
beta
,
const
std
::
string
&
lm_path
)
{
this
->
alpha
=
alpha
;
this
->
beta
=
beta
;
...
...
@@ -90,3 +94,84 @@ double Scorer::get_log_prob(const std::vector<std::string>& words) {
}
return
score
;
}
/* Strip a input sentence
* Parameters:
* str: A reference to the objective string
* ch: The character to prune
* Return:
* void
*/
inline
void
strip
(
std
::
string
&
str
,
char
ch
=
' '
)
{
if
(
str
.
size
()
==
0
)
return
;
int
start
=
0
;
int
end
=
str
.
size
()
-
1
;
for
(
int
i
=
0
;
i
<
str
.
size
();
i
++
){
if
(
str
[
i
]
==
ch
)
{
start
++
;
}
else
{
break
;
}
}
for
(
int
i
=
str
.
size
()
-
1
;
i
>=
0
;
i
--
)
{
if
(
str
[
i
]
==
ch
)
{
end
--
;
}
else
{
break
;
}
}
if
(
start
==
0
&&
end
==
str
.
size
()
-
1
)
return
;
if
(
start
>
end
)
{
std
::
string
emp_str
;
str
=
emp_str
;
}
else
{
str
=
str
.
substr
(
start
,
end
-
start
+
1
);
}
}
int
Scorer
::
word_count
(
std
::
string
sentence
)
{
strip
(
sentence
);
int
cnt
=
1
;
for
(
int
i
=
0
;
i
<
sentence
.
size
();
i
++
)
{
if
(
sentence
[
i
]
==
' '
&&
sentence
[
i
-
1
]
!=
' '
)
{
cnt
++
;
}
}
return
cnt
;
}
double
Scorer
::
get_log_cond_prob
(
std
::
string
sentence
)
{
lm
::
base
::
Model
*
model
=
(
lm
::
base
::
Model
*
)
this
->
_language_model
;
State
state
,
out_state
;
lm
::
FullScoreReturn
ret
;
model
->
BeginSentenceWrite
(
&
state
);
for
(
util
::
TokenIter
<
util
::
SingleCharacter
,
true
>
it
(
sentence
,
' '
);
it
;
++
it
){
lm
::
WordIndex
wid
=
model
->
BaseVocabulary
().
Index
(
*
it
);
ret
=
model
->
BaseFullScore
(
&
state
,
wid
,
&
out_state
);
state
=
out_state
;
}
//log10 prob
double
log_prob
=
ret
.
prob
;
return
log_prob
;
}
void
Scorer
::
reset_params
(
float
alpha
,
float
beta
)
{
this
->
alpha
=
alpha
;
this
->
beta
=
beta
;
}
double
Scorer
::
get_score
(
std
::
string
sentence
,
bool
log
)
{
double
lm_score
=
get_log_cond_prob
(
sentence
);
int
word_cnt
=
word_count
(
sentence
);
double
final_score
=
0.0
;
if
(
log
==
false
)
{
final_score
=
pow
(
10
,
alpha
*
lm_score
)
*
pow
(
word_cnt
,
beta
);
}
else
{
final_score
=
alpha
*
lm_score
*
std
::
log
(
10
)
+
beta
*
std
::
log
(
word_cnt
);
}
return
final_score
;
}
deploy/scorer.h
浏览文件 @
eef364d1
...
...
@@ -30,6 +30,7 @@ public:
// Example:
// Scorer scorer(alpha, beta, "path_of_language_model");
// scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" });
// scorer.get_log_cond_prob("this a sentence");
// scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" });
class
Scorer
{
public:
...
...
@@ -40,7 +41,14 @@ public:
size_t
get_max_order
()
{
return
_max_order
;
}
bool
is_character_based
()
{
return
_is_character_based
;
}
std
::
vector
<
std
::
string
>
get_vocab
()
{
return
_vocabulary
;
}
// word insertion term
int
word_count
(
std
::
string
);
// get the log cond prob of the last word
double
get_log_cond_prob
(
std
::
string
);
// reset params alpha & beta
void
reset_params
(
float
alpha
,
float
beta
);
// get the final score
double
get_score
(
std
::
string
,
bool
log
=
false
);
// expose to decoder
double
alpha
;
double
beta
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录