Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
a7660331a
tesseract
提交
da1254dd
T
tesseract
项目概览
a7660331a
/
tesseract
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
tesseract
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
da1254dd
编写于
5月 19, 2017
作者:
Z
zdenop
提交者:
GitHub
5月 19, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #936 from stweil/opt
Reduce number of new / delete operations
上级
95bf30de
e6d68392
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
36 addition
and
40 deletion
+36
-40
classify/kdtree.cpp
classify/kdtree.cpp
+11
-10
wordrec/language_model.cpp
wordrec/language_model.cpp
+22
-27
wordrec/language_model.h
wordrec/language_model.h
+3
-3
未找到文件。
classify/kdtree.cpp
浏览文件 @
da1254dd
...
...
@@ -132,21 +132,20 @@ class KDTreeSearch {
KDTREE
*
tree_
;
FLOAT32
*
query_point_
;
MinK
<
FLOAT32
,
void
*>*
results_
;
FLOAT32
*
sb_min_
;
//< search box minimum
FLOAT32
*
sb_max_
;
//< search box maximum
MinK
<
FLOAT32
,
void
*>
results_
;
};
KDTreeSearch
::
KDTreeSearch
(
KDTREE
*
tree
,
FLOAT32
*
query_point
,
int
k_closest
)
:
tree_
(
tree
),
query_point_
(
query_point
)
{
results_
=
new
MinK
<
FLOAT32
,
void
*>
(
MAXSEARCH
,
k_closest
);
query_point_
(
query_point
)
,
results_
(
MAXSEARCH
,
k_closest
)
{
sb_min_
=
new
FLOAT32
[
tree
->
KeySize
];
sb_max_
=
new
FLOAT32
[
tree
->
KeySize
];
}
KDTreeSearch
::~
KDTreeSearch
()
{
delete
results_
;
delete
[]
sb_min_
;
delete
[]
sb_max_
;
}
...
...
@@ -164,11 +163,12 @@ void KDTreeSearch::Search(int *result_count,
sb_max_
[
i
]
=
tree_
->
KeyDesc
[
i
].
Max
;
}
SearchRec
(
0
,
tree_
->
Root
.
Left
);
int
count
=
results_
->
elements_count
();
int
count
=
results_
.
elements_count
();
*
result_count
=
count
;
for
(
int
j
=
0
;
j
<
count
;
j
++
)
{
distances
[
j
]
=
(
FLOAT32
)
sqrt
((
FLOAT64
)
results_
->
elements
()[
j
].
key
);
results
[
j
]
=
results_
->
elements
()[
j
].
value
;
// TODO: why FLOAT64 here?
distances
[
j
]
=
(
FLOAT32
)
sqrt
((
FLOAT64
)
results_
.
elements
()[
j
].
key
);
results
[
j
]
=
results_
.
elements
()[
j
].
value
;
}
}
}
...
...
@@ -405,9 +405,9 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
if
(
!
BoxIntersectsSearch
(
sb_min_
,
sb_max_
))
return
;
results_
->
insert
(
DistanceSquared
(
tree_
->
KeySize
,
tree_
->
KeyDesc
,
results_
.
insert
(
DistanceSquared
(
tree_
->
KeySize
,
tree_
->
KeyDesc
,
query_point_
,
sub_tree
->
Key
),
sub_tree
->
Data
);
sub_tree
->
Data
);
if
(
query_point_
[
level
]
<
sub_tree
->
BranchPoint
)
{
if
(
sub_tree
->
Left
!=
NULL
)
{
...
...
@@ -479,9 +479,10 @@ FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]) {
/// one wrap distance away from the query.
bool
KDTreeSearch
::
BoxIntersectsSearch
(
FLOAT32
*
lower
,
FLOAT32
*
upper
)
{
FLOAT32
*
query
=
query_point_
;
// Why FLOAT64?
FLOAT64
total_distance
=
0.0
;
FLOAT64
radius_squared
=
results_
->
max_insertable_key
()
*
results_
->
max_insertable_key
();
results_
.
max_insertable_key
()
*
results_
.
max_insertable_key
();
PARAM_DESC
*
dim
=
tree_
->
KeyDesc
;
for
(
int
i
=
tree_
->
KeySize
;
i
>
0
;
i
--
,
dim
++
,
query
++
,
lower
++
,
upper
++
)
{
...
...
wordrec/language_model.cpp
浏览文件 @
da1254dd
...
...
@@ -118,20 +118,15 @@ LanguageModel::LanguageModel(const UnicityTable<FontInfo> *fontinfo_table,
BOOL_INIT_MEMBER
(
language_model_use_sigmoidal_certainty
,
false
,
"Use sigmoidal score for certainty"
,
dict
->
getCCUtil
()
->
params
()),
dawg_args_
(
nullptr
,
new
DawgPositionVector
(),
NO_PERM
),
fontinfo_table_
(
fontinfo_table
),
dict_
(
dict
),
fixed_pitch_
(
false
),
max_char_wh_ratio_
(
0.0
),
acceptable_choice_found_
(
false
)
{
ASSERT_HOST
(
dict_
!=
NULL
);
dawg_args_
=
new
DawgArgs
(
NULL
,
new
DawgPositionVector
(),
NO_PERM
);
very_beginning_active_dawgs_
=
new
DawgPositionVector
();
beginning_active_dawgs_
=
new
DawgPositionVector
();
}
LanguageModel
::~
LanguageModel
()
{
delete
very_beginning_active_dawgs_
;
delete
beginning_active_dawgs_
;
delete
dawg_args_
->
updated_dawgs
;
delete
dawg_args_
;
delete
dawg_args_
.
updated_dawgs
;
}
void
LanguageModel
::
InitForWord
(
const
WERD_CHOICE
*
prev_word
,
...
...
@@ -144,10 +139,10 @@ void LanguageModel::InitForWord(const WERD_CHOICE *prev_word,
correct_segmentation_explored_
=
false
;
// Initialize vectors with beginning DawgInfos.
very_beginning_active_dawgs_
->
clear
();
dict_
->
init_active_dawgs
(
very_beginning_active_dawgs_
,
false
);
beginning_active_dawgs_
->
clear
();
dict_
->
default_dawgs
(
beginning_active_dawgs_
,
false
);
very_beginning_active_dawgs_
.
clear
();
dict_
->
init_active_dawgs
(
&
very_beginning_active_dawgs_
,
false
);
beginning_active_dawgs_
.
clear
();
dict_
->
default_dawgs
(
&
beginning_active_dawgs_
,
false
);
// Fill prev_word_str_ with the last language_model_ngram_order
// unichars from prev_word.
...
...
@@ -791,18 +786,18 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
// Initialize active_dawgs from parent_vse if it is not NULL.
// Otherwise use very_beginning_active_dawgs_.
if
(
parent_vse
==
NULL
)
{
dawg_args_
->
active_dawgs
=
very_beginning_active_dawgs_
;
dawg_args_
->
permuter
=
NO_PERM
;
dawg_args_
.
active_dawgs
=
&
very_beginning_active_dawgs_
;
dawg_args_
.
permuter
=
NO_PERM
;
}
else
{
if
(
parent_vse
->
dawg_info
==
NULL
)
return
NULL
;
// not a dict word path
dawg_args_
->
active_dawgs
=
&
parent_vse
->
dawg_info
->
active_dawgs
;
dawg_args_
->
permuter
=
parent_vse
->
dawg_info
->
permuter
;
dawg_args_
.
active_dawgs
=
&
parent_vse
->
dawg_info
->
active_dawgs
;
dawg_args_
.
permuter
=
parent_vse
->
dawg_info
->
permuter
;
}
// Deal with hyphenated words.
if
(
word_end
&&
dict_
->
has_hyphen_end
(
b
.
unichar_id
(),
curr_col
==
0
))
{
if
(
language_model_debug_level
>
0
)
tprintf
(
"Hyphenated word found
\n
"
);
return
new
LanguageModelDawgInfo
(
dawg_args_
->
active_dawgs
,
return
new
LanguageModelDawgInfo
(
dawg_args_
.
active_dawgs
,
COMPOUND_PERM
);
}
...
...
@@ -815,7 +810,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
// Do not allow compounding of words with lengths shorter than
// language_model_min_compound_length
if
(
parent_vse
==
NULL
||
word_end
||
dawg_args_
->
permuter
==
COMPOUND_PERM
||
dawg_args_
.
permuter
==
COMPOUND_PERM
||
parent_vse
->
length
<
language_model_min_compound_length
)
return
NULL
;
int
i
;
...
...
@@ -835,7 +830,7 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
if
(
!
has_word_ending
)
return
NULL
;
if
(
language_model_debug_level
>
0
)
tprintf
(
"Compound word found
\n
"
);
return
new
LanguageModelDawgInfo
(
beginning_active_dawgs_
,
COMPOUND_PERM
);
return
new
LanguageModelDawgInfo
(
&
beginning_active_dawgs_
,
COMPOUND_PERM
);
}
// done dealing with compound words
LanguageModelDawgInfo
*
dawg_info
=
NULL
;
...
...
@@ -850,22 +845,22 @@ LanguageModelDawgInfo *LanguageModel::GenerateDawgInfo(
if
(
language_model_debug_level
>
2
)
tprintf
(
"Test Letter OK for unichar %d, normed %d
\n
"
,
b
.
unichar_id
(),
normed_ids
[
i
]);
dict_
->
LetterIsOkay
(
dawg_args_
,
normed_ids
[
i
],
dict_
->
LetterIsOkay
(
&
dawg_args_
,
normed_ids
[
i
],
word_end
&&
i
==
normed_ids
.
size
()
-
1
);
if
(
dawg_args_
->
permuter
==
NO_PERM
)
{
if
(
dawg_args_
.
permuter
==
NO_PERM
)
{
break
;
}
else
if
(
i
<
normed_ids
.
size
()
-
1
)
{
tmp_active_dawgs
=
*
dawg_args_
->
updated_dawgs
;
dawg_args_
->
active_dawgs
=
&
tmp_active_dawgs
;
tmp_active_dawgs
=
*
dawg_args_
.
updated_dawgs
;
dawg_args_
.
active_dawgs
=
&
tmp_active_dawgs
;
}
if
(
language_model_debug_level
>
2
)
tprintf
(
"Letter was OK for unichar %d, normed %d
\n
"
,
b
.
unichar_id
(),
normed_ids
[
i
]);
}
dawg_args_
->
active_dawgs
=
NULL
;
if
(
dawg_args_
->
permuter
!=
NO_PERM
)
{
dawg_info
=
new
LanguageModelDawgInfo
(
dawg_args_
->
updated_dawgs
,
dawg_args_
->
permuter
);
dawg_args_
.
active_dawgs
=
nullptr
;
if
(
dawg_args_
.
permuter
!=
NO_PERM
)
{
dawg_info
=
new
LanguageModelDawgInfo
(
dawg_args_
.
updated_dawgs
,
dawg_args_
.
permuter
);
}
else
if
(
language_model_debug_level
>
3
)
{
tprintf
(
"Letter %s not OK!
\n
"
,
dict_
->
getUnicharset
().
id_to_unichar
(
b
.
unichar_id
()));
...
...
@@ -1320,7 +1315,7 @@ void LanguageModel::UpdateBestChoice(
// Update hyphen state if we are dealing with a dictionary word.
if
(
vse
->
dawg_info
!=
NULL
)
{
if
(
dict_
->
has_hyphen_end
(
*
word
))
{
dict_
->
set_hyphen_word
(
*
word
,
*
(
dawg_args_
->
active_dawgs
));
dict_
->
set_hyphen_word
(
*
word
,
*
(
dawg_args_
.
active_dawgs
));
}
else
{
dict_
->
reset_hyphen_vars
(
true
);
}
...
...
wordrec/language_model.h
浏览文件 @
da1254dd
...
...
@@ -361,7 +361,7 @@ class LanguageModel {
// Temporary DawgArgs struct that is re-used across different words to
// avoid dynamic memory re-allocation (should be cleared before each use).
DawgArgs
*
dawg_args_
;
DawgArgs
dawg_args_
;
// Scaling for recovering blob outline length from rating and certainty.
float
rating_cert_scale_
;
...
...
@@ -392,8 +392,8 @@ class LanguageModel {
STRING
prev_word_str_
;
int
prev_word_unichar_step_len_
;
// Active dawg vector.
DawgPositionVector
*
very_beginning_active_dawgs_
;
// includes continuation
DawgPositionVector
*
beginning_active_dawgs_
;
DawgPositionVector
very_beginning_active_dawgs_
;
// includes continuation
DawgPositionVector
beginning_active_dawgs_
;
// Set to true if acceptable choice was discovered.
// Note: it would be nice to use this to terminate the search once an
// acceptable choices is found. However we do not do that and once an
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录