Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
55ec0e2a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
55ec0e2a
编写于
1月 19, 2018
作者:
W
whs
提交者:
GitHub
1月 19, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7649 from wanghaoshuang/fix_edit
Change input data type to int64_t
上级
0071b5f7
6e04e580
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
23 addition
and
23 deletion
+23
-23
paddle/operators/edit_distance_op.cc
paddle/operators/edit_distance_op.cc
+13
-13
paddle/operators/edit_distance_op.cu
paddle/operators/edit_distance_op.cu
+4
-4
paddle/operators/edit_distance_op.h
paddle/operators/edit_distance_op.h
+2
-2
python/paddle/v2/fluid/tests/test_edit_distance_op.py
python/paddle/v2/fluid/tests/test_edit_distance_op.py
+4
-4
未找到文件。
paddle/operators/edit_distance_op.cc
浏览文件 @
55ec0e2a
...
...
@@ -49,10 +49,10 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
EditDistanceOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"Hyps"
,
"(2-D LoDTensor<int>, 2nd dim. equal to 1) "
"(2-D LoDTensor<int
64_t
>, 2nd dim. equal to 1) "
"The indices for hypothesis strings."
);
AddInput
(
"Refs"
,
"(2-D LoDTensor<int>, 2nd dim. equal to 1) "
"(2-D LoDTensor<int
64_t
>, 2nd dim. equal to 1) "
"The indices for reference strings."
);
AddAttr
<
bool
>
(
"normalized"
,
"(bool, default false) Indicated whether to normalize "
...
...
@@ -66,22 +66,22 @@ class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
EditDistance operator computes the edit distances between a batch of hypothesis
strings and their references.
Edit distance, also called Levenshtein distance, measures how dissimilar two strings
are by counting the minimum number of operations to transform one string into anthor.
Here the operations include insertion, deletion, and substitution. For example,
given hypothesis string A = "kitten" and reference B = "sitting", the edit distance
is 3 for A will be transformed into B at least after two substitutions and one
Edit distance, also called Levenshtein distance, measures how dissimilar two strings
are by counting the minimum number of operations to transform one string into anthor.
Here the operations include insertion, deletion, and substitution. For example,
given hypothesis string A = "kitten" and reference B = "sitting", the edit distance
is 3 for A will be transformed into B at least after two substitutions and one
insertion:
"kitten" -> "sitten" -> "sittin" -> "sitting"
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total
number denoted by `batch_size`, and the separation is specified by the LoD information.
And the `batch_size` reference strings are arranged in order in the same way in the
Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total
number denoted by `batch_size`, and the separation is specified by the LoD information.
And the `batch_size` reference strings are arranged in order in the same way in the
LoDTensor Input(Refs).
Output(Out) contains the `batch_size` results and each stands for the edit stance
for a pair of strings respectively. If Attr(normalized) is true, the edit distance
Output(Out) contains the `batch_size` results and each stands for the edit stance
for a pair of strings respectively. If Attr(normalized) is true, the edit distance
will be divided by the length of reference string.
)DOC"
);
}
...
...
paddle/operators/edit_distance_op.cu
浏览文件 @
55ec0e2a
...
...
@@ -39,8 +39,8 @@ __global__ void FillFirstColumn(T* dist, const int M, const int N) {
}
template
<
typename
T
>
__global__
void
Levenshtein
(
T
*
dist
,
const
int
*
x1
,
const
int
*
x2
,
const
int
M
,
const
int
N
,
const
int
start
)
{
__global__
void
Levenshtein
(
T
*
dist
,
const
int
64_t
*
x1
,
const
int64_t
*
x2
,
const
int
M
,
const
int
N
,
const
int
start
)
{
int
idx
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
offset
=
N
;
int
index
=
start
+
idx
*
offset
;
...
...
@@ -113,8 +113,8 @@ class EditDistanceGPUKernel : public framework::OpKernel<T> {
dist_t
.
Resize
({
m
+
1
,
n
+
1
});
dist_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dist
=
dist_t
.
data
<
T
>
();
auto
x1
=
x1_t
->
data
<
int
>
()
+
hyp_lod
[
num
];
auto
x2
=
x2_t
->
data
<
int
>
()
+
ref_lod
[
num
];
auto
x1
=
x1_t
->
data
<
int
64_t
>
()
+
hyp_lod
[
num
];
auto
x2
=
x2_t
->
data
<
int
64_t
>
()
+
ref_lod
[
num
];
FillFirstColumn
<
T
><<<
1
+
m
/
PADDLE_CUDA_NUM_THREADS
,
PADDLE_CUDA_NUM_THREADS
,
0
,
stream
>>>
(
dist
,
m
,
n
);
...
...
paddle/operators/edit_distance_op.h
浏览文件 @
55ec0e2a
...
...
@@ -60,8 +60,8 @@ class EditDistanceKernel : public framework::OpKernel<T> {
dist_t
.
Resize
({
m
+
1
,
n
+
1
});
dist_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
dist
=
dist_t
.
data
<
T
>
();
auto
x1
=
x1_t
->
data
<
int
>
()
+
hyp_lod
[
num
];
auto
x2
=
x2_t
->
data
<
int
>
()
+
ref_lod
[
num
];
auto
x1
=
x1_t
->
data
<
int
64_t
>
()
+
hyp_lod
[
num
];
auto
x2
=
x2_t
->
data
<
int
64_t
>
()
+
ref_lod
[
num
];
for
(
int64_t
i
=
0
;
i
<
m
+
1
;
++
i
)
{
dist
[
i
*
(
n
+
1
)]
=
i
;
}
...
...
python/paddle/v2/fluid/tests/test_edit_distance_op.py
浏览文件 @
55ec0e2a
...
...
@@ -51,8 +51,8 @@ class TestEditDistanceOp(OpTest):
def
setUp
(
self
):
self
.
op_type
=
"edit_distance"
normalized
=
False
x1
=
np
.
array
([[
0
,
12
,
3
,
5
,
8
,
2
]]).
astype
(
"int
32
"
)
x2
=
np
.
array
([[
0
,
12
,
4
,
7
,
8
]]).
astype
(
"int
32
"
)
x1
=
np
.
array
([[
0
,
12
,
3
,
5
,
8
,
2
]]).
astype
(
"int
64
"
)
x2
=
np
.
array
([[
0
,
12
,
4
,
7
,
8
]]).
astype
(
"int
64
"
)
x1
=
np
.
transpose
(
x1
)
x2
=
np
.
transpose
(
x2
)
x1_lod
=
[
0
,
1
,
5
]
...
...
@@ -79,8 +79,8 @@ class TestEditDistanceOpNormalized(OpTest):
def
setUp
(
self
):
self
.
op_type
=
"edit_distance"
normalized
=
True
x1
=
np
.
array
([[
0
,
10
,
3
,
6
,
5
,
8
,
2
]]).
astype
(
"int
32
"
)
x2
=
np
.
array
([[
0
,
10
,
4
,
6
,
7
,
8
]]).
astype
(
"int
32
"
)
x1
=
np
.
array
([[
0
,
10
,
3
,
6
,
5
,
8
,
2
]]).
astype
(
"int
64
"
)
x2
=
np
.
array
([[
0
,
10
,
4
,
6
,
7
,
8
]]).
astype
(
"int
64
"
)
x1
=
np
.
transpose
(
x1
)
x2
=
np
.
transpose
(
x2
)
x1_lod
=
[
0
,
1
,
3
,
6
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录