Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
f83254d6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f83254d6
编写于
12月 06, 2019
作者:
A
Aurelius84
提交者:
GitHub
12月 06, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry-pick pyramid_hash op test=develop (#20779)(#18525) (#21562)
上级
e228e707
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
819 addition
and
39 deletion
+819
-39
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+8
-5
paddle/fluid/operators/match_matrix_tensor_op.cc
paddle/fluid/operators/match_matrix_tensor_op.cc
+2
-2
paddle/fluid/operators/math/bloomfilter.h
paddle/fluid/operators/math/bloomfilter.h
+179
-0
paddle/fluid/operators/pyramid_hash_op.cc
paddle/fluid/operators/pyramid_hash_op.cc
+453
-0
paddle/fluid/operators/search_compute.h
paddle/fluid/operators/search_compute.h
+16
-32
python/paddle/fluid/contrib/layers/nn.py
python/paddle/fluid/contrib/layers/nn.py
+96
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+4
-0
python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
+61
-0
未找到文件。
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
f83254d6
...
...
@@ -48,14 +48,17 @@ if (WITH_DISTRIBUTE)
SET
(
OP_PREFETCH_DEPS
${
OP_PREFETCH_DEPS
}
parameter_prefetch
)
endif
()
SET
(
OP_ONLY_MKL
""
)
if
(
NOT WITH_MKL
)
SET
(
OP_ONLY_MKL
${
OP_ONLY_MKL
}
match_matrix_tensor_op
)
SET
(
OP_ONLY_MKL
${
OP_ONLY_MKL
}
var_conv_2d_op
)
SET
(
OP_MKL_DEPS
""
)
if
(
NOT WITH_MKL OR NOT WITH_AVX
)
SET
(
OP_MKL_DEPS
${
OP_MKL_DEPS
}
match_matrix_tensor_op
)
SET
(
OP_MKL_DEPS
${
OP_MKL_DEPS
}
var_conv_2d_op
)
endif
()
if
(
WITH_COVERAGE OR NOT WITH_AVX OR WIN32
)
SET
(
OP_MKL_DEPS
${
OP_MKL_DEPS
}
pyramid_hash_op
)
endif
()
register_operators
(
EXCLUDES py_func_op warpctc_op dgc_op conv_fusion_op
sync_batch_norm_op multihead_matmul_op
${
OP_
ONLY_MKL
}
DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
sync_batch_norm_op multihead_matmul_op
${
OP_
MKL_DEPS
}
DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
if
(
WITH_GPU
)
# warpctc_op needs cudnn 7 above
...
...
paddle/fluid/operators/match_matrix_tensor_op.cc
浏览文件 @
f83254d6
...
...
@@ -286,8 +286,8 @@ class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel<T> {
auto
*
r_data
=
bottom_r_data
+
(
offset_r
[
b
]
+
j
)
*
dim_in
;
auto
*
r_diff
=
bottom_r_diff
+
(
offset_r
[
b
]
+
j
)
*
dim_in
;
if
(
diff
!=
0.0
)
{
sse
_axpy
(
r_data
,
l_trans_diff
,
dim_in
,
diff
);
sse
_axpy
(
l_trans_data
,
r_diff
,
dim_in
,
diff
);
avx
_axpy
(
r_data
,
l_trans_diff
,
dim_in
,
diff
);
avx
_axpy
(
l_trans_data
,
r_diff
,
dim_in
,
diff
);
}
}
}
...
...
paddle/fluid/operators/math/bloomfilter.h
0 → 100644
浏览文件 @
f83254d6
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define BLOOMFILTER_MAGIC_NUM_NEW 17070416
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
namespace
paddle
{
namespace
operators
{
namespace
math
{
#pragma pack(4)
struct
bloomfilter
{
uint64_t
magic_num
;
uint64_t
m
;
uint64_t
k
;
uint64_t
count
;
unsigned
char
bit_vector
[
1
];
};
int
bloomfilter_get
(
const
struct
bloomfilter
*
bloomfilter
,
const
void
*
key
,
size_t
len
);
int
bloomfilter_check
(
struct
bloomfilter
*
filter
);
#define bit_get(v, n) ((v)[(n) >> 3] & (0x1 << (0x7 - ((n)&0x7))))
#define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
#define BIG_CONSTANT(x) (x##LLU)
uint64_t
fmix64
(
uint64_t
k
)
{
k
^=
k
>>
33
;
k
*=
BIG_CONSTANT
(
0xff51afd7ed558ccd
);
k
^=
k
>>
33
;
k
*=
BIG_CONSTANT
(
0xc4ceb9fe1a85ec53
);
k
^=
k
>>
33
;
return
k
;
}
void
murmurhash3_x64_128
(
const
void
*
key
,
const
int
len
,
const
uint32_t
seed
,
void
*
out
)
{
const
uint8_t
*
data
=
(
const
uint8_t
*
)
key
;
const
int
nblocks
=
len
/
16
;
uint64_t
h1
=
seed
;
uint64_t
h2
=
seed
;
int
i
=
0
;
const
uint64_t
c1
=
BIG_CONSTANT
(
0x87c37b91114253d5
);
const
uint64_t
c2
=
BIG_CONSTANT
(
0x4cf5ad432745937f
);
//----------
// body
const
uint64_t
*
blocks
=
(
const
uint64_t
*
)(
data
);
uint64_t
k1
;
uint64_t
k2
;
for
(
i
=
0
;
i
<
nblocks
;
i
++
)
{
k1
=
blocks
[
i
*
2
+
0
];
k2
=
blocks
[
i
*
2
+
1
];
k1
*=
c1
;
k1
=
ROTL64
(
k1
,
31
);
k1
*=
c2
;
h1
^=
k1
;
h1
=
ROTL64
(
h1
,
27
);
h1
+=
h2
;
h1
=
h1
*
5
+
0x52dce729
;
k2
*=
c2
;
k2
=
ROTL64
(
k2
,
33
);
k2
*=
c1
;
h2
^=
k2
;
h2
=
ROTL64
(
h2
,
31
);
h2
+=
h1
;
h2
=
h2
*
5
+
0x38495ab5
;
}
//----------
// tail
const
uint8_t
*
tail
=
(
const
uint8_t
*
)(
data
+
nblocks
*
16
);
uint64_t
nk1
=
0
;
uint64_t
nk2
=
0
;
uint64_t
tail0_64
=
*
(
uint64_t
*
)(
tail
);
// NOLINT
uint64_t
tail_64
=
*
(
uint64_t
*
)(
tail
+
8
);
// NOLINT
uint64_t
mask0
=
0xffffffffffffffff
;
uint64_t
mask
=
0x00ffffffffffffff
;
int
flag
=
len
&
15
;
if
(
flag
&&
flag
<=
8
)
{
tail0_64
&=
(
mask0
>>
((
8
-
flag
)
<<
3
));
}
else
if
(
flag
>
8
)
{
tail_64
&=
(
mask
>>
((
15
-
flag
)
<<
3
));
nk2
^=
tail_64
;
nk2
*=
c2
;
nk2
=
ROTL64
(
nk2
,
33
);
nk2
*=
c1
;
h2
^=
nk2
;
}
if
(
flag
)
{
nk1
^=
tail0_64
;
nk1
*=
c1
;
nk1
=
ROTL64
(
nk1
,
31
);
nk1
*=
c2
;
h1
^=
nk1
;
}
//----------
// finalization
h1
^=
len
;
h2
^=
len
;
h1
+=
h2
;
h2
+=
h1
;
h1
=
fmix64
(
h1
);
h2
=
fmix64
(
h2
);
h1
+=
h2
;
h2
+=
h1
;
reinterpret_cast
<
uint64_t
*>
(
out
)[
0
]
=
h1
;
reinterpret_cast
<
uint64_t
*>
(
out
)[
1
]
=
h2
;
}
int
bloomfilter_check
(
struct
bloomfilter
*
filter
)
{
if
(
filter
->
magic_num
==
BLOOMFILTER_MAGIC_NUM_NEW
)
{
return
1
;
}
else
{
fprintf
(
stderr
,
"error magic_num %ld
\n
"
,
filter
->
magic_num
);
return
0
;
}
}
int
bloomfilter_get
(
const
struct
bloomfilter
*
bloomfilter
,
const
void
*
key
,
size_t
len
)
{
uint32_t
i
;
uint64_t
result
[
2
];
for
(
i
=
0
;
i
<
bloomfilter
->
k
;
i
++
)
{
murmurhash3_x64_128
(
key
,
len
,
i
,
&
result
);
result
[
0
]
%=
bloomfilter
->
m
;
result
[
1
]
%=
bloomfilter
->
m
;
if
(
!
bit_get
(
bloomfilter
->
bit_vector
,
result
[
0
]))
{
return
0
;
}
if
(
!
bit_get
(
bloomfilter
->
bit_vector
,
result
[
1
]))
{
return
0
;
}
}
return
1
;
}
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/pyramid_hash_op.cc
0 → 100644
浏览文件 @
f83254d6
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <xxhash.h>
#include <algorithm>
#include <cmath>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/search_compute.h"
extern
"C"
{
#include "math/bloomfilter.h"
}
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoD
=
framework
::
LoD
;
class
PyramidHashOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"X (Tensor, MUST be Tensor<!!!_int32_!!!>) Input variable which "
"should contain lod information."
);
AddInput
(
"W"
,
"W (Tensor)"
);
AddInput
(
"WhiteList"
,
"WhiteList (Tensor)"
);
AddInput
(
"BlackList"
,
"BlackList (Tensor)"
);
AddAttr
<
int
>
(
"num_emb"
,
"num_emb"
).
SetDefault
(
0
).
EqualGreaterThan
(
0
);
AddAttr
<
int
>
(
"space_len"
,
"space_len"
).
SetDefault
(
0
).
EqualGreaterThan
(
0
);
AddAttr
<
int
>
(
"pyramid_layer"
,
"pyramid_layer (must be >= 2)"
)
.
SetDefault
(
2
)
.
EqualGreaterThan
(
2
);
AddAttr
<
int
>
(
"rand_len"
,
"rand_len"
).
SetDefault
(
0
).
EqualGreaterThan
(
0
);
AddAttr
<
float
>
(
"drop_out_percent"
,
"drop_out_percent"
)
.
SetDefault
(
0
)
.
EqualGreaterThan
(
0
);
AddAttr
<
int
>
(
"is_training"
,
"is_training"
)
.
SetDefault
(
0
)
.
EqualGreaterThan
(
0
);
AddAttr
<
bool
>
(
"use_filter"
,
"use_filter"
).
SetDefault
(
true
);
AddAttr
<
int
>
(
"white_list_len"
,
"white_list_len"
)
.
SetDefault
(
0
)
.
EqualGreaterThan
(
0
);
AddAttr
<
int
>
(
"black_list_len"
,
"black_list_len"
)
.
SetDefault
(
0
)
.
EqualGreaterThan
(
0
);
AddAttr
<
int
>
(
"seed"
,
"seed"
).
SetDefault
(
0
).
EqualGreaterThan
(
0
);
AddAttr
<
float
>
(
"lr"
,
"learning rate"
).
SetDefault
(
0.0
).
EqualGreaterThan
(
0.0
);
AddOutput
(
"Out"
,
"Out (Tensor, default Tensor<float>) Output variable"
);
AddOutput
(
"DropPos"
,
"Out (Tensor, Tensor<int>) Output variable"
);
AddOutput
(
"X_Temp_Out"
,
"Out (Tensor, Tensor<int>) Output variable"
)
.
AsIntermediate
();
AddComment
(
R"DOC(
PyramidHash
NOTE: only support 'float32' data type now.
)DOC"
);
}
};
class
PyramidHashOP
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X"
),
true
,
"X(Input) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"W"
),
true
,
"W(Input) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"Out"
),
true
,
"Out(Output) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"DropPos"
),
true
,
"DropPos(TMP Output) should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
2
,
"The rank of X(Input) should be 2."
);
auto
w_dims
=
ctx
->
GetInputDim
(
"W"
);
PADDLE_ENFORCE_EQ
(
w_dims
.
size
(),
2
,
"W should be 2-D tensor"
);
int
space_len
=
ctx
->
Attrs
().
Get
<
int
>
(
"space_len"
);
int
rand_len
=
ctx
->
Attrs
().
Get
<
int
>
(
"rand_len"
);
PADDLE_ENFORCE_EQ
(
w_dims
[
0
],
space_len
+
rand_len
,
"w_dims[0] should be equal to (space_len + rand_len)"
);
PADDLE_ENFORCE_EQ
(
w_dims
[
1
],
1
,
"w_dims[1] should be equal to 1"
);
int
num_emb
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_emb"
);
PADDLE_ENFORCE_EQ
(
num_emb
%
rand_len
,
0
,
"random length should mod embedding size"
);
int
white_list_len
=
ctx
->
Attrs
().
Get
<
int
>
(
"white_list_len"
);
if
(
white_list_len
>
0
)
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"WhiteList"
),
true
,
"WhiteList(Input) should not be null when white_list_len > 0"
);
auto
wl_dims
=
ctx
->
GetInputDim
(
"WhiteList"
);
PADDLE_ENFORCE_EQ
(
wl_dims
.
size
(),
2
,
"WhiteList should be 2-D tensor"
);
PADDLE_ENFORCE_EQ
(
wl_dims
[
0
],
white_list_len
,
"wl_dims[0] should be equal to white_list_len"
);
PADDLE_ENFORCE_EQ
(
wl_dims
[
1
],
1
,
"wl_dims[1] should be equal to 1"
);
}
int
black_list_len
=
ctx
->
Attrs
().
Get
<
int
>
(
"black_list_len"
);
if
(
black_list_len
>
0
)
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"BlackList"
),
true
,
"BlackList(Input) should not be null when black_list_len > 0"
);
auto
bl_dims
=
ctx
->
GetInputDim
(
"BlackList"
);
PADDLE_ENFORCE_EQ
(
bl_dims
.
size
(),
2
,
"BlackList should be 2-D tensor"
);
PADDLE_ENFORCE_EQ
(
bl_dims
[
0
],
black_list_len
,
"bl_dims[0] should be equal to black_list_len"
);
PADDLE_ENFORCE_EQ
(
bl_dims
[
1
],
1
,
"bl_dims[1] should be equal to 1"
);
}
if
(
ctx
->
IsRuntime
())
{
// something to do in runtime.
}
else
{
// compile time
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
({
-
1
,
num_emb
}));
ctx
->
SetOutputDim
(
"X_Temp_Out"
,
x_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"W"
),
ctx
.
GetPlace
());
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
CPUPyramidHashOPKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
bool
should_use_term
(
math
::
bloomfilter
*
_filter
,
math
::
bloomfilter
*
_black_filter
,
const
T
*
word_repr
,
int
len
)
const
{
return
(
!
_filter
||
1
==
math
::
bloomfilter_get
(
_filter
,
word_repr
,
len
*
sizeof
(
T
)))
&&
(
!
_black_filter
||
0
==
math
::
bloomfilter_get
(
_black_filter
,
word_repr
,
len
*
sizeof
(
T
)));
}
void
hash_embedding_ff
(
const
T
*
hash_id
,
int
len
,
T
*
top_pos
,
const
T
*
weights
,
int
_num_emb
,
int
_rand_len
,
int
_space_len
)
const
{
unsigned
int
pos1
=
XXH32
(
hash_id
,
len
*
sizeof
(
T
),
0
)
%
_space_len
;
unsigned
int
pos2
=
XXH32
(
hash_id
,
len
*
sizeof
(
T
),
_rand_len
)
%
_space_len
;
for
(
int
j
=
0
;
j
!=
_num_emb
;
j
+=
_rand_len
)
{
if
(
j
+
_rand_len
<
_num_emb
)
{
__builtin_prefetch
(
weights
+
pos2
);
__builtin_prefetch
(
top_pos
+
j
+
_rand_len
);
}
unsigned
int
pos3
=
XXH32
(
hash_id
,
len
*
sizeof
(
T
),
j
+
2
*
_rand_len
)
%
_space_len
;
memcpy
(
top_pos
+
j
,
const_cast
<
float
*>
(
weights
+
pos1
),
_rand_len
*
sizeof
(
T
));
pos1
=
pos2
;
pos2
=
pos3
;
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
bottom
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
_blobs_0
=
ctx
.
Input
<
Tensor
>
(
"W"
);
auto
*
_blobs_1
=
ctx
.
Input
<
Tensor
>
(
"WhiteList"
);
auto
*
_blobs_2
=
ctx
.
Input
<
Tensor
>
(
"BlackList"
);
auto
*
top
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
auto
*
drop_pos
=
ctx
.
Output
<
LoDTensor
>
(
"DropPos"
);
int
_num_emb
=
ctx
.
Attr
<
int
>
(
"num_emb"
);
bool
use_filter
=
ctx
.
Attr
<
bool
>
(
"use_filter"
);
int
white_list_len
=
ctx
.
Attr
<
int
>
(
"white_list_len"
);
int
black_list_len
=
ctx
.
Attr
<
int
>
(
"black_list_len"
);
int
_pyramid_layer
=
ctx
.
Attr
<
int
>
(
"pyramid_layer"
);
int
_is_training
=
ctx
.
Attr
<
int
>
(
"is_training"
);
int
seed
=
ctx
.
Attr
<
int
>
(
"seed"
);
unsigned
int
_seed
=
(
unsigned
int
)
seed
;
int
_rand_len
=
ctx
.
Attr
<
int
>
(
"rand_len"
);
int
_space_len
=
ctx
.
Attr
<
int
>
(
"space_len"
);
float
_drop_out_percent
=
ctx
.
Attr
<
float
>
(
"drop_out_percent"
);
const
auto
&
offset
=
bottom
->
lod
()[
0
];
const
auto
*
bottom_data_ori
=
bottom
->
data
<
int32_t
>
();
auto
*
buff
=
ctx
.
Output
<
LoDTensor
>
(
"X_Temp_Out"
);
buff
->
Resize
(
framework
::
make_ddim
({
bottom
->
dims
()[
0
],
bottom
->
dims
()[
1
]}));
T
*
bottom_data
=
buff
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
for
(
int
i
=
0
;
i
<
bottom
->
dims
()[
0
];
i
++
)
{
bottom_data
[
i
]
=
bottom_data_ori
[
i
];
}
const
auto
*
weights
=
_blobs_0
->
data
<
T
>
();
std
::
vector
<
size_t
>
top_offset
;
top_offset
.
resize
(
offset
.
size
());
top_offset
[
0
]
=
0
;
math
::
bloomfilter
*
_filter
=
NULL
;
math
::
bloomfilter
*
_black_filter
=
NULL
;
if
(
use_filter
)
{
if
(
white_list_len
!=
0
)
{
_filter
=
(
math
::
bloomfilter
*
)
_blobs_1
->
data
<
T
>
();
PADDLE_ENFORCE_EQ
(
math
::
bloomfilter_check
(
_filter
),
1
,
"white filter not load"
);
}
if
(
black_list_len
!=
0
)
{
_black_filter
=
(
math
::
bloomfilter
*
)
_blobs_2
->
data
<
T
>
();
PADDLE_ENFORCE_EQ
(
math
::
bloomfilter_check
(
_black_filter
),
1
,
"black filter not load"
);
}
}
drop_pos
->
Resize
(
framework
::
make_ddim
(
{
bottom
->
dims
()[
0
]
*
bottom
->
dims
()[
1
]
*
_pyramid_layer
,
1
}));
std
::
vector
<
size_t
>
drop_pos_offset
;
drop_pos_offset
.
resize
(
offset
.
size
());
drop_pos_offset
[
0
]
=
0
;
int
*
iter
=
drop_pos
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
int
*
iter_end
=
iter
;
for
(
size_t
i
=
0
;
i
<
top_offset
.
size
()
-
1
;
++
i
)
{
int
w
=
offset
[
i
+
1
]
-
offset
[
i
];
int
nsentense_with_pyramid
=
0
;
if
(
w
<
2
)
{
nsentense_with_pyramid
=
0
;
}
else
{
for
(
int
ilayer
=
1
;
ilayer
<
_pyramid_layer
&&
ilayer
<
w
;
++
ilayer
)
{
for
(
int
l
=
0
;
l
<
w
-
ilayer
;
++
l
)
{
if
(
should_use_term
(
_filter
,
_black_filter
,
(
const
T
*
)(
bottom_data
+
offset
[
i
]
+
l
),
ilayer
+
1
))
{
if
(
_is_training
!=
0
)
{
unsigned
int
rand_val
=
rand_r
(
&
_seed
);
T
rate
=
static_cast
<
T
>
(
rand_val
)
/
(
RAND_MAX
);
*
(
iter_end
++
)
=
(
rate
<
_drop_out_percent
?
0
:
1
);
}
else
{
*
(
iter_end
++
)
=
1
;
}
}
else
{
*
(
iter_end
++
)
=
0
;
}
}
}
nsentense_with_pyramid
=
std
::
count
(
iter
,
iter_end
,
1
);
iter
=
iter_end
;
}
drop_pos_offset
[
i
+
1
]
=
drop_pos_offset
[
i
]
+
nsentense_with_pyramid
;
top_offset
[
i
+
1
]
=
top_offset
[
i
]
+
(
nsentense_with_pyramid
==
0
?
1
:
nsentense_with_pyramid
);
}
int
top_l
=
top_offset
[
top_offset
.
size
()
-
1
];
framework
::
LoD
top_lod
;
top_lod
.
push_back
(
top_offset
);
top
->
set_lod
(
top_lod
);
top
->
Resize
(
framework
::
make_ddim
({
top_l
,
_num_emb
}));
auto
*
top_data
=
top
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
LoD
drop_pos_lod
;
drop_pos_lod
.
push_back
(
drop_pos_offset
);
drop_pos
->
set_lod
(
drop_pos_lod
);
iter
=
drop_pos
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
int
top_counter
=
0
;
for
(
size_t
i
=
0
;
i
<
offset
.
size
()
-
1
;
++
i
)
{
int
w_drop
=
drop_pos_offset
[
i
+
1
]
-
drop_pos_offset
[
i
];
int
w
=
offset
[
i
+
1
]
-
offset
[
i
];
if
(
w_drop
==
0
)
{
if
(
w
>=
2
)
{
for
(
int
ilayer
=
1
;
ilayer
<
_pyramid_layer
&&
ilayer
<
w
;
++
ilayer
)
{
for
(
int
l
=
0
;
l
<
w
-
ilayer
;
++
l
)
{
iter
++
;
}
}
}
auto
*
top_pos
=
top_data
+
top_counter
++
*
_num_emb
;
memset
(
top_pos
,
0
,
_num_emb
*
sizeof
(
T
));
continue
;
}
if
(
w
>=
2
)
{
for
(
int
ilayer
=
1
;
ilayer
<
_pyramid_layer
&&
ilayer
<
w
;
++
ilayer
)
{
for
(
int
l
=
0
;
l
<
w
-
ilayer
;
++
l
)
{
if
(
*
(
iter
++
)
==
0
)
{
// do nothing
}
else
{
auto
*
top_pos
=
top_data
+
top_counter
++
*
_num_emb
;
hash_embedding_ff
((
const
T
*
)(
bottom_data
+
offset
[
i
]
+
l
),
ilayer
+
1
,
top_pos
,
weights
,
_num_emb
,
_rand_len
,
_space_len
);
}
}
}
}
}
if
(
iter
!=
iter_end
)
{
exit
(
1
);
}
if
(
_is_training
==
0
)
{
avx_axpy_noadd
(
top_data
,
top_data
,
top
->
dims
()[
0
]
*
top
->
dims
()[
1
],
_drop_out_percent
);
}
}
};
class
PyramidHashOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X"
),
true
,
"Input(X) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"W"
),
true
,
"Input(W) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"DropPos"
),
true
,
"Input(DropPos) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X_Temp_Out"
),
true
,
"Input(X_Temp_Out) should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
true
,
"Input(Out@GRAD) of PyramidHashGradOp should not be null."
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
OperatorWithKernel
::
IndicateVarDataType
(
ctx
,
"W"
),
ctx
.
GetPlace
());
}
};
class
PyramidHashGradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op_desc_ptr
=
new
framework
::
OpDesc
();
op_desc_ptr
->
SetType
(
"pyramid_hash_grad"
);
op_desc_ptr
->
SetInput
(
"X"
,
Input
(
"X"
));
op_desc_ptr
->
SetInput
(
"W"
,
Input
(
"W"
));
op_desc_ptr
->
SetInput
(
"DropPos"
,
Output
(
"DropPos"
));
op_desc_ptr
->
SetInput
(
"X_Temp_Out"
,
Output
(
"X_Temp_Out"
));
op_desc_ptr
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op_desc_ptr
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op_desc_ptr
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op_desc_ptr
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
CPUPyramidHashOPGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
hash_embedding_bp
(
const
T
*
hash_id
,
int
len
,
const
T
*
top_pos
,
T
*
weights
,
T
mlr
,
int
_num_emb
,
int
_rand_len
,
int
_space_len
)
const
{
for
(
int
j
=
0
;
j
!=
_num_emb
;
j
+=
_rand_len
)
{
unsigned
int
pos
=
XXH32
(
hash_id
,
len
*
sizeof
(
T
),
j
)
%
_space_len
;
avx_axpy
(
top_pos
+
j
,
weights
+
pos
,
_rand_len
,
mlr
);
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
bottom
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
_blobs
=
ctx
.
Input
<
Tensor
>
(
"W"
);
auto
*
drop_pos
=
ctx
.
Input
<
LoDTensor
>
(
"DropPos"
);
auto
*
top
=
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
int
_num_emb
=
ctx
.
Attr
<
int
>
(
"num_emb"
);
float
_lr
=
ctx
.
Attr
<
float
>
(
"lr"
);
int
_rand_len
=
ctx
.
Attr
<
int
>
(
"rand_len"
);
int
_space_len
=
ctx
.
Attr
<
int
>
(
"space_len"
);
int
_pyramid_layer
=
ctx
.
Attr
<
int
>
(
"pyramid_layer"
);
auto
*
buff
=
ctx
.
Input
<
LoDTensor
>
(
"X_Temp_Out"
);
auto
*
bottom_data
=
buff
->
data
<
T
>
();
int
_slot_len
=
bottom
->
dims
()[
0
];
if
(
static_cast
<
size_t
>
(
_slot_len
)
==
bottom
->
lod
()[
0
].
size
()
-
1
&&
std
::
count
(
bottom_data
,
bottom_data
+
_slot_len
,
-
1
)
==
_slot_len
)
{
return
;
}
auto
&
offset
=
bottom
->
lod
()[
0
];
auto
&
drop_pos_offset
=
drop_pos
->
lod
()[
0
];
const
auto
*
top_diff
=
top
->
data
<
T
>
();
T
*
weights
=
const_cast
<
T
*>
(
_blobs
->
data
<
T
>
());
T
mlr
=
-
1.0
*
_lr
;
const
int
*
iter
=
drop_pos
->
data
<
int
>
();
int
top_counter
=
0
;
for
(
size_t
i
=
0
;
i
<
offset
.
size
()
-
1
;
++
i
)
{
int
w
=
offset
[
i
+
1
]
-
offset
[
i
];
int
w_drop
=
drop_pos_offset
[
i
+
1
]
-
drop_pos_offset
[
i
];
if
(
w_drop
==
0
)
{
top_counter
++
;
}
if
(
w
>
1
)
{
for
(
int
ilayer
=
1
;
ilayer
<
_pyramid_layer
&&
ilayer
<
w
;
++
ilayer
)
{
for
(
int
l
=
0
;
l
<
w
-
ilayer
;
++
l
)
{
if
(
*
(
iter
++
)
==
0
)
{
// do nothing
}
else
{
const
T
*
top_pos
=
top_diff
+
top_counter
++
*
_num_emb
;
hash_embedding_bp
((
const
T
*
)(
bottom_data
+
offset
[
i
]
+
l
),
ilayer
+
1
,
top_pos
,
weights
,
mlr
,
_num_emb
,
_rand_len
,
_space_len
);
}
}
}
}
else
{
// do nothing
}
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plt
=
paddle
::
platform
;
namespace
frm
=
paddle
::
framework
;
REGISTER_OPERATOR
(
pyramid_hash
,
ops
::
PyramidHashOP
,
ops
::
PyramidHashOpMaker
,
ops
::
PyramidHashGradOpMaker
);
REGISTER_OPERATOR
(
pyramid_hash_grad
,
ops
::
PyramidHashOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pyramid_hash
,
ops
::
CPUPyramidHashOPKernel
<
plt
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
pyramid_hash_grad
,
ops
::
CPUPyramidHashOPGradKernel
<
plt
::
CPUDeviceContext
,
float
>
);
paddle/fluid/operators/search_compute.h
浏览文件 @
f83254d6
...
...
@@ -21,7 +21,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/dynload/mklml.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -73,22 +72,10 @@ void call_gemm_batched(const framework::ExecutionContext& ctx,
}
}
#ifndef TYPE_USE_FLOAT
#define TYPE_USE_FLOAT
#endif
#ifndef USE_SSE
#define USE_SSE
#endif
#if defined(TYPE_USE_FLOAT)
#define __m256x __m256
#define __m128x __m128
static
const
unsigned
int
AVX_STEP_SIZE
=
8
;
static
const
unsigned
int
SSE_STEP_SIZE
=
4
;
static
const
unsigned
int
AVX_CUT_LEN_MASK
=
7U
;
static
const
unsigned
int
SSE_CUT_LEN_MASK
=
3U
;
#define _mm256_mul_px _mm256_mul_ps
#define _mm256_add_px _mm256_add_ps
...
...
@@ -96,20 +83,11 @@ static const unsigned int SSE_CUT_LEN_MASK = 3U;
#define _mm256_store_px _mm256_storeu_ps
#define _mm256_broadcast_sx _mm256_broadcast_ss
#define _mm_add_px _mm_add_ps
#define _mm_mul_px _mm_mul_ps
#define _mm_load_px _mm_loadu_ps
#define _mm_store_px _mm_storeu_ps
#define _mm_load1_px _mm_load1_ps
#endif
template
<
typename
T
>
inline
void
sse
_axpy
(
const
T
*
x
,
T
*
y
,
size_t
len
,
const
T
alpha
)
{
inline
void
avx
_axpy
(
const
T
*
x
,
T
*
y
,
size_t
len
,
const
T
alpha
)
{
unsigned
int
jjj
,
lll
;
jjj
=
lll
=
0
;
#if defined(USE_AVX)
lll
=
len
&
~
AVX_CUT_LEN_MASK
;
__m256x
mm_alpha
=
_mm256_broadcast_sx
(
&
alpha
);
for
(
jjj
=
0
;
jjj
<
lll
;
jjj
+=
AVX_STEP_SIZE
)
{
...
...
@@ -119,18 +97,24 @@ inline void sse_axpy(const T* x, T* y, size_t len, const T alpha) {
_mm256_mul_px
(
mm_alpha
,
_mm256_load_px
(
x
+
jjj
))));
}
#elif defined(USE_SSE)
lll
=
len
&
~
SSE_CUT_LEN_MASK
;
__m128x
mm_alpha
=
_mm_load1_px
(
&
alpha
);
for
(
jjj
=
0
;
jjj
<
lll
;
jjj
+=
SSE_STEP_SIZE
)
{
_mm_store_px
(
y
+
jjj
,
_mm_add_px
(
_mm_load_px
(
y
+
jjj
),
_mm_mul_px
(
mm_alpha
,
_mm_load_px
(
x
+
jjj
))));
for
(;
jjj
<
len
;
jjj
++
)
{
y
[
jjj
]
+=
alpha
*
x
[
jjj
];
}
}
template
<
typename
T
>
inline
void
avx_axpy_noadd
(
const
T
*
x
,
T
*
y
,
size_t
len
,
const
T
alpha
)
{
unsigned
int
jjj
,
lll
;
jjj
=
lll
=
0
;
lll
=
len
&
~
AVX_CUT_LEN_MASK
;
__m256x
mm_alpha
=
_mm256_broadcast_sx
(
&
alpha
);
for
(
jjj
=
0
;
jjj
<
lll
;
jjj
+=
AVX_STEP_SIZE
)
{
_mm256_store_px
(
y
+
jjj
,
_mm256_mul_px
(
mm_alpha
,
_mm256_load_px
(
x
+
jjj
)));
}
#endif
for
(;
jjj
<
len
;
jjj
++
)
{
y
[
jjj
]
+
=
alpha
*
x
[
jjj
];
y
[
jjj
]
=
alpha
*
x
[
jjj
];
}
}
...
...
python/paddle/fluid/contrib/layers/nn.py
浏览文件 @
f83254d6
...
...
@@ -31,6 +31,7 @@ __all__ = [
'match_matrix_tensor'
,
'tree_conv'
,
'multiclass_nms2'
,
'search_pyramid_hash'
,
]
...
...
@@ -563,3 +564,98 @@ def multiclass_nms2(bboxes,
if
return_index
:
return
output
,
index
return
output
def
search_pyramid_hash
(
input
,
num_emb
,
space_len
,
pyramid_layer
,
rand_len
,
drop_out_percent
,
is_training
,
use_filter
,
white_list_len
,
black_list_len
,
seed
,
lr
,
param_attr
=
None
,
param_attr_wl
=
None
,
param_attr_bl
=
None
,
name
=
None
,
dtype
=
'float32'
):
"""
**Pyramid hash embedding**
Args:
input (Variable): LoDTensor<int32> Variable contained the IDs' information.
num_emb (int): The embedding size of output.
space_len (int): The length of pyramid hash embedding space.
pyramid_layer (int): The number of pyramid layers. It should be greater than 2.
rand_len (int): The minimum length of pyramid hash cell.
drop_out_percent (float): The probability of dropping out the input token randomly.
It should satisfy: [0., 1.]
is_training (bool): Whether in training or testing phrase.
use_filter(bool): If set True, the white filter and black filter should be given by
:attr:`param_attr_wl` and :attr:`param_attr_bl` .
white_list_len(int): If set :math:`white_list_len>0` , white filter with shape [white_list_len, 1]
should be provided by param_attr_wl.
black_list_len(int): If set :math:`black_list_len>0` , black filter with shape [black_list_len, 1]
should be provided by param_attr_bl.
seed(int): The number of random seed.
lr(float): The learning rate of weight created by :attr:`param_attr` with shape [space_len+rand_len, 1]
in this layer.
param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` .
param_attr_wl(ParamAttr): Specified parameters of white filter.
param_attr_bl(ParamAttr): Specified parameters of black filter.
name(str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` .
dtype(str): The data type of output variable, float32.
Returns:
Variable: LoDTensor of pyramid hash embedding.
"""
helper
=
LayerHelper
(
'search_pyramid_hash'
,
**
locals
())
w_shape
=
[
space_len
+
rand_len
,
1
]
w
=
helper
.
create_parameter
(
attr
=
param_attr
,
shape
=
w_shape
,
dtype
=
dtype
,
is_bias
=
False
)
w
.
stop_gradient
=
True
input_vars
=
{
'X'
:
input
,
'W'
:
w
}
if
white_list_len
>
0
:
wl_shape
=
[
white_list_len
,
1
]
white_list
=
helper
.
create_parameter
(
attr
=
param_attr_wl
,
shape
=
wl_shape
,
dtype
=
dtype
,
is_bias
=
False
)
white_list
.
stop_gradient
=
True
input_vars
[
'WhiteList'
]
=
white_list
if
black_list_len
>=
0
:
bl_shape
=
[
black_list_len
,
1
]
black_list
=
helper
.
create_parameter
(
attr
=
param_attr_bl
,
shape
=
bl_shape
,
dtype
=
dtype
,
is_bias
=
False
)
black_list
.
stop_gradient
=
True
input_vars
[
'BlackList'
]
=
black_list
res
=
helper
.
create_variable_for_type_inference
(
dtype
)
drop_pos
=
helper
.
create_variable_for_type_inference
(
dtype
)
x_temp_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
helper
.
append_op
(
type
=
'pyramid_hash'
,
inputs
=
input_vars
,
outputs
=
{
"Out"
:
res
,
"X_Temp_Out"
:
x_temp_out
,
'DropPos'
:
drop_pos
},
attrs
=
{
'num_emb'
:
num_emb
,
'space_len'
:
space_len
,
'pyramid_layer'
:
pyramid_layer
,
'rand_len'
:
rand_len
,
'drop_out_percent'
:
drop_out_percent
,
'is_training'
:
is_training
,
'use_filter'
:
use_filter
,
'white_list_len'
:
white_list_len
,
'black_list_len'
:
black_list_len
,
'seed'
:
seed
,
'lr'
:
lr
,
})
return
res
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
f83254d6
...
...
@@ -74,6 +74,10 @@ if(NOT WITH_MKL)
list
(
REMOVE_ITEM TEST_OPS test_var_conv_2d
)
endif
(
NOT WITH_MKL
)
if
(
WITH_COVERAGE OR NOT WITH_AVX OR WIN32
)
list
(
REMOVE_ITEM TEST_OPS test_pyramid_hash_op
)
endif
()
if
(
WITH_GPU OR NOT WITH_MKLML
)
# matmul with multiple heads need MKL support
LIST
(
REMOVE_ITEM TEST_OPS test_matmul_op_with_head
)
...
...
python/paddle/fluid/tests/unittests/test_pyramid_hash_op.py
0 → 100644
浏览文件 @
f83254d6
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
class
TestPyramidHashOpApi
(
unittest
.
TestCase
):
def
test_api
(
self
):
num_voc
=
128
embed_dim
=
64
x_shape
,
x_lod
=
[
16
,
10
],
[[
3
,
5
,
2
,
6
]]
x
=
fluid
.
data
(
name
=
'x'
,
shape
=
x_shape
,
dtype
=
'int32'
,
lod_level
=
1
)
hash_embd
=
fluid
.
contrib
.
search_pyramid_hash
(
input
=
x
,
num_emb
=
embed_dim
,
space_len
=
num_voc
*
embed_dim
,
pyramid_layer
=
4
,
rand_len
=
16
,
drop_out_percent
=
0.5
,
is_training
=
True
,
use_filter
=
False
,
white_list_len
=
6400
,
black_list_len
=
2800
,
seed
=
3
,
lr
=
0.002
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"PyramidHash_emb_0"
,
learning_rate
=
0
,
),
param_attr_wl
=
fluid
.
ParamAttr
(
name
=
"Filter"
,
learning_rate
=
0
,
),
param_attr_bl
=
None
,
name
=
None
,
)
place
=
fluid
.
CPUPlace
()
x_tensor
=
fluid
.
create_lod_tensor
(
np
.
random
.
randint
(
0
,
num_voc
,
x_shape
).
astype
(
'int32'
),
x_lod
,
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
ret
=
exe
.
run
(
feed
=
{
'x'
:
x_tensor
},
fetch_list
=
[
hash_embd
],
return_numpy
=
False
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录