Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OPTHREE
Paddle
提交
e5f9d3a4
P
Paddle
项目概览
OPTHREE
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
e5f9d3a4
编写于
2月 27, 2019
作者:
T
tensor-tang
提交者:
GitHub
2月 27, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #15892 from tensor-tang/jit/sgd
refine sgd op
上级
e6bab55f
8bc63815
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
615 addition
and
191 deletion
+615
-191
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+42
-0
paddle/fluid/operators/jit/gen/CMakeLists.txt
paddle/fluid/operators/jit/gen/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/gen/jitcode.h
paddle/fluid/operators/jit/gen/jitcode.h
+2
-1
paddle/fluid/operators/jit/gen/sgd.cc
paddle/fluid/operators/jit/gen/sgd.cc
+130
-0
paddle/fluid/operators/jit/gen/sgd.h
paddle/fluid/operators/jit/gen/sgd.h
+60
-0
paddle/fluid/operators/jit/helper.cc
paddle/fluid/operators/jit/helper.cc
+1
-0
paddle/fluid/operators/jit/helper.h
paddle/fluid/operators/jit/helper.h
+8
-0
paddle/fluid/operators/jit/kernel_base.h
paddle/fluid/operators/jit/kernel_base.h
+23
-0
paddle/fluid/operators/jit/kernel_key.cc
paddle/fluid/operators/jit/kernel_key.cc
+27
-5
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/more/mkl/mkl.cc
paddle/fluid/operators/jit/more/mkl/mkl.cc
+11
-0
paddle/fluid/operators/jit/more/mkl/mkl.h
paddle/fluid/operators/jit/more/mkl/mkl.h
+28
-0
paddle/fluid/operators/jit/refer/CMakeLists.txt
paddle/fluid/operators/jit/refer/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/refer/refer.cc
paddle/fluid/operators/jit/refer/refer.cc
+2
-0
paddle/fluid/operators/jit/refer/refer.h
paddle/fluid/operators/jit/refer/refer.h
+32
-0
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+187
-150
paddle/fluid/operators/optimizers/sgd_op.h
paddle/fluid/operators/optimizers/sgd_op.h
+35
-30
python/paddle/fluid/tests/unittests/test_sgd_op.py
python/paddle/fluid/tests/unittests/test_sgd_op.py
+24
-5
未找到文件。
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
e5f9d3a4
...
...
@@ -332,6 +332,45 @@ void BenchEmbSeqPoolKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchSgdKernel
()
{
const
T
lr
=
0.1
;
auto
UnDuplicatedRandomVec
=
[](
int
n
,
const
int64_t
lower
,
const
int64_t
upper
)
->
std
::
vector
<
int64_t
>
{
PADDLE_ENFORCE_LE
(
static_cast
<
size_t
>
(
upper
-
lower
),
n
-
1
);
PADDLE_ENFORCE_GT
(
n
,
0
);
std
::
vector
<
int64_t
>
all
,
out
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
all
.
push_back
(
i
);
}
std
::
random_shuffle
(
all
.
begin
(),
all
.
end
());
out
.
insert
(
out
.
begin
(),
all
.
begin
(),
all
.
begin
()
+
n
);
return
out
;
};
for
(
int
param_h
:
{
1
,
1000
})
{
for
(
int
grad_w
:
{
1
,
2
,
8
,
16
,
30
,
256
})
{
// only benchmark inplace
Tensor
param
;
param
.
Resize
({
param_h
,
grad_w
});
T
*
param_data
=
param
.
mutable_data
<
T
>
(
PlaceType
());
RandomVec
<
T
>
(
param_h
*
grad_w
,
param_data
,
-
2.
f
,
2.
f
);
for
(
int
rows_size
=
1
;
rows_size
<=
std
::
min
(
param_h
,
10
);
++
rows_size
)
{
Tensor
grad
;
grad
.
Resize
({
rows_size
,
grad_w
});
std
::
vector
<
int64_t
>
rows
=
UnDuplicatedRandomVec
(
rows_size
,
0
,
rows_size
-
1
);
RandomVec
<
T
>
(
rows_size
*
grad_w
,
grad
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
grad_data
=
grad
.
data
<
T
>
();
const
int64_t
*
rows_data
=
rows
.
data
();
jit
::
sgd_attr_t
attr
(
param_h
,
grad_w
,
rows_size
,
grad_w
,
rows_size
);
BenchAllImpls
<
KT
,
jit
::
SgdTuples
<
T
>
,
PlaceType
>
(
attr
,
&
lr
,
param_data
,
grad_data
,
rows_data
,
param_data
,
&
attr
);
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchMatMulKernel
()
{
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
...
...
@@ -477,6 +516,9 @@ BENCH_FP32_CPU(kEmbSeqPool) {
BenchEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
T
,
CPUPlace
>
();
}
// sgd function
BENCH_FP32_CPU
(
kSgd
)
{
BenchSgdKernel
<
jit
::
kSgd
,
T
,
CPUPlace
>
();
}
// matmul
BENCH_FP32_CPU
(
kMatMul
)
{
BenchMatMulKernel
<
jit
::
kMatMul
,
T
,
CPUPlace
>
();
}
...
...
paddle/fluid/operators/jit/gen/CMakeLists.txt
浏览文件 @
e5f9d3a4
...
...
@@ -32,3 +32,4 @@ USE_JITKERNEL_GEN(kSeqPool)
USE_JITKERNEL_GEN
(
kHMax
)
USE_JITKERNEL_GEN
(
kHSum
)
USE_JITKERNEL_GEN
(
kEmbSeqPool
)
USE_JITKERNEL_GEN
(
kSgd
)
paddle/fluid/operators/jit/gen/jitcode.h
浏览文件 @
e5f9d3a4
...
...
@@ -31,7 +31,8 @@ namespace gen {
// Application Binary Interface
constexpr
Xbyak
::
Operand
::
Code
abi_param1
(
Xbyak
::
Operand
::
RDI
),
abi_param2
(
Xbyak
::
Operand
::
RSI
),
abi_param3
(
Xbyak
::
Operand
::
RDX
),
abi_param4
(
Xbyak
::
Operand
::
RCX
);
abi_param4
(
Xbyak
::
Operand
::
RCX
),
abi_param5
(
Xbyak
::
Operand
::
R8
),
abi_param6
(
Xbyak
::
Operand
::
R9
);
constexpr
Xbyak
::
Operand
::
Code
g_abi_regs
[]
=
{
Xbyak
::
Operand
::
RBX
,
Xbyak
::
Operand
::
RBP
,
Xbyak
::
Operand
::
R12
,
...
...
paddle/fluid/operators/jit/gen/sgd.cc
0 → 100644
浏览文件 @
e5f9d3a4
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/jit/gen/sgd.h"
#include <stddef.h> // offsetof
#include <vector>
#include "paddle/fluid/operators/jit/registry.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
void
SgdJitCode
::
genCode
()
{
preCode
();
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
constexpr
int
max_num_regs
=
7
;
const
int
num_block
=
w_
/
block
;
const
int
num_groups
=
num_block
/
max_num_regs
;
const
size_t
block_size
=
sizeof
(
float
)
*
block
;
const
size_t
width_size
=
w_
*
sizeof
(
float
);
std
::
vector
<
int
>
groups
(
num_groups
,
max_num_regs
);
int
rest_num_regs
=
num_block
%
max_num_regs
;
if
(
rest_num_regs
>
0
)
{
groups
.
push_back
(
rest_num_regs
);
}
vbroadcastss
(
ymm_lr
,
ptr
[
param_lr
]);
// protect rdx
mov
(
reg_ptr_grad_i
,
param_grad
);
mov
(
reg_ptr_rows_i
,
param_rows
);
mov
(
reg_rows_size_in_byte
,
qword
[
param_attr
+
offsetof
(
sgd_attr_t
,
selected_rows_size
)]);
mov
(
rax
,
sizeof
(
int64_t
));
mul
(
reg_rows_size_in_byte
);
mov
(
reg_rows_size_in_byte
,
rax
);
add
(
reg_rows_size_in_byte
,
reg_ptr_rows_i
);
Label
l_next_row
;
L
(
l_next_row
);
{
mov
(
reg_row
,
qword
[
reg_ptr_rows_i
]);
mov
(
rax
,
width_size
);
mul
(
reg_row
);
mov
(
reg_row
,
rax
);
mov
(
reg_ptr_param_i
,
param_param
);
mov
(
reg_ptr_out_i
,
param_out
);
add
(
reg_ptr_param_i
,
reg_row
);
add
(
reg_ptr_out_i
,
reg_row
);
size_t
w_offset
=
0
;
for
(
int
num_regs
:
groups
)
{
// load grad
size_t
inner_offfset
=
w_offset
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ymm_t
(
reg_i
),
ptr
[
reg_ptr_grad_i
+
inner_offfset
]);
inner_offfset
+=
block_size
;
}
// load param
inner_offfset
=
w_offset
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ymm_t
(
reg_i
+
num_regs
),
ptr
[
reg_ptr_param_i
+
inner_offfset
]);
inner_offfset
+=
block_size
;
}
// compute out
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmulps
(
ymm_t
(
reg_i
),
ymm_t
(
reg_i
),
ymm_lr
);
vsubps
(
ymm_t
(
reg_i
+
num_regs
),
ymm_t
(
reg_i
+
num_regs
),
ymm_t
(
reg_i
));
}
// save out
inner_offfset
=
w_offset
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ptr
[
reg_ptr_out_i
+
inner_offfset
],
ymm_t
(
reg_i
+
num_regs
));
inner_offfset
+=
block_size
;
}
w_offset
+=
(
block_size
*
num_regs
);
}
add
(
reg_ptr_grad_i
,
width_size
);
add
(
reg_ptr_rows_i
,
sizeof
(
int64_t
));
cmp
(
reg_ptr_rows_i
,
reg_rows_size_in_byte
);
jl
(
l_next_row
,
T_NEAR
);
}
postCode
();
}
class
SgdCreator
:
public
JitCodeCreator
<
sgd_attr_t
>
{
public:
bool
UseMe
(
const
sgd_attr_t
&
attr
)
const
override
{
return
platform
::
MayIUse
(
platform
::
avx
)
&&
attr
.
grad_width
%
YMM_FLOAT_BLOCK
==
0
;
}
size_t
CodeSize
(
const
sgd_attr_t
&
attr
)
const
override
{
return
96
+
(
attr
.
grad_width
/
YMM_FLOAT_BLOCK
)
*
32
*
8
;
}
std
::
unique_ptr
<
GenBase
>
CreateJitCode
(
const
sgd_attr_t
&
attr
)
const
override
{
PADDLE_ENFORCE_EQ
(
attr
.
param_width
,
attr
.
grad_width
);
PADDLE_ENFORCE_LE
(
attr
.
selected_rows_size
,
attr
.
grad_height
);
PADDLE_ENFORCE_GE
(
attr
.
selected_rows_size
,
0
);
return
make_unique
<
SgdJitCode
>
(
attr
,
CodeSize
(
attr
));
}
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
kSgd
,
gen
::
SgdCreator
);
paddle/fluid/operators/jit/gen/sgd.h
0 → 100644
浏览文件 @
e5f9d3a4
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/operators/jit/gen/jitcode.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
class
SgdJitCode
:
public
JitCode
{
public:
explicit
SgdJitCode
(
const
sgd_attr_t
&
attr
,
size_t
code_size
=
256
*
1024
,
void
*
code_ptr
=
nullptr
)
:
JitCode
(
code_size
,
code_ptr
),
w_
(
attr
.
grad_width
)
{
this
->
genCode
();
}
DECLARE_JIT_CODE
(
SgdJitCode
);
void
genCode
()
override
;
private:
int
w_
;
reg64_t
param_lr
{
abi_param1
};
reg64_t
param_param
{
abi_param2
};
reg64_t
param_grad
{
abi_param3
};
reg64_t
param_rows
{
abi_param4
};
reg64_t
param_out
{
abi_param5
};
reg64_t
param_attr
{
abi_param6
};
ymm_t
ymm_lr
=
ymm_t
(
15
);
reg64_t
reg_ptr_grad_i
{
r10
};
reg64_t
reg_ptr_rows_i
{
r11
};
reg64_t
reg_rows_size_in_byte
{
r12
};
reg64_t
reg_row
{
r13
};
reg64_t
reg_ptr_param_i
{
r14
};
reg64_t
reg_ptr_out_i
{
r15
};
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/jit/helper.cc
浏览文件 @
e5f9d3a4
...
...
@@ -55,6 +55,7 @@ const char* to_string(KernelType kt) {
ONE_CASE
(
kHSum
);
ONE_CASE
(
kSoftmax
);
ONE_CASE
(
kEmbSeqPool
);
ONE_CASE
(
kSgd
);
default:
PADDLE_THROW
(
"Not support type: %d, or forget to add it."
,
kt
);
return
"NOT JITKernel"
;
...
...
paddle/fluid/operators/jit/helper.h
浏览文件 @
e5f9d3a4
...
...
@@ -181,6 +181,14 @@ inline std::ostream& operator<<(std::ostream& os,
return
os
;
}
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
sgd_attr_t
&
attr
)
{
os
<<
"param_height["
<<
attr
.
param_height
<<
"],param_width["
<<
attr
.
param_width
<<
"],grad_height["
<<
attr
.
grad_height
<<
"],grad_width["
<<
attr
.
grad_width
<<
"],selected_rows_size["
<<
attr
.
selected_rows_size
<<
"]"
;
return
os
;
}
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
matmul_attr_t
&
attr
)
{
os
<<
"M["
<<
attr
.
m
<<
"],N["
<<
attr
.
n
<<
"],K["
<<
attr
.
k
<<
"]"
;
return
os
;
...
...
paddle/fluid/operators/jit/kernel_base.h
浏览文件 @
e5f9d3a4
...
...
@@ -46,6 +46,7 @@ typedef enum {
kVMul
,
kVRelu
,
kVScal
,
kSgd
,
kVSigmoid
,
kVSquare
,
kVSub
,
...
...
@@ -173,6 +174,28 @@ struct EmbSeqPoolTuples {
const
emb_seq_pool_attr_t
*
);
};
typedef
struct
sgd_attr_s
{
int64_t
param_height
,
param_width
;
int64_t
grad_height
,
grad_width
;
int64_t
selected_rows_size
;
sgd_attr_s
()
=
default
;
explicit
sgd_attr_s
(
int64_t
param_h
,
int64_t
param_w
,
int64_t
grad_h
,
int64_t
grad_w
,
int64_t
selected_rows_sz
)
:
param_height
(
param_h
),
param_width
(
param_w
),
grad_height
(
grad_h
),
grad_width
(
grad_w
),
selected_rows_size
(
selected_rows_sz
)
{}
}
sgd_attr_t
;
template
<
typename
T
>
struct
SgdTuples
{
typedef
T
data_type
;
typedef
sgd_attr_t
attr_type
;
typedef
void
(
*
func_type
)(
const
T
*
,
const
T
*
,
const
T
*
,
const
int64_t
*
,
T
*
,
const
sgd_attr_t
*
);
};
typedef
struct
matmul_attr_s
{
int
m
,
n
,
k
;
void
*
packed_weight
{
nullptr
};
...
...
paddle/fluid/operators/jit/kernel_key.cc
浏览文件 @
e5f9d3a4
...
...
@@ -13,6 +13,7 @@
* limitations under the License. */
#include "paddle/fluid/operators/jit/kernel_key.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -23,14 +24,30 @@ size_t JitCodeKey<int>(const int& d) {
return
d
;
}
// TODO(TJ): refine and benchmark JitCodeKey generatation
constexpr
int
act_type_shift
=
3
;
// suppot 2^3 act types
static
inline
int
act_type_convert
(
KernelType
type
)
{
if
(
type
==
kVIdentity
)
{
return
0
;
}
else
if
(
type
==
kVExp
)
{
return
1
;
}
else
if
(
type
==
kVRelu
)
{
return
2
;
}
else
if
(
type
==
kVSigmoid
)
{
return
3
;
}
else
if
(
type
==
kVTanh
)
{
return
4
;
}
PADDLE_THROW
(
"Unsupported act type %d"
,
type
);
return
0
;
}
template
<
>
size_t
JitCodeKey
<
lstm_attr_t
>
(
const
lstm_attr_t
&
attr
)
{
size_t
key
=
attr
.
d
;
int
gate_key
=
static_cast
<
int
>
(
attr
.
act_gate
)
<<
1
;
int
cand_key
=
static_cast
<
int
>
(
attr
.
act_cand
)
<<
(
1
+
act_type_shift
);
int
cell_key
=
static_cast
<
int
>
(
attr
.
act_cell
)
<<
(
1
+
act_type_shift
*
2
);
int
gate_key
=
act_type_convert
(
attr
.
act_gate
)
<<
1
;
int
cand_key
=
act_type_convert
(
attr
.
act_cand
)
<<
(
1
+
act_type_shift
);
int
cell_key
=
act_type_convert
(
attr
.
act_cell
)
<<
(
1
+
act_type_shift
*
2
);
return
(
key
<<
(
1
+
act_type_shift
*
3
))
+
gate_key
+
cand_key
+
cell_key
+
attr
.
use_peephole
;
}
...
...
@@ -38,8 +55,8 @@ size_t JitCodeKey<lstm_attr_t>(const lstm_attr_t& attr) {
template
<
>
size_t
JitCodeKey
<
gru_attr_t
>
(
const
gru_attr_t
&
attr
)
{
size_t
key
=
attr
.
d
;
return
(
key
<<
(
act_type_shift
*
2
))
+
static_cast
<
int
>
(
attr
.
act_gate
)
+
(
static_cast
<
int
>
(
attr
.
act_cand
)
<<
act_type_shift
);
return
(
key
<<
(
act_type_shift
*
2
))
+
act_type_convert
(
attr
.
act_gate
)
+
(
act_type_convert
(
attr
.
act_cand
)
<<
act_type_shift
);
}
template
<
>
...
...
@@ -61,6 +78,11 @@ size_t JitCodeKey<emb_seq_pool_attr_t>(const emb_seq_pool_attr_t& attr) {
return
attr
.
table_width
;
}
template
<
>
size_t
JitCodeKey
<
sgd_attr_t
>
(
const
sgd_attr_t
&
attr
)
{
return
attr
.
grad_width
;
}
}
// namespace jit
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
浏览文件 @
e5f9d3a4
...
...
@@ -14,3 +14,4 @@ USE_JITKERNEL_MORE(kVTanh, mkl)
USE_JITKERNEL_MORE
(
kSeqPool, mkl
)
USE_JITKERNEL_MORE
(
kSoftmax, mkl
)
USE_JITKERNEL_MORE
(
kEmbSeqPool, mkl
)
USE_JITKERNEL_MORE
(
kSgd, mkl
)
paddle/fluid/operators/jit/more/mkl/mkl.cc
浏览文件 @
e5f9d3a4
...
...
@@ -184,6 +184,16 @@ bool EmbSeqPoolKernel<double>::UseMe(const emb_seq_pool_attr_t& attr) const {
return
true
;
}
template
<
>
bool
SgdKernel
<
float
>::
UseMe
(
const
sgd_attr_t
&
attr
)
const
{
return
true
;
}
template
<
>
bool
SgdKernel
<
double
>::
UseMe
(
const
sgd_attr_t
&
attr
)
const
{
return
true
;
}
template
<
>
bool
MatMulKernel
<
float
>::
UseMe
(
const
matmul_attr_t
&
attr
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
...
...
@@ -239,5 +249,6 @@ REGISTER_MKL_KERNEL(kVTanh, VTanh);
REGISTER_MKL_KERNEL
(
kSeqPool
,
SeqPool
);
REGISTER_MKL_KERNEL
(
kEmbSeqPool
,
EmbSeqPool
);
REGISTER_MKL_KERNEL
(
kSoftmax
,
Softmax
);
REGISTER_MKL_KERNEL
(
kSgd
,
Sgd
);
#undef REGISTER_MKL_KERNEL
paddle/fluid/operators/jit/more/mkl/mkl.h
浏览文件 @
e5f9d3a4
...
...
@@ -142,6 +142,32 @@ void Softmax(const T* x, T* y, int n, int bs) {
}
}
template
<
typename
T
>
void
Sgd
(
const
T
*
lr
,
const
T
*
param
,
const
T
*
grad
,
const
int64_t
*
rows
,
T
*
out
,
const
sgd_attr_t
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
->
param_width
,
attr
->
grad_width
);
PADDLE_ENFORCE_LE
(
attr
->
selected_rows_size
,
attr
->
grad_height
);
T
scalar
=
-
lr
[
0
];
int
width
=
attr
->
grad_width
;
if
(
out
==
param
)
{
for
(
int64_t
i
=
0
;
i
<
attr
->
selected_rows_size
;
++
i
)
{
auto
h_idx
=
rows
[
i
];
PADDLE_ENFORCE_LT
(
h_idx
,
attr
->
param_height
);
PADDLE_ENFORCE_GE
(
h_idx
,
0
);
VAXPY
(
scalar
,
grad
+
i
*
width
,
out
+
h_idx
*
width
,
width
);
}
}
else
{
for
(
int64_t
i
=
0
;
i
<
attr
->
selected_rows_size
;
++
i
)
{
auto
h_idx
=
rows
[
i
];
PADDLE_ENFORCE_LT
(
h_idx
,
attr
->
param_height
);
PADDLE_ENFORCE_GE
(
h_idx
,
0
);
VScal
(
&
scalar
,
grad
+
i
*
width
,
out
+
h_idx
*
width
,
width
);
VAdd
(
param
+
h_idx
*
width
,
out
+
h_idx
*
width
,
out
+
h_idx
*
width
,
width
);
}
}
}
#define DECLARE_MKL_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public KernelMore<tuples<T>> { \
...
...
@@ -173,6 +199,8 @@ DECLARE_MKL_KERNEL(EmbSeqPool, EmbSeqPoolTuples);
DECLARE_MKL_KERNEL
(
Softmax
,
SoftmaxTuples
);
DECLARE_MKL_KERNEL
(
Sgd
,
SgdTuples
);
#undef DECLARE_MKL_KERNEL
}
// namespace mkl
...
...
paddle/fluid/operators/jit/refer/CMakeLists.txt
浏览文件 @
e5f9d3a4
...
...
@@ -33,3 +33,4 @@ USE_JITKERNEL_REFER(kHSum)
USE_JITKERNEL_REFER
(
kHMax
)
USE_JITKERNEL_REFER
(
kSoftmax
)
USE_JITKERNEL_REFER
(
kEmbSeqPool
)
USE_JITKERNEL_REFER
(
kSgd
)
paddle/fluid/operators/jit/refer/refer.cc
浏览文件 @
e5f9d3a4
...
...
@@ -59,4 +59,6 @@ REGISTER_REFER_KERNEL(kSoftmax, Softmax);
REGISTER_REFER_KERNEL
(
kEmbSeqPool
,
EmbSeqPool
);
REGISTER_REFER_KERNEL
(
kSgd
,
Sgd
);
#undef REGISTER_REFER_KERNEL
paddle/fluid/operators/jit/refer/refer.h
浏览文件 @
e5f9d3a4
...
...
@@ -446,6 +446,36 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out,
}
}
// SGD algorithm:
// lr is pointor of learning rate scalar
// param is an input matrix with (param_h, param_w)
// grad is an input matrix with (grad_h, grad_w), here grad_w == param_w
// selected_rows is a vectot<int64_t> with size selected_rows_size( <= grad_h )
// out is an output matrix with (param_h, param_w)
//
// support both regular and sparse grad
// regular SGD: out[:] = param[:] - lr[0] * grad[:];
// sparse SGD: out[rows[i]][:] = param[rows[i]][:] - lr[0] * grad[i][:]
//
// Note: when use sparse SGD, and if out != param,
// the out rows which are not selected have not beed changed, which maybe empty
template
<
typename
T
>
void
Sgd
(
const
T
*
lr
,
const
T
*
param
,
const
T
*
grad
,
const
int64_t
*
rows
,
T
*
out
,
const
sgd_attr_t
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
->
param_width
,
attr
->
grad_width
);
PADDLE_ENFORCE_LE
(
attr
->
selected_rows_size
,
attr
->
grad_height
);
for
(
int64_t
i
=
0
;
i
<
attr
->
selected_rows_size
;
++
i
)
{
auto
h_idx
=
rows
[
i
];
PADDLE_ENFORCE_LT
(
h_idx
,
attr
->
param_height
);
PADDLE_ENFORCE_GE
(
h_idx
,
0
);
for
(
int64_t
j
=
0
;
j
<
attr
->
grad_width
;
++
j
)
{
out
[
h_idx
*
attr
->
grad_width
+
j
]
=
param
[
h_idx
*
attr
->
grad_width
+
j
]
-
lr
[
0
]
*
grad
[
i
*
attr
->
grad_width
+
j
];
}
}
}
#define DECLARE_REFER_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public ReferKernel<tuples<T>> { \
...
...
@@ -496,6 +526,8 @@ DECLARE_REFER_KERNEL(Softmax, SoftmaxTuples);
DECLARE_REFER_KERNEL
(
EmbSeqPool
,
EmbSeqPoolTuples
);
DECLARE_REFER_KERNEL
(
Sgd
,
SgdTuples
);
#undef DECLARE_REFER_KERNEL
}
// namespace refer
...
...
paddle/fluid/operators/jit/test.cc
浏览文件 @
e5f9d3a4
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <random>
#include <string>
#include <vector>
...
...
@@ -36,14 +37,14 @@ void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
}
template
<
typename
T
>
void
ExpectEQ
(
const
T
*
target
,
const
T
*
refer
,
in
t
n
)
{
void
ExpectEQ
(
const
T
*
target
,
const
T
*
refer
,
size_
t
n
)
{
if
(
std
::
is_floating_point
<
T
>::
value
)
{
for
(
in
t
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_NEAR
(
target
[
i
],
refer
[
i
],
FLAGS_acc
);
for
(
size_
t
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_NEAR
(
target
[
i
],
refer
[
i
],
FLAGS_acc
)
<<
" at index : "
<<
i
;
}
}
else
{
for
(
in
t
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_EQ
(
target
[
i
],
refer
[
i
]);
for
(
size_
t
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_EQ
(
target
[
i
],
refer
[
i
])
<<
" at index : "
<<
i
;
}
}
}
...
...
@@ -296,6 +297,45 @@ struct TestFuncWithRefer<jit::EmbSeqPoolTuples<T>, std::vector<T>,
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
SgdTuples
<
T
>
,
T
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int64_t
>
,
std
::
vector
<
T
>
,
typename
jit
::
SgdTuples
<
T
>::
attr_type
>
{
void
operator
()(
const
typename
jit
::
SgdTuples
<
T
>::
func_type
tgt
,
const
T
lr
,
const
std
::
vector
<
T
>&
param
,
const
std
::
vector
<
T
>&
grad
,
const
std
::
vector
<
int64_t
>&
rows
,
const
std
::
vector
<
T
>&
oref
,
const
typename
jit
::
SgdTuples
<
T
>::
attr_type
&
attr
)
{
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
param
.
size
(),
static_cast
<
size_t
>
(
attr
.
param_height
*
attr
.
param_width
));
EXPECT_EQ
(
grad
.
size
(),
static_cast
<
size_t
>
(
attr
.
grad_height
*
attr
.
grad_width
));
EXPECT_EQ
(
rows
.
size
(),
static_cast
<
size_t
>
(
attr
.
selected_rows_size
));
EXPECT_EQ
(
param
.
size
(),
oref
.
size
());
const
T
*
param_data
=
param
.
data
();
const
T
*
grad_data
=
grad
.
data
();
const
int64_t
*
rows_data
=
rows
.
data
();
const
T
*
oref_data
=
oref
.
data
();
std
::
vector
<
T
>
out
(
oref
.
size
());
T
*
o_data
=
out
.
data
();
tgt
(
&
lr
,
param_data
,
grad_data
,
rows_data
,
o_data
,
&
attr
);
// only the selected rows should be equal
for
(
size_t
i
=
0
;
i
<
rows
.
size
();
++
i
)
{
ExpectEQ
<
T
>
(
o_data
+
rows
[
i
]
*
attr
.
grad_width
,
oref_data
+
rows
[
i
]
*
attr
.
grad_width
,
attr
.
grad_width
);
}
// inplace
std
::
copy
(
param
.
begin
(),
param
.
end
(),
out
.
begin
());
tgt
(
&
lr
,
o_data
,
grad_data
,
rows_data
,
o_data
,
&
attr
);
for
(
size_t
i
=
0
;
i
<
rows
.
size
();
++
i
)
{
ExpectEQ
<
T
>
(
o_data
+
rows
[
i
]
*
attr
.
grad_width
,
oref_data
+
rows
[
i
]
*
attr
.
grad_width
,
attr
.
grad_width
);
}
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
MatMulTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
...
...
@@ -407,7 +447,7 @@ void TestAllImpls(const typename KernelTuples::attr_type& attr, Args... args) {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
XYZNKernel
()
{
void
Test
KernelXYZNTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
for
(
int
d
:
TestSizes
())
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
XYZNTuples
<
T
>>
();
...
...
@@ -440,7 +480,7 @@ void TestXYZNKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
AXYNKernel
()
{
void
Test
KernelAXYNTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
for
(
int
d
:
TestSizes
())
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
AXYNTuples
<
T
>>
();
...
...
@@ -466,7 +506,7 @@ void TestAXYNKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
XRNKernel
()
{
void
Test
KernelXRNTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
auto
last_acc
=
FLAGS_acc
;
FLAGS_acc
=
1e-4
;
...
...
@@ -484,7 +524,7 @@ void TestXRNKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
XYNKernel
()
{
void
Test
KernelXYNTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
for
(
int
d
:
TestSizes
())
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
XYNTuples
<
T
>>
();
...
...
@@ -509,10 +549,12 @@ void TestXYNKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
LSTMKernel
()
{
void
Test
KernelLSTMTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
std
::
vector
<
std
::
string
>
all_acts
=
{
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
};
for
(
int
d
:
TestSizes
())
{
auto
test_sizes
=
TestSizes
();
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
1000
));
for
(
int
d
:
test_sizes
)
{
for
(
bool
use_peephole
:
{
true
,
false
})
{
for
(
auto
&
act_gate
:
all_acts
)
{
for
(
auto
&
act_cand
:
all_acts
)
{
...
...
@@ -559,10 +601,12 @@ void TestLSTMKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
GRUKernel
()
{
void
Test
KernelGRUTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
std
::
vector
<
std
::
string
>
all_acts
=
{
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
};
for
(
int
d
:
TestSizes
())
{
auto
test_sizes
=
TestSizes
();
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
1000
));
for
(
int
d
:
test_sizes
)
{
for
(
auto
&
act_gate
:
all_acts
)
{
for
(
auto
&
act_cand
:
all_acts
)
{
const
jit
::
gru_attr_t
attr
(
d
,
jit
::
to_kerneltype
(
act_gate
),
...
...
@@ -593,14 +637,16 @@ void TestGRUKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
SeqPoolKernel
()
{
void
Test
KernelSeqPoolTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
std
::
vector
<
jit
::
SeqPoolType
>
pool_types
=
{
jit
::
SeqPoolType
::
kSum
,
jit
::
SeqPoolType
::
kAvg
,
jit
::
SeqPoolType
::
kSqrt
};
auto
test_sizes
=
TestSizes
();
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
1000
));
for
(
auto
type
:
pool_types
)
{
for
(
int
w
:
TestSizes
()
)
{
for
(
int
w
:
test_sizes
)
{
jit
::
seq_pool_attr_t
attr
(
w
,
type
);
for
(
int
h
:
TestSizes
()
)
{
for
(
int
h
:
test_sizes
)
{
attr
.
h
=
h
;
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
SeqPoolTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
...
...
@@ -618,11 +664,11 @@ void TestSeqPoolKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
MatMulKernel
()
{
void
Test
KernelMatMulTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
auto
last_acc
=
FLAGS_acc
;
//
TODO(intel): fix MKL acc issue
//
https://github.com/PaddlePaddle/Paddle/issues/15447
//
export MKL_CBWR=AVX would make MKL force to use AVX
//
export KMP_DETERMINISTIC_REDUCTION=yes would make the result deterministic
FLAGS_acc
=
1e-3
;
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
for
(
int
n
:
{
1
,
2
,
3
,
4
})
{
...
...
@@ -646,7 +692,7 @@ void TestMatMulKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
SoftmaxKernel
()
{
void
Test
KernelSoftmaxTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
for
(
int
bs
:
{
1
,
2
,
10
})
{
for
(
int
n
:
TestSizes
())
{
...
...
@@ -671,12 +717,14 @@ void TestSoftmaxKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
EmbSeqPoolKernel
()
{
void
Test
KernelEmbSeqPoolTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
int64_t
tbl_h
=
1e4
;
std
::
vector
<
jit
::
SeqPoolType
>
pool_types
=
{
jit
::
SeqPoolType
::
kSum
};
// only support sum yet
for
(
int
tbl_w
:
TestSizes
())
{
auto
test_sizes
=
TestSizes
();
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
1000
));
for
(
int
tbl_w
:
test_sizes
)
{
std
::
vector
<
T
>
table
(
tbl_h
*
tbl_w
);
RandomVec
<
T
>
(
tbl_h
*
tbl_w
,
table
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
table_data
=
table
.
data
();
...
...
@@ -705,7 +753,61 @@ void TestEmbSeqPoolKernel() {
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestNCHW16CMulNCKernel
()
{
void
TestKernelSgdTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
const
T
lr
=
0.1
;
auto
UnDuplicatedRandomVec
=
[](
int
n
,
const
int64_t
lower
,
const
int64_t
upper
)
->
std
::
vector
<
int64_t
>
{
PADDLE_ENFORCE_LE
(
static_cast
<
size_t
>
(
upper
-
lower
),
n
-
1
);
PADDLE_ENFORCE_GT
(
n
,
0
);
std
::
vector
<
int64_t
>
all
,
out
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
all
.
push_back
(
i
);
}
std
::
random_shuffle
(
all
.
begin
(),
all
.
end
());
out
.
insert
(
out
.
begin
(),
all
.
begin
(),
all
.
begin
()
+
n
);
return
out
;
};
for
(
int
param_h
:
{
1
,
10
})
{
for
(
int
grad_w
:
TestSizes
())
{
std
::
vector
<
T
>
param
(
param_h
*
grad_w
);
std
::
vector
<
T
>
param_out
(
param_h
*
grad_w
);
RandomVec
<
T
>
(
param_h
*
grad_w
,
param
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
param_data
=
param
.
data
();
T
*
out_data
=
param_out
.
data
();
for
(
int
rows_size
=
1
;
rows_size
<=
param_h
;
++
rows_size
)
{
std
::
vector
<
T
>
grad
(
rows_size
*
grad_w
);
std
::
vector
<
int64_t
>
rows
=
UnDuplicatedRandomVec
(
rows_size
,
0
,
rows_size
-
1
);
RandomVec
<
T
>
(
rows_size
*
grad_w
,
grad
.
data
(),
-
2.
f
,
2.
f
);
const
int64_t
*
rows_data
=
rows
.
data
();
const
T
*
grad_data
=
grad
.
data
();
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
SgdTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
jit
::
sgd_attr_t
attr
(
param_h
,
grad_w
,
rows_size
,
grad_w
,
rows_size
);
ref
(
&
lr
,
param_data
,
grad_data
,
rows_data
,
out_data
,
&
attr
);
// inplace test
std
::
vector
<
T
>
inp
(
param
.
size
());
std
::
copy
(
param
.
begin
(),
param
.
end
(),
inp
.
begin
());
T
*
inp_data
=
inp
.
data
();
ref
(
&
lr
,
inp_data
,
grad_data
,
rows_data
,
inp_data
,
&
attr
);
// only the selected rows should be equal
for
(
int
i
=
0
;
i
<
rows_size
;
++
i
)
{
ExpectEQ
<
T
>
(
inp_data
+
rows
[
i
]
*
grad_w
,
out_data
+
rows
[
i
]
*
grad_w
,
grad_w
);
}
TestAllImpls
<
KT
,
jit
::
SgdTuples
<
T
>
,
PlaceType
,
T
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int64_t
>
,
std
::
vector
<
T
>>
(
attr
,
lr
,
param
,
grad
,
rows
,
param_out
,
attr
);
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestKernelNCHW16CMulNCTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
const
int
n
=
3
,
c
=
16
*
4
,
h
=
10
,
w
=
10
;
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
NCHW16CMulNCTuples
<
T
>>
();
...
...
@@ -758,7 +860,7 @@ void TestNCHW16CMulNCKernel() {
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
LayerNormKernel
()
{
void
Test
KernelLayerNormTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
const
T
epsilon
=
9.99999975e-06
;
for
(
int
n
:
{
1
,
2
,
10
})
{
...
...
@@ -797,11 +899,13 @@ void TestLayerNormKernel() {
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
Test
CRFDecodingKernel
()
{
void
Test
KernelCRFDecodingTuples
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
constexpr
int
state_trans_base_idx
=
2
;
auto
test_sizes
=
TestSizes
();
test_sizes
.
erase
(
std
::
remove
(
test_sizes
.
begin
(),
test_sizes
.
end
(),
1000
));
for
(
int
seq_len
:
{
1
,
11
,
17
,
50
})
{
for
(
int
tag_num
:
TestSizes
()
)
{
for
(
int
tag_num
:
test_sizes
)
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
CRFDecodingTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
int
x_sz
=
seq_len
*
tag_num
;
...
...
@@ -822,143 +926,76 @@ void TestCRFDecodingKernel() {
}
}
// XYZNTuple
TEST
(
JITKernel
,
kVMul
)
{
TestXYZNKernel
<
jit
::
kVMul
,
float
,
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVMul
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVAdd
)
{
TestXYZNKernel
<
jit
::
kVAdd
,
float
,
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVAdd
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVAddRelu
)
{
TestXYZNKernel
<
jit
::
kVAddRelu
,
float
,
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVAddRelu
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVSub
)
{
TestXYZNKernel
<
jit
::
kVSub
,
float
,
CPUPlace
>
();
TestXYZNKernel
<
jit
::
kVSub
,
double
,
CPUPlace
>
();
}
// AXYNTuples
TEST
(
JITKernel
,
kVScal
)
{
TestAXYNKernel
<
jit
::
kVScal
,
float
,
CPUPlace
>
();
TestAXYNKernel
<
jit
::
kVScal
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVAddBias
)
{
TestAXYNKernel
<
jit
::
kVAddBias
,
float
,
CPUPlace
>
();
TestAXYNKernel
<
jit
::
kVAddBias
,
double
,
CPUPlace
>
();
}
// XRNTuples
TEST
(
JITKernel
,
kHMax
)
{
TestXRNKernel
<
jit
::
kHMax
,
float
,
CPUPlace
>
();
TestXRNKernel
<
jit
::
kHMax
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kHSum
)
{
TestXRNKernel
<
jit
::
kHSum
,
float
,
CPUPlace
>
();
TestXRNKernel
<
jit
::
kHSum
,
double
,
CPUPlace
>
();
}
// XYNTuples
TEST
(
JITKernel
,
kVRelu
)
{
TestXYNKernel
<
jit
::
kVRelu
,
float
,
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVRelu
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVIdentity
)
{
TestXYNKernel
<
jit
::
kVIdentity
,
float
,
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVIdentity
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVSquare
)
{
TestXYNKernel
<
jit
::
kVSquare
,
float
,
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVSquare
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVExp
)
{
TestXYNKernel
<
jit
::
kVExp
,
float
,
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVExp
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kVSigmoid
)
{
TestXYNKernel
<
jit
::
kVSigmoid
,
float
,
CPUPlace
>
();
TestXYNKernel
<
jit
::
kVSigmoid
,
double
,
CPUPlace
>
();
}
#define TEST_CPU_KERNEL(test_tuple, kernel_type) \
TEST(JITKernel, kernel_type) { \
TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
TestKernel##test_tuple<jit::kernel_type, float, CPUPlace>(); \
}
TEST
(
JITKernel
,
kVTanh
)
{
TestXYNKernel
<
jit
::
kVTanh
,
float
,
CPUPlace
>
(
);
TestXYNKernel
<
jit
::
kVTanh
,
double
,
CPUPlace
>
(
);
}
TEST
_CPU_KERNEL
(
XYZNTuples
,
kVMul
);
TEST_CPU_KERNEL
(
XYZNTuples
,
kVAdd
);
TEST_CPU_KERNEL
(
XYZNTuples
,
kVAddRelu
);
TEST_CPU_KERNEL
(
XYZNTuples
,
kVSub
);
// LSTM
TEST
(
JITKernel
,
kLSTMCtHt
)
{
TestLSTMKernel
<
jit
::
kLSTMCtHt
,
float
,
CPUPlace
>
();
TestLSTMKernel
<
jit
::
kLSTMCtHt
,
double
,
CPUPlace
>
();
}
TEST_CPU_KERNEL
(
AXYNTuples
,
kVScal
);
TEST_CPU_KERNEL
(
AXYNTuples
,
kVAddBias
);
TEST
(
JITKernel
,
kLSTMC1H1
)
{
TestLSTMKernel
<
jit
::
kLSTMC1H1
,
float
,
CPUPlace
>
();
TestLSTMKernel
<
jit
::
kLSTMC1H1
,
double
,
CPUPlace
>
();
}
TEST_CPU_KERNEL
(
XRNTuples
,
kHMax
);
TEST_CPU_KERNEL
(
XRNTuples
,
kHSum
);
// GRU
TEST
(
JITKernel
,
kGRUH1
)
{
TestGRUKernel
<
jit
::
kGRUH1
,
float
,
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUH1
,
double
,
CPUPlace
>
();
}
TEST_CPU_KERNEL
(
XYNTuples
,
kVRelu
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVIdentity
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVSquare
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVExp
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVSigmoid
);
TEST_CPU_KERNEL
(
XYNTuples
,
kVTanh
);
TEST
(
JITKernel
,
kGRUHtPart1
)
{
TestGRUKernel
<
jit
::
kGRUHtPart1
,
float
,
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUHtPart1
,
double
,
CPUPlace
>
();
}
TEST_CPU_KERNEL
(
LSTMTuples
,
kLSTMCtHt
);
TEST_CPU_KERNEL
(
LSTMTuples
,
kLSTMC1H1
);
TEST
(
JITKernel
,
kGRUHtPart2
)
{
TestGRUKernel
<
jit
::
kGRUHtPart2
,
float
,
CPUPlace
>
();
TestGRUKernel
<
jit
::
kGRUHtPart2
,
double
,
CPUPlace
>
();
}
TEST_CPU_KERNEL
(
GRUTuples
,
kGRUH1
);
TEST_CPU_KERNEL
(
GRUTuples
,
kGRUHtPart1
);
TEST_CPU_KERNEL
(
GRUTuples
,
kGRUHtPart2
);
TEST
(
JITKernel
,
kSeqPool
)
{
TestSeqPoolKernel
<
jit
::
kSeqPool
,
float
,
CPUPlace
>
();
TestSeqPoolKernel
<
jit
::
kSeqPool
,
double
,
CPUPlace
>
();
}
TEST_CPU_KERNEL
(
NCHW16CMulNCTuples
,
kNCHW16CMulNC
);
TEST
(
JITKernel
,
kMatMul
)
{
TestMatMulKernel
<
jit
::
kMatMul
,
float
,
CPUPlace
>
();
TestMatMulKernel
<
jit
::
kMatMul
,
double
,
CPUPlace
>
();
}
TEST_CPU_KERNEL
(
SeqPoolTuples
,
kSeqPool
);
TEST_CPU_KERNEL
(
MatMulTuples
,
kMatMul
);
TEST_CPU_KERNEL
(
SoftmaxTuples
,
kSoftmax
);
TEST_CPU_KERNEL
(
EmbSeqPoolTuples
,
kEmbSeqPool
);
TEST_CPU_KERNEL
(
SgdTuples
,
kSgd
);
TEST_CPU_KERNEL
(
LayerNormTuples
,
kLayerNorm
);
TEST_CPU_KERNEL
(
CRFDecodingTuples
,
kCRFDecoding
);
TEST
(
JITKernel
,
kSoftmax
)
{
TestSoftmaxKernel
<
jit
::
kSoftmax
,
float
,
CPUPlace
>
();
TestSoftmaxKernel
<
jit
::
kSoftmax
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel_key
,
lstm
)
{
jit
::
lstm_attr_t
attr1
(
8
,
jit
::
kVIdentity
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
jit
::
lstm_attr_t
attr2
(
9
,
jit
::
kVIdentity
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
jit
::
lstm_attr_t
attr3
(
9
,
jit
::
kVIdentity
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
jit
::
lstm_attr_t
attr4
(
9
,
jit
::
kVRelu
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
TEST
(
JITKernel
,
kEmbSeqPool
)
{
TestEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
float
,
CPUPlace
>
(
);
TestEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
double
,
CPUPlace
>
(
);
}
auto
key1
=
jit
::
JitCodeKey
<
jit
::
lstm_attr_t
>
(
attr1
);
auto
key2
=
jit
::
JitCodeKey
<
jit
::
lstm_attr_t
>
(
attr2
);
auto
key3
=
jit
::
JitCodeKey
<
jit
::
lstm_attr_t
>
(
attr3
);
auto
key4
=
jit
::
JitCodeKey
<
jit
::
lstm_attr_t
>
(
attr4
);
TEST
(
JITKernel
,
kNCHW16CMulNC
)
{
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
float
,
CPUPlace
>
(
);
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
double
,
CPUPlace
>
(
);
EXPECT_TRUE
(
key1
!=
key2
);
EXPECT_TRUE
(
key2
==
key3
);
EXPECT_TRUE
(
key3
!=
key4
);
}
TEST
(
JITKernel
,
kLayerNorm
)
{
TestLayerNormKernel
<
jit
::
kLayerNorm
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestLayerNormKernel
<
jit
::
kLayerNorm
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
TEST
(
JITKernel_key
,
gru
)
{
jit
::
gru_attr_t
attr1
(
8
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
jit
::
gru_attr_t
attr2
(
9
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
jit
::
gru_attr_t
attr3
(
9
,
jit
::
kVSigmoid
,
jit
::
kVTanh
);
jit
::
gru_attr_t
attr4
(
9
,
jit
::
kVSigmoid
,
jit
::
kVIdentity
);
TEST
(
JITKernel
,
kCRFDecoding
)
{
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
float
,
paddle
::
platform
::
CPUPlace
>
();
TestCRFDecodingKernel
<
jit
::
kCRFDecoding
,
double
,
paddle
::
platform
::
CPUPlace
>
();
}
auto
key1
=
jit
::
JitCodeKey
<
jit
::
gru_attr_t
>
(
attr1
);
auto
key2
=
jit
::
JitCodeKey
<
jit
::
gru_attr_t
>
(
attr2
);
auto
key3
=
jit
::
JitCodeKey
<
jit
::
gru_attr_t
>
(
attr3
);
auto
key4
=
jit
::
JitCodeKey
<
jit
::
gru_attr_t
>
(
attr4
);
TEST
(
JITKernel
,
pool
)
{
// TODO(TJ): add some test
EXPECT_TRUE
(
key1
!=
key2
);
EXPECT_TRUE
(
key2
==
key3
);
EXPECT_TRUE
(
key3
!=
key4
);
}
// TODO(TJ): add more test about key and pool
paddle/fluid/operators/optimizers/sgd_op.h
浏览文件 @
e5f9d3a4
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/jit/kernels.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -32,53 +33,57 @@ class SGDOpKernel : public framework::OpKernel<T> {
if
(
param_var
->
IsType
<
framework
::
LoDTensor
>
())
{
const
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
// Actually, all tensors are LoDTensor except SelectedRows.
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
auto
*
grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
);
auto
p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param
);
auto
g
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
grad
);
auto
o
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out
);
auto
*
lr
=
learning_rate
->
data
<
T
>
();
o
=
p
-
lr
[
0
]
*
g
;
auto
sz
=
param_out
->
numel
();
PADDLE_ENFORCE_EQ
(
param
->
numel
(),
sz
);
PADDLE_ENFORCE_EQ
(
grad
->
numel
(),
sz
);
jit
::
sgd_attr_t
attr
(
1
,
sz
,
1
,
sz
,
1
);
const
T
*
lr
=
learning_rate
->
data
<
T
>
();
const
T
*
param_data
=
param
->
data
<
T
>
();
const
T
*
grad_data
=
grad
->
data
<
T
>
();
int64_t
rows_idx
=
0
;
T
*
out_data
=
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
sgd
=
jit
::
Get
<
jit
::
kSgd
,
jit
::
SgdTuples
<
T
>
,
platform
::
CPUPlace
>
(
attr
);
sgd
(
lr
,
param_data
,
grad_data
,
&
rows_idx
,
out_data
,
&
attr
);
}
else
if
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
())
{
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ
(
param
,
param_out
);
const
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
auto
&
grad_rows
=
grad
->
rows
();
// for distributed training, a sparse var may be empty,
// just skip updating.
if
(
grad
->
rows
()
.
size
()
==
0
)
{
if
(
grad
_rows
.
size
()
==
0
)
{
return
;
}
auto
grad_height
=
grad
->
height
();
auto
out_dims
=
param_out
->
dims
();
PADDLE_ENFORCE_EQ
(
grad_height
,
out_dims
[
0
]);
PADDLE_ENFORCE_EQ
(
grad
->
height
(),
out_dims
[
0
]);
auto
&
grad_value
=
grad
->
value
();
auto
&
grad_rows
=
grad
->
rows
();
size_t
grad_row_numel
=
grad_value
.
numel
()
/
grad_rows
.
size
();
PADDLE_ENFORCE_EQ
(
static_cast
<
int64_t
>
(
grad_row_numel
),
param_out
->
numel
()
/
grad_height
);
auto
*
grad_data
=
grad_value
.
data
<
T
>
()
;
a
uto
*
out_data
=
param_out
->
data
<
T
>
()
;
a
uto
*
lr
=
learning_rate
->
data
<
T
>
()
;
for
(
size_t
i
=
0
;
i
<
grad_rows
.
size
();
i
++
)
{
PADDLE_ENFORCE
(
grad_rows
[
i
]
<
grad_height
,
"Input rows index should less than height"
);
for
(
size_t
j
=
0
;
j
<
grad_row_numel
;
j
++
)
{
out_data
[
grad_rows
[
i
]
*
grad_row_numel
+
j
]
-=
lr
[
0
]
*
grad_data
[
i
*
grad_row_numel
+
j
];
}
}
const
T
*
param_data
=
param
->
data
<
T
>
();
const
T
*
grad_data
=
grad_value
.
data
<
T
>
();
const
T
*
lr
=
learning_rate
->
data
<
T
>
();
const
int64_t
*
rows_data
=
grad_rows
.
data
();
T
*
out_data
=
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()
);
jit
::
sgd_attr_t
attr
;
a
ttr
.
param_height
=
out_dims
[
0
]
;
a
ttr
.
param_width
=
param_out
->
numel
()
/
attr
.
param_height
;
attr
.
grad_height
=
grad_rows
.
size
();
// note: it is not grad->height()
attr
.
grad_width
=
grad_value
.
numel
()
/
attr
.
grad_height
;
attr
.
selected_rows_size
=
grad_rows
.
size
(
);
PADDLE_ENFORCE_EQ
(
attr
.
grad_width
,
attr
.
param_width
);
auto
sgd
=
jit
::
Get
<
jit
::
kSgd
,
jit
::
SgdTuples
<
T
>
,
platform
::
CPUPlace
>
(
attr
);
sgd
(
lr
,
param_data
,
grad_data
,
rows_data
,
out_data
,
&
attr
);
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
}
...
...
python/paddle/fluid/tests/unittests/test_sgd_op.py
浏览文件 @
e5f9d3a4
...
...
@@ -24,17 +24,28 @@ from op_test import OpTest
class
TestSGDOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"sgd"
w
=
np
.
random
.
random
((
102
,
105
)).
astype
(
"float32"
)
g
=
np
.
random
.
random
((
102
,
105
)).
astype
(
"float32"
)
self
.
conf
()
w
=
np
.
random
.
random
((
self
.
h
,
self
.
w
)).
astype
(
"float32"
)
g
=
np
.
random
.
random
((
self
.
h
,
self
.
w
)).
astype
(
"float32"
)
lr
=
np
.
array
([
0.1
]).
astype
(
"float32"
)
self
.
inputs
=
{
'Param'
:
w
,
'Grad'
:
g
,
'LearningRate'
:
lr
}
self
.
outputs
=
{
'ParamOut'
:
w
-
lr
*
g
}
def
conf
(
self
):
self
.
h
=
102
self
.
w
=
105
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSGDOpCase8X
(
TestSGDOp
):
def
conf
(
self
):
self
.
h
=
10
self
.
w
=
64
class
TestSparseSGDOp
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
...
...
@@ -42,12 +53,12 @@ class TestSparseSGDOp(unittest.TestCase):
# create and initialize Grad Variable
height
=
10
rows
=
[
0
,
4
,
7
]
row_numel
=
12
self
.
conf
()
grad_selected_rows
=
scope
.
var
(
'Grad'
).
get_selected_rows
()
grad_selected_rows
.
set_height
(
height
)
grad_selected_rows
.
set_rows
(
rows
)
np_array
=
np
.
ones
((
len
(
rows
),
row_numel
)).
astype
(
"float32"
)
np_array
=
np
.
ones
((
len
(
rows
),
self
.
row_numel
)).
astype
(
"float32"
)
np_array
[
0
,
0
]
=
2.0
np_array
[
2
,
8
]
=
4.0
...
...
@@ -56,7 +67,7 @@ class TestSparseSGDOp(unittest.TestCase):
# create and initialize Param Variable
param
=
scope
.
var
(
'Param'
).
get_tensor
()
param_array
=
np
.
full
((
height
,
row_numel
),
5.0
).
astype
(
"float32"
)
param_array
=
np
.
full
((
height
,
self
.
row_numel
),
5.0
).
astype
(
"float32"
)
param
.
set
(
param_array
,
place
)
# create and initialize LeraningRate Variable
...
...
@@ -98,6 +109,14 @@ class TestSparseSGDOp(unittest.TestCase):
for
place
in
places
:
self
.
check_with_place
(
place
)
def
conf
(
self
):
self
.
row_numel
=
12
class
TestSparseSGDOpCase8X
(
TestSparseSGDOp
):
def
conf
(
self
):
self
.
row_numel
=
16
class
TestSGDOpOptimizeSelectedRows
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录