Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
6648995f
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6648995f
编写于
12月 17, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix build
上级
74292f41
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
61 addition
and
61 deletion
+61
-61
paddle/fluid/operators/crf_decoding_op.h
paddle/fluid/operators/crf_decoding_op.h
+2
-2
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
.../fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+1
-1
paddle/fluid/operators/fused/fusion_gru_op.cc
paddle/fluid/operators/fused/fusion_gru_op.cc
+23
-23
paddle/fluid/operators/fused/fusion_lstm_op.cc
paddle/fluid/operators/fused/fusion_lstm_op.cc
+27
-28
paddle/fluid/operators/jit/helper.h
paddle/fluid/operators/jit/helper.h
+4
-4
paddle/fluid/operators/layer_norm_op.h
paddle/fluid/operators/layer_norm_op.h
+1
-1
paddle/fluid/operators/math/fc_compute.h
paddle/fluid/operators/math/fc_compute.h
+3
-2
未找到文件。
paddle/fluid/operators/crf_decoding_op.h
浏览文件 @
6648995f
...
...
@@ -82,8 +82,8 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
Tensor
track
;
int
*
track_value
=
track
.
mutable_data
<
int
>
(
emission_dims
,
platform
::
CPUPlace
());
auto
ker
=
jit
::
Get
<
jit
::
crfdecoding
,
jit
::
CRFDecoding
,
platform
::
CPUPlace
>
(
tag_num
);
auto
ker
=
jit
::
Get
<
jit
::
crfdecoding
,
jit
::
CRFDecoding
Tuples
<
T
>
,
platform
::
CPUPlace
>
(
tag_num
);
ker
(
static_cast
<
int
>
(
seq_len
),
x
,
w
,
alpha_value
,
track_value
,
tag_num
);
T
max_score
=
-
std
::
numeric_limits
<
T
>::
max
();
int
max_i
=
0
;
...
...
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
浏览文件 @
6648995f
...
...
@@ -108,7 +108,7 @@ class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
constexpr
int
simd_width
=
16
;
int
C
=
c
/
simd_width
;
auto
multiply
=
jit
::
Get
<
jit
::
nchw16cmulnc
,
jit
::
NCHW16CMulNCTuples
,
auto
multiply
=
jit
::
Get
<
jit
::
nchw16cmulnc
,
jit
::
NCHW16CMulNCTuples
<
T
>
,
platform
::
CPUPlace
>
(
0
);
#pragma omp parallel for collapse(2)
for
(
int
ni
=
0
;
ni
<
n
;
ni
++
)
{
...
...
paddle/fluid/operators/fused/fusion_gru_op.cc
浏览文件 @
6648995f
...
...
@@ -183,29 +183,29 @@ class FusionGRUKernel : public framework::OpKernel<T> {
const int total_T = x_dims[0]; \
const int D3 = wh_dims[1]
#define INIT_OTHER_DEFINES \
auto* h0 = ctx.Input<Tensor>("H0"); \
auto* wx = ctx.Input<Tensor>("WeightX"); \
auto* bias = ctx.Input<Tensor>("Bias"); \
auto* hidden_out = ctx.Output<LoDTensor>("Hidden"); \
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
const int M = x_dims[1]; \
const int D = wh_dims[0]; \
const int D2 = D * 2; \
const jit::gru_attr_t attr( \
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("activation"))); \
jit::gru_t one_step; \
auto ComputeH1 = \
jit::Get<jit::gruh1, jit::GRUTuples, platform::CPUPlace>(attr); \
auto ComputeHtPart1 = \
jit::Get<jit::gruhtpart1, jit::GRUTuples, platform::CPUPlace>(attr); \
auto ComputeHtPart2 = \
jit::Get<jit::gruhtpart2, jit::GRUTuples, platform::CPUPlace>(attr); \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
auto place = ctx.GetPlace(); \
#define INIT_OTHER_DEFINES
\
auto* h0 = ctx.Input<Tensor>("H0");
\
auto* wx = ctx.Input<Tensor>("WeightX");
\
auto* bias = ctx.Input<Tensor>("Bias");
\
auto* hidden_out = ctx.Output<LoDTensor>("Hidden");
\
bool is_reverse = ctx.Attr<bool>("is_reverse");
\
const int M = x_dims[1];
\
const int D = wh_dims[0];
\
const int D2 = D * 2;
\
const jit::gru_attr_t attr(
\
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")),
\
jit::to_kerneltype(ctx.Attr<std::string>("activation")));
\
jit::gru_t one_step;
\
auto ComputeH1 =
\
jit::Get<jit::gruh1, jit::GRUTuples
<T>
, platform::CPUPlace>(attr); \
auto ComputeHtPart1 =
\
jit::Get<jit::gruhtpart1, jit::GRUTuples
<T>
, platform::CPUPlace>(attr); \
auto ComputeHtPart2 =
\
jit::Get<jit::gruhtpart2, jit::GRUTuples
<T>
, platform::CPUPlace>(attr); \
const T* x_data = x->data<T>();
\
const T* wx_data = wx->data<T>();
\
const T* wh_data = wh->data<T>();
\
auto place = ctx.GetPlace();
\
T* xx_data = xx->mutable_data<T>(place)
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
...
...
paddle/fluid/operators/fused/fusion_lstm_op.cc
浏览文件 @
6648995f
...
...
@@ -236,33 +236,32 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
const int D = wh_dims[0]; \
const int D4 = wh_dims[1]
#define INIT_OTHER_DEFINES \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
/* diagonal weight*/
\
const T* wp_data = bias->data<T>() + D4; \
/* for peephole only*/
\
T* checked_cell_data = nullptr; \
auto place = ctx.GetPlace(); \
if (use_peepholes) { \
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/
\
auto* checked_cell = ctx.Output<Tensor>("CheckedCell"); \
checked_cell_data = checked_cell->mutable_data<T>(place); \
} \
const jit \
: lstm_attr_t attr( \
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")), \
use_peepholes); \
math::jitkernel::lstm_t one_step; \
one_step.wp = wp_data; \
one_step.checked = checked_cell_data; \
auto ComputeC1H1 = \
jit::Get<jit::lstmc1h1, jit::LSTMTuples, platform::CPUPlace>(attr); \
auto ComputeCtHt = \
jit::Get<jit::lstmctht, jit::LSTMTuples, platform::CPUPlace>(attr)
#define INIT_OTHER_DEFINES \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
/* diagonal weight*/
\
const T* wp_data = bias->data<T>() + D4; \
/* for peephole only*/
\
T* checked_cell_data = nullptr; \
auto place = ctx.GetPlace(); \
if (use_peepholes) { \
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/
\
auto* checked_cell = ctx.Output<Tensor>("CheckedCell"); \
checked_cell_data = checked_cell->mutable_data<T>(place); \
} \
const jit::lstm_attr_t attr( \
D, jit::to_kerneltype(ctx.Attr<std::string>("gate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("candidate_activation")), \
jit::to_kerneltype(ctx.Attr<std::string>("cell_activation")), \
use_peepholes); \
jit::lstm_t one_step; \
one_step.wp = wp_data; \
one_step.checked = checked_cell_data; \
auto ComputeC1H1 = \
jit::Get<jit::lstmc1h1, jit::LSTMTuples<T>, platform::CPUPlace>(attr); \
auto ComputeCtHt = \
jit::Get<jit::lstmctht, jit::LSTMTuples<T>, platform::CPUPlace>(attr)
// Wh GEMM
#define GEMM_WH_ADDON(bs, prev, out) \
...
...
@@ -434,7 +433,7 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
one_step
.
ct_1
=
cur_prev_c_data
;
one_step
.
ct
=
cur_c_out_data
;
one_step
.
ht
=
cur_h_out_data
;
ComputeC
1H1
(
&
one_step
,
&
attr
);
ComputeC
tHt
(
&
one_step
,
&
attr
);
// move one batch
cur_in_data
+=
D4
;
...
...
paddle/fluid/operators/jit/helper.h
浏览文件 @
6648995f
...
...
@@ -32,7 +32,7 @@ inline typename std::enable_if<
std
::
is_same
<
typename
KernelTuples
::
data_type
,
float
>::
value
&&
std
::
is_same
<
PlaceType
,
platform
::
CPUPlace
>::
value
,
typename
KernelTuples
::
func_type
>::
type
GetJitCode
(
typename
KernelTuples
::
attr_type
attr
)
{
GetJitCode
(
const
typename
KernelTuples
::
attr_type
&
attr
)
{
using
Func
=
typename
KernelTuples
::
func_type
;
using
Attr
=
typename
KernelTuples
::
attr_type
;
size_t
key
=
JitCodeKey
<
Attr
>
(
attr
);
...
...
@@ -68,7 +68,7 @@ inline typename std::enable_if<
!
std
::
is_same
<
typename
KernelTuples
::
data_type
,
float
>::
value
||
!
std
::
is_same
<
PlaceType
,
platform
::
CPUPlace
>::
value
,
typename
KernelTuples
::
func_type
>::
type
GetJitCode
(
typename
KernelTuples
::
attr_type
attr
)
{
GetJitCode
(
const
typename
KernelTuples
::
attr_type
&
attr
)
{
return
nullptr
;
}
...
...
@@ -93,8 +93,8 @@ inline typename KernelTuples::func_type GetRefer() {
template
<
KernelType
KT
,
typename
KernelTuples
,
typename
PlaceType
=
platform
::
CPUPlace
>
// TODO(TJ): const & attr
typename
KernelTuples
::
func_type
Get
(
typename
KernelTuples
::
attr_type
attr
)
{
typename
KernelTuples
::
func_type
Get
(
const
typename
KernelTuples
::
attr_type
&
attr
)
{
auto
jitfunc
=
GetJitCode
<
KT
,
KernelTuples
,
PlaceType
>
(
attr
);
if
(
jitfunc
)
{
return
jitfunc
;
...
...
paddle/fluid/operators/layer_norm_op.h
浏览文件 @
6648995f
...
...
@@ -230,7 +230,7 @@ class LayerNormKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
bias
->
numel
(),
right
);
auto
ker
=
jit
::
Get
<
jit
::
layernorm
,
jit
::
LayerNormTuples
,
platform
::
CPUPlace
>
(
jit
::
Get
<
jit
::
layernorm
,
jit
::
LayerNormTuples
<
T
>
,
platform
::
CPUPlace
>
(
right
);
ker
(
x
.
data
<
T
>
(),
out
.
data
<
T
>
(),
mean
->
data
<
T
>
(),
var
->
data
<
T
>
(),
scale
->
data
<
T
>
(),
bias
->
data
<
T
>
(),
static_cast
<
int
>
(
left
),
...
...
paddle/fluid/operators/math/fc_compute.h
浏览文件 @
6648995f
...
...
@@ -31,13 +31,14 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
}
if
(
relu
)
{
auto
compute
=
jit
::
Get
<
jit
::
vaddrelu
,
jit
::
XYZNTuples
,
platform
::
CPUPlc
ace
>
(
N
);
jit
::
Get
<
jit
::
vaddrelu
,
jit
::
XYZNTuples
<
T
>
,
platform
::
CPUPl
ace
>
(
N
);
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
T
*
dst
=
Y
+
i
*
N
;
compute
(
B
,
dst
,
dst
,
N
);
}
}
else
{
auto
compute
=
jit
::
Get
<
jit
::
vadd
,
jit
::
XYZNTuples
,
platform
::
CPUPlcace
>
(
N
);
auto
compute
=
jit
::
Get
<
jit
::
vadd
,
jit
::
XYZNTuples
<
T
>
,
platform
::
CPUPlace
>
(
N
);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录