Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
35e820dc
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
35e820dc
编写于
6月 07, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into infer_api
上级
aad8f4d1
3ff9ba0e
变更
29
隐藏空白更改
内联
并排
Showing
29 changed file
with
476 addition
and
149 deletion
+476
-149
cmake/configure.cmake
cmake/configure.cmake
+3
-0
paddle/fluid/framework/data_layout.h
paddle/fluid/framework/data_layout.h
+6
-1
paddle/fluid/framework/data_layout_transform.cc
paddle/fluid/framework/data_layout_transform.cc
+83
-0
paddle/fluid/framework/data_layout_transform.h
paddle/fluid/framework/data_layout_transform.h
+45
-0
paddle/fluid/framework/data_transform.cc
paddle/fluid/framework/data_transform.cc
+26
-3
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+2
-1
paddle/fluid/framework/details/fuse_vars_op_handle.cc
paddle/fluid/framework/details/fuse_vars_op_handle.cc
+51
-0
paddle/fluid/framework/details/fuse_vars_op_handle.h
paddle/fluid/framework/details/fuse_vars_op_handle.h
+63
-0
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+19
-10
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+10
-0
paddle/fluid/framework/details/op_handle_base.h
paddle/fluid/framework/details/op_handle_base.h
+2
-0
paddle/fluid/framework/details/ssa_graph_printer.h
paddle/fluid/framework/details/ssa_graph_printer.h
+1
-1
paddle/fluid/framework/op_kernel_type.h
paddle/fluid/framework/op_kernel_type.h
+8
-1
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+9
-2
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+21
-5
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+26
-2
paddle/fluid/framework/tensor_test.cc
paddle/fluid/framework/tensor_test.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+2
-5
paddle/fluid/inference/tensorrt/convert/activation_op.cc
paddle/fluid/inference/tensorrt/convert/activation_op.cc
+8
-2
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+28
-87
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+3
-1
paddle/fluid/operators/batch_norm_op.cc
paddle/fluid/operators/batch_norm_op.cc
+8
-5
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+11
-6
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+2
-2
paddle/fluid/operators/lrn_op.cc
paddle/fluid/operators/lrn_op.cc
+4
-3
paddle/fluid/operators/pool_mkldnn_op.cc
paddle/fluid/operators/pool_mkldnn_op.cc
+7
-4
paddle/fluid/operators/pool_op.cc
paddle/fluid/operators/pool_op.cc
+8
-4
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+6
-3
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+13
-0
未找到文件。
cmake/configure.cmake
浏览文件 @
35e820dc
...
...
@@ -92,6 +92,9 @@ if(WITH_GPU)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
message
(
FATAL_ERROR
"TensorRT needs CUDNN >= 7.0 to compile"
)
endif
()
if
(
${
TENSORRT_MAJOR_VERSION
}
VERSION_LESS 4
)
message
(
FATAL_ERROR
"Paddle needs TensorRT >= 4.0 to compile"
)
endif
()
include_directories
(
${
TENSORRT_INCLUDE_DIR
}
)
endif
()
elseif
(
WITH_AMD_GPU
)
...
...
paddle/fluid/framework/data_layout.h
浏览文件 @
35e820dc
...
...
@@ -27,6 +27,7 @@ enum class DataLayout {
kNHWC
=
0
,
kNCHW
=
1
,
kAnyLayout
=
2
,
kMKLDNN
=
3
,
// all layouts supported by MKLDNN internally
};
inline
DataLayout
StringToDataLayout
(
const
std
::
string
&
str
)
{
...
...
@@ -41,6 +42,8 @@ inline DataLayout StringToDataLayout(const std::string& str) {
return
DataLayout
::
kNCHW
;
}
else
if
(
s
==
"ANYLAYOUT"
)
{
return
DataLayout
::
kAnyLayout
;
}
else
if
(
s
==
"MKLDNNLAYOUT"
)
{
return
DataLayout
::
kMKLDNN
;
}
else
{
PADDLE_THROW
(
"Unknown storage order string: %s"
,
s
);
}
...
...
@@ -54,8 +57,10 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
return
"NCHW"
;
case
DataLayout
::
kAnyLayout
:
return
"ANY_LAYOUT"
;
case
DataLayout
::
kMKLDNN
:
return
"MKLDNNLAYOUT"
;
default:
PADDLE_THROW
(
"unknown DataLayou %d"
,
data_layout
);
PADDLE_THROW
(
"unknown DataLayou
t
%d"
,
data_layout
);
}
}
...
...
paddle/fluid/framework/data_layout_transform.cc
浏览文件 @
35e820dc
...
...
@@ -16,6 +16,9 @@
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
framework
{
...
...
@@ -88,5 +91,85 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
out
->
set_layout
(
expected_kernel_type
.
data_layout_
);
}
#ifdef PADDLE_WITH_MKLDNN
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
void
*
GetDataFromTensor
(
const
Tensor
&
tensor
,
mkldnn
::
memory
::
data_type
type
)
{
switch
(
type
)
{
case
mkldnn
::
memory
::
data_type
::
f32
:
return
platform
::
to_void_cast
(
tensor
.
data
<
float
>
());
case
mkldnn
::
memory
::
data_type
::
s8
:
return
platform
::
to_void_cast
(
tensor
.
data
<
char
>
());
case
mkldnn
::
memory
::
data_type
::
u8
:
return
platform
::
to_void_cast
(
tensor
.
data
<
unsigned
char
>
());
case
mkldnn
::
memory
::
data_type
::
s16
:
return
platform
::
to_void_cast
(
tensor
.
data
<
int16_t
>
());
case
mkldnn
::
memory
::
data_type
::
s32
:
return
platform
::
to_void_cast
(
tensor
.
data
<
int32_t
>
());
default:
PADDLE_THROW
(
"wrong mkldnn type provided"
);
}
}
#endif
void
TransDataLayoutFromMKLDNN
(
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
expected_kernel_type
,
const
Tensor
&
in
,
Tensor
*
out
)
{
auto
in_layout
=
kernel_type_for_var
.
data_layout_
;
auto
out_layout
=
expected_kernel_type
.
data_layout_
;
PADDLE_ENFORCE
(
in_layout
==
DataLayout
::
kMKLDNN
&&
out_layout
!=
DataLayout
::
kMKLDNN
,
"TransDataLayoutFromMKLDNN only supports transform from MKLDNN to "
"non-MKLDNN"
);
#ifdef PADDLE_WITH_MKLDNN
PADDLE_ENFORCE
(
in
.
format
()
!=
memory
::
format
::
format_undef
&&
in
.
format
()
!=
memory
::
format
::
any
,
"Input tensor should have specified memory format"
);
// Set default as NCHW in case not specified
out_layout
=
out_layout
==
DataLayout
::
kAnyLayout
?
DataLayout
::
kNCHW
:
out_layout
;
auto
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
dynamic_cast
<
platform
::
MKLDNNDeviceContext
*>
(
pool
.
Get
(
expected_kernel_type
.
place_
));
auto
&
cpu_engine
=
dev_ctx
->
GetEngine
();
std
::
vector
<
int
>
in_tz
=
paddle
::
framework
::
vectorize2int
(
in
.
dims
());
std
::
vector
<
int
>
out_tz
=
in_tz
;
memory
::
data_type
in_type
=
ToMKLDNNDataType
(
in
.
type
());
PADDLE_ENFORCE
(
in_type
!=
memory
::
data_type
::
data_undef
,
"Input tensor type is not supported: "
,
in
.
type
().
name
());
memory
::
data_type
out_type
=
in_type
;
memory
::
format
in_format
=
in_tz
.
size
()
==
2
?
memory
::
format
::
nc
:
in
.
format
();
memory
::
format
out_format
=
out_tz
.
size
()
==
2
?
memory
::
format
::
nc
:
ToMKLDNNFormat
(
out_layout
);
void
*
in_data
=
GetDataFromTensor
(
in
,
in_type
);
// output tensor has the same dims as input. Reorder don't change dims
out
->
Resize
(
in
.
dims
());
auto
out_data
=
out
->
mutable_data
(
expected_kernel_type
.
place_
,
in
.
type
());
auto
in_memory
=
memory
({{{
in_tz
},
in_type
,
in_format
},
cpu_engine
},
in_data
);
auto
out_memory
=
memory
({{{
out_tz
},
out_type
,
out_format
},
cpu_engine
},
out_data
);
platform
::
Reorder
(
in_memory
,
out_memory
);
out
->
set_layout
(
out_layout
);
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
out
->
set_format
(
memory
::
format
::
format_undef
);
#endif
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/data_layout_transform.h
浏览文件 @
35e820dc
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <map>
#include <vector>
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/tensor.h"
...
...
@@ -22,6 +23,50 @@
namespace
paddle
{
namespace
framework
{
#ifdef PADDLE_WITH_MKLDNN
using
MKLDNNFormat
=
mkldnn
::
memory
::
format
;
using
MKLDNNDataType
=
mkldnn
::
memory
::
data_type
;
inline
MKLDNNFormat
ToMKLDNNFormat
(
const
DataLayout
&
layout
)
{
switch
(
layout
)
{
case
DataLayout
::
kNHWC
:
return
MKLDNNFormat
::
nhwc
;
case
DataLayout
::
kNCHW
:
return
MKLDNNFormat
::
nchw
;
default:
PADDLE_THROW
(
"Fail to convert layout %s to MKLDNN format"
,
DataLayoutToString
(
layout
));
}
}
inline
DataLayout
ToPaddleLayout
(
const
MKLDNNFormat
&
format
)
{
switch
(
format
)
{
case
MKLDNNFormat
::
nhwc
:
return
DataLayout
::
kNHWC
;
case
MKLDNNFormat
::
nchw
:
return
DataLayout
::
kNCHW
;
default:
PADDLE_THROW
(
"Fail to convert MKLDNN format to paddle layout"
);
}
}
inline
MKLDNNDataType
ToMKLDNNDataType
(
const
std
::
type_index
type
)
{
static
const
std
::
map
<
std
::
type_index
,
MKLDNNDataType
>
dict
{
{
std
::
type_index
(
typeid
(
float
)),
MKLDNNDataType
::
f32
},
// NOLINT
{
std
::
type_index
(
typeid
(
char
)),
MKLDNNDataType
::
s8
},
// NOLINT
{
std
::
type_index
(
typeid
(
unsigned
char
)),
MKLDNNDataType
::
u8
},
{
std
::
type_index
(
typeid
(
int16_t
)),
MKLDNNDataType
::
s16
},
{
std
::
type_index
(
typeid
(
int32_t
)),
MKLDNNDataType
::
s32
}};
auto
iter
=
dict
.
find
(
type
);
if
(
iter
!=
dict
.
end
())
return
iter
->
second
;
return
MKLDNNDataType
::
data_undef
;
}
#endif
void
TransDataLayoutFromMKLDNN
(
const
OpKernelType
&
kernel_type_for_var
,
const
OpKernelType
&
expected_kernel_type
,
const
Tensor
&
in
,
Tensor
*
out
);
std
::
vector
<
int
>
GetAxis
(
const
DataLayout
&
from
,
const
DataLayout
&
to
);
void
TransDataLayout
(
const
OpKernelType
&
kernel_type_for_var
,
...
...
paddle/fluid/framework/data_transform.cc
浏览文件 @
35e820dc
...
...
@@ -33,11 +33,34 @@ void DataTransform(const OpKernelType& expected_kernel_type,
Tensor
in
;
in
.
ShareDataWith
(
input_tensor
);
Tensor
out
;
DataLayout
lin
=
kernel_type_for_var
.
data_layout_
;
DataLayout
lout
=
expected_kernel_type
.
data_layout_
;
// do layout transform
if
(
NeedTransformLayout
(
expected_kernel_type
.
data_layout_
,
kernel_type_for_var
.
data_layout_
))
{
TransDataLayout
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
if
(
NeedTransformLayout
(
lout
,
lin
))
{
if
(
lin
==
DataLayout
::
kMKLDNN
||
lout
==
DataLayout
::
kMKLDNN
)
{
PADDLE_ENFORCE
(
!
(
lin
==
DataLayout
::
kMKLDNN
&&
lout
==
DataLayout
::
kMKLDNN
),
"No layout transform needed between two MKLDNN OPKernels"
);
if
(
lin
!=
DataLayout
::
kMKLDNN
&&
lout
==
DataLayout
::
kMKLDNN
)
{
#ifdef PADDLE_WITH_MKLDNN
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Just set layout/format. No real transform occur
out
.
ShareDataWith
(
input_tensor
);
out
.
set_layout
(
DataLayout
::
kMKLDNN
);
out
.
set_format
(
ToMKLDNNFormat
(
lin
));
#endif
}
else
{
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
// Do transform via MKLDNN lib
TransDataLayoutFromMKLDNN
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
}
}
else
{
// Case3 - transfrom between Non-MKLDNN OPKernels
TransDataLayout
(
kernel_type_for_var
,
expected_kernel_type
,
in
,
&
out
);
}
transformed
=
true
;
PassTensorData
(
&
out
,
&
in
);
}
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
35e820dc
...
...
@@ -13,7 +13,7 @@ cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_ro
if
(
WITH_GPU
)
nv_library
(
nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda
)
dynload_cuda
variable_visitor
)
set
(
multi_devices_graph_builder_deps nccl_all_reduce_op_handle
)
nv_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim dynload_cuda
)
nv_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda
)
...
...
@@ -25,6 +25,7 @@ else()
endif
()
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle rpc_op_handle
${
multi_devices_graph_builder_deps
}
reduce_op_handle broadcast_op_handle
)
...
...
paddle/fluid/framework/details/fuse_vars_op_handle.cc
0 → 100644
浏览文件 @
35e820dc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fuse_vars_op_handle.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
void
FuseVarsOpHandle
::
RunImpl
()
{
WaitInputVarGenerated
(
place_
);
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Inputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
()
-
1
,
inputs_numel_
.
size
(),
""
);
auto
scope
=
local_scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
out_var_handle
=
out_var_handles
[
0
];
auto
out_var
=
scope
->
Var
(
out_var_handle
->
name_
);
auto
out_tensor
=
out_var
->
GetMutable
<
LoDTensor
>
();
out_tensor
->
Resize
({
total_numel_
}).
mutable_data
(
this
->
place_
,
type_
);
int64_t
s
=
0
;
for
(
size_t
i
=
1
;
i
<
out_var_handles
.
size
();
++
i
)
{
auto
out_name
=
out_var_handles
[
i
]
->
name_
;
auto
out_t
=
scope
->
Var
(
out_name
)
->
GetMutable
<
LoDTensor
>
();
auto
numel
=
this
->
inputs_numel_
.
at
(
out_name
);
out_t
->
ShareDataWith
(
out_tensor
->
Slice
(
s
,
s
+
numel
));
s
+=
numel
;
}
this
->
RunAndRecordEvent
([
this
]
{});
}
std
::
string
FuseVarsOpHandle
::
Name
()
const
{
return
"fuse vars"
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/fuse_vars_op_handle.h
0 → 100644
浏览文件 @
35e820dc
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
struct
FuseVarsOpHandle
:
public
OpHandleBase
{
public:
FuseVarsOpHandle
(
Scope
*
local_scope
,
const
platform
::
Place
&
place
,
const
std
::
unordered_map
<
std
::
string
,
int64_t
>
&
inputs_numel
,
const
std
::
type_index
&
var_type
)
:
local_scope_
(
local_scope
),
place_
(
place
),
inputs_numel_
(
inputs_numel
),
type_
(
var_type
)
{
total_numel_
=
0
;
for
(
auto
in_numel
:
inputs_numel
)
{
PADDLE_ENFORCE_GT
(
in_numel
.
second
,
0
);
total_numel_
+=
in_numel
.
second
;
}
}
std
::
string
Name
()
const
override
;
bool
IsMultiDeviceTransfer
()
override
{
return
false
;
};
protected:
void
RunImpl
()
override
;
private:
Scope
*
local_scope_
;
const
platform
::
Place
place_
;
const
std
::
unordered_map
<
std
::
string
,
int64_t
>
inputs_numel_
;
const
std
::
type_index
type_
;
int64_t
total_numel_
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
浏览文件 @
35e820dc
...
...
@@ -11,10 +11,12 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h"
#include <algorithm>
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -30,27 +32,34 @@ NCCLAllReduceOpHandle::NCCLAllReduceOpHandle(
}
void
NCCLAllReduceOpHandle
::
RunImpl
()
{
if
(
inputs_
.
s
ize
()
==
1
)
{
if
(
NoDummyInputS
ize
()
==
1
)
{
return
;
// No need to all reduce when GPU count = 1;
}
else
{
// Wait input done
WaitInputVarGenerated
();
auto
&
var_name
=
static_cast
<
VarHandle
*>
(
this
->
inputs_
[
0
])
->
name_
;
int
dtype
=
-
1
;
size_t
numel
=
0
;
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Inputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
places_
.
size
(),
"The NoDummyInputSize should be equal to the number of places."
);
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
out_var_handles
.
size
(),
"The NoDummyInputSize and NoDummyOutputSize should be equal."
);
std
::
vector
<
const
LoDTensor
*>
lod_tensors
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
*
s
=
local_scopes_
[
i
];
auto
&
local_scope
=
*
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
lod_tensor
=
local_scope
.
FindVar
(
var_name
)
->
Get
<
LoDTensor
>
();
auto
&
lod_tensor
=
local_scope
.
FindVar
(
in_var_handles
[
i
]
->
name_
)
->
Get
<
LoDTensor
>
();
lod_tensors
.
emplace_back
(
&
lod_tensor
);
PADDLE_ENFORCE_EQ
(
in_var_handles
[
i
]
->
name_
,
out_var_handles
[
i
]
->
name_
,
"The name of input and output should be equal."
);
}
if
(
platform
::
is_gpu_place
(
lod_tensors
[
0
]
->
place
()))
{
int
dtype
=
-
1
;
size_t
numel
=
0
;
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
...
...
@@ -96,7 +105,7 @@ void NCCLAllReduceOpHandle::RunImpl() {
auto
&
scope
=
*
local_scopes_
[
i
]
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
p
=
places_
[
i
];
auto
*
var
=
scope
.
FindVar
(
var_name
);
auto
*
var
=
scope
.
FindVar
(
in_var_handles
[
i
]
->
name_
);
auto
*
dev_ctx
=
dev_ctxes_
[
p
];
RunAndRecordEvent
(
p
,
[
&
trg
,
var
,
dev_ctx
,
p
]
{
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
35e820dc
...
...
@@ -104,6 +104,16 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) {
}
}
size_t
OpHandleBase
::
NoDummyInputSize
()
const
{
size_t
cnt
=
0
;
for
(
auto
*
in
:
inputs_
)
{
if
(
dynamic_cast
<
DummyVarHandle
*>
(
in
)
==
nullptr
)
{
++
cnt
;
}
}
return
cnt
;
}
bool
OpHandleBase
::
NeedWait
(
VarHandleBase
*
in_var
)
{
return
in_var
&&
in_var
->
generated_op_
;
}
...
...
paddle/fluid/framework/details/op_handle_base.h
浏览文件 @
35e820dc
...
...
@@ -80,6 +80,8 @@ class OpHandleBase {
const
std
::
vector
<
VarHandleBase
*>
&
Outputs
()
const
{
return
outputs_
;
}
size_t
NoDummyInputSize
()
const
;
protected:
void
RunAndRecordEvent
(
const
std
::
function
<
void
()
>
&
callback
);
...
...
paddle/fluid/framework/details/ssa_graph_printer.h
浏览文件 @
35e820dc
...
...
@@ -20,7 +20,7 @@
namespace
paddle
{
namespace
framework
{
namespace
details
{
class
SSAGraph
;
struct
SSAGraph
;
class
SSAGraphPrinter
{
public:
virtual
~
SSAGraphPrinter
()
{}
...
...
paddle/fluid/framework/op_kernel_type.h
浏览文件 @
35e820dc
...
...
@@ -87,7 +87,14 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
}
inline
bool
NeedTransformLayout
(
const
DataLayout
&
l
,
const
DataLayout
&
r
)
{
return
l
!=
DataLayout
::
kAnyLayout
&&
r
!=
DataLayout
::
kAnyLayout
&&
l
!=
r
;
bool
ret
=
(
l
!=
DataLayout
::
kAnyLayout
&&
r
!=
DataLayout
::
kAnyLayout
&&
l
!=
r
);
#ifdef PADDLE_WITH_MKLDNN
// Layout transform needed for either non-MKLDNN to MKLDNN or vice versa
ret
|=
(
l
!=
DataLayout
::
kMKLDNN
&&
r
==
DataLayout
::
kMKLDNN
);
ret
|=
(
l
==
DataLayout
::
kMKLDNN
&&
r
!=
DataLayout
::
kMKLDNN
);
#endif
return
ret
;
}
inline
bool
TransFromNeeded
(
const
OpKernelType
&
l
,
const
OpKernelType
&
r
)
{
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
35e820dc
...
...
@@ -83,8 +83,14 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
void
operator
()(
const
char
*
op_type
,
const
char
*
library_type
)
const
{
using
T
=
typename
KERNEL_TYPE
::
ELEMENT_TYPE
;
std
::
string
library
(
library_type
);
std
::
string
data_layout
=
"ANYLAYOUT"
;
if
(
library
==
"MKLDNN"
)
{
data_layout
=
"MKLDNNLAYOUT"
;
}
OpKernelType
key
(
ToDataType
(
std
::
type_index
(
typeid
(
T
))),
PlaceType
(),
DataLayout
::
kAnyLayout
,
StringToLibraryType
(
library_type
));
StringToDataLayout
(
data_layout
),
StringToLibraryType
(
library_type
));
OperatorWithKernel
::
AllOpKernels
()[
op_type
][
key
].
reset
(
new
KERNEL_TYPE
);
constexpr
auto
size
=
std
::
tuple_size
<
std
::
tuple
<
KernelTypes
...
>>::
value
;
...
...
@@ -99,7 +105,8 @@ struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> {
void
operator
()(
const
char
*
op_type
,
const
char
*
library_type
)
const
{}
};
// User can register many kernel in one place. The data type could be different.
// User can register many kernel in one place. The data type could be
// different.
template
<
typename
PlaceType
,
typename
...
KernelType
>
class
OpKernelRegistrar
:
public
Registrar
{
public:
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
35e820dc
...
...
@@ -444,10 +444,25 @@ class RuntimeInferShapeContext : public InferShapeContext {
auto
*
out_tensor
=
out_var
->
GetMutable
<
LoDTensor
>
();
out_tensor
->
set_lod
(
in_tensor
.
lod
());
// TODO(dzhwinter) : reuse ShareLoD in most operators.
// Need to call ShareLayout explicitly in sequence related ops.
// Shall we have a better method to shared info between in/out Tensor?
out_tensor
->
set_layout
(
in_tensor
.
layout
());
// TODO(dzhwinter) : reuse ShareLoD in most operators.
// Need to call ShareLayout explicitly in sequence related ops.
// Shall we have a better method to shared info between in/out Tensor?
#ifdef PADDLE_WITH_MKLDNN
// Fix me: ugly workaround below
// Correct solution:
// set_layout() should NOT be called here (i.e. ShareLoD). Instead,
// layout of output tensor should be set "manually" in Compute()
// of each OPKernel. The reason layout should NOT be shared between
// input and output "automatically" (now by InferShape()->ShareLoD())
// is that layout transform may occur after InferShape().
// Workaround:
// Skip set_layout() when input layout is kMKLDNN
// This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN
// OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called
// in Compute()
if
(
in_tensor
.
layout
()
!=
DataLayout
::
kMKLDNN
)
#endif
out_tensor
->
set_layout
(
in_tensor
.
layout
());
}
void
ShareLayout
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
...
...
@@ -665,7 +680,8 @@ OpKernelType OperatorWithKernel::GetExpectedKernelType(
OpKernelType
OperatorWithKernel
::
GetKernelTypeForVar
(
const
std
::
string
&
var_name
,
const
Tensor
&
tensor
,
const
OpKernelType
&
expected_kernel_type
)
const
{
return
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
());
return
OpKernelType
(
expected_kernel_type
.
data_type_
,
tensor
.
place
(),
tensor
.
layout
());
}
}
// namespace framework
...
...
paddle/fluid/framework/tensor.h
浏览文件 @
35e820dc
...
...
@@ -34,6 +34,28 @@ namespace framework {
class
LoDTensor
;
class
Tensor
{
#ifdef PADDLE_WITH_MKLDNN
public:
inline
mkldnn
::
memory
::
format
format
()
const
{
return
format_
;
}
inline
void
set_format
(
const
mkldnn
::
memory
::
format
format
)
{
format_
=
format
;
}
protected:
/**
* @brief the detail format of memory block which have layout as kMKLDNN
*
* @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C,
* nChw16c, etc. For a MKLDNN memory block, layout will be set as
* DataLayout::kMKLDNN meanwhile detail memory format will be kept in
* this field.
*/
mkldnn
::
memory
::
format
format_
=
mkldnn
::
memory
::
format
::
format_undef
;
#endif
public:
template
<
typename
T
,
size_t
D
,
int
MajorType
,
typename
IndexType
>
friend
struct
EigenTensor
;
...
...
@@ -195,8 +217,10 @@ class Tensor {
* N,C,H,W for respectively the batch size, the number of
* feature maps, the height.
*/
DataLayout
layout_
=
DataLayout
::
kNHWC
;
// Fix me: here just change the default layout to kNCHW
// it doesn't fix the real issue, i.e. feeder should set up tensor layout
// according to actual input data
DataLayout
layout_
=
DataLayout
::
kNCHW
;
/**
* @brief A PlaceHolder may be shared by more than one tensor.
...
...
paddle/fluid/framework/tensor_test.cc
浏览文件 @
35e820dc
...
...
@@ -209,7 +209,7 @@ TEST(Tensor, ReshapeToMatrix) {
TEST
(
Tensor
,
Layout
)
{
framework
::
Tensor
src
;
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kN
HWC
);
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kN
CHW
);
src
.
set_layout
(
framework
::
DataLayout
::
kAnyLayout
);
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kAnyLayout
);
}
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
35e820dc
# Add TRT tests
# This test is not stable
# See https://paddleci.ngrok.io/viewLog.html?tab=buildLog&buildTypeId=Paddle_PrCi2&buildId=36834&_focus=8828
#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc io_converter.cc
# DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine
# SERIAL)
nv_library
(
tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc
DEPS tensorrt_engine mul_op
)
...
...
@@ -16,3 +11,5 @@ nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine mul_op SERIAL
)
nv_test
(
test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine mul_op SERIAL
)
nv_test
(
test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine activation_op SERIAL
)
paddle/fluid/inference/tensorrt/convert/activation_op.cc
浏览文件 @
35e820dc
...
...
@@ -22,7 +22,8 @@ namespace tensorrt {
class
ReluOpConverter
:
public
OpConverter
{
public:
ReluOpConverter
()
{}
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
)
override
{
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
...
...
@@ -33,7 +34,12 @@ class ReluOpConverter : public OpConverter {
nvinfer1
::
IActivationLayer
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Activation
,
*
const_cast
<
nvinfer1
::
ITensor
*>
(
input_tensor
),
nvinfer1
::
ActivationType
::
kRELU
);
engine_
->
SetITensor
(
op_desc
.
Output
(
"Out"
)[
0
],
layer
->
getOutput
(
0
));
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
if
(
test_mode
)
{
// the test framework can not determine which is the
// output, so place the declaration inside.
engine_
->
DeclareOutput
(
output_name
);
}
}
};
...
...
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
35e820dc
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/tensorrt/convert/io_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
USE_OP
(
relu
);
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
void
Compare
(
const
std
::
string
op_type
,
float
input
,
float
expect
)
{
TEST
(
ReluOpConverter
,
main
)
{
framework
::
Scope
scope
;
platform
::
CUDAPlace
place
;
platform
::
CUDADeviceContext
ctx
(
place
);
// init fluid op and variable
auto
x_var
=
scope
.
Var
(
"X"
);
auto
x_tensor
=
x_var
->
GetMutable
<
framework
::
LoDTensor
>
();
x_tensor
->
Resize
({
1
,
1
});
x_tensor
->
mutable_data
<
float
>
(
place
);
std
::
vector
<
float
>
init
;
init
.
push_back
(
input
);
framework
::
TensorFromVector
(
init
,
ctx
,
x_tensor
);
auto
out_var
=
scope
.
Var
(
"Out"
);
auto
out_tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
out_tensor
->
Resize
({
1
,
1
});
out_tensor
->
mutable_data
<
float
>
(
place
);
framework
::
OpDesc
op_desc
;
op_desc
.
SetType
(
op_type
);
op_desc
.
SetInput
(
"X"
,
{
"X"
});
op_desc
.
SetOutput
(
"Out"
,
{
"Out"
});
auto
op
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
.
Proto
());
// run fluid op
op
->
Run
(
scope
,
place
);
// get fluid output
std
::
vector
<
float
>
out1
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out1
);
// init tensorrt op
cudaStream_t
stream
;
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream
));
TensorRTEngine
*
engine
=
new
TensorRTEngine
(
1
,
1
<<
10
,
&
stream
);
engine
->
InitNetwork
();
engine
->
DeclareInput
(
"X"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
DimsCHW
{
1
,
1
,
1
});
// convert op
OpConverter
op_converter
;
op_converter
.
ConvertOp
(
*
op_desc
.
Proto
(),
engine
);
engine
->
DeclareOutput
(
"Out"
);
engine
->
FreezeNetwork
();
// convert LoDTensor to ITensor
size_t
size
=
x_tensor
->
memory_size
();
EngineIOConverter
::
ConvertInput
(
op_type
,
*
x_tensor
,
engine
->
buffer
(
"X"
).
buffer
,
size
,
&
stream
);
// run tensorrt Outp
engine
->
Execute
(
1
);
// convert ITensor to LoDTensor
EngineIOConverter
::
ConvertOutput
(
op_type
,
engine
->
buffer
(
"Out"
).
buffer
,
out_tensor
,
size
,
&
stream
);
// get tensorrt output
std
::
vector
<
float
>
out2
;
framework
::
TensorToVector
(
*
out_tensor
,
ctx
,
&
out2
);
// compare
ASSERT_EQ
(
out1
[
0
],
out2
[
0
]);
ASSERT_EQ
(
out1
[
0
],
expect
);
delete
engine
;
cudaStreamDestroy
(
stream
);
}
TEST
(
OpConverter
,
ConvertRelu
)
{
Compare
(
"relu"
,
1
,
1
);
// relu(1) = 1
Compare
(
"relu"
,
-
5
,
0
);
// relu(-5) = 0
std
::
unordered_set
<
std
::
string
>
parameters
;
TRTConvertValidation
validator
(
10
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"relu-X"
,
nvinfer1
::
Dims2
(
10
,
6
));
validator
.
DeclOutputVar
(
"relu-Out"
,
nvinfer1
::
Dims2
(
10
,
6
));
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"relu"
);
desc
.
SetInput
(
"X"
,
{
"relu-X"
});
desc
.
SetOutput
(
"Out"
,
{
"relu-Out"
});
LOG
(
INFO
)
<<
"set OP"
;
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
10
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
USE_OP
(
activation
);
USE_OP
(
relu
);
paddle/fluid/operators/activation_op.cc
浏览文件 @
35e820dc
...
...
@@ -58,14 +58,16 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const
framework
::
OperatorWithKernel
&
oper
,
const
std
::
string
&
name
)
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
auto
it
=
oper
.
Attrs
().
find
(
"use_mkldnn"
);
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
it
!=
oper
.
Attrs
().
end
()
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
name
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library
);
...
...
paddle/fluid/operators/batch_norm_op.cc
浏览文件 @
35e820dc
...
...
@@ -111,14 +111,16 @@ class BatchNormOp : public framework::OperatorWithKernel {
"Variance input should be of float type"
);
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
library_
);
}
...
...
@@ -367,17 +369,18 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
}
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
DataLayout
::
kAnyLayout
;
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library_
);
layout
_
,
library_
);
}
};
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
35e820dc
...
...
@@ -75,6 +75,11 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
ConvOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -84,6 +89,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
...
...
@@ -99,9 +105,6 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
"float16 can only be used when CUDNN is used"
);
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout
,
library
);
}
...
...
@@ -309,6 +312,10 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
ConvOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -318,12 +325,10 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
...
...
paddle/fluid/operators/fc_op.cc
浏览文件 @
35e820dc
...
...
@@ -43,7 +43,7 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
FCOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
AnyLayout
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
MKLDNN
};
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
...
...
@@ -65,7 +65,7 @@ void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
FCOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
AnyLayout
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
k
MKLDNN
};
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
...
...
paddle/fluid/operators/lrn_op.cc
浏览文件 @
35e820dc
...
...
@@ -124,16 +124,17 @@ namespace {
framework
::
OpKernelType
GetExpectedLRNKernel
(
const
framework
::
ExecutionContext
&
ctx
)
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
...
...
paddle/fluid/operators/pool_mkldnn_op.cc
浏览文件 @
35e820dc
...
...
@@ -24,10 +24,13 @@ using mkldnn::pooling_backward;
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static
std
::
string
gethash
(
memory
::
dims
&
input_dims
,
std
::
string
&
pooling_type
,
std
::
vector
<
int
>&
ksize
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
string
suffix
)
{
auto
dims2str
=
[](
memory
::
dims
&
operand_dims
)
{
static
std
::
string
gethash
(
const
memory
::
dims
&
input_dims
,
const
std
::
string
&
pooling_type
,
const
std
::
vector
<
int
>&
ksize
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
string
&
suffix
)
{
auto
dims2str
=
[](
const
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
...
...
paddle/fluid/operators/pool_op.cc
浏览文件 @
35e820dc
...
...
@@ -83,6 +83,9 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
framework
::
OpKernelType
PoolOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -92,11 +95,10 @@ framework::OpKernelType PoolOp::GetExpectedKernelType(
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
ctx
.
GetPlace
(),
layout_
,
library_
);
...
...
@@ -112,6 +114,9 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
framework
::
OpKernelType
PoolOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -121,6 +126,7 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
...
...
@@ -129,8 +135,6 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
PADDLE_ENFORCE_EQ
(
library_
,
framework
::
LibraryType
::
kCUDNN
,
"float16 can only be used when CUDNN is used"
);
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
library_
);
}
...
...
paddle/fluid/operators/softmax_op.cc
浏览文件 @
35e820dc
...
...
@@ -49,6 +49,9 @@ class SoftmaxOp : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// choose cudnn kernel if the runtime supported.
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
@@ -58,6 +61,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
...
...
@@ -68,9 +72,7 @@ class SoftmaxOp : public framework::OperatorWithKernel {
"float16 can only be used on GPU place"
);
}
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
framework
::
StringToDataLayout
(
data_format
),
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
(),
layout_
,
library_
);
}
};
...
...
@@ -142,6 +144,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// choose cudnn kernel if the runtime supported.
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
CanCUDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kCUDNN
;
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
35e820dc
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <mkldnn.h>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
platform
{
...
...
@@ -86,5 +87,17 @@ inline mkldnn::memory::data_type MKLDNNGetDataType<float>() {
return
mkldnn
::
memory
::
f32
;
}
inline
void
Reorder
(
const
mkldnn
::
memory
&
src
,
const
mkldnn
::
memory
&
dst
)
{
auto
reorder_prim
=
mkldnn
::
reorder
(
src
,
dst
);
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
pipeline
.
push_back
(
reorder_prim
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
inline
mkldnn
::
memory
::
format
GetMKLDNNFormat
(
const
mkldnn
::
memory
memory
)
{
return
static_cast
<
mkldnn
::
memory
::
format
>
(
memory
.
get_primitive_desc
().
desc
().
data
.
format
);
}
}
// namespace platform
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录