Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
20392be0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
20392be0
编写于
12月 19, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine/jit
fix conflicts test=develop
上级
f332f589
9035bb81
变更
35
显示空白变更内容
内联
并排
Showing
35 changed file
with
924 addition
and
168 deletion
+924
-168
CMakeLists.txt
CMakeLists.txt
+2
-2
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+25
-5
cmake/operators.cmake
cmake/operators.cmake
+1
-1
paddle/fluid/framework/ngraph_bridge.cc
paddle/fluid/framework/ngraph_bridge.cc
+8
-83
paddle/fluid/framework/ngraph_operator.cc
paddle/fluid/framework/ngraph_operator.cc
+2
-1
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+89
-25
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+46
-16
paddle/fluid/framework/type_defs.h
paddle/fluid/framework/type_defs.h
+3
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-3
paddle/fluid/operators/beam_search_decode_op.cc
paddle/fluid/operators/beam_search_decode_op.cc
+2
-1
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
+17
-6
paddle/fluid/operators/distributed/grpc_serde.cc
paddle/fluid/operators/distributed/grpc_serde.cc
+8
-0
paddle/fluid/operators/distributed/sendrecvop_utils.h
paddle/fluid/operators/distributed/sendrecvop_utils.h
+7
-2
paddle/fluid/operators/distributed/variable_response.cc
paddle/fluid/operators/distributed/variable_response.cc
+1
-1
paddle/fluid/operators/ngraph/ngraph_ops.h
paddle/fluid/operators/ngraph/ngraph_ops.h
+25
-0
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
+52
-0
paddle/fluid/operators/ngraph/ops/mul_op.h
paddle/fluid/operators/ngraph/ops/mul_op.h
+134
-0
paddle/fluid/operators/transpose_mkldnn_op.cc
paddle/fluid/operators/transpose_mkldnn_op.cc
+124
-0
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+47
-2
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+0
-2
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+1
-1
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+2
-0
paddle/fluid/platform/dynload/dynamic_loader.h
paddle/fluid/platform/dynload/dynamic_loader.h
+6
-0
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+1
-1
paddle/fluid/platform/dynload/tensorrt.h
paddle/fluid/platform/dynload/tensorrt.h
+1
-1
paddle/fluid/platform/dynload/warpctc.h
paddle/fluid/platform/dynload/warpctc.h
+1
-1
paddle/fluid/platform/ngraph_helper.h
paddle/fluid/platform/ngraph_helper.h
+105
-0
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+0
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+8
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+13
-5
python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py
...fluid/tests/unittests/ngraph/test_activation_ngraph_op.py
+58
-0
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
...paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
+42
-0
python/paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py
.../paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py
+76
-0
python/paddle/fluid/tests/unittests/test_transpose_op.py
python/paddle/fluid/tests/unittests/test_transpose_op.py
+11
-2
python/setup.py.in
python/setup.py.in
+5
-4
未找到文件。
CMakeLists.txt
浏览文件 @
20392be0
...
...
@@ -208,10 +208,10 @@ include(external/xxhash) # download xxhash
include
(
external/dlpack
)
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
include
(
external/warpctc
)
# download, build, install warpctc
if
(
NOT WIN32
)
# there is no official support of warpctc, nccl, cupti in windows
include
(
external/warpctc
)
# download, build, install warpctc
# there is no official support of nccl, cupti in windows
include
(
cupti
)
include
(
external/gzstream
)
endif
(
NOT WIN32
)
...
...
cmake/external/warpctc.cmake
浏览文件 @
20392be0
...
...
@@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
# Used in unit test test_WarpCTCLayer
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
OR WIN32
)
SET
(
USE_OMP OFF
)
ELSE
()
SET
(
USE_OMP ON
)
ENDIF
()
IF
(
WIN32
)
SET
(
WARPCTC_REPOSITORY
"https://github.com/wopeizl/warp-ctc.git"
)
ELSE
()
SET
(
WARPCTC_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
)
ENDIF
()
ExternalProject_Add
(
extern_warpctc
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
GIT_REPOSITORY
${
WARPCTC_REPOSITORY
}
PREFIX
${
WARPCTC_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_OMP=
${
USE_OMP
}
...
...
@@ -59,6 +67,18 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
)
IF
(
WIN32
)
IF
(
NOT EXISTS
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
)
add_custom_command
(
TARGET extern_warpctc POST_BUILD
COMMAND cmake -E copy
${
WARPCTC_INSTALL_DIR
}
/bin/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
ENDIF
()
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
else
(
WIN32
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
ENDIF
(
WIN32
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
...
...
cmake/operators.cmake
浏览文件 @
20392be0
...
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
endif
()
if
(
WIN32
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
endif
()
...
...
paddle/fluid/framework/ngraph_bridge.cc
浏览文件 @
20392be0
...
...
@@ -16,100 +16,25 @@ limitations under the License. */
#include <functional>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/ngraph/ngraph_ops.h"
#include "paddle/fluid/platform/enforce.h"
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
framework
{
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
const
VariableNameMap
&
var_map
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
var_map
.
at
(
name
);
PADDLE_ENFORCE_EQ
(
var_names
.
size
(),
1
,
"op %s name %s expects one associated var"
,
op
->
Type
(),
name
);
if
(
ngb_node_map
->
find
(
var_names
[
0
])
!=
ngb_node_map
->
end
())
{
return
(
*
ngb_node_map
)[
var_names
[
0
]];
}
else
{
return
nullptr
;
}
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetInputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
name
,
op
->
Inputs
(),
ngb_node_map
);
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetOutputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
name
,
op
->
Outputs
(),
ngb_node_map
);
}
static
void
SetOutputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
ngraph
::
Node
>
node
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
op
->
Outputs
().
at
(
name
);
if
(
var_names
.
size
()
==
1
)
{
(
*
ngb_node_map
)[
var_names
[
0
]]
=
node
;
}
else
if
(
var_names
.
size
()
==
0
)
{
(
*
ngb_node_map
)[
""
]
=
node
;
}
else
{
PADDLE_THROW
(
"name %s has more than 1 var_names."
,
name
);
}
}
static
bool
HasOutput
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
)
{
auto
&
outputs
=
op
->
Outputs
();
if
(
outputs
.
find
(
name
)
==
outputs
.
end
())
return
false
;
return
outputs
.
at
(
name
).
size
()
>
0
;
}
template
<
typename
T
>
static
void
BuildBinaryNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
x
=
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
x
,
y
);
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
template
<
typename
T
>
static
void
BuildUnaryNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
input
=
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
input
);
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
std
::
map
<
std
::
string
,
std
::
function
<
void
(
const
std
::
shared_ptr
<
OperatorBase
>&
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
NgraphBridge
::
NG_NODE_MAP
=
{{
"relu"
,
BuildUnaryNode
<
ngraph
::
op
::
Relu
>
},
{
"tanh"
,
BuildUnaryNode
<
ngraph
::
op
::
Tanh
>
}};
NgraphBridge
::
NG_NODE_MAP
=
{
{
"mul"
,
paddle
::
operators
::
ngraphs
::
BuildMulNode
},
{
"mul_grad"
,
paddle
::
operators
::
ngraphs
::
BuildMulGradNode
},
{
"relu"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Relu
>
},
{
"tanh"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Tanh
>
}};
void
NgraphBridge
::
BuildNgNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
)
{
auto
&
op_type
=
op
->
Type
();
...
...
paddle/fluid/framework/ngraph_operator.cc
浏览文件 @
20392be0
...
...
@@ -278,7 +278,8 @@ std::shared_ptr<ngraph::runtime::Backend> NgraphEngine::backend_ =
ngraph
::
runtime
::
Backend
::
create
(
"CPU"
);
void
NgraphEngine
::
GetNgInputShape
(
std
::
shared_ptr
<
OperatorBase
>
op
)
{
op
->
RuntimeInferShape
(
scope_
,
place_
);
RuntimeContext
ctx
(
op
->
Inputs
(),
op
->
Outputs
(),
scope_
);
op
->
RuntimeInferShape
(
scope_
,
place_
,
ctx
);
for
(
auto
&
var_name_item
:
op
->
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
auto
*
var
=
scope_
.
FindVar
(
var_name
);
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
20392be0
...
...
@@ -137,6 +137,23 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
}
RuntimeContext
::
RuntimeContext
(
const
VariableNameMap
&
innames
,
const
VariableNameMap
&
outnames
,
const
Scope
&
scope
)
{
for
(
auto
&
var_name_item
:
innames
)
{
std
::
vector
<
Variable
*>&
input_vars
=
inputs
[
var_name_item
.
first
];
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
input_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
}
for
(
auto
&
var_name_item
:
outnames
)
{
std
::
vector
<
Variable
*>&
output_vars
=
outputs
[
var_name_item
.
first
];
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
output_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
}
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
if
(
platform
::
is_gpu_place
(
place
))
{
...
...
@@ -412,11 +429,48 @@ bool ExecutionContext::HasOutput(const std::string& name) const {
return
var
!=
nullptr
;
}
const
Variable
*
ExecutionContext
::
InputVar
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
inputs
.
find
(
name
);
if
(
it
==
ctx_
.
inputs
.
end
())
return
nullptr
;
PADDLE_ENFORCE_LE
(
it
->
second
.
size
(),
1UL
,
"Operator %s's input %s should contain only one variable."
,
op_
.
Type
(),
name
);
return
it
->
second
.
empty
()
?
nullptr
:
it
->
second
[
0
];
}
const
Variable
*
ExecutionContext
::
LegacyInputVar
(
const
std
::
string
&
name
)
const
{
auto
ipt
=
op_
.
Input
(
name
);
return
ipt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
ipt
);
}
Variable
*
ExecutionContext
::
OutputVar
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
outputs
.
find
(
name
);
if
(
it
==
ctx_
.
outputs
.
end
())
return
nullptr
;
PADDLE_ENFORCE_LE
(
it
->
second
.
size
(),
1UL
,
"Operator %s's output %s should contain only one variable."
,
op_
.
Type
(),
name
);
return
it
->
second
.
empty
()
?
nullptr
:
it
->
second
[
0
];
}
Variable
*
ExecutionContext
::
LegacyOutputVar
(
const
std
::
string
&
name
)
const
{
auto
opt
=
op_
.
Output
(
name
);
return
opt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
opt
);
}
template
<
>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
Input
<
LoDTensor
>
(
name
);
}
template
<
>
const
Tensor
*
ExecutionContext
::
LegacyInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
LegacyInput
<
LoDTensor
>
(
name
);
}
template
<
>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
...
...
@@ -441,6 +495,11 @@ Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
return
Output
<
LoDTensor
>
(
name
);
}
template
<
>
Tensor
*
ExecutionContext
::
LegacyOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
LegacyOutput
<
LoDTensor
>
(
name
);
}
template
<
>
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
...
...
@@ -477,23 +536,22 @@ bool OpSupportGPU(const std::string& op_type) {
class
RuntimeInferShapeContext
:
public
InferShapeContext
{
public:
RuntimeInferShapeContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
)
:
op_
(
op
),
scope_
(
scope
)
{}
RuntimeInferShapeContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
const
RuntimeContext
&
ctx
)
:
op_
(
op
),
scope_
(
scope
),
ctx_
(
ctx
)
{}
bool
HasInput
(
const
std
::
string
&
name
)
const
override
{
// has only one input
const
auto
&
ins
=
op_
.
Inputs
()
;
const
auto
&
ins
=
ctx_
.
inputs
;
auto
it
=
ins
.
find
(
name
);
if
(
it
==
ins
.
end
())
{
return
false
;
}
const
auto
&
in
=
it
->
second
;
if
(
in
.
size
()
==
0
||
in
[
0
]
==
kEmptyVarName
)
{
return
false
;
}
if
(
in
.
size
()
==
0
)
return
false
;
PADDLE_ENFORCE_EQ
(
in
.
size
(),
1UL
,
"Input %s should not have more than one inputs"
,
name
);
return
scope_
.
FindVar
(
in
[
0
])
!=
nullptr
;
return
in
[
0
]
!=
nullptr
;
}
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
...
...
@@ -678,6 +736,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
private:
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
const
RuntimeContext
&
ctx_
;
};
static
void
CheckTensorNANOrInf
(
const
std
::
string
&
name
,
...
...
@@ -696,15 +755,15 @@ static void CheckTensorNANOrInf(const std::string& name,
}
void
OperatorWithKernel
::
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
const
platform
::
Place
&
place
,
const
RuntimeContext
&
ctx
)
const
{
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
,
ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
}
void
OperatorWithKernel
::
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
this
->
InferShape
(
&
infer_shape_ctx
);
RuntimeContext
ctx
(
Inputs
(),
Outputs
(),
scope
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
place
);
...
...
@@ -718,15 +777,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the
// transform functions are ready.
// for (auto& candidate : kKernelPriority) {
// Do selection
// }
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
,
ctx
));
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
...
...
@@ -748,7 +800,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// do data transformScope &transfer_scope;
std
::
vector
<
std
::
string
>
transfered_inplace_vars
;
auto
*
transfer_scope
=
TryTransferData
(
scope
,
expected_kernel_key
,
&
transfered_inplace_vars
);
PrepareData
(
scope
,
expected_kernel_key
,
&
transfered_inplace_vars
,
&
ctx
);
// exec scope is the scope that kernel actually executed on.
const
Scope
&
exec_scope
=
...
...
@@ -758,7 +810,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
dev_ctx
=
pool
.
Get
(
expected_kernel_key
.
place_
);
}
kernel_iter
->
second
(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
));
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
exec_scope
,
ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
// TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
// not Scope. Imperative mode only pass inputs and get outputs.
kernel_iter
->
second
(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
,
ctx
));
if
(
!
transfered_inplace_vars
.
empty
())
{
// there is inplace variable has been transfered.
...
...
@@ -782,6 +838,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
}
}
void
OperatorWithKernel
::
TransferInplaceVarsBack
(
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
const
Scope
&
transfer_scope
)
const
{
...
...
@@ -797,13 +854,19 @@ void OperatorWithKernel::TransferInplaceVarsBack(
}
}
Scope
*
OperatorWithKernel
::
TryTransfer
Data
(
Scope
*
OperatorWithKernel
::
Prepare
Data
(
const
Scope
&
scope
,
const
OpKernelType
&
expected_kernel_key
,
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
)
const
{
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
,
RuntimeContext
*
ctx
)
const
{
Scope
*
new_scope
=
nullptr
;
for
(
auto
&
var_name_item
:
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
std
::
vector
<
Variable
*>&
input_vars
=
ctx
->
inputs
[
var_name_item
.
first
];
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
auto
&
var_name
=
var_name_item
.
second
[
i
];
auto
*
var
=
scope
.
FindVar
(
var_name
);
input_vars
[
i
]
=
var
;
// Only tensor can be tranfer to another device.
if
(
var
==
nullptr
||
!
VarIsTensor
(
*
var
))
{
continue
;
...
...
@@ -851,6 +914,7 @@ Scope* OperatorWithKernel::TryTransferData(
}
auto
*
trans_var
=
new_scope
->
Var
(
var_name
);
input_vars
[
i
]
=
trans_var
;
Tensor
out
;
TransformData
(
expected_kernel_key
,
kernel_type_for_var
,
*
tensor_in
,
&
out
);
...
...
paddle/fluid/framework/operator.h
浏览文件 @
20392be0
...
...
@@ -70,6 +70,15 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);
class
OperatorBase
;
class
ExecutionContext
;
class
RuntimeContext
{
public:
RuntimeContext
(
const
VariableNameMap
&
innames
,
const
VariableNameMap
&
outnames
,
const
Scope
&
scope
);
VariableValueMap
inputs
;
VariableValueMap
outputs
;
};
/**
* OperatorBase has the basic elements that Net will call to do computation.
* Only CreateOperator from OpRegistry will new Operator directly. User
...
...
@@ -129,7 +138,8 @@ class OperatorBase {
void
SetIsCalledByExecutor
(
bool
x
)
{
run_by_executor_
=
x
;
}
virtual
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{}
const
platform
::
Place
&
place
,
const
RuntimeContext
&
ctx
)
const
{}
protected:
std
::
string
type_
;
...
...
@@ -156,8 +166,9 @@ class OperatorBase {
class
ExecutionContext
{
public:
ExecutionContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
device_context
)
:
op_
(
op
),
scope_
(
scope
),
device_context_
(
device_context
)
{}
const
platform
::
DeviceContext
&
device_context
,
const
RuntimeContext
&
ctx
)
:
op_
(
op
),
scope_
(
scope
),
device_context_
(
device_context
),
ctx_
(
ctx
)
{}
const
OperatorBase
&
op
()
const
{
return
op_
;
}
...
...
@@ -180,15 +191,9 @@ class ExecutionContext {
return
op_
.
Outputs
(
name
).
size
();
}
const
Variable
*
InputVar
(
const
std
::
string
&
name
)
const
{
auto
ipt
=
op_
.
Input
(
name
);
return
ipt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
ipt
);
}
const
Variable
*
InputVar
(
const
std
::
string
&
name
)
const
;
Variable
*
OutputVar
(
const
std
::
string
&
name
)
const
{
auto
opt
=
op_
.
Output
(
name
);
return
opt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
opt
);
}
Variable
*
OutputVar
(
const
std
::
string
&
name
)
const
;
const
std
::
vector
<
const
Variable
*>
MultiInputVar
(
const
std
::
string
&
name
)
const
{
...
...
@@ -227,6 +232,22 @@ class ExecutionContext {
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
}
template
<
typename
T
>
const
T
*
LegacyInput
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
LegacyInputVar
(
name
);
return
var
==
nullptr
?
nullptr
:
&
var
->
Get
<
T
>
();
}
template
<
typename
T
>
T
*
LegacyOutput
(
const
std
::
string
&
name
)
const
{
auto
var
=
LegacyOutputVar
(
name
);
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
}
const
Variable
*
LegacyInputVar
(
const
std
::
string
&
name
)
const
;
Variable
*
LegacyOutputVar
(
const
std
::
string
&
name
)
const
;
template
<
typename
T
>
const
std
::
vector
<
const
T
*>
MultiInput
(
const
std
::
string
&
name
)
const
{
auto
names
=
op_
.
Inputs
(
name
);
...
...
@@ -286,11 +307,16 @@ class ExecutionContext {
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
const
platform
::
DeviceContext
&
device_context_
;
const
RuntimeContext
&
ctx_
;
};
template
<>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
const
Tensor
*
ExecutionContext
::
LegacyInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
...
...
@@ -298,6 +324,9 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
template
<>
Tensor
*
ExecutionContext
::
Output
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
Tensor
*
ExecutionContext
::
LegacyOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
...
...
@@ -350,8 +379,8 @@ class OperatorWithKernel : public OperatorBase {
OpInfoMap
::
Instance
().
Get
(
Type
()).
infer_shape_
(
ctx
);
}
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
;
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
const
RuntimeContext
&
ctx
)
const
override
;
protected:
virtual
OpKernelType
GetExpectedKernelType
(
const
ExecutionContext
&
ctx
)
const
;
...
...
@@ -371,9 +400,10 @@ class OperatorWithKernel : public OperatorBase {
*
* * transfered_inplace_vars is a output vector.
*/
Scope
*
TryTransferData
(
const
Scope
&
scope
,
const
OpKernelType
&
expected_kernel_key
,
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
)
const
;
Scope
*
PrepareData
(
const
Scope
&
scope
,
const
OpKernelType
&
expected_kernel_key
,
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
,
RuntimeContext
*
ctx
)
const
;
void
TransferInplaceVarsBack
(
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
...
...
paddle/fluid/framework/type_defs.h
浏览文件 @
20392be0
...
...
@@ -28,8 +28,11 @@ class OperatorBase;
class
OpDesc
;
class
InferShapeContext
;
class
BlockDesc
;
class
Variable
;
using
VariableNameMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
;
// TODO(panyx0718): Replace vector with something like gtl::Vector.
using
VariableValueMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
Variable
*>>
;
// The order should be as same as framework.proto
using
Attribute
=
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
20392be0
...
...
@@ -65,9 +65,7 @@ endif()
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
if
(
NOT WIN32
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
endif
()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
if
(
WITH_GPU
)
...
...
paddle/fluid/operators/beam_search_decode_op.cc
浏览文件 @
20392be0
...
...
@@ -122,7 +122,8 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
framework
::
ExecutionContext
ctx
(
*
this
,
scope
,
dev_ctx
);
framework
::
RuntimeContext
run_ctx
(
Inputs
(),
Outputs
(),
scope
);
framework
::
ExecutionContext
ctx
(
*
this
,
scope
,
dev_ctx
,
run_ctx
);
const
LoDTensorArray
*
ids
=
ctx
.
Input
<
LoDTensorArray
>
(
"Ids"
);
const
LoDTensorArray
*
scores
=
ctx
.
Input
<
LoDTensorArray
>
(
"Scores"
);
...
...
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
浏览文件 @
20392be0
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <nccl.h>
#endif
#include <sys/time.h>
#include <limits>
#include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h"
...
...
@@ -31,7 +32,12 @@ namespace distributed {
class
IOBufWriter
{
public:
static
void
Append
(
butil
::
IOBuf
*
iobuf
,
int
k
,
const
char
*
v
,
int64_t
vlen
)
{
static
void
Append
(
const
std
::
string
&
varname
,
butil
::
IOBuf
*
iobuf
,
int
k
,
const
char
*
v
,
int64_t
vlen
)
{
if
(
vlen
>=
std
::
numeric_limits
<
int
>::
max
()
||
vlen
<
0
)
{
LOG
(
FATAL
)
<<
"AppendZeroCopy varname:"
<<
varname
<<
", vlen:"
<<
vlen
;
}
iobuf
->
append
(
reinterpret_cast
<
char
*>
(
&
k
),
4
);
iobuf
->
append
(
reinterpret_cast
<
char
*>
(
&
vlen
),
8
);
iobuf
->
append
(
v
,
vlen
);
...
...
@@ -87,6 +93,10 @@ class IOBufWriter {
int
k
,
const
char
*
v
,
int64_t
vlen
,
bool
in_cuda_pinned
,
void
(
*
destroy
)(
void
*
),
void
*
user_data
)
{
if
(
vlen
>=
std
::
numeric_limits
<
int
>::
max
()
||
vlen
<
0
)
{
LOG
(
FATAL
)
<<
"AppendZeroCopy varname:"
<<
varname
<<
", vlen:"
<<
vlen
;
}
#ifdef PADDLE_WITH_BRPC_RDMA
IOBufWriter
::
AppendRdmaZeroCopy
(
varname
,
iobuf
,
k
,
v
,
vlen
,
in_cuda_pinned
,
destroy
,
user_data
);
...
...
@@ -134,7 +144,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
request
->
set_type
(
::
sendrecv
::
NCCL_ID
);
const
ncclUniqueId
&
uid
=
var
->
Get
<
ncclUniqueId
>
();
// TODO(gongwb): use append_zero to avoid data copy.
IOBufWriter
::
Append
(
iobuf
,
IOBufWriter
::
Append
(
name
,
iobuf
,
sendrecv
::
VariableMessage
::
kSerializedFieldNumber
,
uid
.
internal
,
NCCL_UNIQUE_ID_BYTES
);
return
;
...
...
@@ -149,7 +159,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
// FIXME(gongwb): it seems that can use zero copy.
if
(
var_is_not_stable
)
{
IOBufWriter
::
Append
(
iobuf
,
::
sendrecv
::
VariableMessage
::
kSerializedFieldNumber
,
name
,
iobuf
,
::
sendrecv
::
VariableMessage
::
kSerializedFieldNumber
,
static_cast
<
const
char
*>
(
payload
->
ptr
()),
payload
->
memory_size
());
}
else
{
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
...
...
@@ -171,10 +181,11 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
*
slr
=
var
->
GetMutable
<
framework
::
SelectedRows
>
();
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
framework
::
SizeOfType
(
typeid
(
int64_t
)
);
PADDLE_ENFORCE
(
VectorElemName
(
slr
->
rows
())
==
typeid
(
int64_t
).
name
());
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
sizeof
(
int64_t
);
IOBufWriter
::
Append
(
iobuf
,
::
sendrecv
::
VariableMessage
::
kRowsFieldNumber
,
IOBufWriter
::
Append
(
name
,
iobuf
,
::
sendrecv
::
VariableMessage
::
kRowsFieldNumber
,
reinterpret_cast
<
const
char
*>
(
slr
->
rows
().
data
()),
static_cast
<
int64_t
>
(
rows_memory_size
));
}
...
...
paddle/fluid/operators/distributed/grpc_serde.cc
浏览文件 @
20392be0
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#endif
#include <limits>
#include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h"
...
...
@@ -102,6 +103,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
e
.
WriteVarlengthBeginning
(
VarMsg
::
kSerializedFieldNumber
,
payload
->
memory_size
());
if
(
payload
->
memory_size
()
>=
std
::
numeric_limits
<
int
>::
max
())
{
LOG
(
FATAL
)
<<
"AppendZeroCopy varname:"
<<
name
<<
", vlen:"
<<
payload
->
memory_size
();
}
// steal reference of tensor data
::
grpc
::
Slice
slices
[
4
];
// metadata, tensor, rows meta, rows
int
num_slices
=
2
;
// only SelectedRows have rows buffer
...
...
@@ -115,7 +120,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
*
slr
=
var
->
GetMutable
<
framework
::
SelectedRows
>
();
ProtoEncodeHelper
e2
(
static_cast
<
char
*>
(
buf
),
128
);
PADDLE_ENFORCE
(
VectorElemName
(
slr
->
rows
())
==
typeid
(
int64_t
).
name
());
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
sizeof
(
int64_t
);
e2
.
WriteVarlengthBeginning
(
VarMsg
::
kRowsFieldNumber
,
rows_memory_size
);
slices
[
2
]
=
::
grpc
::
Slice
(
e2
.
size
());
memcpy
(
const_cast
<
uint8_t
*>
(
slices
[
2
].
begin
()),
e2
.
data
(),
e2
.
size
());
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.h
浏览文件 @
20392be0
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <iostream>
#include <string>
#include <typeindex>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
...
...
@@ -23,9 +24,8 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -83,6 +83,11 @@ inline framework::proto::VarType::Type ToVarType(
}
}
template
<
template
<
typename
>
class
T
,
typename
Elem
>
std
::
string
VectorElemName
(
const
T
<
Elem
>&
arg
)
{
return
typeid
(
Elem
).
name
();
}
}
// namespace distributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/distributed/variable_response.cc
浏览文件 @
20392be0
...
...
@@ -118,7 +118,7 @@ bool VariableResponse::CopyLodTensorData(
VLOG
(
6
)
<<
"Tensor.memory_size = "
<<
tensor
->
memory_size
()
<<
", Buffer Size = "
<<
length
;
PADDLE_ENFORCE_EQ
(
tensor
->
memory_size
(),
length
);
PADDLE_ENFORCE_EQ
(
tensor
->
memory_size
(),
static_cast
<
unsigned
int
>
(
length
)
);
return
ReadRaw
(
input
,
ctx
,
tensor
->
place
(),
tensor_data
,
length
);
}
...
...
paddle/fluid/operators/ngraph/ngraph_ops.h
0 → 100644
浏览文件 @
20392be0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file contains the list of the ngraph operators for Paddle.
*
* ATTENTION: It requires some C++11 features, for lower version C++ or C, we
* might release another API.
*/
#pragma once
#include "ops/binary_unnary_op.h"
#include "ops/mul_op.h"
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
0 → 100644
浏览文件 @
20392be0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
template
<
typename
T
>
static
void
BuildBinaryNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
x
,
y
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
template
<
typename
T
>
static
void
BuildUnaryNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
input
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
input
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/ngraph/ops/mul_op.h
0 → 100644
浏览文件 @
20392be0
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
static
void
BuildMulNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
int
x_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"x_num_col_dims"
);
int
y_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"y_num_col_dims"
);
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
x_reshape
=
x
;
auto
y_reshape
=
y
;
if
(
x
->
get_shape
().
size
()
>
2
)
{
auto
x_2d
=
paddle
::
platform
::
FlattenTo2d
(
x
->
get_shape
(),
x_num_col_dims
);
x_reshape
=
paddle
::
platform
::
NgReshaper
(
x
,
x_2d
);
}
if
(
y
->
get_shape
().
size
()
>
2
)
{
auto
y_2d
=
paddle
::
platform
::
FlattenTo2d
(
y
->
get_shape
(),
y_num_col_dims
);
y_reshape
=
paddle
::
platform
::
NgReshaper
(
y
,
y_2d
);
}
std
::
shared_ptr
<
ngraph
::
Node
>
out
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
x_reshape
,
y_reshape
);
auto
dummy_out
=
paddle
::
platform
::
GetOutputNode
(
op
,
"Out"
,
ngb_node_map
);
if
(
dummy_out
&&
dummy_out
->
get_shape
()
!=
out
->
get_shape
())
{
out
=
paddle
::
platform
::
NgReshaper
(
out
,
dummy_out
->
get_shape
());
}
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
static
void
BuildMulGradNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
int
x_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"x_num_col_dims"
);
int
y_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"y_num_col_dims"
);
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
dout
=
paddle
::
platform
::
GetInputNode
(
op
,
"Out@GRAD"
,
ngb_node_map
);
bool
is_dx
=
paddle
::
platform
::
HasOutput
(
op
,
"X@GRAD"
)
?
true
:
false
;
bool
is_dy
=
paddle
::
platform
::
HasOutput
(
op
,
"Y@GRAD"
)
?
true
:
false
;
auto
x_shape
=
x
->
get_shape
();
auto
y_shape
=
y
->
get_shape
();
auto
x_reshape
=
x
;
auto
y_reshape
=
y
;
if
(
x_shape
.
size
()
>
2
)
{
auto
x_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
x_shape
,
x_num_col_dims
);
x_reshape
=
paddle
::
platform
::
NgReshaper
(
x
,
x_2d_shape
);
}
if
(
y_shape
.
size
()
>
2
)
{
auto
y_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
y_shape
,
y_num_col_dims
);
y_reshape
=
paddle
::
platform
::
NgReshaper
(
y
,
y_2d_shape
);
}
auto
x_reshape_shape
=
x_reshape
->
get_shape
();
std
::
reverse
(
x_reshape_shape
.
begin
(),
x_reshape_shape
.
end
());
auto
x_transpose
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
x_reshape
,
ngraph
::
AxisVector
{
1
,
0
},
x_reshape_shape
);
auto
y_reshape_shape
=
y_reshape
->
get_shape
();
std
::
reverse
(
y_reshape_shape
.
begin
(),
y_reshape_shape
.
end
());
auto
y_transpose
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
y_reshape
,
ngraph
::
AxisVector
{
1
,
0
},
y_reshape_shape
);
if
(
is_dx
)
{
if
(
dout
->
get_shape
().
size
()
>
2
)
{
auto
dout_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
dout
->
get_shape
(),
2
);
dout
=
paddle
::
platform
::
NgReshaper
(
dout
,
dout_2d_shape
);
}
auto
dx
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
dout
,
y_transpose
);
if
(
dx
->
get_shape
()
==
x_shape
)
{
paddle
::
platform
::
SetOutputNode
(
op
,
"X@GRAD"
,
dx
,
ngb_node_map
);
}
else
{
auto
dx_reshape
=
paddle
::
platform
::
NgReshaper
(
dx
,
x_shape
);
paddle
::
platform
::
SetOutputNode
(
op
,
"X@GRAD"
,
dx_reshape
,
ngb_node_map
);
}
}
if
(
is_dy
)
{
if
(
dout
->
get_shape
().
size
()
>
2
)
{
auto
dout_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
dout
->
get_shape
(),
2
);
dout
=
paddle
::
platform
::
NgReshaper
(
dout
,
dout_2d_shape
);
}
auto
dy
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
x_transpose
,
dout
);
if
(
dy
->
get_shape
()
==
y_shape
)
{
paddle
::
platform
::
SetOutputNode
(
op
,
"Y@GRAD"
,
dy
,
ngb_node_map
);
}
else
{
auto
dy_reshape
=
paddle
::
platform
::
NgReshaper
(
dy
,
y_shape
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Y@GRAD"
,
dy_reshape
,
ngb_node_map
);
}
}
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/transpose_mkldnn_op.cc
0 → 100644
浏览文件 @
20392be0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
framework
::
DataLayout
;
template
<
typename
T
>
class
TransposeMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
PADDLE_ENFORCE
(
is_test
==
true
,
"ConvTransposeMKLDNN works only for inference!. Set is_test = True"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
std
::
vector
<
int
>
axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"axis"
);
int
ndims
=
axis
.
size
();
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
if
(
ndims
==
1
)
{
output
->
ShareDataWith
(
*
input
);
return
;
}
std
::
vector
<
int
>
nchw_axis
(
ndims
,
0
);
for
(
size_t
i
=
0
;
i
<
nchw_axis
.
size
();
++
i
)
{
nchw_axis
[
i
]
=
i
;
}
std
::
vector
<
int
>
nchw_tz
=
paddle
::
framework
::
vectorize2int
(
input
->
dims
());
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
auto
src_md
=
input
->
format
()
!=
mkldnn
::
memory
::
format
::
nchw
?
platform
::
MKLDNNMemDesc
(
nchw_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
input
->
format
())
:
Axis2MemoryDesc
(
nchw_tz
,
nchw_axis
);
this
->
TransposeKernel
(
ctx
.
GetPlace
(),
Axis2MemoryDesc
(
nchw_tz
,
axis
),
src_md
,
output
,
input_data
,
nchw_tz
,
mkldnn_engine
);
}
protected:
mkldnn
::
memory
::
desc
Axis2MemoryDesc
(
std
::
vector
<
int
>&
nchw_tz
,
std
::
vector
<
int
>&
axis
)
const
{
mkldnn_memory_desc_t
mem_fmt
;
mem_fmt
.
primitive_kind
=
mkldnn_memory
;
mem_fmt
.
ndims
=
axis
.
size
();
for
(
unsigned
int
i
=
0
;
i
<
nchw_tz
.
size
();
++
i
)
{
mem_fmt
.
dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format,
// regardless physical layout)
}
mem_fmt
.
data_type
=
mkldnn_f32
;
mem_fmt
.
format
=
mkldnn_blocked
;
unsigned
int
total_stride
=
1
;
for
(
int
i
=
nchw_tz
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
mem_fmt
.
layout_desc
.
blocking
.
padding_dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format, regardless physical
// layout)
mem_fmt
.
layout_desc
.
blocking
.
block_dims
[
i
]
=
1
;
mem_fmt
.
layout_desc
.
blocking
.
offset_padding_to_data
[
i
]
=
0
;
// no offset
mem_fmt
.
layout_desc
.
blocking
.
strides
[
0
][
axis
[
i
]]
=
total_stride
;
mem_fmt
.
layout_desc
.
blocking
.
strides
[
1
][
axis
[
i
]]
=
1
;
total_stride
*=
nchw_tz
[
axis
[
i
]];
}
mem_fmt
.
layout_desc
.
blocking
.
offset_padding
=
0
;
// no initial offset
return
mem_fmt
;
}
void
TransposeKernel
(
platform
::
Place
place
,
mkldnn
::
memory
::
desc
md_o
,
mkldnn
::
memory
::
desc
md_i
,
Tensor
*
output
,
const
T
*
data_i
,
std
::
vector
<
int
>&
nchw_dims
,
const
mkldnn
::
engine
&
eng
)
const
{
// Make Memory primitive descriptors
auto
mpd_o
=
mkldnn
::
memory
::
primitive_desc
(
md_o
,
eng
);
auto
mpd_i
=
mkldnn
::
memory
::
primitive_desc
(
md_i
,
eng
);
auto
data_o
=
output
->
mutable_data
<
T
>
(
place
,
paddle
::
memory
::
Allocator
::
kDefault
,
mpd_o
.
get_size
());
auto
src
=
mkldnn
::
memory
(
mpd_i
,
(
T
*
)(
data_i
));
auto
dst
=
mkldnn
::
memory
(
mpd_o
,
data_o
);
auto
r
=
mkldnn
::
reorder
(
src
,
dst
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
({
r
}).
wait
();
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
transpose2
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
TransposeMKLDNNOpKernel
<
float
>
);
REGISTER_OP_KERNEL
(
transpose
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
TransposeMKLDNNOpKernel
<
float
>
);
paddle/fluid/operators/transpose_op.cc
浏览文件 @
20392be0
...
...
@@ -16,6 +16,10 @@ limitations under the License. */
#include <string>
#include <vector>
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
operators
{
...
...
@@ -53,11 +57,32 @@ class TransposeOp : public framework::OperatorWithKernel {
}
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
};
class
TransposeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddAttr
<
bool
>
(
"is_test"
,
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true."
)
.
SetDefault
(
false
);
AddInput
(
"X"
,
"(Tensor) The input tensor, tensors with rank up to 6 are supported."
);
...
...
@@ -67,6 +92,16 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
"(vector<int>) A list of values, and the size of the list should be "
"the same with the input tensor rank. This operator permutes the input "
"tensor's axes according to the values given."
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
AddComment
(
R"DOC(
Transpose Operator.
...
...
@@ -144,8 +179,18 @@ class Transpose2Op : public TransposeOp {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
};
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
20392be0
...
...
@@ -16,9 +16,7 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
20392be0
...
...
@@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
20392be0
...
...
@@ -201,6 +201,8 @@ void* GetCurandDsoHandle() {
void
*
GetWarpCTCDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"warpctc.dll"
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
#endif
...
...
paddle/fluid/platform/dynload/dynamic_loader.h
浏览文件 @
20392be0
...
...
@@ -18,6 +18,12 @@ namespace paddle {
namespace
platform
{
namespace
dynload
{
#ifndef _WIN32
#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
#else
#define DECLARE_TYPE(__name, ...) decltype(auto)
#endif
void
*
GetCublasDsoHandle
();
void
*
GetCUDNNDsoHandle
();
void
*
GetCUPTIDsoHandle
();
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
20392be0
...
...
@@ -34,7 +34,7 @@ extern void* mklml_dso_handle;
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using mklmlFunc = decltype(&::__name); \
std::call_once(mklml_dso_flag, []() { \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
...
...
paddle/fluid/platform/dynload/tensorrt.h
浏览文件 @
20392be0
...
...
@@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle;
#define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(tensorrt_dso_flag, []() { \
tensorrt_dso_handle = \
...
...
paddle/fluid/platform/dynload/warpctc.h
浏览文件 @
20392be0
...
...
@@ -34,7 +34,7 @@ extern void* warpctc_dso_handle;
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using warpctcFunc = decltype(&::__name); \
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
...
...
paddle/fluid/platform/ngraph_helper.h
0 → 100644
浏览文件 @
20392be0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <functional>
#include <string>
#include <vector>
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
platform
{
static
ngraph
::
Shape
FlattenTo2d
(
ngraph
::
Shape
sh
,
int
num
)
{
auto
x1
=
std
::
accumulate
(
std
::
begin
(
sh
),
std
::
begin
(
sh
)
+
num
,
1
,
std
::
multiplies
<
size_t
>
());
auto
x2
=
std
::
accumulate
(
std
::
begin
(
sh
)
+
num
,
std
::
end
(
sh
),
1
,
std
::
multiplies
<
size_t
>
());
size_t
x1_l
=
static_cast
<
size_t
>
(
x1
);
size_t
x2_l
=
static_cast
<
size_t
>
(
x2
);
return
ngraph
::
Shape
{
x1_l
,
x2_l
};
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
NgReshaper
(
std
::
shared_ptr
<
ngraph
::
Node
>
input
,
ngraph
::
Shape
shape
)
{
std
::
vector
<
size_t
>
input_order
(
input
->
get_shape
().
size
());
std
::
iota
(
std
::
begin
(
input_order
),
std
::
end
(
input_order
),
0
);
return
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
input
,
ngraph
::
AxisVector
(
input_order
),
shape
);
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
const
paddle
::
framework
::
VariableNameMap
&
var_map
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
var_map
.
at
(
prm
);
PADDLE_ENFORCE_EQ
(
var_names
.
size
(),
1
,
"op %s prm %s expects one associated var"
,
op
->
Type
(),
prm
);
if
(
ngb_node_map
->
find
(
var_names
[
0
])
!=
ngb_node_map
->
end
())
{
return
(
*
ngb_node_map
)[
var_names
[
0
]];
}
else
{
return
nullptr
;
}
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetInputNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
prm
,
op
->
Inputs
(),
ngb_node_map
);
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetOutputNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
prm
,
op
->
Outputs
(),
ngb_node_map
);
}
static
void
SetOutputNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
std
::
shared_ptr
<
ngraph
::
Node
>
node
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
op
->
Outputs
().
at
(
prm
);
if
(
var_names
.
size
()
==
1
)
{
(
*
ngb_node_map
)[
var_names
[
0
]]
=
node
;
}
else
if
(
var_names
.
size
()
==
0
)
{
(
*
ngb_node_map
)[
""
]
=
node
;
}
else
{
PADDLE_THROW
(
"prm %s has more than 1 var_names."
,
prm
);
}
}
static
bool
HasOutput
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
)
{
auto
&
outputs
=
op
->
Outputs
();
if
(
outputs
.
find
(
prm
)
==
outputs
.
end
())
return
false
;
return
outputs
.
at
(
prm
).
size
()
>
0
;
}
}
// namespace platform
}
// namespace paddle
#endif
paddle/fluid/platform/port.h
浏览文件 @
20392be0
...
...
@@ -55,7 +55,6 @@ static void *dlsym(void *handle, const char *symbol_name) {
static
void
*
dlopen
(
const
char
*
filename
,
int
flag
)
{
std
::
string
file_name
(
filename
);
file_name
.
replace
(
0
,
file_name
.
size
()
-
1
,
'/'
,
'\\'
);
HMODULE
hModule
=
LoadLibrary
(
file_name
.
c_str
());
if
(
!
hModule
)
{
throw
std
::
runtime_error
(
file_name
+
" not found."
);
...
...
python/paddle/fluid/__init__.py
浏览文件 @
20392be0
...
...
@@ -102,6 +102,13 @@ def __bootstrap__():
import
sys
import
os
import
platform
if
os
.
name
==
'nt'
:
third_lib_path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
+
os
.
sep
+
'..'
+
os
.
sep
+
'libs'
os
.
environ
[
'path'
]
+=
';'
+
third_lib_path
sys
.
path
.
append
(
third_lib_path
)
from
.
import
core
in_test
=
'unittest'
in
sys
.
modules
...
...
@@ -128,13 +135,12 @@ def __bootstrap__():
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
read_env_flags
.
append
(
'warpctc_dir'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_dist
():
...
...
python/paddle/fluid/framework.py
浏览文件 @
20392be0
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
collections
import
contextlib
import
os
import
re
import
six
import
sys
...
...
@@ -27,6 +28,13 @@ from .proto import framework_pb2
try
:
from
.
import
core
except
ImportError
as
e
:
if
os
.
name
==
'nt'
:
raise
ImportError
(
"""NOTE: You may need to run
\"
set PATH=c:\python27\lib:%PATH%
\"
if you encounters
\"
mkldnn.dll not found
\"
errors. If you have python
installed in other directory, replace
\"
c:\python27\lib" with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
else
:
raise
ImportError
(
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
...
...
python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py
0 → 100644
浏览文件 @
20392be0
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.tests.unittests.op_test
import
OpTest
from
paddle.fluid.tests.unittests.test_activation_op
import
TestRelu
,
TestTanh
class
TestNGRAPHReluDim2
(
TestRelu
):
def
setUp
(
self
):
super
(
TestNGRAPHReluDim2
,
self
).
setUp
()
class
TestNGRAPHTanhDim2
(
TestTanh
):
def
setUp
(
self
):
super
(
TestNGRAPHTanhDim2
,
self
).
setUp
()
class
TestNGRAPHReluDim4
(
TestRelu
):
def
setUp
(
self
):
super
(
TestNGRAPHReluDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
maximum
(
x
,
0
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
class
TestNGRAPHTanhDim4
(
TestTanh
):
def
setUp
(
self
):
super
(
TestNGRAPHTanhDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
0 → 100644
浏览文件 @
20392be0
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
paddle.fluid.tests.unittests.test_mul_op
import
TestMulOp
,
TestMulOp2
,
TestFP16MulOp1
,
TestFP16MulOp2
class
TestNGRAPHMulOp
(
TestMulOp
):
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHMulOp2
(
TestMulOp2
):
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHFP16MulOp1
(
TestFP16MulOp1
):
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHFP16MulOp2
(
TestFP16MulOp2
):
def
init_dtype_type
(
self
):
pass
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py
0 → 100644
浏览文件 @
20392be0
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
test_transpose_op
import
TestTransposeOp
class
TestTransposeMKLDNN
(
TestTransposeOp
):
def
init_op_type
(
self
):
self
.
op_type
=
"transpose2"
self
.
use_mkldnn
=
True
self
.
is_test
=
True
return
def
test_check_grad
(
self
):
return
def
test_check_grad_no_input
(
self
):
return
def
test_check_grad_no_filter
(
self
):
return
class
TestCase0MKLDNN
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
)
self
.
axis
=
(
0
,
)
class
TestCase1a
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
4
,
5
)
self
.
axis
=
(
0
,
2
,
1
)
class
TestCase1b
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
4
,
5
)
self
.
axis
=
(
2
,
1
,
0
)
class
TestCase2
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
2
,
3
,
4
,
5
)
self
.
axis
=
(
0
,
2
,
3
,
1
)
class
TestCase3
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
2
,
3
,
4
,
5
,
6
)
self
.
axis
=
(
4
,
2
,
3
,
1
,
0
)
class
TestCase4
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
2
,
3
,
4
,
5
,
6
,
1
)
self
.
axis
=
(
4
,
2
,
3
,
1
,
0
,
5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_transpose_op.py
浏览文件 @
20392be0
...
...
@@ -21,15 +21,24 @@ from op_test import OpTest
class
TestTransposeOp
(
OpTest
):
def
setUp
(
self
):
self
.
init_op_type
()
self
.
initTestCase
()
self
.
op_type
=
"transpose2"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
'axis'
:
list
(
self
.
axis
)}
self
.
attrs
=
{
'axis'
:
list
(
self
.
axis
),
'use_mkldnn'
:
self
.
use_mkldnn
,
'is_test'
:
self
.
is_test
,
}
self
.
outputs
=
{
'XShape'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
),
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)
}
def
init_op_type
(
self
):
self
.
op_type
=
"transpose2"
self
.
use_mkldnn
=
False
self
.
is_test
=
False
def
test_check_output
(
self
):
self
.
check_output
(
no_check_set
=
[
'XShape'
])
...
...
python/setup.py.in
浏览文件 @
20392be0
...
...
@@ -160,10 +160,11 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
# put all thirdparty libraries in paddle.libs
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
if os.name != 'nt':
package_data['paddle.libs']= []
package_data['paddle.libs']=['libwarpctc' + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
package_data['paddle.libs']= []
package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录