Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
d12252e6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d12252e6
编写于
2月 19, 2019
作者:
X
xuezhong
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_sample_logits_op
test=develop
上级
bf6eb60d
a661d0bd
变更
64
隐藏空白更改
内联
并排
Showing
64 changed file
with
1369 addition
and
439 deletion
+1369
-439
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+2
-2
cmake/external/python.cmake
cmake/external/python.cmake
+2
-2
paddle/fluid/API.spec
paddle/fluid/API.spec
+4
-4
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+6
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-0
paddle/fluid/framework/details/inplace_op_pass.cc
paddle/fluid/framework/details/inplace_op_pass.cc
+1
-1
paddle/fluid/framework/details/memory_optimize_helper.cc
paddle/fluid/framework/details/memory_optimize_helper.cc
+66
-6
paddle/fluid/framework/details/memory_optimize_helper.h
paddle/fluid/framework/details/memory_optimize_helper.h
+2
-0
paddle/fluid/framework/details/memory_optimize_helper_test.cc
...le/fluid/framework/details/memory_optimize_helper_test.cc
+46
-0
paddle/fluid/framework/details/memory_optimize_pass.cc
paddle/fluid/framework/details/memory_optimize_pass.cc
+57
-51
paddle/fluid/framework/inplace_op_inference_test.cc
paddle/fluid/framework/inplace_op_inference_test.cc
+16
-16
paddle/fluid/operators/controlflow/compare_op.cc
paddle/fluid/operators/controlflow/compare_op.cc
+5
-5
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+14
-21
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+36
-0
paddle/fluid/operators/jit/gen/CMakeLists.txt
paddle/fluid/operators/jit/gen/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/gen/embseqpool.cc
paddle/fluid/operators/jit/gen/embseqpool.cc
+149
-0
paddle/fluid/operators/jit/gen/embseqpool.h
paddle/fluid/operators/jit/gen/embseqpool.h
+81
-0
paddle/fluid/operators/jit/gen/seqpool.h
paddle/fluid/operators/jit/gen/seqpool.h
+1
-1
paddle/fluid/operators/jit/helper.cc
paddle/fluid/operators/jit/helper.cc
+1
-0
paddle/fluid/operators/jit/helper.h
paddle/fluid/operators/jit/helper.h
+9
-0
paddle/fluid/operators/jit/kernel_base.h
paddle/fluid/operators/jit/kernel_base.h
+47
-19
paddle/fluid/operators/jit/kernel_key.cc
paddle/fluid/operators/jit/kernel_key.cc
+5
-0
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/more/mkl/mkl.cc
paddle/fluid/operators/jit/more/mkl/mkl.cc
+11
-0
paddle/fluid/operators/jit/more/mkl/mkl.h
paddle/fluid/operators/jit/more/mkl/mkl.h
+29
-0
paddle/fluid/operators/jit/refer/CMakeLists.txt
paddle/fluid/operators/jit/refer/CMakeLists.txt
+1
-0
paddle/fluid/operators/jit/refer/refer.cc
paddle/fluid/operators/jit/refer/refer.cc
+2
-0
paddle/fluid/operators/jit/refer/refer.h
paddle/fluid/operators/jit/refer/refer.h
+34
-0
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+65
-0
paddle/fluid/operators/ngraph/ngraph_bridge.cc
paddle/fluid/operators/ngraph/ngraph_bridge.cc
+2
-0
paddle/fluid/operators/ngraph/ngraph_ops.h
paddle/fluid/operators/ngraph/ngraph_ops.h
+1
-0
paddle/fluid/operators/ngraph/ops/batch_norm_op.h
paddle/fluid/operators/ngraph/ops/batch_norm_op.h
+7
-0
paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
+145
-0
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
+0
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+93
-16
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+16
-0
python/paddle/fluid/compiler.py
python/paddle/fluid/compiler.py
+4
-1
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+2
-1
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+8
-12
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+4
-1
python/paddle/fluid/layers/layer_function_generator.py
python/paddle/fluid/layers/layer_function_generator.py
+6
-2
python/paddle/fluid/layers/ops.py
python/paddle/fluid/layers/ops.py
+2
-2
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+3
-3
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+4
-0
python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py
...e/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py
+1
-33
python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py
...fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py
+0
-16
python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py
...dle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py
+0
-55
python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
...id/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
+275
-0
python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py
.../tests/unittests/ngraph/test_elementwise_add_ngraph_op.py
+2
-11
python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
...id/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
+17
-7
python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py
...addle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py
+0
-7
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
...paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
+1
-33
python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py
...dle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py
+10
-46
python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py
...ddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py
+0
-19
python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py
...le/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py
+0
-6
python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
...ddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
+0
-25
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+4
-0
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+1
-1
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
...e/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
+4
-0
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
...id/tests/unittests/test_ir_memory_optimize_transformer.py
+48
-0
python/requirements.txt
python/requirements.txt
+1
-1
tools/manylinux1/Dockerfile.x64
tools/manylinux1/Dockerfile.x64
+4
-4
tools/manylinux1/build_scripts/build.sh
tools/manylinux1/build_scripts/build.sh
+6
-6
未找到文件。
cmake/external/protobuf.cmake
浏览文件 @
d12252e6
...
...
@@ -203,7 +203,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
ENDIF
()
SET
(
PROTOBUF_REPO
"https://github.com/google/protobuf.git"
)
SET
(
PROTOBUF_TAG
"
9f75c5aa851cd877fb0d93ccc31b8567a6706546
"
)
SET
(
PROTOBUF_TAG
"
v3.6.1
"
)
ExternalProject_Add
(
${
TARGET_NAME
}
...
...
@@ -231,7 +231,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
)
ENDFUNCTION
()
SET
(
PROTOBUF_VERSION 3.1
)
SET
(
PROTOBUF_VERSION 3.
6.
1
)
IF
(
NOT PROTOBUF_FOUND
)
build_protobuf
(
extern_protobuf FALSE
)
...
...
cmake/external/python.cmake
浏览文件 @
d12252e6
...
...
@@ -74,8 +74,8 @@ IF(PYTHONINTERP_FOUND)
find_python_module
(
wheel REQUIRED
)
find_python_module
(
google.protobuf REQUIRED
)
FIND_PACKAGE
(
NumPy REQUIRED
)
IF
(
${
PY_GOOGLE.PROTOBUF_VERSION
}
AND
${
PY_GOOGLE.PROTOBUF_VERSION
}
VERSION_LESS
"3.
0.0
"
)
MESSAGE
(
FATAL_ERROR
"Found Python Protobuf
${
PY_GOOGLE.PROTOBUF_VERSION
}
< 3.
0.0
, "
IF
(
${
PY_GOOGLE.PROTOBUF_VERSION
}
AND
${
PY_GOOGLE.PROTOBUF_VERSION
}
VERSION_LESS
"3.
6.1
"
)
MESSAGE
(
FATAL_ERROR
"Found Python Protobuf
${
PY_GOOGLE.PROTOBUF_VERSION
}
< 3.
6.1
, "
"please use pip to upgrade protobuf. pip install -U protobuf"
)
ENDIF
()
ENDIF
(
PYTHONINTERP_FOUND
)
...
...
paddle/fluid/API.spec
浏览文件 @
d12252e6
...
...
@@ -262,7 +262,7 @@ paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=N
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=
'ignored'
, defaults=(None,))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords=
None
, defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
...
...
@@ -474,11 +474,11 @@ paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPinnedPlace) -> None
paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor,
arg0
: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor,
arg0
: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor,
lod
: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor,
recursive_sequence_lengths
: List[List[int]]) -> None
paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int]
paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None
paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray,
arg0
: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray,
tensor
: paddle.fluid.core.LoDTensor) -> None
paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
d12252e6
...
...
@@ -50,7 +50,12 @@ cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_
cc_library
(
gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper
)
if
(
WITH_GPU
)
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info
)
else
()
cc_library
(
memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper cpu_info
)
endif
()
cc_library
(
memory_optimize_pass SRCS memory_optimize_pass.cc DEPS memory_optimize_helper pass
)
cc_library
(
inplace_op_pass SRCS inplace_op_pass.cc DEPS memory_optimize_pass op_info
)
cc_library
(
modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle op_graph_view multi_devices_helper
)
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
d12252e6
...
...
@@ -240,7 +240,9 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
continue
;
}
}
VLOG
(
3
)
<<
"Start Apply Pass "
<<
pass
->
Type
();
graph
=
pass
->
Apply
(
std
::
move
(
graph
));
VLOG
(
3
)
<<
"Finish Apply Pass "
<<
pass
->
Type
();
}
return
graph
;
}
...
...
paddle/fluid/framework/details/inplace_op_pass.cc
浏览文件 @
d12252e6
...
...
@@ -49,7 +49,7 @@ DEFINE_bool(
"If this option turns on, only these op in whitelist can be inplaced."
"If it turns off, all of the running op can be candidate of inplaced op."
"Such as scale, elementwise_add"
"By default, it's turned o
n
"
);
"By default, it's turned o
ff
"
);
DECLARE_string
(
memory_optimize_debug
);
...
...
paddle/fluid/framework/details/memory_optimize_helper.cc
浏览文件 @
d12252e6
...
...
@@ -13,13 +13,19 @@
// limitations under the License.
#include "paddle/fluid/framework/details/memory_optimize_helper.h"
#include <algorithm>
#include <deque>
#include <functional>
#include <i
ostream
>
#include <i
terator
>
#include <numeric>
#include <sstream>
#include <string>
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/cpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/gpu_info.h"
#endif // PADDLE_WITH_CUDA
namespace
paddle
{
namespace
framework
{
...
...
@@ -166,6 +172,11 @@ struct NodeComparator {
bool
operator
()(
ir
::
Node
*
lhs
,
ir
::
Node
*
rhs
)
const
{
auto
*
lhs_desc
=
FindVarDescInBlock
(
lhs
);
auto
*
rhs_desc
=
FindVarDescInBlock
(
rhs
);
// match data type
if
(
lhs_desc
->
GetDataType
()
!=
rhs_desc
->
GetDataType
())
{
return
false
;
}
// match shape
auto
lhs_shape
=
lhs_desc
->
GetShape
();
auto
rhs_shape
=
rhs_desc
->
GetShape
();
if
((
lhs_shape
[
0
]
==
-
1
&&
rhs_shape
[
0
]
==
-
1
)
||
...
...
@@ -230,6 +241,27 @@ ir::Node* OrderedSet::FindBestFitNode(ir::Node* var) const {
return
found_node
;
}
ir
::
Node
*
OrderedSet
::
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
{
ir
::
Node
*
found_node
=
nullptr
;
NodeComparator
functor
;
auto
it
=
std
::
find_if
(
nodes_
.
begin
(),
nodes_
.
end
(),
[
&
](
const
NodeVector
&
v
)
{
if
(
v
.
front
()
==
prev
)
return
true
;
else
return
false
;
});
PADDLE_ENFORCE
(
it
!=
nodes_
.
end
(),
"Not found previous in node list!"
);
for
(
it
=
std
::
next
(
it
);
it
!=
nodes_
.
end
();
++
it
)
{
auto
&
candidate
=
it
->
front
();
if
(
functor
(
var
,
candidate
))
{
found_node
=
candidate
;
break
;
}
}
return
found_node
;
}
bool
OrderedSet
::
Has
(
ir
::
Node
*
var
)
const
{
if
(
mark_table_
.
count
(
var
->
Name
()))
{
auto
&
node_in_samename
=
mark_table_
.
at
(
var
->
Name
());
...
...
@@ -241,10 +273,15 @@ bool OrderedSet::Has(ir::Node* var) const {
return
false
;
}
void
OrderedSet
::
Erase
(
const
std
::
string
&
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
));
nodes_
.
erase
(
mark_table_
[
var
]);
mark_table_
.
erase
(
var
);
}
void
OrderedSet
::
Erase
(
ir
::
Node
*
var
)
{
PADDLE_ENFORCE
(
mark_table_
.
count
(
var
->
Name
()));
nodes_
.
erase
(
mark_table_
[
var
->
Name
()]);
mark_table_
.
erase
(
var
->
Name
());
PADDLE_ENFORCE
(
var
!=
nullptr
);
Erase
(
var
->
Name
());
}
std
::
string
OrderedSet
::
ToString
()
const
{
...
...
@@ -274,14 +311,35 @@ bool NodeCanReused(ir::Node* node) {
return
flag
;
}
int
MinChunkSize
()
{
int
size
{
0
};
#ifdef PADDLE_WITH_CUDA
size
=
platform
::
GpuMinChunkSize
();
#else
size
=
platform
::
CpuMinChunkSize
();
#endif // PADDLE_WITH_CUDA
return
size
;
}
bool
NodeCanReused
(
const
VarDesc
&
node
)
{
auto
type
=
node
.
GetType
();
// only these types holds bulk of gpu memory
if
(
!
(
type
==
proto
::
VarType
::
LOD_TENSOR
||
type
==
proto
::
VarType
::
SELECTED_ROWS
||
type
==
proto
::
VarType
::
LOD_TENSOR_ARRAY
))
{
return
false
;
}
if
(
node
.
Persistable
()
||
node
.
GetShape
().
empty
())
{
// persistable variable is parameter
if
(
node
.
Persistable
())
{
return
false
;
}
// shape < min_chunk_size is meaningless.
// further more, fetched loss always has size = 1
// which should not be reused.
auto
shape
=
node
.
GetShape
();
int
size
=
std
::
abs
(
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
()));
if
(
shape
.
empty
()
||
size
<
MinChunkSize
())
{
return
false
;
}
// vars can be @EMPTY@, @LR_DECAY_REUSE_ID@. For example, while_grad
...
...
@@ -461,7 +519,9 @@ ir::Node* ControlFlowGraph::GetNodeByName(const std::string& name,
for
(
auto
*
node
:
ops_
)
{
if
(
node
==
op
)
break
;
for
(
auto
&
output
:
node
->
outputs
)
{
if
(
output
->
Name
()
==
name
)
{
PADDLE_ENFORCE
((
output
!=
nullptr
&&
output
->
IsVar
()),
"Output is empty!"
);
if
(
output
->
Var
()
&&
output
->
Name
()
==
name
)
{
found_node
=
output
;
}
}
...
...
paddle/fluid/framework/details/memory_optimize_helper.h
浏览文件 @
d12252e6
...
...
@@ -55,6 +55,7 @@ class OrderedSet {
void
Insert
(
ir
::
Node
*
var
);
void
Erase
(
ir
::
Node
*
var
);
void
Erase
(
const
std
::
string
&
var
);
bool
Has
(
ir
::
Node
*
var
)
const
;
void
Clear
()
{
mark_table_
.
clear
();
...
...
@@ -62,6 +63,7 @@ class OrderedSet {
}
// find the bestfit shape node block with var.
ir
::
Node
*
FindBestFitNode
(
ir
::
Node
*
var
)
const
;
ir
::
Node
*
FindNextBestFitNode
(
ir
::
Node
*
var
,
ir
::
Node
*
prev
)
const
;
// map store non-const iterator, can not promise const
int
GetNodeIndexInPool
(
ir
::
Node
*
var
);
// pool all node to string
...
...
paddle/fluid/framework/details/memory_optimize_helper_test.cc
浏览文件 @
d12252e6
...
...
@@ -107,6 +107,52 @@ TEST(OrderedSet, Normal) {
ASSERT_EQ
(
pool
.
GetNodeIndexInPool
(
cache
),
5
);
// match 4:[5,2]
}
}
TEST
(
OrderedSet
,
FindBestFitNode
)
{
OrderedSet
pool
;
std
::
vector
<
std
::
unique_ptr
<
ir
::
Node
>>
nodes
;
ProgramDesc
prog
;
BlockDesc
*
block_desc
=
prog
.
MutableBlock
(
0
);
auto
*
op_desc
=
block_desc
->
AppendOp
();
op_desc
->
SetType
(
"dummy"
);
std
::
unique_ptr
<
ir
::
Node
>
op
=
ir
::
CreateNodeForTest
(
op_desc
);
{
auto
desc
=
block_desc
->
Var
(
"a"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"b"
);
desc
->
SetShape
({
128
,
129
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
{
auto
desc
=
block_desc
->
Var
(
"c"
);
desc
->
SetShape
({
128
,
128
});
std
::
unique_ptr
<
ir
::
Node
>
node
=
ir
::
CreateNodeForTest
(
desc
);
node
->
inputs
.
emplace_back
(
op
.
get
());
nodes
.
emplace_back
(
std
::
move
(
node
));
}
for
(
auto
&
node
:
nodes
)
{
pool
.
Insert
(
node
.
get
());
}
// FindNextBestFitNode
auto
*
n
=
nodes
[
0
].
get
();
auto
*
cache
=
pool
.
FindBestFitNode
(
n
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"a"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"c"
);
cache
=
pool
.
FindNextBestFitNode
(
n
,
cache
);
PADDLE_ENFORCE
(
cache
->
Name
()
==
"b"
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/details/memory_optimize_pass.cc
浏览文件 @
d12252e6
...
...
@@ -69,55 +69,59 @@ std::unique_ptr<ir::Graph> MemoryOptimizePass::ApplyImpl(
}
for
(
auto
&
var
:
op
->
outputs
)
{
if
(
!
NodeCanReused
(
var
)
||
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
||
skip_set_
.
count
(
var
->
Name
()))
if
(
var
->
IsVar
()
&&
!
var
->
IsCtrlVar
()
&&
skip_set_
.
count
(
var
->
Name
()))
{
VLOG
(
3
)
<<
"Skip set contains variable of "
<<
var
->
Name
()
<<
"disable reuse on it. skipped"
;
continue
;
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
if
(
var
->
Name
()
==
FLAGS_memory_optimize_debug
)
{
VLOG
(
3
)
<<
"start match var "
<<
DebugString
(
var
)
<<
" of op "
<<
op
->
Name
();
VLOG
(
3
)
<<
pool_
.
ToString
();
VLOG
(
3
)
<<
"matched in pool : "
<<
((
cache
==
nullptr
)
?
"False"
:
"True"
);
}
if
(
NodeCanReused
(
var
)
&&
cfg_
->
Use
(
op
).
count
(
var
->
Name
())
==
0
)
{
ir
::
Node
*
cache
=
pool_
.
FindBestFitNode
(
var
);
while
(
cache
!=
nullptr
&&
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused. "
<<
cache
->
Name
()
<<
" is re-filled to the pool after "
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
cache
=
pool_
.
FindNextBestFitNode
(
var
,
cache
);
}
if
(
var
->
Name
()
==
FLAGS_memory_optimize_debug
)
{
VLOG
(
3
)
<<
"start match var "
<<
DebugString
(
var
)
<<
" of op "
<<
op
->
Name
();
VLOG
(
3
)
<<
pool_
.
ToString
();
VLOG
(
3
)
<<
"matched in pool : "
<<
((
cache
==
nullptr
)
?
"False"
:
"True"
);
}
if
(
cache
==
nullptr
)
continue
;
if
(
var
->
Name
()
==
cache
->
Name
())
{
VLOG
(
3
)
<<
"The same cache variable is cascade reused."
<<
var
->
Name
()
<<
" is re-filled to the pool after"
<<
"the reused op is finished. Current op can not "
<<
"replace it again. Skip this candidate."
;
continue
;
int
node_idx_in_pool
=
pool_
.
GetNodeIndexInPool
(
cache
);
VLOG
(
3
)
<<
string
::
Sprintf
(
"!!! %s, %s => %s, cache idx %d, pool size %d"
,
std
::
to_string
(
reuse_id
++
),
DebugString
(
var
),
DebugString
(
cache
),
node_idx_in_pool
,
static_cast
<
int
>
(
pool_
.
size
()));
// update CFG Graph on the fly.
// reused var maybe re-fill into the pool
cfg_
->
RenameVarInCFGGraph
(
var
->
Name
(),
cache
->
Name
(),
idx
);
// NOTE(dzhwinter): we need to both update the ProgramDesc
// and IR Graph. because op_desc/var_desc is used in CreateOp,
// CreateVar when running happens. But IR Graph
// define the dependence relationship between nodes.
RenameVarInGraphDesc
(
var
->
Name
(),
cache
->
Name
(),
idx
);
RenameVarInGraphNode
(
var
->
Name
(),
cache
->
Name
(),
idx
,
graph
.
get
());
pool_
.
Erase
(
cache
);
}
if
(
cache
!=
nullptr
)
{
int
node_idx_in_pool
=
pool_
.
GetNodeIndexInPool
(
cache
);
VLOG
(
3
)
<<
string
::
Sprintf
(
"!!! %s, %s => %s, cache idx %d, pool size %d"
,
std
::
to_string
(
reuse_id
++
),
DebugString
(
var
),
DebugString
(
cache
),
node_idx_in_pool
,
static_cast
<
int
>
(
pool_
.
size
()));
// NOTE(dzhwinter): update the ProgramDesc/IR Graph
// and the CFG Graph on the fly.
//
// IR Graph define the dependence relationship between nodes.
//
// ProgramDesc defines the input/output vars. Its used in
// CreateOp, CreateVar when running happens.
//
// CFG Graph store the liveness information, when reuse happens
// we also need to update the variable liveness.
const
std
::
string
var_name
=
var
->
Name
();
const
std
::
string
cache_name
=
cache
->
Name
();
// fill the pool
std
::
unordered_set
<
std
::
string
>
unlived_vars
;
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
unlived_vars
.
emplace
(
var
);
cfg_
->
RenameVarInCFGGraph
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphDesc
(
var_name
,
cache_name
,
idx
);
RenameVarInGraphNode
(
var_name
,
cache_name
,
idx
,
graph
.
get
());
pool_
.
Erase
(
cache_name
);
}
}
for
(
auto
var
:
unlived_vars
)
{
}
// fill the pool
for
(
auto
var
:
cfg_
->
LiveIn
(
op
))
{
if
(
cfg_
->
LiveOut
(
op
).
count
(
var
)
==
0
)
{
ir
::
Node
*
var_node
=
cfg_
->
GetNodeByName
(
var
,
op
);
if
(
var_node
==
nullptr
||
var_node
->
IsCtrlVar
())
continue
;
if
(
NodeCanReused
(
var_node
)
&&
!
pool_
.
Has
(
var_node
))
{
pool_
.
Insert
(
var_node
);
}
...
...
@@ -273,8 +277,7 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
// redirect the input to the latest version of cache_var
for
(
auto
*
node
:
op
->
inputs
)
{
if
(
node
->
Name
()
==
var
)
{
ir
::
Node
*
cache_node
=
graph
->
CreateVarNode
(
var_desc
.
get
());
var_nodes_
[
cache_var
].
emplace_back
(
cache_node
);
ir
::
Node
*
cache_node
=
var_nodes_
[
cache_var
].
back
();
// swap node to cache_node
cache_node
->
outputs
.
insert
(
cache_node
->
outputs
.
end
(),
...
...
@@ -283,11 +286,15 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
auto
*
prev_op
=
node
->
inputs
[
0
];
std
::
replace
(
prev_op
->
outputs
.
begin
(),
prev_op
->
outputs
.
end
(),
node
,
cache_node
);
cache_node
->
inputs
.
emplace_back
(
prev_op
);
for
(
auto
*
next_op
:
node
->
outputs
)
{
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
}
...
...
@@ -307,15 +314,14 @@ void MemoryOptimizePass::RenameVarInGraphNode(const std::string& var,
std
::
replace
(
next_op
->
inputs
.
begin
(),
next_op
->
inputs
.
end
(),
node
,
cache_node
);
}
// erase unused node
auto
&
nodes
=
var_nodes_
.
at
(
var
);
nodes
.
erase
(
std
::
remove
(
nodes
.
begin
(),
nodes
.
end
(),
node
),
nodes
.
end
());
graph
->
RemoveNode
(
node
);
}
}
}
// release node of unused var in graph
for
(
auto
*
node
:
var_nodes_
[
var
])
{
graph
->
RemoveNode
(
node
);
}
var_nodes_
.
at
(
var
).
clear
();
}
}
// namespace details
...
...
paddle/fluid/framework/inplace_op_inference_test.cc
浏览文件 @
d12252e6
...
...
@@ -179,11 +179,11 @@ TEST(InferInplace, SingleOpInplaceInToOut) {
op
->
SetOutput
(
"Out"
,
{
"test2_out"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
64
,
128
,
128
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
128
,
128
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -201,11 +201,11 @@ TEST(InferInplace, SingleGradOpInplaceInToOut) {
op
->
SetOutput
(
GradVarName
(
"X"
),
{
"test2_a"
,
"test2_b"
,
"test2_c"
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_a"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -233,12 +233,12 @@ TEST(InferInplace, MultiOutInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
@@ -267,12 +267,12 @@ TEST(InferInplace, MultiGradInplaceInToOut) {
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
});
prog
.
MutableBlock
(
0
)
->
Var
(
"a0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"b0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"c0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"o0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"y0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
prog
.
MutableBlock
(
0
)
->
Var
(
"z0"
)
->
SetShape
({
32
,
16
,
1024
,
1024
});
auto
&
infer_inplace
=
OpInfoMap
::
Instance
().
Get
(
op
->
Type
()).
infer_inplace_
;
auto
in_to_outs
=
infer_inplace
(
*
op
,
op
->
Block
());
...
...
paddle/fluid/operators/controlflow/compare_op.cc
浏览文件 @
d12252e6
...
...
@@ -51,6 +51,11 @@ class CompareOpProtoMaker : public framework::OpProtoAndCheckerMaker {
comment
.
type
));
AddInput
(
"Y"
,
string
::
Sprintf
(
"the right hand operand of %s operator"
,
comment
.
type
));
AddAttr
<
int
>
(
"axis"
,
"The start dimension index for broadcasting Y onto X. [default -1]"
)
.
SetDefault
(
-
1
)
.
EqualGreaterThan
(
-
1
);
AddAttr
<
bool
>
(
"force_cpu"
,
"Force fill output variable to cpu "
"memory. Otherwise, fill output variable to the running "
...
...
@@ -64,11 +69,6 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by $%s$
)DOC"
,
comment
.
equation
));
AddAttr
<
int
>
(
"axis"
,
"The start dimension index for broadcasting Y onto X. [default -1]"
)
.
SetDefault
(
-
1
)
.
EqualGreaterThan
(
-
1
);
}
};
...
...
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
浏览文件 @
d12252e6
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/jit/kernels.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
...
...
@@ -37,32 +38,24 @@ struct EmbeddingVSumFunctor {
const
LoDTensor
*
table_t
,
const
LoDTensor
*
ids_t
,
LoDTensor
*
output_t
)
{
auto
*
table
=
table_t
->
data
<
T
>
();
int64_t
row_number
=
table_t
->
dims
()[
0
];
int64_t
row
_width
=
table_t
->
dims
()[
1
];
int64_t
last_dim
=
output_t
->
dims
()[
1
];
int64_t
table_height
=
table_t
->
dims
()[
0
];
int64_t
table
_width
=
table_t
->
dims
()[
1
];
int64_t
out_width
=
output_t
->
dims
()[
1
];
const
int64_t
*
ids
=
ids_t
->
data
<
int64_t
>
();
auto
ids_lod
=
ids_t
->
lod
()[
0
];
int64_t
ids_count
=
ids_t
->
numel
()
/
ids_lod
.
back
();
int64_t
idx_width
=
ids_t
->
numel
()
/
ids_lod
.
back
();
auto
*
output
=
output_t
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
int64_t
i
=
0
;
i
!=
ids_lod
.
size
()
-
1
;
++
i
)
{
size_t
begin
=
ids_lod
[
i
]
*
ids_count
;
for
(
int64_t
j
=
0
;
j
!=
ids_count
;
++
j
)
{
PADDLE_ENFORCE_LT
(
ids
[
begin
],
row_number
);
PADDLE_ENFORCE_GE
(
ids
[
begin
],
0
,
"ids %d"
,
i
);
blas
.
VCOPY
(
row_width
,
table
+
ids
[
begin
+
j
]
*
row_width
,
output
+
i
*
last_dim
+
j
*
row_width
);
}
PADDLE_ENFORCE_LE
(
table_width
*
idx_width
,
out_width
);
for
(
int64_t
r
=
(
ids_lod
[
i
]
+
1
)
*
ids_count
;
r
<
ids_lod
[
i
+
1
]
*
ids_count
;
++
r
)
{
PADDLE_ENFORCE_LT
(
ids
[
r
],
row_number
);
PADDLE_ENFORCE_GE
(
ids
[
r
],
0
,
"ids %d"
,
i
);
blas
.
AXPY
(
row_width
,
1.
,
table
+
ids
[
r
]
*
row_width
,
output
+
i
*
last_dim
+
(
r
%
ids_count
)
*
row_width
);
}
jit
::
emb_seq_pool_attr_t
attr
(
table_height
,
table_width
,
0
,
idx_width
,
out_width
,
jit
::
SeqPoolType
::
kSum
);
for
(
int64_t
i
=
0
;
i
!=
ids_lod
.
size
()
-
1
;
++
i
)
{
attr
.
index_height
=
ids_lod
[
i
+
1
]
-
ids_lod
[
i
];
auto
emb_seqpool
=
jit
::
Get
<
jit
::
kEmbSeqPool
,
jit
::
EmbSeqPoolTuples
<
T
>
,
platform
::
CPUPlace
>
(
attr
);
emb_seqpool
(
table
,
ids
+
ids_lod
[
i
]
*
idx_width
,
output
+
i
*
out_width
,
&
attr
);
}
}
};
...
...
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
d12252e6
...
...
@@ -301,6 +301,37 @@ void BenchSeqPoolKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchEmbSeqPoolKernel
()
{
std
::
vector
<
jit
::
SeqPoolType
>
pool_types
=
{
jit
::
SeqPoolType
::
kSum
};
int64_t
tbl_h
=
1e4
;
for
(
int
tbl_w
:
{
10
,
16
,
256
})
{
Tensor
table
;
table
.
Resize
({
tbl_h
,
tbl_w
});
RandomVec
<
T
>
(
tbl_h
*
tbl_w
,
table
.
mutable_data
<
T
>
(
PlaceType
()),
-
2.
f
,
2.
f
);
const
T
*
table_data
=
table
.
data
<
T
>
();
for
(
auto
type
:
pool_types
)
{
for
(
int
idx_w
:
{
1
,
2
,
10
,
16
})
{
for
(
int
idx_h
:
{
1
,
2
,
9
,
13
,
16
})
{
int64_t
out_w
=
tbl_w
*
idx_w
;
jit
::
emb_seq_pool_attr_t
attr
(
tbl_h
,
tbl_w
,
idx_h
,
idx_w
,
out_w
,
type
);
Tensor
idx
,
out
;
idx
.
Resize
({
idx_h
,
idx_w
});
out
.
Resize
({
out_w
});
RandomVec
<
int64_t
>
(
idx_h
*
idx_w
,
idx
.
mutable_data
<
int64_t
>
(
PlaceType
()),
0
,
tbl_h
-
1
);
const
int64_t
*
idx_data
=
idx
.
data
<
int64_t
>
();
T
*
o_data
=
out
.
mutable_data
<
T
>
(
PlaceType
());
BenchAllImpls
<
KT
,
jit
::
EmbSeqPoolTuples
<
T
>
,
PlaceType
>
(
attr
,
table_data
,
idx_data
,
o_data
,
&
attr
);
}
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
BenchMatMulKernel
()
{
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
...
...
@@ -441,6 +472,11 @@ BENCH_FP32_CPU(kGRUHtPart2) { BenchGRUKernel<jit::kGRUHtPart2, T, CPUPlace>(); }
// seq pool function
BENCH_FP32_CPU
(
kSeqPool
)
{
BenchSeqPoolKernel
<
jit
::
kSeqPool
,
T
,
CPUPlace
>
();
}
// embedding seq pool function
BENCH_FP32_CPU
(
kEmbSeqPool
)
{
BenchEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
T
,
CPUPlace
>
();
}
// matmul
BENCH_FP32_CPU
(
kMatMul
)
{
BenchMatMulKernel
<
jit
::
kMatMul
,
T
,
CPUPlace
>
();
}
...
...
paddle/fluid/operators/jit/gen/CMakeLists.txt
浏览文件 @
d12252e6
...
...
@@ -31,3 +31,4 @@ USE_JITKERNEL_GEN(kNCHW16CMulNC)
USE_JITKERNEL_GEN
(
kSeqPool
)
USE_JITKERNEL_GEN
(
kHMax
)
USE_JITKERNEL_GEN
(
kHSum
)
USE_JITKERNEL_GEN
(
kEmbSeqPool
)
paddle/fluid/operators/jit/gen/embseqpool.cc
0 → 100644
浏览文件 @
d12252e6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include "paddle/fluid/operators/jit/gen/embseqpool.h"
#include <stddef.h> // offsetof
#include <vector>
#include "paddle/fluid/operators/jit/gen/act.h" // for exp_float_consts ones
#include "paddle/fluid/operators/jit/registry.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
void
EmbSeqPoolJitCode
::
genCode
()
{
preCode
();
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
constexpr
int
max_num_regs
=
8
;
const
int
num_block
=
tbl_w_
/
block
;
const
int
num_groups
=
num_block
/
max_num_regs
;
const
size_t
block_size
=
sizeof
(
float
)
*
block
;
std
::
vector
<
int
>
groups
(
num_groups
,
max_num_regs
);
int
rest_num_regs
=
num_block
%
max_num_regs
;
if
(
rest_num_regs
>
0
)
{
groups
.
push_back
(
rest_num_regs
);
}
// protect param_dst
mov
(
reg_ptr_param_dst
,
param_dst
);
mov
(
reg_idx_width_in_byte
,
qword
[
param_attr
+
offsetof
(
emb_seq_pool_attr_t
,
index_width
)]);
mov
(
reg_idx_height
,
qword
[
param_attr
+
offsetof
(
emb_seq_pool_attr_t
,
index_height
)]);
mov
(
rax
,
sizeof
(
int64_t
));
mul
(
reg_idx_width_in_byte
);
mov
(
reg_idx_width_in_byte
,
rax
);
const
size_t
tbl_width_in_byte
=
sizeof
(
float
)
*
tbl_w_
;
int
acc_num_regs
=
0
;
for
(
int
num_regs
:
groups
)
{
Label
l_next_idx_w
,
l_next_idx_h
,
l_save_now
;
xor_
(
reg_idx_w_i_in_byte
,
reg_idx_w_i_in_byte
);
mov
(
reg_ptr_dst_i
,
reg_ptr_param_dst
);
add
(
reg_ptr_dst_i
,
acc_num_regs
*
block_size
);
L
(
l_next_idx_w
);
{
// h == 0
mov
(
reg_ptr_idx_i
,
param_idx
);
add
(
reg_ptr_idx_i
,
reg_idx_w_i_in_byte
);
mov
(
reg_idx
,
qword
[
reg_ptr_idx_i
]);
mov
(
rax
,
tbl_width_in_byte
);
mul
(
reg_idx
);
mov
(
reg_ptr_tbl_i
,
rax
);
// reg is offset now
add
(
reg_ptr_tbl_i
,
param_tbl
);
// reg is ptr_i now
size_t
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ymm_t
(
reg_i
+
num_regs
),
ptr
[
reg_ptr_tbl_i
+
w_offset
]);
w_offset
+=
block_size
;
}
add
(
reg_ptr_idx_i
,
reg_idx_width_in_byte
);
// end condition of idx h
mov
(
reg_idx_h_end
,
reg_idx_height
);
mov
(
rax
,
reg_idx_width_in_byte
);
mul
(
reg_idx_h_end
);
mov
(
reg_idx_h_end
,
rax
);
add
(
reg_idx_h_end
,
reg_idx_w_i_in_byte
);
add
(
reg_idx_h_end
,
param_idx
);
cmp
(
reg_ptr_idx_i
,
reg_idx_h_end
);
jge
(
l_save_now
,
T_NEAR
);
L
(
l_next_idx_h
);
{
mov
(
reg_idx
,
qword
[
reg_ptr_idx_i
]);
mov
(
reg_ptr_tbl_i
,
reg_idx
);
mov
(
rax
,
tbl_width_in_byte
);
mul
(
reg_idx
);
mov
(
reg_ptr_tbl_i
,
rax
);
add
(
reg_ptr_tbl_i
,
param_tbl
);
size_t
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ymm_t
(
reg_i
),
ptr
[
reg_ptr_tbl_i
+
w_offset
]);
vaddps
(
ymm_t
(
reg_i
+
num_regs
),
ymm_t
(
reg_i
+
num_regs
),
ymm_t
(
reg_i
));
w_offset
+=
block_size
;
}
add
(
reg_ptr_idx_i
,
reg_idx_width_in_byte
);
cmp
(
reg_ptr_idx_i
,
reg_idx_h_end
);
jl
(
l_next_idx_h
,
T_NEAR
);
}
// end of idx h
L
(
l_save_now
);
// avg or sqrt here, if needed
w_offset
=
0
;
for
(
int
reg_i
=
0
;
reg_i
<
num_regs
;
++
reg_i
)
{
vmovups
(
ptr
[
reg_ptr_dst_i
+
w_offset
],
ymm_t
(
reg_i
+
num_regs
));
w_offset
+=
block_size
;
}
add
(
reg_ptr_dst_i
,
tbl_width_in_byte
);
add
(
reg_idx_w_i_in_byte
,
sizeof
(
int64_t
));
cmp
(
reg_idx_w_i_in_byte
,
reg_idx_width_in_byte
);
jl
(
l_next_idx_w
,
T_NEAR
);
}
// end of idx w
acc_num_regs
+=
num_regs
;
add
(
param_tbl
,
num_regs
*
block_size
);
// do not use acc_num_regs
}
// end of groups
postCode
();
}
class
EmbSeqPoolCreator
:
public
JitCodeCreator
<
emb_seq_pool_attr_t
>
{
public:
bool
UseMe
(
const
emb_seq_pool_attr_t
&
attr
)
const
override
{
return
platform
::
MayIUse
(
platform
::
avx
)
&&
attr
.
table_width
%
YMM_FLOAT_BLOCK
==
0
;
}
size_t
CodeSize
(
const
emb_seq_pool_attr_t
&
attr
)
const
override
{
return
96
+
(
attr
.
table_width
/
YMM_FLOAT_BLOCK
)
*
96
*
8
;
}
std
::
unique_ptr
<
GenBase
>
CreateJitCode
(
const
emb_seq_pool_attr_t
&
attr
)
const
override
{
PADDLE_ENFORCE_GT
(
attr
.
table_height
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
table_width
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
index_height
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
index_width
,
0
);
PADDLE_ENFORCE_GT
(
attr
.
out_width
,
0
);
return
make_unique
<
EmbSeqPoolJitCode
>
(
attr
,
CodeSize
(
attr
));
}
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
namespace
gen
=
paddle
::
operators
::
jit
::
gen
;
REGISTER_JITKERNEL_GEN
(
kEmbSeqPool
,
gen
::
EmbSeqPoolCreator
);
paddle/fluid/operators/jit/gen/embseqpool.h
0 → 100644
浏览文件 @
d12252e6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#pragma once
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/operators/jit/gen/jitcode.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
namespace
jit
{
namespace
gen
{
class
EmbSeqPoolJitCode
:
public
JitCode
{
public:
explicit
EmbSeqPoolJitCode
(
const
emb_seq_pool_attr_t
&
attr
,
size_t
code_size
=
256
*
1024
,
void
*
code_ptr
=
nullptr
)
:
JitCode
(
code_size
,
code_ptr
),
tbl_w_
(
attr
.
table_width
),
type_
(
attr
.
pool_type
)
{
if
(
type_
!=
SeqPoolType
::
kSum
)
{
LOG
(
FATAL
)
<<
"Only support sum pool yet "
;
}
this
->
genCode
();
}
std
::
string
name
()
const
override
{
std
::
string
base
=
"EmbSeqPoolJitCode"
;
if
(
type_
==
SeqPoolType
::
kSum
)
{
base
+=
"_Sum"
;
}
else
if
(
type_
==
SeqPoolType
::
kAvg
)
{
base
+=
"_Avg"
;
}
else
if
(
type_
==
SeqPoolType
::
kSqrt
)
{
base
+=
"_Sqrt"
;
}
base
+=
(
"_W"
+
std
::
to_string
(
tbl_w_
));
return
base
;
}
void
genCode
()
override
;
private:
int
tbl_w_
;
SeqPoolType
type_
;
reg64_t
param_tbl
{
abi_param1
};
reg64_t
param_idx
{
abi_param2
};
reg64_t
param_dst
{
abi_param3
};
reg64_t
param_attr
{
abi_param4
};
reg64_t
reg_tmp
{
rax
};
reg64_t
reg_idx_width_in_byte
{
r8
};
reg64_t
reg_idx_height
{
r9
};
reg64_t
reg_ptr_tbl_i
{
r10
};
reg64_t
reg_idx
{
r10
};
// could use same of reg_ptr_tbl_i
reg64_t
reg_ptr_idx_i
{
r11
};
reg64_t
reg_ptr_dst_i
{
r12
};
reg64_t
reg_ptr_param_dst
{
r13
};
// rdx is used in mul so protect param_dst
reg64_t
reg_idx_w_i_in_byte
{
r14
};
reg64_t
reg_idx_h_end
{
r15
};
};
}
// namespace gen
}
// namespace jit
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/jit/gen/seqpool.h
浏览文件 @
d12252e6
...
...
@@ -32,7 +32,7 @@ class SeqPoolJitCode : public JitCode {
:
JitCode
(
code_size
,
code_ptr
),
w_
(
attr
.
w
),
type_
(
attr
.
type
)
{
if
(
!
(
type_
==
SeqPoolType
::
kSum
||
type_
==
SeqPoolType
::
kAvg
||
type_
==
SeqPoolType
::
kSqrt
))
{
LOG
(
FATAL
)
<<
"Only support
sum pool yet
"
;
LOG
(
FATAL
)
<<
"Only support
ed pool type: sum, avg and sqrt.
"
;
}
fp_h_
[
0
]
=
1.
f
;
this
->
genCode
();
...
...
paddle/fluid/operators/jit/helper.cc
浏览文件 @
d12252e6
...
...
@@ -54,6 +54,7 @@ const char* to_string(KernelType kt) {
ONE_CASE
(
kHMax
);
ONE_CASE
(
kHSum
);
ONE_CASE
(
kSoftmax
);
ONE_CASE
(
kEmbSeqPool
);
default:
PADDLE_THROW
(
"Not support type: %d, or forget to add it."
,
kt
);
return
"NOT JITKernel"
;
...
...
paddle/fluid/operators/jit/helper.h
浏览文件 @
d12252e6
...
...
@@ -172,6 +172,15 @@ inline std::ostream& operator<<(std::ostream& os, const seq_pool_attr_t& attr) {
return
os
;
}
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
emb_seq_pool_attr_t
&
attr
)
{
os
<<
"table_height["
<<
attr
.
table_height
<<
"],table_width["
<<
attr
.
table_width
<<
"],index_height["
<<
attr
.
index_height
<<
"],index_width["
<<
attr
.
index_width
<<
"],output_width["
<<
attr
.
out_width
<<
"],pool_type["
<<
to_string
(
attr
.
pool_type
)
<<
"]"
;
return
os
;
}
inline
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
matmul_attr_t
&
attr
)
{
os
<<
"M["
<<
attr
.
m
<<
"],N["
<<
attr
.
n
<<
"],K["
<<
attr
.
k
<<
"]"
;
return
os
;
...
...
paddle/fluid/operators/jit/kernel_base.h
浏览文件 @
d12252e6
...
...
@@ -13,6 +13,7 @@
* limitations under the License. */
#pragma once
#include <cstdint>
#include "paddle/fluid/operators/jit/macro.h"
#include "paddle/fluid/platform/macros.h"
...
...
@@ -20,34 +21,35 @@ namespace paddle {
namespace
operators
{
namespace
jit
{
// TODO(TJ): reorder by alphabet
typedef
enum
{
kNone
=
0
,
kVMul
=
1
,
kVAdd
=
2
,
kVAddRelu
,
kVSub
,
kVScal
,
kVAddBias
,
kVRelu
,
kVIdentity
,
kVSquare
,
kVExp
,
kVSigmoid
,
kVTanh
,
kLSTMCtHt
,
kLSTMC1H1
,
// sort by alphabet
kCRFDecoding
=
1
,
kEmbSeqPool
=
2
,
kGRUH1
,
kGRUHtPart1
,
kGRUHtPart2
,
kCRFDecoding
,
kHSum
,
// horizontal max
kHMax
,
// horizontal sum
kLSTMCtHt
,
kLSTMC1H1
,
kLayerNorm
,
kMatMul
,
kNCHW16CMulNC
,
kSeqPool
,
kMatMul
,
kHSum
,
// horizontal max
kHMax
,
// horizontal sum
kSoftmax
,
kVAdd
,
kVAddBias
,
kVAddRelu
,
kVExp
,
kVIdentity
,
kVMul
,
kVRelu
,
kVScal
,
kVSigmoid
,
kVSquare
,
kVSub
,
kVTanh
,
}
KernelType
;
typedef
enum
{
...
...
@@ -145,6 +147,32 @@ struct SeqPoolTuples {
typedef
void
(
*
func_type
)(
const
T
*
,
T
*
,
const
seq_pool_attr_t
*
);
};
typedef
struct
emb_seq_pool_attr_s
{
int64_t
table_height
,
table_width
;
int64_t
index_height
,
index_width
;
int64_t
out_width
;
SeqPoolType
pool_type
;
emb_seq_pool_attr_s
()
=
default
;
explicit
emb_seq_pool_attr_s
(
int64_t
tbl_height
,
int64_t
tbl_width
,
int64_t
idx_height
,
int64_t
idx_width
,
int64_t
output_width
,
SeqPoolType
seqpool_type
=
SeqPoolType
::
kSum
)
:
table_height
(
tbl_height
),
table_width
(
tbl_width
),
index_height
(
idx_height
),
index_width
(
idx_width
),
out_width
(
output_width
),
pool_type
(
seqpool_type
)
{}
}
emb_seq_pool_attr_t
;
template
<
typename
T
>
struct
EmbSeqPoolTuples
{
typedef
T
data_type
;
typedef
emb_seq_pool_attr_t
attr_type
;
typedef
void
(
*
func_type
)(
const
T
*
,
const
int64_t
*
,
T
*
,
const
emb_seq_pool_attr_t
*
);
};
typedef
struct
matmul_attr_s
{
int
m
,
n
,
k
;
void
*
packed_weight
{
nullptr
};
...
...
paddle/fluid/operators/jit/kernel_key.cc
浏览文件 @
d12252e6
...
...
@@ -56,6 +56,11 @@ size_t JitCodeKey<matmul_attr_t>(const matmul_attr_t& attr) {
return
(
key
<<
shift
*
2
)
+
((
static_cast
<
size_t
>
(
attr
.
n
))
<<
shift
)
+
attr
.
k
;
}
template
<
>
size_t
JitCodeKey
<
emb_seq_pool_attr_t
>
(
const
emb_seq_pool_attr_t
&
attr
)
{
return
attr
.
table_width
;
}
}
// namespace jit
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
浏览文件 @
d12252e6
...
...
@@ -13,3 +13,4 @@ USE_JITKERNEL_MORE(kVSigmoid, mkl)
USE_JITKERNEL_MORE
(
kVTanh, mkl
)
USE_JITKERNEL_MORE
(
kSeqPool, mkl
)
USE_JITKERNEL_MORE
(
kSoftmax, mkl
)
USE_JITKERNEL_MORE
(
kEmbSeqPool, mkl
)
paddle/fluid/operators/jit/more/mkl/mkl.cc
浏览文件 @
d12252e6
...
...
@@ -174,6 +174,16 @@ bool SeqPoolKernel<double>::UseMe(const seq_pool_attr_t& attr) const {
return
true
;
}
template
<
>
bool
EmbSeqPoolKernel
<
float
>::
UseMe
(
const
emb_seq_pool_attr_t
&
attr
)
const
{
return
true
;
}
template
<
>
bool
EmbSeqPoolKernel
<
double
>::
UseMe
(
const
emb_seq_pool_attr_t
&
attr
)
const
{
return
true
;
}
template
<
>
bool
MatMulKernel
<
float
>::
UseMe
(
const
matmul_attr_t
&
attr
)
const
{
return
platform
::
MayIUse
(
platform
::
avx
);
...
...
@@ -227,6 +237,7 @@ REGISTER_MKL_KERNEL(kVSquare, VSquare);
REGISTER_MKL_KERNEL
(
kVSigmoid
,
VSigmoid
);
REGISTER_MKL_KERNEL
(
kVTanh
,
VTanh
);
REGISTER_MKL_KERNEL
(
kSeqPool
,
SeqPool
);
REGISTER_MKL_KERNEL
(
kEmbSeqPool
,
EmbSeqPool
);
REGISTER_MKL_KERNEL
(
kSoftmax
,
Softmax
);
#undef REGISTER_MKL_KERNEL
paddle/fluid/operators/jit/more/mkl/mkl.h
浏览文件 @
d12252e6
...
...
@@ -18,6 +18,7 @@
#include <type_traits>
#include <vector>
#include "paddle/fluid/operators/jit/kernel_base.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -91,6 +92,32 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) {
}
}
template
<
typename
T
>
void
EmbSeqPool
(
const
T
*
table
,
const
int64_t
*
idx
,
T
*
out
,
const
emb_seq_pool_attr_t
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
->
table_width
*
attr
->
index_width
,
attr
->
out_width
);
auto
check_idx_value_valid
=
[
&
](
int64_t
i
)
{
PADDLE_ENFORCE_LT
(
idx
[
i
],
attr
->
table_height
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
PADDLE_ENFORCE_GE
(
idx
[
i
],
0
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
};
for
(
int64_t
w
=
0
;
w
!=
attr
->
index_width
;
++
w
)
{
check_idx_value_valid
(
w
);
VCopy
<
T
>
(
table
+
idx
[
w
]
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
attr
->
table_width
);
}
for
(
int64_t
h
=
1
;
h
<
attr
->
index_height
;
++
h
)
{
for
(
int64_t
w
=
0
;
w
<
attr
->
index_width
;
++
w
)
{
int64_t
i
=
h
*
attr
->
index_width
+
w
;
check_idx_value_valid
(
i
);
VAXPY
<
T
>
(
static_cast
<
T
>
(
1
),
table
+
idx
[
i
]
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
attr
->
table_width
);
}
}
}
template
<
typename
T
>
void
ASum
(
const
T
*
x
,
T
*
res
,
int
n
);
...
...
@@ -142,6 +169,8 @@ DECLARE_MKL_KERNEL(VSquare, XYNTuples);
DECLARE_MKL_KERNEL
(
SeqPool
,
SeqPoolTuples
);
DECLARE_MKL_KERNEL
(
EmbSeqPool
,
EmbSeqPoolTuples
);
DECLARE_MKL_KERNEL
(
Softmax
,
SoftmaxTuples
);
#undef DECLARE_MKL_KERNEL
...
...
paddle/fluid/operators/jit/refer/CMakeLists.txt
浏览文件 @
d12252e6
...
...
@@ -32,3 +32,4 @@ USE_JITKERNEL_REFER(kVSquare)
USE_JITKERNEL_REFER
(
kHSum
)
USE_JITKERNEL_REFER
(
kHMax
)
USE_JITKERNEL_REFER
(
kSoftmax
)
USE_JITKERNEL_REFER
(
kEmbSeqPool
)
paddle/fluid/operators/jit/refer/refer.cc
浏览文件 @
d12252e6
...
...
@@ -57,4 +57,6 @@ REGISTER_REFER_KERNEL(kHSum, HSum);
REGISTER_REFER_KERNEL
(
kSoftmax
,
Softmax
);
REGISTER_REFER_KERNEL
(
kEmbSeqPool
,
EmbSeqPool
);
#undef REGISTER_REFER_KERNEL
paddle/fluid/operators/jit/refer/refer.h
浏览文件 @
d12252e6
...
...
@@ -16,6 +16,7 @@
#include <cmath>
#include <limits>
#include <string>
#include "paddle/fluid/operators/jit/helper.h"
#include "paddle/fluid/operators/jit/kernel_base.h"
#include "paddle/fluid/platform/enforce.h"
...
...
@@ -414,6 +415,37 @@ void Softmax(const T* x, T* y, int n, int bs = 1) {
}
}
// embedding seq pool
// table is a matrix with (tbl_h, tbl_w)
// idx is a matrix with (idx_h, idx_w)
// output is a vector with length tbl_w * idx_w
template
<
typename
T
>
void
EmbSeqPool
(
const
T
*
table
,
const
int64_t
*
idx
,
T
*
out
,
const
emb_seq_pool_attr_t
*
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
->
table_width
*
attr
->
index_width
,
attr
->
out_width
);
auto
check_idx_value_valid
=
[
&
](
int64_t
i
)
{
PADDLE_ENFORCE_LT
(
idx
[
i
],
attr
->
table_height
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
PADDLE_ENFORCE_GE
(
idx
[
i
],
0
,
"idx value: %d, i: %d"
,
idx
[
i
],
i
);
};
for
(
int64_t
w
=
0
;
w
!=
attr
->
index_width
;
++
w
)
{
check_idx_value_valid
(
w
);
std
::
memcpy
(
out
+
w
*
attr
->
table_width
,
table
+
idx
[
w
]
*
attr
->
table_width
,
attr
->
table_width
*
sizeof
(
T
));
}
for
(
int64_t
h
=
1
;
h
<
attr
->
index_height
;
++
h
)
{
for
(
int64_t
w
=
0
;
w
<
attr
->
index_width
;
++
w
)
{
int64_t
i
=
h
*
attr
->
index_width
+
w
;
check_idx_value_valid
(
i
);
VAdd
(
table
+
idx
[
i
]
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
out
+
w
*
attr
->
table_width
,
attr
->
table_width
);
}
}
}
#define DECLARE_REFER_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public ReferKernel<tuples<T>> { \
...
...
@@ -462,6 +494,8 @@ DECLARE_REFER_KERNEL(HSum, XRNTuples);
DECLARE_REFER_KERNEL
(
Softmax
,
SoftmaxTuples
);
DECLARE_REFER_KERNEL
(
EmbSeqPool
,
EmbSeqPoolTuples
);
#undef DECLARE_REFER_KERNEL
}
// namespace refer
...
...
paddle/fluid/operators/jit/test.cc
浏览文件 @
d12252e6
...
...
@@ -270,6 +270,32 @@ struct TestFuncWithRefer<jit::SeqPoolTuples<T>, std::vector<T>, std::vector<T>,
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
EmbSeqPoolTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
int64_t
>
,
std
::
vector
<
T
>
,
typename
jit
::
EmbSeqPoolTuples
<
T
>::
attr_type
>
{
void
operator
()(
const
typename
jit
::
EmbSeqPoolTuples
<
T
>::
func_type
tgt
,
const
std
::
vector
<
T
>&
table
,
const
std
::
vector
<
int64_t
>&
idx
,
const
std
::
vector
<
T
>&
oref
,
const
typename
jit
::
EmbSeqPoolTuples
<
T
>::
attr_type
&
attr
)
{
EXPECT_TRUE
(
tgt
!=
nullptr
);
EXPECT_EQ
(
table
.
size
(),
static_cast
<
size_t
>
(
attr
.
table_height
*
attr
.
table_width
));
EXPECT_EQ
(
idx
.
size
(),
static_cast
<
size_t
>
(
attr
.
index_height
*
attr
.
index_width
));
EXPECT_EQ
(
oref
.
size
(),
static_cast
<
size_t
>
(
attr
.
table_width
*
attr
.
index_width
));
const
T
*
table_data
=
table
.
data
();
const
int64_t
*
idx_data
=
idx
.
data
();
const
T
*
oref_data
=
oref
.
data
();
int
o_w
=
oref
.
size
();
std
::
vector
<
T
>
out
(
o_w
);
T
*
o_data
=
out
.
data
();
tgt
(
table_data
,
idx_data
,
o_data
,
&
attr
);
ExpectEQ
<
T
>
(
o_data
,
oref_data
,
o_w
);
}
};
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
MatMulTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
...
...
@@ -644,6 +670,40 @@ void TestSoftmaxKernel() {
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestEmbSeqPoolKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
int64_t
tbl_h
=
1e4
;
std
::
vector
<
jit
::
SeqPoolType
>
pool_types
=
{
jit
::
SeqPoolType
::
kSum
};
// only support sum yet
for
(
int
tbl_w
:
TestSizes
())
{
std
::
vector
<
T
>
table
(
tbl_h
*
tbl_w
);
RandomVec
<
T
>
(
tbl_h
*
tbl_w
,
table
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
table_data
=
table
.
data
();
for
(
auto
type
:
pool_types
)
{
for
(
int
idx_w
:
{
1
,
2
,
10
,
16
})
{
for
(
int
idx_h
:
{
1
,
2
,
9
,
13
,
16
})
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
EmbSeqPoolTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
int64_t
>
idx
(
idx_h
*
idx_w
);
RandomVec
<
int64_t
>
(
idx_h
*
idx_w
,
idx
.
data
(),
0
,
tbl_h
-
1
);
int64_t
out_w
=
tbl_w
*
idx_w
;
std
::
vector
<
T
>
oref
(
out_w
);
const
int64_t
*
idx_data
=
idx
.
data
();
T
*
o_data
=
oref
.
data
();
jit
::
emb_seq_pool_attr_t
attr
(
tbl_h
,
tbl_w
,
idx_h
,
idx_w
,
out_w
,
type
);
ref
(
table_data
,
idx_data
,
o_data
,
&
attr
);
TestAllImpls
<
KT
,
jit
::
EmbSeqPoolTuples
<
T
>
,
PlaceType
,
std
::
vector
<
T
>
,
std
::
vector
<
int64_t
>
,
std
::
vector
<
T
>>
(
attr
,
table
,
idx
,
oref
,
attr
);
}
}
}
}
}
template
<
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestNCHW16CMulNCKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
...
...
@@ -878,6 +938,11 @@ TEST(JITKernel, kSoftmax) {
TestSoftmaxKernel
<
jit
::
kSoftmax
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kEmbSeqPool
)
{
TestEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
float
,
CPUPlace
>
();
TestEmbSeqPoolKernel
<
jit
::
kEmbSeqPool
,
double
,
CPUPlace
>
();
}
TEST
(
JITKernel
,
kNCHW16CMulNC
)
{
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
float
,
CPUPlace
>
();
TestNCHW16CMulNCKernel
<
jit
::
kNCHW16CMulNC
,
double
,
CPUPlace
>
();
...
...
paddle/fluid/operators/ngraph/ngraph_bridge.cc
浏览文件 @
d12252e6
...
...
@@ -36,6 +36,8 @@ std::map<std::string,
{
"conv2d_grad"
,
NG_OPS
::
BuildConv2dGradNode
},
{
"batch_norm"
,
NG_OPS
::
BuildBatchNormNode
},
{
"batch_norm_grad"
,
NG_OPS
::
BuildBatchNormGradNode
},
{
"cross_entropy"
,
NG_OPS
::
BuildCrossEntropyNode
},
{
"cross_entropy_grad"
,
NG_OPS
::
BuildCrossEntropyGradNode
},
{
"elementwise_add"
,
NG_OPS
::
BuildElementwiseAddNode
},
{
"elementwise_add_grad"
,
NG_OPS
::
BuildElementwiseAddGradNode
},
{
"fill_constant"
,
NG_OPS
::
BuildFillConstantNode
},
...
...
paddle/fluid/operators/ngraph/ngraph_ops.h
浏览文件 @
d12252e6
...
...
@@ -26,6 +26,7 @@ limitations under the License. */
#include "ops/batch_norm_op.h"
#include "ops/binary_unary_op.h"
#include "ops/conv2d_op.h"
#include "ops/cross_entropy_op.h"
#include "ops/elementwise_add_op.h"
#include "ops/fill_constant_op.h"
#include "ops/mean_op.h"
...
...
paddle/fluid/operators/ngraph/ops/batch_norm_op.h
浏览文件 @
d12252e6
...
...
@@ -44,6 +44,10 @@ void BuildBatchNormNode(
const
float
epsilon
=
op_attrs
.
Get
<
float
>
(
"epsilon"
);
const
float
momentum
=
op_attrs
.
Get
<
float
>
(
"momentum"
);
PADDLE_ENFORCE
(
data_layout
==
"NHWC"
||
data_layout
==
"NCHW"
||
data_layout
==
"NC"
,
"The BatchNorm operator only supports NHWC/NCHW/NC data format"
);
if
(
data_layout
==
"NHWC"
)
{
x
=
paddle
::
platform
::
Nhwc2Nchw
(
x
);
}
...
...
@@ -110,6 +114,9 @@ void BuildBatchNormGradNode(
"BN grap input size needs to be 2 or 4"
);
PADDLE_ENFORCE_EQ
(
x_shape
.
size
(),
dy_shape
.
size
(),
"BN grap input and delta size needs to be equal"
);
PADDLE_ENFORCE
(
data_layout
==
"NHWC"
||
data_layout
==
"NCHW"
||
data_layout
==
"NC"
,
"The BatchNorm operator only supports NHWC/NCHW/NC data format"
);
if
(
x_shape
.
size
()
==
2
)
{
x
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
...
...
paddle/fluid/operators/ngraph/ops/cross_entropy_op.h
0 → 100644
浏览文件 @
d12252e6
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
void
BuildCrossEntropyNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
label
=
paddle
::
platform
::
GetInputNode
(
op
,
"Label"
,
ngb_node_map
);
auto
label_shape
=
label
->
get_shape
();
auto
x_shape
=
x
->
get_shape
();
auto
label_rank
=
label_shape
.
size
();
auto
x_rank
=
x_shape
.
size
();
std
::
shared_ptr
<
ngraph
::
Node
>
x_2d
=
x
,
label_2d
=
label
;
auto
label_2d_shape
=
label_shape
,
x_2d_shape
=
x_shape
;
if
(
label_rank
>
2
)
{
label_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
label_shape
,
label_rank
-
1
);
label_2d
=
paddle
::
platform
::
NgReshaper
(
label
,
label_2d_shape
);
}
if
(
x_rank
>
2
)
{
x_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
x_shape
,
x_rank
-
1
);
x_2d
=
paddle
::
platform
::
NgReshaper
(
x
,
x_2d_shape
);
}
auto
batch_size
=
x_2d_shape
.
at
(
0
);
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
const
bool
is_soft_label
=
op_attrs
.
Get
<
bool
>
(
"soft_label"
);
std
::
shared_ptr
<
ngraph
::
Node
>
node_1_hot
=
label_2d
;
if
(
!
is_soft_label
)
{
auto
label_1d
=
paddle
::
platform
::
NgReshaper
(
label_2d
,
ngraph
::
Shape
{
label_2d_shape
.
at
(
0
)});
node_1_hot
=
std
::
make_shared
<
ngraph
::
op
::
OneHot
>
(
label_1d
,
x_2d_shape
,
1
);
}
if
(
x
->
get_element_type
()
!=
node_1_hot
->
get_element_type
())
{
node_1_hot
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
node_1_hot
,
x
->
get_element_type
());
}
auto
node_log
=
std
::
make_shared
<
ngraph
::
op
::
Log
>
(
x_2d
);
auto
high_clip
=
ngraph
::
op
::
Constant
::
create
(
node_log
->
get_element_type
(),
node_log
->
get_shape
(),
{
1e20
});
auto
low_clip
=
ngraph
::
op
::
Constant
::
create
(
node_log
->
get_element_type
(),
node_log
->
get_shape
(),
{
-
1e20
});
auto
node_min
=
std
::
make_shared
<
ngraph
::
op
::
Minimum
>
(
node_log
,
high_clip
);
auto
node_max
=
std
::
make_shared
<
ngraph
::
op
::
Maximum
>
(
node_min
,
low_clip
);
auto
node_mul
=
node_1_hot
*
node_log
;
auto
node_sum
=
std
::
make_shared
<
ngraph
::
op
::
Sum
>
(
node_mul
,
ngraph
::
AxisSet
{
1
});
auto
node_neg
=
std
::
make_shared
<
ngraph
::
op
::
Negative
>
(
node_sum
);
auto
xe
=
paddle
::
platform
::
NgReshaper
(
node_neg
,
ngraph
::
Shape
{
batch_size
,
1
});
if
(
!
is_soft_label
)
{
auto
ignore_index
=
op_attrs
.
Get
<
int
>
(
"ignore_index"
);
auto
ignore_node
=
ngraph
::
op
::
Constant
::
create
(
label
->
get_element_type
(),
label_2d_shape
,
{
ignore_index
});
auto
not_equal_node
=
std
::
make_shared
<
ngraph
::
op
::
NotEqual
>
(
label_2d
,
ignore_node
);
auto
mask
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
not_equal_node
,
xe
->
get_element_type
());
xe
=
xe
*
mask
;
}
paddle
::
platform
::
SetOutputNode
(
op
,
"Y"
,
xe
,
ngb_node_map
);
}
void
BuildCrossEntropyGradNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
const
bool
is_soft_label
=
op_attrs
.
Get
<
bool
>
(
"soft_label"
);
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
label
=
paddle
::
platform
::
GetInputNode
(
op
,
"Label"
,
ngb_node_map
);
auto
dy
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y@GRAD"
,
ngb_node_map
);
auto
x_shape
=
x
->
get_shape
();
auto
rank
=
x_shape
.
size
();
std
::
shared_ptr
<
ngraph
::
Node
>
mask
;
if
(
!
is_soft_label
)
{
auto
label_shape
=
label
->
get_shape
();
label_shape
.
pop_back
();
label
=
paddle
::
platform
::
NgReshaper
(
label
,
label_shape
);
auto
ignore_index
=
op_attrs
.
Get
<
int
>
(
"ignore_index"
);
auto
ignore_node
=
ngraph
::
op
::
Constant
::
create
(
label
->
get_element_type
(),
label_shape
,
{
ignore_index
});
auto
not_equal_node
=
std
::
make_shared
<
ngraph
::
op
::
NotEqual
>
(
label
,
ignore_node
);
mask
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
not_equal_node
,
x
->
get_element_type
());
mask
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
mask
,
x_shape
,
ngraph
::
AxisSet
{
rank
-
1
});
label
=
std
::
make_shared
<
ngraph
::
op
::
OneHot
>
(
label
,
x_shape
,
rank
-
1
);
}
auto
dy_shape
=
dy
->
get_shape
();
dy_shape
.
pop_back
();
auto
dy_reshape
=
paddle
::
platform
::
NgReshaper
(
dy
,
dy_shape
);
auto
dy_bcast
=
std
::
make_shared
<
ngraph
::
op
::
Broadcast
>
(
dy_reshape
,
x_shape
,
ngraph
::
AxisSet
{
rank
-
1
});
if
(
x
->
get_element_type
()
!=
label
->
get_element_type
())
{
label
=
std
::
make_shared
<
ngraph
::
op
::
Convert
>
(
label
,
x
->
get_element_type
());
}
auto
xe_grad
=
-
label
*
dy_bcast
/
x
;
if
(
!
is_soft_label
)
{
xe_grad
=
xe_grad
*
mask
;
}
paddle
::
platform
::
SetOutputNode
(
op
,
"X@GRAD"
,
xe_grad
,
ngb_node_map
);
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/ngraph/ops/fill_constant_op.h
浏览文件 @
d12252e6
...
...
@@ -46,8 +46,6 @@ void BuildFillConstantNode(
ng_dtype
=
ngraph
::
element
::
i64
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
INT32
)
{
ng_dtype
=
ngraph
::
element
::
i32
;
}
else
if
(
data_type
==
paddle
::
framework
::
proto
::
VarType
::
BOOL
)
{
ng_dtype
=
ngraph
::
element
::
boolean
;
}
else
{
PADDLE_THROW
(
"unsupported data type: %s"
,
data_type
);
}
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
d12252e6
...
...
@@ -373,7 +373,13 @@ PYBIND11_MODULE(core, m) {
PADDLE_ENFORCE
(
CheckLoD
(
new_lod
,
vectorize
(
self
.
dims
()).
front
()),
"the provided lod info is invalid"
);
self
.
set_lod
(
new_lod
);
})
},
py
::
arg
(
"lod"
),
R"DOC(
Set LoD of the LoDTensor.
Args:
lod (List[List[int]]): the lod to be set.
)DOC"
)
.
def
(
"set_recursive_sequence_lengths"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
recursive_sequence_lengths
)
{
...
...
@@ -389,7 +395,17 @@ PYBIND11_MODULE(core, m) {
CheckLoD
(
new_offset_lod
,
vectorize
(
self
.
dims
()).
front
()),
"the provided recursive_sequence_lengths info is invalid"
);
self
.
set_lod
(
new_offset_lod
);
})
},
py
::
arg
(
"recursive_sequence_lengths"
),
R"DOC(
Set LoD of the LoDTensor according to recursive sequence length.
For example, if recursive_sequence_lengths=[[2, 3]], meaning that
there are two sequences with length 2 and 3 respectively, the
corresponding lod would be [[0, 2, 2+3]], i.e, [[0, 2, 5]].
Args:
recursive_sequence_lengths (List[List[int]]): sequence lengths.
)DOC"
)
.
def
(
"lod"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
// output the offset-based lod info
...
...
@@ -398,7 +414,13 @@ PYBIND11_MODULE(core, m) {
new_lod
.
reserve
(
lod
.
size
());
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
return
new_lod
;
})
},
R"DOC(
Return the LoD of the LoDTensor.
Returns:
out (List[List[int]]): the lod of the LoDTensor.
)DOC"
)
// Set above comments of set_lod.
.
def
(
"recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
...
...
@@ -408,12 +430,25 @@ PYBIND11_MODULE(core, m) {
new_lod
.
reserve
(
lod
.
size
());
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
return
new_lod
;
})
.
def
(
"has_valid_recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
bool
{
// Check that the lod info is valid and match the outermost
// dimension of the LoDTensor data
return
CheckLoD
(
self
.
lod
(),
vectorize
(
self
.
dims
()).
front
());
});
},
R"DOC(
Return the sequence length of the LoDTensor corresponding to LoD.
Returns:
out (List[List[int]): the sequence lengths.
)DOC"
)
.
def
(
"has_valid_recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
bool
{
// Check that the lod info is valid and match the outermost
// dimension of the LoDTensor data
return
CheckLoD
(
self
.
lod
(),
vectorize
(
self
.
dims
()).
front
());
},
R"DOC(
Check whether the lod of the LoDTensor is valid.
Returns:
out (bool): whether the lod is valid.
)DOC"
);
py
::
class_
<
SelectedRows
>
(
m
,
"SelectedRows"
)
.
def
(
"__init__"
,
...
...
@@ -549,11 +584,45 @@ All parameter, weight, gradient are variables in Paddle.
[](
Scope
&
self
,
const
std
::
string
&
name
)
->
Variable
*
{
return
self
.
Var
(
name
);
},
py
::
arg
(
"name"
),
R"DOC(
Find or create variable named :code:`name` in the current scope.
If the variable named :code:`name` does not exist in the
current scope, the variable would be created. Otherwise,
return the existing variable.
Args:
name (str): the variable name.
Returns:
out (core.Variable): the found or created variable.
)DOC"
,
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
arg
(
"name"
),
R"DOC(
Find variable named :code:`name` in the current scope or
its parent scope. Return None if not found.
Args:
name (str): the variable name.
Returns:
out (core.Variable|None): the found variable or None.
)DOC"
,
py
::
return_value_policy
::
reference
)
.
def
(
"find_var"
,
&
Scope
::
FindVar
,
py
::
return_value_policy
::
reference
)
.
def
(
"new_scope"
,
[](
Scope
&
self
)
->
Scope
*
{
return
&
self
.
NewScope
();
},
R"DOC(
Create a new sub-scope of the current scope.
Returns:
out (core._Scope): the created sub-scope.
)DOC"
,
py
::
return_value_policy
::
reference
)
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
);
.
def
(
"drop_kids"
,
&
Scope
::
DropKids
,
R"DOC(
Delete all sub-scopes of the current scope.
)DOC"
);
m
.
def
(
"Scope"
,
[]()
->
Scope
*
{
...
...
@@ -561,6 +630,12 @@ All parameter, weight, gradient are variables in Paddle.
ScopePool
::
Instance
().
Insert
(
std
::
unique_ptr
<
Scope
>
(
s
));
return
s
;
},
R"DOC(
Create a new scope.
Returns:
out (core._Scope): the created scope.
)DOC"
,
py
::
return_value_policy
::
reference
);
//! @note: Be careful! PyBind will return std::string as an unicode, not
...
...
@@ -789,11 +864,13 @@ All parameter, weight, gradient are variables in Paddle.
self
[
i
].
ShareDataWith
(
t
);
self
[
i
].
set_lod
(
t
.
lod
());
})
.
def
(
"append"
,
[](
LoDTensorArray
&
self
,
const
LoDTensor
&
t
)
{
self
.
emplace_back
();
self
.
back
().
ShareDataWith
(
t
);
self
.
back
().
set_lod
(
t
.
lod
());
});
.
def
(
"append"
,
[](
LoDTensorArray
&
self
,
const
LoDTensor
&
t
)
{
self
.
emplace_back
();
self
.
back
().
ShareDataWith
(
t
);
self
.
back
().
set_lod
(
t
.
lod
());
},
py
::
arg
(
"tensor"
),
"Append a LoDensor to LoDTensorArray."
);
m
.
def
(
"IsInplace"
,
[](
std
::
string
op
)
->
bool
{
return
operators
::
IsInplace
(
op
);
});
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
d12252e6
...
...
@@ -88,6 +88,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.5 uninstall
-y
protobuf
pip3.5
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
exit
1
...
...
@@ -101,6 +102,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.6 uninstall
-y
protobuf
pip3.6
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
exit
1
...
...
@@ -114,6 +116,7 @@ function cmake_gen() {
-DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/
-DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib"
WITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
ON
}
pip3.7 uninstall
-y
protobuf
pip3.7
install
--user
-r
${
PADDLE_ROOT
}
/python/requirements.txt
else
exit
1
...
...
@@ -128,31 +131,44 @@ function cmake_gen() {
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
pip uninstall
-y
protobuf
pip
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp27-cp27mu"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-2.7.11-ucs4/lib:
${
LD_LIBRARY_PATH
#/opt/_internal/cpython-2.7.11-ucs2/lib
:
}
export
PATH
=
/opt/python/cp27-cp27mu/bin/:
${
PATH
}
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
pip uninstall
-y
protobuf
pip
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp35-cp35m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-3.5.1/lib/:
${
LD_LIBRARY_PATH
}
export
PATH
=
/opt/_internal/cpython-3.5.1/bin/:
${
PATH
}
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.5.1/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.5.1/include/python3.5m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.5.1/lib/libpython3.so"
pip3.5 uninstall
-y
protobuf
pip3.5
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp36-cp36m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-3.6.0/lib/:
${
LD_LIBRARY_PATH
}
export
PATH
=
/opt/_internal/cpython-3.6.0/bin/:
${
PATH
}
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.6.0/bin/python3
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.6.0/include/python3.6m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.6.0/lib/libpython3.so"
pip3.6 uninstall
-y
protobuf
pip3.6
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
elif
[
"
$1
"
==
"cp37-cp37m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-3.7.0/lib/:
${
LD_LIBRARY_PATH
}
export
PATH
=
/opt/_internal/cpython-3.7.0/bin/:
${
PATH
}
export
PYTHON_FLAGS
=
"-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.7.0/bin/python3.7
-DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so"
pip3.7 uninstall
-y
protobuf
pip3.7
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
fi
else
pip uninstall
-y
protobuf
pip
install
-r
${
PADDLE_ROOT
}
/python/requirements.txt
fi
fi
...
...
python/paddle/fluid/compiler.py
浏览文件 @
d12252e6
...
...
@@ -177,7 +177,10 @@ class CompiledProgram(object):
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
self
.
_build_strategy
.
enable_inplace
=
False
if
self
.
_program
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
memory_optimize
is
None
:
self
.
_build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
enable_inplace
is
None
:
self
.
_build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
if
self
.
_build_strategy
.
num_trainers
>
1
and
trainers_endpoints
:
assert
self
.
_build_strategy
.
num_trainers
==
len
(
...
...
python/paddle/fluid/framework.py
浏览文件 @
d12252e6
...
...
@@ -557,7 +557,8 @@ class OpProtoHolder(object):
return
{
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
()
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpCreationCallstackAttrName
()
}
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
d12252e6
...
...
@@ -506,9 +506,9 @@ class While(object):
while loop control flow.
Args:
cond
(Variable): condition used to compare.
cond(Variable): condition used to compare.
is_test(bool): A flag indicating whether execution is in test phase.
name
(str): The name of this layer.
name(str): The name of this layer.
Examples:
.. code-block:: python
...
...
@@ -589,7 +589,8 @@ class While(object):
def
lod_rank_table
(
x
,
level
=
0
):
"""LoD Rank Table Operator. Given an input variable **x** and a level number
"""
LoD Rank Table Operator. Given an input variable **x** and a level number
of LoD, this layer creates a LodRankTable object. A LoDRankTable object
contains a list of bi-element tuples. Each tuple consists of an index and
a length, both of which are int type. Refering to specified level of LoD,
...
...
@@ -883,10 +884,8 @@ def less_than(x, y, force_cpu=None, cond=None, **ignored):
return
cond
def
equal
(
x
,
y
,
cond
=
None
,
**
ignored
):
def
equal
(
x
,
y
,
cond
=
None
):
"""
**equal**
This layer returns the truth value of :math:`x == y` elementwise.
Args:
...
...
@@ -1458,7 +1457,6 @@ class DynamicRNN(object):
Returns:
The current timestep in the input sequence.
"""
self
.
_assert_in_rnn_block_
(
"step_input"
)
if
not
isinstance
(
x
,
Variable
):
...
...
@@ -1535,8 +1533,7 @@ class DynamicRNN(object):
@
signature_safe_contextmanager
def
block
(
self
):
"""
The block for user to define operators in RNN. See the class docstring
for more details.
The block for user to define operators in RNN.
"""
if
self
.
status
!=
DynamicRNN
.
BEFORE_RNN
:
raise
ValueError
(
"rnn.block() can only be invoke once"
)
...
...
@@ -1640,8 +1637,7 @@ class DynamicRNN(object):
dtype(str|numpy.dtype): The data type of the initialized memory.
Returns:
the memory variable.
The memory variable.
"""
self
.
_assert_in_rnn_block_
(
'memory'
)
self
.
_init_zero_idx_
()
...
...
@@ -1740,7 +1736,7 @@ class DynamicRNN(object):
def
output
(
self
,
*
outputs
):
"""
m
ark the RNN output variables.
M
ark the RNN output variables.
Args:
outputs: The output variables.
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
d12252e6
...
...
@@ -56,7 +56,10 @@ def data(name,
Args:
name(str): The name/alias of the function
shape(list): Tuple declaring the shape.
shape(list): Tuple declaring the shape. If :code:`append_batch_size` is
True and there is no -1 inside :code:`shape`, it should be
considered as the shape of the each sample. Otherwise, it
should be considered as the shape of the batched data.
append_batch_size(bool):
1. If true, it prepends -1 to the shape.
For example if shape=[1], the resulting shape is [-1, 1].
...
...
python/paddle/fluid/layers/layer_function_generator.py
浏览文件 @
d12252e6
...
...
@@ -24,7 +24,7 @@ from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype
from
..layer_helper
import
LayerHelper
__all__
=
[
'deprecated'
,
'generate_layer_fn'
,
'generate_
layer_fn_noattr
'
,
'autodoc'
,
'deprecated'
,
'generate_layer_fn'
,
'generate_
activation_fn
'
,
'autodoc'
,
'templatedoc'
]
...
...
@@ -89,6 +89,9 @@ def _generate_doc_string_(op_proto, additional_args_lines=None):
buf
.
write
(
'
\n
'
)
skip_attrs
=
OpProtoHolder
.
generated_op_attr_names
()
# attr use_mkldnn and is_test also should not be visible to users.
skip_attrs
.
add
(
"use_mkldnn"
)
skip_attrs
.
add
(
"is_test"
)
for
each_attr
in
op_proto
.
attrs
:
if
each_attr
.
name
in
skip_attrs
:
...
...
@@ -226,7 +229,7 @@ def generate_layer_fn(op_type):
return
func
def
generate_
layer_fn_noattr
(
op_type
):
def
generate_
activation_fn
(
op_type
):
"""Register the Python layer for an Operator without Attribute.
Args:
...
...
@@ -246,6 +249,7 @@ def generate_layer_fn_noattr(op_type):
func
.
__name__
=
op_type
func
.
__doc__
=
_generate_doc_string_
(
op_proto
)
return
func
...
...
python/paddle/fluid/layers/ops.py
浏览文件 @
d12252e6
...
...
@@ -14,7 +14,7 @@
from
__future__
import
print_function
import
os
from
.layer_function_generator
import
generate_layer_fn
,
generate_
layer_fn_noattr
from
.layer_function_generator
import
generate_layer_fn
,
generate_
activation_fn
from
..
import
core
from
..framework
import
convert_np_dtype_to_dtype_
...
...
@@ -53,7 +53,7 @@ globals()['_elementwise_div'] = generate_layer_fn('elementwise_div')
__all__
+=
__activations_noattr__
for
_OP
in
set
(
__activations_noattr__
):
globals
()[
_OP
]
=
generate_
layer_fn_noattr
(
_OP
)
globals
()[
_OP
]
=
generate_
activation_fn
(
_OP
)
__all__
+=
[
"uniform_random"
]
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
d12252e6
...
...
@@ -1368,9 +1368,9 @@ class FtrlOptimizer(Optimizer):
Args:
learning_rate (float|Variable): global learning rate.
l1 (float):
l2 (float):
lr_power (float):
l1 (float):
L1 regularization strength.
l2 (float):
L2 regularization strength.
lr_power (float):
Learning Rate Power.
regularization: A Regularizer, such as
fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix.
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
d12252e6
...
...
@@ -148,6 +148,8 @@ class ParallelExecutor(object):
else
framework
.
default_main_program
()
# FIXME(dzhwinter): enable_inplace should be after memory_optimize
# if turn on python memory optimize, turn off the inplace_pass.
if
build_strategy
.
memory_optimize
is
None
:
build_strategy
.
memory_optimize
=
False
if
main
.
_is_mem_optimized
else
True
if
build_strategy
.
enable_inplace
is
None
:
build_strategy
.
enable_inplace
=
False
if
main
.
_is_mem_optimized
else
True
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
d12252e6
...
...
@@ -77,6 +77,7 @@ list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list
(
REMOVE_ITEM TEST_OPS test_nearest_interp_op
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_resnet
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_optimizer
)
list
(
REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
...
...
@@ -107,6 +108,9 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
if
(
NOT WIN32
)
py_test_modules
(
test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL
)
endif
()
if
(
NOT APPLE
)
py_test_modules
(
test_image_classification_resnet MODULES test_image_classification_resnet SERIAL
)
if
(
CMAKE_BUILD_TYPE STREQUAL
"Debug"
)
...
...
python/paddle/fluid/tests/unittests/ngraph/test_accuracy_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -15,39 +15,7 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
class
TestNGRAPHAccuracyOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"accuracy"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
n
=
128
infer
=
np
.
random
.
random
((
n
,
1
)).
astype
(
self
.
dtype
)
indices
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
label
=
np
.
random
.
randint
(
0
,
2
,
(
n
,
1
))
self
.
inputs
=
{
'Out'
:
infer
,
'Indices'
:
indices
,
"Label"
:
label
}
num_correct
=
0
for
rowid
in
range
(
n
):
for
ele
in
indices
[
rowid
]:
if
ele
==
label
[
rowid
]:
num_correct
+=
1
break
self
.
outputs
=
{
'Accuracy'
:
np
.
array
([
num_correct
/
float
(
n
)]).
astype
(
self
.
dtype
),
'Correct'
:
np
.
array
([
num_correct
]).
astype
(
"int64"
),
'Total'
:
np
.
array
([
n
]).
astype
(
"int64"
)
}
self
.
_cpu_only
=
True
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
from
paddle.fluid.tests.unittests.test_accuracy_op
import
TestAccuracyOp
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_batch_norm_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -17,21 +17,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_batch_norm_op
import
TestBatchNormOpTraining
,
TestBatchNormOpInference
class
TestNGRAPHBatchNormOpTraining
(
TestBatchNormOpTraining
):
def
init_kernel_type
(
self
):
super
(
TestNGRAPHBatchNormOpTraining
,
self
).
init_kernel_type
()
class
TestNGRAPHBatchNormOpInference
(
TestBatchNormOpInference
):
def
init_kernel_type
(
self
):
super
(
TestNGRAPHBatchNormOpInference
,
self
).
init_kernel_type
()
class
TestNGRAPHBatchNormOpWithReluInference
(
TestBatchNormOpInference
):
def
init_kernel_type
(
self
):
super
(
TestNGRAPHBatchNormOpWithReluInference
,
self
).
init_kernel_type
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_conv2d_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -17,60 +17,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
,
TestWithGroup
,
TestWith1x1
,
TestWithInput1x1Filter1x1
class
TestNGRAPH
(
TestConv2dOp
):
def
setUp
(
self
):
super
(
TestNGRAPH
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPH
,
self
).
init_kernel_type
()
class
TestNGRAPHWithPad
(
TestWithPad
):
def
setUp
(
self
):
super
(
TestNGRAPHWithPad
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithPad
,
self
).
init_kernel_type
()
class
TestNGRAPHWithStride
(
TestWithStride
):
def
setUp
(
self
):
super
(
TestNGRAPHWithStride
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithStride
,
self
).
init_kernel_type
()
class
TestNGRAPHWithGroup
(
TestWithGroup
):
def
setUp
(
self
):
super
(
TestNGRAPHWithGroup
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithGroup
,
self
).
init_kernel_type
()
class
TestNGRAPHWith1x1
(
TestWith1x1
):
def
setUp
(
self
):
super
(
TestNGRAPHWith1x1
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWith1x1
,
self
).
init_kernel_type
()
class
TestNGRAPHWithInput1x1Filter1x1
(
TestWithInput1x1Filter1x1
):
def
setUp
(
self
):
super
(
TestNGRAPHWithInput1x1Filter1x1
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_kernel_type
(
self
):
super
(
TestNGRAPHWithInput1x1Filter1x1
,
self
).
init_kernel_type
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_cross_entropy_ngraph_op.py
0 → 100644
浏览文件 @
d12252e6
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.tests.unittests.op_test
import
OpTest
,
randomize_probability
class
TestCrossEntropyOp
(
OpTest
):
"""Test cross-entropy with discrete one-hot labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
self
.
soft_label
=
False
self
.
ignore_index
=
-
100
self
.
dtype
=
np
.
float64
self
.
batch_size
=
30
self
.
class_num
=
10
self
.
_cpu_only
=
True
self
.
init_dtype_type
()
self
.
init_attr_type
()
self
.
init_bs_class_num
()
self
.
init_x
()
self
.
init_label
()
self
.
get_cross_entropy
()
self
.
inputs
=
{
"X"
:
self
.
x
,
"Label"
:
self
.
label
}
self
.
outputs
=
{
"Y"
:
self
.
cross_entropy
}
self
.
attrs
=
{
"soft_label"
:
self
.
soft_label
,
"ignore_index"
:
self
.
ignore_index
}
def
init_x
(
self
):
self
.
x
=
randomize_probability
(
self
.
batch_size
,
self
.
class_num
,
dtype
=
self
.
dtype
)
def
init_label
(
self
):
self
.
label
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
batch_size
,
1
),
dtype
=
"int64"
)
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
x
[
i
][
self
.
label
[
i
][
0
]])]
for
i
in
range
(
self
.
x
.
shape
[
0
])],
dtype
=
"float64"
)
def
init_attr_type
(
self
):
pass
def
init_dtype_type
(
self
):
pass
def
init_bs_class_num
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Y"
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp2
(
TestCrossEntropyOp
):
"""Test cross-entropy with vectorized soft labels.
"""
def
init_label
(
self
):
self
.
label
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
self
.
batch_size
,
self
.
class_num
]).
astype
(
self
.
dtype
)
self
.
label
/=
self
.
label
.
sum
(
axis
=
1
,
keepdims
=
True
)
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
(
-
self
.
label
*
np
.
log
(
self
.
x
)).
sum
(
axis
=
1
,
keepdims
=
True
).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
batch_size
=
5
self
.
class_num
=
37
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp3
(
TestCrossEntropyOp
):
"""Test cross-entropy with vectorized one-hot representation of labels.
"""
def
init_label
(
self
):
self
.
label_index
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
batch_size
))
self
.
label
=
np
.
zeros
(
self
.
x
.
shape
).
astype
(
self
.
dtype
)
self
.
label
[
np
.
arange
(
self
.
batch_size
),
self
.
label_index
]
=
1
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
x
[
i
][
self
.
label_index
[
i
]])]
for
i
in
range
(
self
.
x
.
shape
[
0
])]).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
batch_size
=
5
self
.
class_num
=
17
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp4
(
TestCrossEntropyOp
):
"""Test high rank tensor cross-entropy with discrete one-hot labels.
"""
def
init_x
(
self
):
self
.
shape
=
[
10
,
2
,
4
]
self
.
ins_num
=
np
.
prod
(
np
.
array
(
self
.
shape
))
self
.
X_2d
=
randomize_probability
(
self
.
ins_num
,
self
.
class_num
).
astype
(
self
.
dtype
)
self
.
x
=
self
.
X_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
init_label
(
self
):
self
.
label_2d
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
ins_num
,
1
),
dtype
=
"int64"
)
self
.
label
=
self
.
label_2d
.
reshape
(
self
.
shape
+
[
1
])
def
get_cross_entropy
(
self
):
cross_entropy_2d
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
X_2d
[
i
][
self
.
label_2d
[
i
][
0
]])]
for
i
in
range
(
self
.
X_2d
.
shape
[
0
])]).
astype
(
self
.
dtype
)
self
.
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
self
.
shape
+
[
1
])
def
init_attr_type
(
self
):
self
.
soft_label
=
False
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float64
def
init_bs_class_num
(
self
):
self
.
class_num
=
10
class
TestCrossEntropyOp5
(
TestCrossEntropyOp
):
"""Test high rank tensor cross-entropy with vectorized soft labels.
"""
def
init_x
(
self
):
self
.
shape
=
[
4
,
3
]
self
.
ins_num
=
np
.
prod
(
np
.
array
(
self
.
shape
))
self
.
X_2d
=
randomize_probability
(
self
.
ins_num
,
self
.
class_num
).
astype
(
self
.
dtype
)
self
.
x
=
self
.
X_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
init_label
(
self
):
self
.
label_2d
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
self
.
ins_num
,
self
.
class_num
]).
astype
(
self
.
dtype
)
self
.
label_2d
/=
self
.
label_2d
.
sum
(
axis
=
1
,
keepdims
=
True
)
self
.
label
=
self
.
label_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
get_cross_entropy
(
self
):
cross_entropy_2d
=
(
-
self
.
label_2d
*
np
.
log
(
self
.
X_2d
)).
sum
(
axis
=
1
,
keepdims
=
True
).
astype
(
self
.
dtype
)
self
.
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
self
.
shape
+
[
1
])
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
class_num
=
37
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp6
(
TestCrossEntropyOp
):
"""Test high rank tensor cross-entropy with vectorized one-hot representation of labels.
"""
def
init_x
(
self
):
self
.
shape
=
[
4
,
3
,
2
]
self
.
ins_num
=
np
.
prod
(
np
.
array
(
self
.
shape
))
self
.
X_2d
=
randomize_probability
(
self
.
ins_num
,
self
.
class_num
).
astype
(
self
.
dtype
)
self
.
x
=
self
.
X_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
])
def
init_label
(
self
):
self
.
label_index_2d
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
ins_num
),
dtype
=
"int64"
)
label_2d
=
np
.
zeros
(
self
.
X_2d
.
shape
)
label_2d
[
np
.
arange
(
self
.
ins_num
),
self
.
label_index_2d
]
=
1
self
.
label
=
label_2d
.
reshape
(
self
.
shape
+
[
self
.
class_num
]).
astype
(
self
.
dtype
)
def
get_cross_entropy
(
self
):
cross_entropy_2d
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
X_2d
[
i
][
self
.
label_index_2d
[
i
]])]
for
i
in
range
(
self
.
X_2d
.
shape
[
0
])])
self
.
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
self
.
shape
+
[
1
]).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
True
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
init_bs_class_num
(
self
):
self
.
class_num
=
17
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp7
(
TestCrossEntropyOp
):
"""Test cross-entropy with ignore index.
"""
def
init_label
(
self
):
self
.
label
=
np
.
random
.
randint
(
0
,
self
.
class_num
,
(
self
.
batch_size
,
1
),
dtype
=
"int64"
)
def
get_cross_entropy
(
self
):
self
.
cross_entropy
=
np
.
asmatrix
(
[[
-
np
.
log
(
self
.
x
[
i
][
self
.
label
[
i
][
0
]])]
if
self
.
label
[
i
][
0
]
!=
self
.
ignore_index
else
[
0
]
for
i
in
range
(
self
.
x
.
shape
[
0
])]).
astype
(
self
.
dtype
)
def
init_attr_type
(
self
):
self
.
soft_label
=
False
self
.
ignore_index
=
3
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float64
def
init_bs_class_num
(
self
):
self
.
batch_size
=
30
self
.
class_num
=
10
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_elementwise_add_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -13,18 +13,9 @@
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
paddle.fluid.tests.unittests.test_elementwise_add_op
import
TestElementwiseAddOp
class
TestNGRAPHElementwiseAddOp
(
TestElementwiseAddOp
):
def
setUp
(
self
):
super
(
TestNGRAPHElementwiseAddOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_input_output
(
self
):
super
(
TestNGRAPHElementwiseAddOp
,
self
).
init_input_output
()
import
unittest
from
paddle.fluid.tests.unittests.test_elementwise_add_op
import
TestElementwiseAddOp
,
TestElementwiseAddOp_broadcast_0
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -13,24 +13,34 @@
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.test_fill_constant_op
import
TestFillConstantOp1
,
TestFillConstantOp2
,
TestFillConstantOpWithSelectedRows
class
TestNGRAPHFillConstant
Op1
(
TestFillConstantOp1
):
class
TestNGRAPHFillConstant
FP64
(
TestFillConstantOp1
):
def
setUp
(
self
):
super
(
TestNGRAPHFillConstantOp1
,
self
).
setUp
()
super
(
TestNGRAPHFillConstantFP64
,
self
).
setUp
()
self
.
attrs
=
{
'shape'
:
[
123
,
92
],
'value'
:
3.8
,
'dtype'
:
6
}
self
.
outputs
=
{
'Out'
:
np
.
full
((
123
,
92
),
3.8
)}
class
TestNGRAPHFillConstant
Op
2
(
TestFillConstantOp2
):
class
TestNGRAPHFillConstant
INT3
2
(
TestFillConstantOp2
):
def
setUp
(
self
):
super
(
TestNGRAPHFillConstant
Op
2
,
self
).
setUp
()
super
(
TestNGRAPHFillConstant
INT3
2
,
self
).
setUp
()
self
.
attrs
=
{
'shape'
:
[
123
,
92
],
'dtype'
:
2
}
self
.
outputs
=
{
'Out'
:
np
.
full
((
123
,
92
),
0
)}
class
TestNGRAPHFillConstantOpWithSelectedRows
(
TestFillConstantOpWithSelectedRows
):
class
TestNGRAPHFillConstantINT64
(
TestFillConstantOp2
):
def
setUp
(
self
):
super
(
TestFillConstantOpWithSelectedRows
,
self
).
setUp
()
super
(
TestNGRAPHFillConstantINT64
,
self
).
setUp
()
self
.
attrs
=
{
'shape'
:
[
123
,
92
],
'dtype'
:
3
}
self
.
outputs
=
{
'Out'
:
np
.
full
((
123
,
92
),
0
)}
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/ngraph/test_mean_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -16,12 +16,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_mean_op
import
TestMeanOp
class
TestNGRAPHMeanOp
(
TestMeanOp
):
def
setUp
(
self
):
super
(
TestNGRAPHMeanOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -15,39 +15,7 @@
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.tests.unittests.op_test
import
OpTest
class
TestNGRAPHMulOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
self
.
dtype
=
np
.
float32
self
.
init_dtype_type
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
2
,
4
)).
astype
(
self
.
dtype
),
'Y'
:
np
.
random
.
random
((
4
,
4
)).
astype
(
self
.
dtype
)
}
self
.
outputs
=
{
'Out'
:
np
.
dot
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
self
.
_cpu_only
=
True
def
init_dtype_type
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
0.5
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
from
paddle.fluid.tests.unittests.test_mul_op
import
TestMulOp
,
TestMulOp2
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_pool2d_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -14,61 +14,25 @@
from
__future__
import
print_function
from
paddle.fluid.tests.unittests.test_pool2d_op
import
TestPool2D_Op
,
TestCase1
,
TestCase2
,
TestCase3
,
TestCase4
,
TestCase5
class
TestNGRAPHPool2D_Op
(
TestPool2D_Op
):
def
setUp
(
self
):
super
(
TestNGRAPHPool2D_Op
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_test_case
(
self
):
super
(
TestNGRAPHPool2D_Op
,
self
).
init_test_case
()
class
TestNGRAPHCase1
(
TestCase1
):
def
setUp
(
self
):
super
(
TestNGRAPHCase1
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_test_case
(
self
):
super
(
TestNGRAPHCase1
,
self
).
init_test_case
()
import
unittest
class
TestNGRAPHCase2
(
TestCase2
):
def
setUp
(
self
):
super
(
TestNGRAPHCase2
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_test_case
(
self
):
super
(
TestNGRAPHCase2
,
self
).
init_test_case
()
class
TestNGRAPHCase3
(
TestCase3
):
def
setUp
(
self
):
super
(
TestNGRAPHCase3
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_pool_type
(
self
):
super
(
TestNGRAPHCase3
,
self
).
init_pool_type
()
from
paddle.fluid.tests.unittests.test_pool2d_op
import
TestPool2D_Op
,
TestCase1
,
TestCase2
,
TestCase3
,
TestCase4
,
TestCase5
class
TestNGRAPHC
ase4
(
TestCase4
):
class
TestNGRAPHC
eilMode
(
TestCase1
):
def
setUp
(
self
):
super
(
TestNGRAPHCase4
,
self
).
setUp
()
self
.
_cpu_only
=
True
super
(
TestNGRAPHCeilMode
,
self
).
setUp
()
def
init_
pool_typ
e
(
self
):
s
uper
(
TestNGRAPHCase4
,
self
).
init_pool_type
()
def
init_
ceil_mod
e
(
self
):
s
elf
.
ceil_mode
=
True
class
TestNGRAPH
Case5
(
TestCase5
):
class
TestNGRAPH
Adaptive
(
TestCase1
):
def
setUp
(
self
):
super
(
TestNGRAPHCase5
,
self
).
setUp
()
self
.
_cpu_only
=
True
super
(
TestNGRAPHAdaptive
,
self
).
setUp
()
def
init_
pool_typ
e
(
self
):
s
uper
(
TestNGRAPHCase5
,
self
).
init_pool_type
()
def
init_
adaptiv
e
(
self
):
s
elf
.
adaptive
=
True
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/ngraph/test_scale_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -15,24 +15,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_scale_op
import
TestScaleOp
,
TestScaleOpSelectedRows
class
TestNGRAPHScaleOp
(
TestScaleOp
):
def
setUp
(
self
):
super
(
TestNGRAPHScaleOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHScaleOpSelectedRows
(
TestScaleOpSelectedRows
):
def
setUp
(
self
):
super
(
TestNGRAPHScaleOpSelectedRows
,
self
).
setUp
()
self
.
_cpu_only
=
True
def
init_dtype_type
(
self
):
pass
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_softmax_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -16,11 +16,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_softmax_op
import
TestSoftmaxOp
class
TestSoftmaxNGRAPHOp
(
TestSoftmaxOp
):
def
setUp
(
self
):
super
(
TestSoftmaxNGRAPHOp
,
self
).
setUp
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py
浏览文件 @
d12252e6
...
...
@@ -16,30 +16,5 @@ from __future__ import print_function
import
unittest
from
paddle.fluid.tests.unittests.test_top_k_op
import
TestTopkOp
,
TestTopkOp3d
,
TestTopkOp2
,
TestTopkOp3
,
TestTopkOp4
class
TestNGRAPHTopkOp
(
TestTopkOp
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp
,
self
).
setUp
()
self
.
_cpu_only
=
True
class
TestNGRAPHTopkOp2
(
TestTopkOp2
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp2
,
self
).
setUp
()
self
.
_cpu_only
=
True
class
TestNGRAPHTopkOp3
(
TestTopkOp3
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp3
,
self
).
setUp
()
self
.
_cpu_only
=
True
class
TestNGRAPHTopkOp4
(
TestTopkOp4
):
def
setUp
(
self
):
super
(
TestNGRAPHTopkOp4
,
self
).
setUp
()
self
.
_cpu_only
=
True
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
d12252e6
...
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
import
os
import
unittest
import
numpy
as
np
import
random
...
...
@@ -374,6 +375,9 @@ class OpTest(unittest.TestCase):
return
[]
places
=
[
fluid
.
CPUPlace
()]
cpu_only
=
self
.
_cpu_only
if
hasattr
(
self
,
'_cpu_only'
)
else
False
use_ngraph
=
bool
(
os
.
getenv
(
"FLAGS_use_ngraph"
,
False
))
if
use_ngraph
:
cpu_only
=
True
if
core
.
is_compiled_with_cuda
()
and
core
.
op_support_gpu
(
self
.
op_type
)
\
and
not
cpu_only
:
places
.
append
(
core
.
CUDAPlace
(
0
))
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
d12252e6
...
...
@@ -79,7 +79,7 @@ class TestParallelExecutorBase(unittest.TestCase):
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
build_strategy
.
memory_optimize
=
False
if
memory_opt
else
use_ir_memory_optimize
# python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way.
build_strategy
.
enable_inplace
=
False
if
memory_opt
else
enable_inplace
...
...
python/paddle/fluid/tests/unittests/test_fuse_elewise_add_act_pass.py
浏览文件 @
d12252e6
...
...
@@ -121,6 +121,8 @@ class TestMNIST(TestParallelExecutorBase):
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-6
))
return
optimizer
# NOTE(dzh):
# need to make it compatible with elewise fuse act
not_fuse_op_first_loss
,
not_fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
feed_dict
=
{
"image"
:
img
,
...
...
@@ -128,6 +130,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
False
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
fuse_op_first_loss
,
fuse_op_last_loss
=
self
.
check_network_convergence
(
model
,
...
...
@@ -136,6 +139,7 @@ class TestMNIST(TestParallelExecutorBase):
use_cuda
=
use_cuda
,
fuse_elewise_add_act_ops
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
False
,
optimizer
=
_optimizer
)
for
loss
in
zip
(
not_fuse_op_first_loss
,
fuse_op_first_loss
):
...
...
python/paddle/fluid/tests/unittests/test_ir_memory_optimize_transformer.py
0 → 100644
浏览文件 @
d12252e6
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
os
.
environ
[
'FLAGS_eager_delete_tensor_gb'
]
=
"0.0"
os
.
environ
[
'RECORDIO_FILENAME'
]
=
'/tmp/ir_memory_optimize_transformer.wmt16.recordio'
from
test_parallel_executor_transformer
import
TestTransformer
from
test_parallel_executor_transformer
import
transformer
# NOTE(dzhwinter): test diferent strategy colisions.
# open the eager delete tensor strategy by default.
class
TestTransformerWithIR
(
TestTransformer
):
def
test_main
(
self
):
if
core
.
is_compiled_with_cuda
():
# check python transpiler
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
True
,
use_ir_memory_optimize
=
False
)
# check IR memory optimize
self
.
check_network_convergence
(
transformer
,
use_cuda
=
True
,
memory_opt
=
False
,
use_ir_memory_optimize
=
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/requirements.txt
浏览文件 @
d12252e6
requests==2.9.2
numpy>=1.12
protobuf
==3.1
protobuf
>=3.6
recordio>=0.1.0
matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib
rarfile
...
...
tools/manylinux1/Dockerfile.x64
浏览文件 @
d12252e6
...
...
@@ -31,10 +31,10 @@ RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH}
# protobuf 3.
1.0
RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.
1.0/protobuf-cpp-3.1.0
.tar.gz && \
tar xzf protobuf-cpp-3.
1.0
.tar.gz && \
cd protobuf-3.
1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0
.tar.gz
# protobuf 3.
6.1
RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.
6.1/protobuf-cpp-3.6.1
.tar.gz && \
tar xzf protobuf-cpp-3.
6.1
.tar.gz && \
cd protobuf-3.
6.1 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.6.1
.tar.gz
RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt
...
...
tools/manylinux1/build_scripts/build.sh
浏览文件 @
d12252e6
...
...
@@ -17,7 +17,7 @@ OPENSSL_ROOT=openssl-1.1.0i
OPENSSL_HASH
=
ebbfc844a8c8cc0ea5dc10b86c9ce97f401837f3fa08c17b2cdadc118253cf99
EPEL_RPM_HASH
=
e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d
DEVTOOLS_HASH
=
a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
PATCHELF_HASH
=
d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
PATCHELF_HASH
=
f2aa40a6148cb3b0ca807a1bf836b081793e55ec9e5540a5356d800132be7e0a
CURL_ROOT
=
curl-7.49.1
CURL_HASH
=
eb63cec4bef692eab9db459033f409533e6d10e20942f4b060b32819e81885f1
AUTOCONF_ROOT
=
autoconf-2.69
...
...
@@ -107,11 +107,11 @@ curl-config --features
rm
-rf
/usr/local/ssl
# Install patchelf (latest with unreleased bug fixes)
curl
-sLO
http
://nipy.bic.berkeley.edu/manylinux/patchelf-0.9njs2
.tar.gz
check_sha256sum patchelf-0.9
njs2
.tar.gz
$PATCHELF_HASH
tar
-xzf
patchelf-0.9
njs2
.tar.gz
(
cd
patchelf-0.9
njs2
&&
./configure
&&
make
&&
make
install
)
rm
-rf
patchelf-0.9
njs2.tar.gz patchelf-0.9njs2
curl
-sLO
http
s://nixos.org/releases/patchelf/patchelf-0.9/patchelf-0.9
.tar.gz
check_sha256sum patchelf-0.9.tar.gz
$PATCHELF_HASH
tar
-xzf
patchelf-0.9.tar.gz
(
cd
patchelf-0.9
&&
./configure
&&
make
&&
make
install
)
rm
-rf
patchelf-0.9
.tar.gz patchelf-0.9
# Install latest pypi release of auditwheel
LD_LIBRARY_PATH
=
"
${
ORIGINAL_LD_LIBRARY_PATH
}
:
$(
dirname
${
PY35_BIN
}
)
/lib"
$PY35_BIN
/pip
install
auditwheel
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录