Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
13509da6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
13509da6
编写于
8月 28, 2018
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
差异文件
Merge upstream to branch wrap_squeezes
上级
03f6292b
9be39bb4
变更
79
展开全部
隐藏空白更改
内联
并排
Showing
79 changed file
with
3144 addition
and
737 deletion
+3144
-737
paddle/fluid/API.spec
paddle/fluid/API.spec
+4
-1
paddle/fluid/framework/data_type.cc
paddle/fluid/framework/data_type.cc
+1
-0
paddle/fluid/framework/data_type.h
paddle/fluid/framework/data_type.h
+3
-0
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+41
-24
paddle/fluid/framework/framework.proto
paddle/fluid/framework/framework.proto
+1
-0
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+0
-57
paddle/fluid/framework/tensor.cc
paddle/fluid/framework/tensor.cc
+5
-1
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+1
-1
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+2
-0
paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
...luid/inference/api/api_tensorrt_subgraph_engine_tester.cc
+1
-0
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+2
-0
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+4
-4
paddle/fluid/inference/tensorrt/convert/concat_op.cc
paddle/fluid/inference/tensorrt/convert/concat_op.cc
+57
-0
paddle/fluid/inference/tensorrt/convert/op_converter.h
paddle/fluid/inference/tensorrt/convert/op_converter.h
+8
-0
paddle/fluid/inference/tensorrt/convert/test_concat_op.cc
paddle/fluid/inference/tensorrt/convert/test_concat_op.cc
+49
-0
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+35
-14
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-0
paddle/fluid/operators/auc_op.h
paddle/fluid/operators/auc_op.h
+14
-0
paddle/fluid/operators/batch_norm_mkldnn_op.cc
paddle/fluid/operators/batch_norm_mkldnn_op.cc
+277
-119
paddle/fluid/operators/fake_dequantize_op.cc
paddle/fluid/operators/fake_dequantize_op.cc
+25
-12
paddle/fluid/operators/fake_dequantize_op.cu
paddle/fluid/operators/fake_dequantize_op.cu
+36
-0
paddle/fluid/operators/fake_dequantize_op.h
paddle/fluid/operators/fake_dequantize_op.h
+15
-8
paddle/fluid/operators/fetch_barrier_op.cc
paddle/fluid/operators/fetch_barrier_op.cc
+2
-0
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+149
-7
paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc
paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc
+201
-0
paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h
paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h
+42
-0
paddle/fluid/operators/math/concat.cu
paddle/fluid/operators/math/concat.cu
+6
-0
paddle/fluid/operators/math/cpu_vec_test.cc
paddle/fluid/operators/math/cpu_vec_test.cc
+1
-0
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+2
-1
paddle/fluid/operators/math/math_function.cu
paddle/fluid/operators/math/math_function.cu
+5
-4
paddle/fluid/operators/math/padding.h
paddle/fluid/operators/math/padding.h
+124
-0
paddle/fluid/operators/math/sequence_padding.cc
paddle/fluid/operators/math/sequence_padding.cc
+97
-103
paddle/fluid/operators/math/sequence_padding.cu
paddle/fluid/operators/math/sequence_padding.cu
+103
-144
paddle/fluid/operators/math/sequence_padding.h
paddle/fluid/operators/math/sequence_padding.h
+38
-14
paddle/fluid/operators/math/sequence_padding_test.cc
paddle/fluid/operators/math/sequence_padding_test.cc
+19
-4
paddle/fluid/operators/pad_constant_like_op.cc
paddle/fluid/operators/pad_constant_like_op.cc
+212
-0
paddle/fluid/operators/pad_constant_like_op.cu
paddle/fluid/operators/pad_constant_like_op.cu
+27
-0
paddle/fluid/operators/pad_constant_like_op.h
paddle/fluid/operators/pad_constant_like_op.h
+93
-0
paddle/fluid/operators/pad_op.h
paddle/fluid/operators/pad_op.h
+20
-93
paddle/fluid/operators/scale_op.cc
paddle/fluid/operators/scale_op.cc
+20
-1
paddle/fluid/operators/scale_op.h
paddle/fluid/operators/scale_op.h
+20
-8
paddle/fluid/operators/send_barrier_op.cc
paddle/fluid/operators/send_barrier_op.cc
+4
-0
paddle/fluid/operators/sequence_pad_op.cc
paddle/fluid/operators/sequence_pad_op.cc
+194
-0
paddle/fluid/operators/sequence_pad_op.cu
paddle/fluid/operators/sequence_pad_op.cu
+29
-0
paddle/fluid/operators/sequence_pad_op.h
paddle/fluid/operators/sequence_pad_op.h
+66
-0
paddle/fluid/operators/unstack_op.cc
paddle/fluid/operators/unstack_op.cc
+26
-0
paddle/fluid/operators/unstack_op.h
paddle/fluid/operators/unstack_op.h
+135
-0
paddle/fluid/operators/warpctc_op.h
paddle/fluid/operators/warpctc_op.h
+18
-6
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+5
-0
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+17
-4
paddle/fluid/platform/device_tracer.h
paddle/fluid/platform/device_tracer.h
+9
-1
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+2
-0
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+1
-2
paddle/fluid/platform/enforce.h
paddle/fluid/platform/enforce.h
+27
-3
paddle/fluid/platform/profiler.h
paddle/fluid/platform/profiler.h
+10
-0
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+1
-0
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+3
-0
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+1
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+11
-4
python/paddle/dataset/movielens.py
python/paddle/dataset/movielens.py
+3
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+2
-0
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+9
-2
python/paddle/fluid/layers/metric_op.py
python/paddle/fluid/layers/metric_op.py
+8
-4
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+191
-22
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+7
-1
python/paddle/fluid/tests/unittests/dist_se_resnext.py
python/paddle/fluid/tests/unittests/dist_se_resnext.py
+14
-8
python/paddle/fluid/tests/unittests/dist_word2vec.py
python/paddle/fluid/tests/unittests/dist_word2vec.py
+10
-6
python/paddle/fluid/tests/unittests/test_dist_train.py
python/paddle/fluid/tests/unittests/test_dist_train.py
+1
-1
python/paddle/fluid/tests/unittests/test_dist_word2vec.py
python/paddle/fluid/tests/unittests/test_dist_word2vec.py
+1
-1
python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
...n/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
+21
-12
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+36
-26
python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py
...uid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py
+139
-0
python/paddle/fluid/tests/unittests/test_pad_constant_like.py
...on/paddle/fluid/tests/unittests/test_pad_constant_like.py
+69
-0
python/paddle/fluid/tests/unittests/test_scale_op.py
python/paddle/fluid/tests/unittests/test_scale_op.py
+54
-0
python/paddle/fluid/tests/unittests/test_sequence_pad_op.py
python/paddle/fluid/tests/unittests/test_sequence_pad_op.py
+131
-0
python/paddle/fluid/tests/unittests/test_tensor.py
python/paddle/fluid/tests/unittests/test_tensor.py
+21
-0
python/paddle/fluid/tests/unittests/test_unstack_op.py
python/paddle/fluid/tests/unittests/test_unstack_op.py
+81
-0
python/paddle/fluid/tests/unittests/test_variable.py
python/paddle/fluid/tests/unittests/test_variable.py
+2
-1
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+21
-10
未找到文件。
paddle/fluid/API.spec
浏览文件 @
13509da6
...
...
@@ -113,6 +113,7 @@ paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.sequence_expand ArgSpec(args=['x', 'y', 'ref_level', 'name'], varargs=None, keywords=None, defaults=(-1, None))
paddle.fluid.layers.sequence_pad ArgSpec(args=['x', 'pad_value', 'maxlen'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.lstm_unit ArgSpec(args=['x_t', 'hidden_t_prev', 'cell_t_prev', 'forget_bias', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(0.0, None, None, None))
paddle.fluid.layers.reduce_sum ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
paddle.fluid.layers.reduce_mean ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None))
...
...
@@ -148,6 +149,7 @@ paddle.fluid.layers.unsqueeze ArgSpec(args=['input', 'axes', 'name'], varargs=No
paddle.fluid.layers.lod_reset ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.lrn ArgSpec(args=['input', 'n', 'k', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(5, 1.0, 0.0001, 0.75, None))
paddle.fluid.layers.pad ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
paddle.fluid.layers.pad_constant_like ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None))
paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None))
paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
...
...
@@ -166,6 +168,7 @@ paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], vara
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None))
paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
...
...
@@ -380,7 +383,7 @@ paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core.LoDTensor, a
paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> bool
paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[
float32], arg1: paddle::platform::CUDAPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[u
int8], arg1: paddle::platform::CUDAPinnedPlace) -> None
paddle.fluid.LoDTensor.set 1. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CPUPlace) -> None 2. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CPUPlace) -> None 3. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CPUPlace) -> None 4. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CPUPlace) -> None 5. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CPUPlace) -> None 6. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CPUPlace) -> None 7. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CPUPlace) -> None 8. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[
int8], arg1: paddle::platform::CPUPlace) -> None 9. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPlace) -> None 10. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPlace) -> None 11. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPlace) -> None 12. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPlace) -> None 13. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPlace) -> None 14. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPlace) -> None 15. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPlace) -> None 16. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int8], arg1: paddle::platform::CUDAPlace) -> None 17. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float32], arg1: paddle::platform::CUDAPinnedPlace) -> None 18. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int32], arg1: paddle::platform::CUDAPinnedPlace) -> None 19. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[float64], arg1: paddle::platform::CUDAPinnedPlace) -> None 20. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[int64], arg1: paddle::platform::CUDAPinnedPlace) -> None 21. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[bool], arg1: paddle::platform::CUDAPinnedPlace) -> None 22. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint16], arg1: paddle::platform::CUDAPinnedPlace) -> None 23. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[uint8], arg1: paddle::platform::CUDAPinnedPlace) -> None 24. set(self: paddle.fluid.core.Tensor, arg0: numpy.ndarray[
int8], arg1: paddle::platform::CUDAPinnedPlace) -> None
paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None
paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None
paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int]
...
...
paddle/fluid/framework/data_type.cc
浏览文件 @
13509da6
...
...
@@ -64,6 +64,7 @@ static DataTypeMap* InitDataTypeMap() {
RegType
(
size_t
,
proto
::
VarType
::
SIZE_T
);
RegType
(
int16_t
,
proto
::
VarType
::
INT16
);
RegType
(
uint8_t
,
proto
::
VarType
::
UINT8
);
RegType
(
int8_t
,
proto
::
VarType
::
INT8
);
#undef RegType
return
retv
;
...
...
paddle/fluid/framework/data_type.h
浏览文件 @
13509da6
...
...
@@ -54,6 +54,9 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
case
proto
::
VarType
::
INT16
:
visitor
.
template
operator
()
<
int16_t
>();
break
;
case
proto
::
VarType
::
INT8
:
visitor
.
template
operator
()
<
int8_t
>();
break
;
default:
PADDLE_THROW
(
"Not supported %d"
,
type
);
}
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
13509da6
...
...
@@ -754,17 +754,26 @@ void MultiDevSSAGraphBuilder::CreateDistTrainOp(ir::Graph *result,
node
->
Op
()
->
Type
());
CreateComputationalOp
(
result
,
node
,
op_dev_id
);
if
(
node
->
Op
()
->
Type
()
==
"concat"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
kGraphOps
).
back
().
get
(),
"fetch_barrier"
);
}
void
SetOpInputsAllPlaces
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
int
num_places
)
{
auto
*
op_handle
=
result
->
Get
<
GraphOps
>
(
kGraphOps
).
back
().
get
();
for
(
ir
::
Node
*
input
:
node
->
inputs
)
{
VarHandle
*
var
=
nullptr
;
for
(
int
place_offset
=
0
;
place_offset
<
num_places
;
++
place_offset
)
{
auto
&
var_holders
=
result
->
Get
<
GraphVars
>
(
kGraphVars
)[
place_offset
];
auto
&
var_holder
=
var_holders
[
input
->
Name
()];
if
(
!
var_holder
.
empty
())
{
var
=
var_holder
.
rbegin
()
->
get
();
op_handle
->
AddInput
(
var
);
}
}
}
}
// Create RPC related op handles that connects its in ops and out ops.
void
MultiDevSSAGraphBuilder
::
CreateRPCOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
)
const
{
// FIXME(typhoonzero): Cleanup this deps for both sync mode and async mode
// put them into transpiler.
int
op_dev_id
=
-
1
;
if
(
node
->
Op
()
->
Type
()
==
"send"
)
{
// TODO(paddle-dev): getting the first var is not safe.
...
...
@@ -799,8 +808,6 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
}
auto
recv_param_grad
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
node
->
Op
()
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
// FIXME(typhoonzero): assume each recv op output one param
// Use the same place as send.
if
(
recv_param_grad
.
size
()
==
2U
)
{
op_dev_id
=
GetVarDeviceID
(
*
result
,
recv_param_grad
[
1
]);
VLOG
(
10
)
<<
"recv param "
<<
recv_param_grad
[
0
]
...
...
@@ -814,34 +821,44 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
.
emplace
(
varname
,
op_dev_id
);
}
}
else
{
// send_barrier
and fetch_barrier op can be scheduled on device 0
// send_barrier
, fetch_barrier will run on place 0;
op_dev_id
=
0
;
}
PADDLE_ENFORCE
(
op_dev_id
!=
-
1
,
"can not find the right place for rpc op: %s"
,
node
->
Op
()
->
Type
());
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
new
RPCOpHandle
(
result
->
CreateOpNode
(
node
->
Op
()),
*
node
->
Op
(),
local_scopes_
[
op_dev_id
],
node
->
Op
()
->
Type
(),
places_
[
op_dev_id
]));
// TODO(panyx0718): This might not be needed anymore.
if
(
node
->
Op
()
->
Type
()
==
"send_barrier"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
kGraphOps
).
back
().
get
(),
"send"
);
}
else
if
(
node
->
Op
()
->
Type
()
==
"recv"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
kGraphOps
).
back
().
get
(),
"send_barrier"
);
}
else
if
(
node
->
Op
()
->
Type
()
==
"fetch_barrier"
)
{
ConnectOp
(
result
,
result
->
Get
<
GraphOps
>
(
kGraphOps
).
back
().
get
(),
"recv"
);
}
else
if
(
node
->
Op
()
->
Type
()
==
"send"
)
{
// do nothing
if
(
node
->
Op
()
->
Type
()
==
"send"
)
{
CreateOpHandleIOs
(
result
,
node
,
op_dev_id
);
}
else
{
PADDLE_THROW
(
"rpc op should be in ["
"send, send_barrier. recv, fetch_barrier]"
);
}
// send_barrier, recv, fetch_barrier's inputs are deps var, get them from
// all places
auto
p
=
places_
[
op_dev_id
];
auto
*
op_handle
=
result
->
Get
<
GraphOps
>
(
kGraphOps
).
back
().
get
();
op_handle
->
SetDeviceContext
(
p
,
platform
::
DeviceContextPool
::
Instance
().
Get
(
p
));
CreateOpHandleIOs
(
result
,
node
,
op_dev_id
);
SetOpInputsAllPlaces
(
result
,
node
,
places_
.
size
());
for
(
ir
::
Node
*
output
:
node
->
outputs
)
{
int
outvar_dev_id
=
op_dev_id
;
if
(
node
->
Op
()
->
Type
()
==
"fetch_barrier"
)
{
outvar_dev_id
=
GetVarDeviceID
(
*
result
,
output
->
Name
());
PADDLE_ENFORCE_NE
(
outvar_dev_id
,
-
1
);
}
p
=
places_
[
outvar_dev_id
];
ir
::
Node
*
new_node
=
nullptr
;
if
(
output
->
Var
())
{
new_node
=
result
->
CreateVarNode
(
output
->
Var
());
}
else
{
new_node
=
result
->
CreateEmptyNode
(
output
->
Name
(),
ir
::
Node
::
Type
::
kVariable
);
}
CreateOpOutput
(
result
,
op_handle
,
new_node
,
p
,
outvar_dev_id
);
}
}
}
bool
MultiDevSSAGraphBuilder
::
IsScaleLossOp
(
ir
::
Node
*
node
)
const
{
...
...
paddle/fluid/framework/framework.proto
浏览文件 @
13509da6
...
...
@@ -107,6 +107,7 @@ message VarType {
// Tensor<size_t> is used in C++.
SIZE_T
=
19
;
UINT8
=
20
;
INT8
=
21
;
// Other types that may need additional descriptions
LOD_TENSOR
=
7
;
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
13509da6
...
...
@@ -132,63 +132,6 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
}
}
std
::
vector
<
ir
::
Node
*>
send_ops
;
ir
::
Node
*
send_bar
=
nullptr
;
std
::
vector
<
ir
::
Node
*>
recv_ops
;
ir
::
Node
*
fetch_bar
=
nullptr
;
for
(
ir
::
Node
*
node
:
Nodes
())
{
if
(
node
->
Name
()
==
"send"
)
{
send_ops
.
push_back
(
node
);
}
else
if
(
node
->
Name
()
==
"send_barrier"
)
{
PADDLE_ENFORCE
(
!
send_bar
,
"only has one send barrier"
);
send_bar
=
node
;
}
else
if
(
node
->
Name
()
==
"recv"
)
{
recv_ops
.
push_back
(
node
);
}
else
if
(
node
->
Name
()
==
"fetch_barrier"
)
{
PADDLE_ENFORCE
(
!
fetch_bar
,
"only has one fetch barrier"
);
fetch_bar
=
node
;
}
}
if
(
send_bar
)
{
for
(
ir
::
Node
*
send
:
send_ops
)
{
ir
::
Node
*
dep_var
=
CreateControlDepVar
();
send
->
outputs
.
push_back
(
dep_var
);
dep_var
->
inputs
.
push_back
(
send
);
send_bar
->
inputs
.
push_back
(
dep_var
);
dep_var
->
outputs
.
push_back
(
send_bar
);
}
for
(
ir
::
Node
*
recv
:
recv_ops
)
{
ir
::
Node
*
dep_var
=
CreateControlDepVar
();
recv
->
inputs
.
push_back
(
dep_var
);
dep_var
->
outputs
.
push_back
(
recv
);
send_bar
->
outputs
.
push_back
(
dep_var
);
dep_var
->
inputs
.
push_back
(
send_bar
);
}
}
if
(
fetch_bar
)
{
for
(
ir
::
Node
*
recv
:
recv_ops
)
{
ir
::
Node
*
dep_var
=
CreateControlDepVar
();
recv
->
outputs
.
push_back
(
dep_var
);
dep_var
->
inputs
.
push_back
(
recv
);
fetch_bar
->
inputs
.
push_back
(
dep_var
);
dep_var
->
outputs
.
push_back
(
fetch_bar
);
}
}
std
::
vector
<
std
::
string
>
send_vars
=
FindDistTrainSendVars
(
send_ops
);
std
::
vector
<
std
::
string
>
recv_vars
=
FindDistTrainRecvVars
(
recv_ops
);
for
(
ir
::
Node
*
node
:
Nodes
())
{
if
(
IsDistTrainOp
(
node
,
send_vars
,
recv_vars
))
{
if
(
fetch_bar
&&
node
->
Name
()
==
"concat"
)
{
ir
::
Node
*
dep_var
=
CreateControlDepVar
();
fetch_bar
->
outputs
.
push_back
(
dep_var
);
dep_var
->
inputs
.
push_back
(
fetch_bar
);
node
->
inputs
.
push_back
(
dep_var
);
dep_var
->
outputs
.
push_back
(
node
);
}
}
}
/**
* We should handle write after read(WAR) and write after write(WAW) here.
* Because some of the operators of the program can be executed parallelly.
...
...
paddle/fluid/framework/tensor.cc
浏览文件 @
13509da6
...
...
@@ -40,7 +40,11 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type,
"When calling this method, the Tensor's numel must be "
"equal or larger than zero. "
"Please check Tensor::Resize has been called first."
);
size_t
size
=
requested_size
?
requested_size
:
numel
()
*
SizeOfType
(
type
);
size_t
size
=
numel
()
*
SizeOfType
(
type
);
if
(
requested_size
)
{
PADDLE_ENFORCE_GE
(
requested_size
,
size
);
size
=
requested_size
;
}
/* some versions of boost::variant don't have operator!= */
if
(
holder_
==
nullptr
||
!
(
holder_
->
place
()
==
place
)
||
holder_
->
size
()
<
size
+
offset_
)
{
...
...
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
13509da6
...
...
@@ -72,7 +72,7 @@ class DfgPassManagerImpl final : public DfgPassManager {
auto
trt_teller
=
[
&
](
const
Node
*
node
)
{
std
::
unordered_set
<
std
::
string
>
teller_set
(
{
"elementwise_add"
,
"mul"
,
"conv2d"
,
"pool2d"
,
"relu"
,
"softmax"
,
"depthwise_conv2d"
,
"batch_norm"
});
"depthwise_conv2d"
,
"batch_norm"
,
"concat"
});
if
(
!
node
->
IsFunction
())
return
false
;
const
auto
*
func
=
static_cast
<
const
Function
*>
(
node
);
...
...
paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
浏览文件 @
13509da6
...
...
@@ -32,6 +32,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
:
NativePaddlePredictor
(
config
),
config_
(
config
)
{}
bool
Init
(
const
std
::
shared_ptr
<
framework
::
Scope
>&
parent_scope
)
{
FLAGS_IA_enable_tensorrt_subgraph_engine
=
true
;
VLOG
(
3
)
<<
"Predictor::init()"
;
FLAGS_tensorrt_max_batch_size
=
config_
.
max_batch_size
;
FLAGS_tensorrt_workspace_size
=
config_
.
workspace_size
;
...
...
@@ -161,3 +162,4 @@ USE_TRT_CONVERTER(fc);
USE_TRT_CONVERTER
(
pool2d
);
USE_TRT_CONVERTER
(
softmax
);
USE_TRT_CONVERTER
(
batch_norm
);
USE_TRT_CONVERTER
(
concat
);
paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc
浏览文件 @
13509da6
...
...
@@ -37,6 +37,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
config1
.
use_gpu
=
true
;
config1
.
fraction_of_gpu_memory
=
0.3
;
config1
.
device
=
0
;
config1
.
max_batch_size
=
10
;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config0
);
...
...
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
13509da6
...
...
@@ -23,9 +23,11 @@ include_directories("${PADDLE_LIB}")
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
if
(
NOT WIN32
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
endif
(
NOT WIN32
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/boost"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/eigen3"
)
...
...
paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
浏览文件 @
13509da6
# Add TRT tests
nv_library
(
tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc
concat_op.cc
DEPS tensorrt_engine operator scope framework_proto op_registry
)
nv_test
(
test_op_converter SRCS test_op_converter.cc DEPS
...
...
@@ -18,12 +18,12 @@ nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine conv_op SERIAL
)
nv_test
(
test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine pool_op SERIAL
)
nv_test
(
test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine elementwise_add_op SERIAL
)
nv_test
(
test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine softmax_op SERIAL
)
nv_test
(
test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine batch_norm_op SERIAL
)
nv_test
(
test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
DEPS
${
FLUID_CORE_MODULES
}
tensorrt_engine concat_op SERIAL
)
paddle/fluid/inference/tensorrt/convert/concat_op.cc
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
/*
* MulOp, IMatrixMultiplyLayer in TRT. This Layer doesn't has weights.
*/
class
ConcatOpConverter
:
public
OpConverter
{
public:
void
operator
()(
const
framework
::
proto
::
OpDesc
&
op
,
const
framework
::
Scope
&
scope
,
bool
test_mode
)
override
{
VLOG
(
4
)
<<
"convert a fluid mul op to tensorrt mul layer without bias"
;
framework
::
OpDesc
op_desc
(
op
,
nullptr
);
// Declare inputs
std
::
vector
<
nvinfer1
::
ITensor
*>
itensors
;
for
(
auto
&
input_name
:
op_desc
.
Input
(
"X"
))
{
itensors
.
push_back
(
engine_
->
GetITensor
(
input_name
));
}
int
axis
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"axis"
));
PADDLE_ENFORCE
(
axis
>
0
,
"The axis attr of Concat op should be large than 0 for trt"
);
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Concatenation
,
itensors
.
data
(),
itensors
.
size
());
axis
=
axis
-
1
;
// Remove batch dim
layer
->
setAxis
(
axis
);
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
if
(
test_mode
)
{
// the test framework can not determine which is the
// output, so place the declaration inside.
engine_
->
DeclareOutput
(
output_name
);
}
}
};
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
concat
,
ConcatOpConverter
);
paddle/fluid/inference/tensorrt/convert/op_converter.h
浏览文件 @
13509da6
...
...
@@ -79,6 +79,14 @@ class OpConverter {
it
=
Registry
<
OpConverter
>::
Lookup
(
"elementwise_"
+
op_type
+
"_tensor"
);
}
PADDLE_ENFORCE_NOT_NULL
(
it
,
"no OpConverter for optype [%s]"
,
op_desc
.
Type
());
}
if
(
op_desc
.
Type
()
==
"depthwise_conv2d"
)
{
it
=
Registry
<
OpConverter
>::
Lookup
(
"conv2d"
);
PADDLE_ENFORCE_NOT_NULL
(
it
,
"no OpConverter for optype [%s]"
,
op_desc
.
Type
());
}
if
(
!
it
)
{
...
...
paddle/fluid/inference/tensorrt/convert/test_concat_op.cc
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
tensorrt
{
TEST
(
concat_op
,
test
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
""
});
framework
::
Scope
scope
;
TRTConvertValidation
validator
(
10
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"concat_x1"
,
nvinfer1
::
DimsCHW
(
10
,
3
,
1
));
validator
.
DeclInputVar
(
"concat_x2"
,
nvinfer1
::
DimsCHW
(
3
,
3
,
1
));
validator
.
DeclInputVar
(
"concat_x3"
,
nvinfer1
::
DimsCHW
(
7
,
3
,
1
));
validator
.
DeclOutputVar
(
"concat_out"
,
nvinfer1
::
DimsCHW
(
20
,
3
,
1
));
// Prepare Op description
framework
::
OpDesc
desc
;
desc
.
SetType
(
"concat"
);
desc
.
SetInput
(
"X"
,
{
"concat_x1"
,
"concat_x2"
,
"concat_x3"
});
desc
.
SetOutput
(
"Out"
,
{
"concat_out"
});
int
axis
=
1
;
desc
.
SetAttr
(
"axis"
,
axis
);
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
5
);
}
}
// namespace tensorrt
}
// namespace inference
}
// namespace paddle
USE_OP
(
concat
);
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
13509da6
...
...
@@ -11,12 +11,18 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#include "paddle/fluid/memory/detail/system_allocator.h"
#include <stdlib.h> // for malloc and free
#ifdef _WIN32
#include <malloc.h>
#include <windows.h> // VirtualLock/VirtualUnlock
#else
#include <sys/mman.h> // for mlock and munlock
#include <algorithm> // for std::max
#endif
#include <stdlib.h> // for malloc and free
#include <algorithm> // for std::max
#include "gflags/gflags.h"
#include "paddle/fluid/platform/assert.h"
...
...
@@ -35,31 +41,42 @@ namespace paddle {
namespace
memory
{
namespace
detail
{
void
*
CPUAllocator
::
Alloc
(
size_t
*
index
,
size_t
size
)
{
// According to http://www.cplusplus.com/reference/cstdlib/malloc/,
// malloc might not return nullptr if size is zero, but the returned
// pointer shall not be dereferenced -- so we make it nullptr.
if
(
size
<=
0
)
return
nullptr
;
*
index
=
0
;
// unlock memory
void
*
AlignedMalloc
(
size_t
size
)
{
void
*
p
=
nullptr
;
size_t
alignment
=
32ul
;
#ifdef PADDLE_WITH_MKLDNN
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
// memory alignment
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
4096ul
,
size
),
0
,
"Alloc %ld error!"
,
size
);
alignment
=
4096ul
;
#endif
#ifdef _WIN32
p
=
_aligned_malloc
(
size
,
alignment
);
#else
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
32ul
,
size
),
0
,
"Alloc %ld error!"
,
PADDLE_ENFORCE_EQ
(
posix_memalign
(
&
p
,
alignment
,
size
),
0
,
"Alloc %ld error!"
,
size
);
#endif
PADDLE_ENFORCE
(
p
,
"Fail to allocate CPU memory: size = %d ."
,
size
);
return
p
;
}
void
*
CPUAllocator
::
Alloc
(
size_t
*
index
,
size_t
size
)
{
// According to http://www.cplusplus.com/reference/cstdlib/malloc/,
// malloc might not return nullptr if size is zero, but the returned
// pointer shall not be dereferenced -- so we make it nullptr.
if
(
size
<=
0
)
return
nullptr
;
*
index
=
0
;
// unlock memory
void
*
p
=
AlignedMalloc
(
size
);
if
(
p
!=
nullptr
)
{
if
(
FLAGS_use_pinned_memory
)
{
*
index
=
1
;
#ifdef _WIN32
VirtualLock
(
p
,
size
);
#else
mlock
(
p
,
size
);
// lock memory
#endif
}
}
...
...
@@ -68,7 +85,11 @@ void* CPUAllocator::Alloc(size_t* index, size_t size) {
void
CPUAllocator
::
Free
(
void
*
p
,
size_t
size
,
size_t
index
)
{
if
(
p
!=
nullptr
&&
index
==
1
)
{
#ifdef _WIN32
VirtualUnlock
(
p
,
size
);
#else
munlock
(
p
,
size
);
#endif
}
free
(
p
);
}
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
13509da6
...
...
@@ -291,6 +291,8 @@ op_library(unsqueeze_op DEPS reshape_op)
op_library
(
squeeze_op DEPS reshape_op
)
op_library
(
extract_rows_op DEPS memory
)
op_library
(
flatten_op DEPS reshape_op
)
op_library
(
sequence_pad_op DEPS sequence_padding
)
op_library
(
unstack_op DEPS stack_op
)
if
(
WITH_GPU
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
...
...
paddle/fluid/operators/auc_op.h
浏览文件 @
13509da6
...
...
@@ -60,6 +60,20 @@ class AucKernel : public framework::OpKernel<T> {
const
T
*
inference_data
=
predict
->
data
<
T
>
();
const
auto
*
label_data
=
label
->
data
<
int64_t
>
();
// check if states are inited.
auto
*
tp_in
=
ctx
.
Input
<
Tensor
>
(
"TP"
);
auto
*
fp_in
=
ctx
.
Input
<
Tensor
>
(
"FP"
);
auto
*
tn_in
=
ctx
.
Input
<
Tensor
>
(
"TN"
);
auto
*
fn_in
=
ctx
.
Input
<
Tensor
>
(
"FN"
);
PADDLE_ENFORCE
(
tp_in
->
IsInitialized
(),
"true_positive is not inited!"
);
PADDLE_ENFORCE
(
fp_in
->
IsInitialized
(),
"false_negative is not inited!"
);
PADDLE_ENFORCE
(
tn_in
->
IsInitialized
(),
"true_negative is not inited!"
);
PADDLE_ENFORCE
(
fn_in
->
IsInitialized
(),
"false_positive is not inited!"
);
PADDLE_ENFORCE_EQ
(
tp_in
->
numel
(),
num_thresholds
,
""
);
PADDLE_ENFORCE_EQ
(
fp_in
->
numel
(),
num_thresholds
,
""
);
PADDLE_ENFORCE_EQ
(
tn_in
->
numel
(),
num_thresholds
,
""
);
PADDLE_ENFORCE_EQ
(
fn_in
->
numel
(),
num_thresholds
,
""
);
auto
*
tp_data
=
true_positive
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
fn_data
=
false_negative
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
auto
*
tn_data
=
true_negative
->
mutable_data
<
int64_t
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/batch_norm_mkldnn_op.cc
浏览文件 @
13509da6
此差异已折叠。
点击以展开。
paddle/fluid/operators/fake_dequantize_op.cc
浏览文件 @
13509da6
...
...
@@ -18,15 +18,32 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
struct
DequantizeFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
dev_ctx
,
const
framework
::
Tensor
*
in
,
const
framework
::
Tensor
*
scale
,
T
max_range
,
framework
::
Tensor
*
out
)
{
auto
in_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in
);
const
T
*
scale_factor
=
scale
->
data
<
T
>
();
auto
out_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
&
dev
=
*
dev_ctx
.
eigen_device
();
out_e
.
device
(
dev
)
=
(
scale_factor
[
0
]
/
max_range
)
*
in_e
;
}
};
template
struct
DequantizeFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
DequantizeFunctor
<
platform
::
CPUDeviceContext
,
double
>;
class
FakeDequantizeMaxAbsOp
:
public
framework
::
OperatorWithKernel
{
public:
FakeDequantizeMaxAbsOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
FakeDequantizeMaxAbsOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of FakeDequantizeMaxAbsOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
...
...
@@ -42,21 +59,17 @@ class FakeDequantizeMaxAbsOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"X"
,
"(Tensor) The input with float-32/64 type is the "
"low precision tensor."
);
AddInput
(
"Scale"
,
"(float) The scale in quantization stage."
);
AddOutput
(
"Out"
,
"(Tensor) The output is the dequantized high "
"precision tensor."
);
AddAttr
<
int
>
(
"num_bits"
,
"(int) `num_bits` is the quantization level bits, "
"such as 2, 5, 8."
);
AddAttr
<
float
>
(
"scale"
,
"(float) The maximum absolute value of low precision tensor."
"It is usually calculated by the fake_quantize_max_abs_op."
);
AddAttr
<
float
>
(
"max_range"
,
"(float) The max range in quantization stage."
);
AddComment
(
R"DOC(
FakeDequantizeMaxAbsOp operator.
This calculation is an opposite operation of FakeQuantizeMaxAbsOp:
$$Out = \frac{scale*X}{
2^{num_bits} - 1
}$$
$$Out = \frac{scale*X}{
max_range
}$$
)DOC"
);
}
...
...
paddle/fluid/operators/fake_dequantize_op.cu
浏览文件 @
13509da6
...
...
@@ -14,6 +14,42 @@ limitations under the License. */
#include "paddle/fluid/operators/fake_dequantize_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__global__
void
KeDequantize
(
const
T
*
in
,
const
T
*
scale
,
T
max_range
,
int
num
,
T
*
out
)
{
const
int
idx
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
if
(
idx
<
num
)
{
out
[
idx
]
=
in
[
idx
]
*
scale
[
0
]
/
max_range
;
}
}
template
<
typename
T
>
struct
DequantizeFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
const
framework
::
Tensor
*
in
,
const
framework
::
Tensor
*
scale
,
T
max_range
,
framework
::
Tensor
*
out
)
{
const
T
*
in_data
=
in
->
data
<
T
>
();
const
T
*
scale_factor
=
scale
->
data
<
T
>
();
T
*
out_data
=
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
int
num
=
in
->
numel
();
int
block
=
512
;
int
grid
=
(
num
+
block
-
1
)
/
block
;
KeDequantize
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
in_data
,
scale_factor
,
max_range
,
num
,
out_data
);
}
};
template
struct
DequantizeFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
struct
DequantizeFunctor
<
platform
::
CUDADeviceContext
,
double
>;
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
using
CUDA
=
paddle
::
platform
::
CUDADeviceContext
;
REGISTER_OP_CUDA_KERNEL
(
fake_dequantize_max_abs
,
...
...
paddle/fluid/operators/fake_dequantize_op.h
浏览文件 @
13509da6
...
...
@@ -19,22 +19,29 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
struct
DequantizeFunctor
{
void
operator
()(
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
*
in
,
const
framework
::
Tensor
*
scale
,
T
max_range
,
framework
::
Tensor
*
out
);
};
template
<
typename
DeviceContext
,
typename
T
>
class
FakeDequantizeMaxAbsKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
virtual
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
scale
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Scale"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
in
->
place
());
int
num_bits
=
ctx
.
Attr
<
int
>
(
"num_bits"
);
T
scale
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"scale"
));
int
range
=
std
::
pow
(
2
,
num_bits
)
-
1
;
float
max_range
=
ctx
.
Attr
<
float
>
(
"max_range"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
auto
eigen_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
eigen_in
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in
);
auto
&
dev
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
eigen_out
.
device
(
dev
)
=
(
scale
/
range
)
*
eigen_in
;
DequantizeFunctor
<
DeviceContext
,
T
>
()(
dev_ctx
,
in
,
scale
,
static_cast
<
T
>
(
max_range
),
out
);
}
};
...
...
paddle/fluid/operators/fetch_barrier_op.cc
浏览文件 @
13509da6
...
...
@@ -52,6 +52,8 @@ class FetchBarrierOp : public framework::OperatorBase {
class
FetchBarrierOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
{
AddOutput
(
"Out"
,
"(Any) Dummy outputs, used for control dependency"
)
.
AsDuplicable
();
AddComment
(
R"DOC(
SendBarrier operator
...
...
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
13509da6
...
...
@@ -15,10 +15,14 @@ limitations under the License. */
#include "paddle/fluid/operators/fusion_lstm_op.h"
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/platform/cpu_info.h"
DEFINE_bool
(
seq_mode
,
true
,
"Use sequence mode"
);
namespace
paddle
{
namespace
operators
{
...
...
@@ -98,7 +102,12 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
ctx
->
ShareLoD
(
"X"
,
"Hidden"
);
ctx
->
ShareLoD
(
"X"
,
"Cell"
);
int
xx_width
=
x_dims
[
1
]
>
wx_dims
[
1
]
?
wx_dims
[
1
]
:
x_dims
[
1
];
int
xx_width
;
if
(
FLAGS_seq_mode
)
{
xx_width
=
wx_dims
[
1
];
}
else
{
xx_width
=
x_dims
[
1
]
>
wx_dims
[
1
]
?
wx_dims
[
1
]
:
x_dims
[
1
];
}
ctx
->
SetOutputDim
(
"XX"
,
{
x_dims
[
0
],
xx_width
});
ctx
->
ShareLoD
(
"X"
,
"XX"
);
}
...
...
@@ -205,10 +214,138 @@ inline void ReorderInitState(const DeviceContext& ctx,
row_shuffle
(
ctx
,
src
,
index_lod
,
dst
,
indexed_src
);
}
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
T
>
class
FuisonLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
paddle
::
platform
::
CPUDeviceContext
;
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
h0
=
ctx
.
Input
<
Tensor
>
(
"H0"
);
auto
*
c0
=
ctx
.
Input
<
Tensor
>
(
"C0"
);
auto
*
wx
=
ctx
.
Input
<
Tensor
>
(
"WeightX"
);
auto
*
wh
=
ctx
.
Input
<
Tensor
>
(
"WeightH"
);
auto
*
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
*
xx
=
ctx
.
Output
<
LoDTensor
>
(
"XX"
);
auto
*
hidden_out
=
ctx
.
Output
<
LoDTensor
>
(
"Hidden"
);
auto
*
cell_out
=
ctx
.
Output
<
LoDTensor
>
(
"Cell"
);
bool
is_reverse
=
ctx
.
Attr
<
bool
>
(
"is_reverse"
);
std
::
function
<
void
(
const
int
,
const
T
*
,
T
*
)
>
act_gate
,
act_cell
,
act_cand
;
auto
&
act_gate_str
=
ctx
.
Attr
<
std
::
string
>
(
"gate_activation"
);
auto
&
act_cell_str
=
ctx
.
Attr
<
std
::
string
>
(
"cell_activation"
);
auto
&
act_cand_str
=
ctx
.
Attr
<
std
::
string
>
(
"candidate_activation"
);
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
math
::
VecActivations
<
T
,
platform
::
jit
::
avx
>
act_functor
;
act_gate
=
act_functor
(
act_gate_str
);
act_cell
=
act_functor
(
act_cell_str
);
act_cand
=
act_functor
(
act_cand_str
);
}
else
{
math
::
VecActivations
<
T
,
platform
::
jit
::
isa_any
>
act_functor
;
act_gate
=
act_functor
(
act_gate_str
);
act_cell
=
act_functor
(
act_cell_str
);
act_cand
=
act_functor
(
act_cand_str
);
}
auto
x_lod
=
x
->
lod
();
auto
x_dims
=
x
->
dims
();
// T x M
auto
wh_dims
=
wh
->
dims
();
// D x 4D
const
int
total_T
=
x_dims
[
0
];
const
int
N
=
x_lod
[
0
].
size
()
-
1
;
// batch size
const
int
M
=
x_dims
[
1
];
// x frame size
const
int
D
=
wh_dims
[
0
];
const
int
D2
=
D
*
2
;
const
int
D3
=
D
*
3
;
const
int
D4
=
wh_dims
[
1
];
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
h0_data
=
h0
?
h0
->
data
<
T
>
()
:
NULL
;
const
T
*
c0_data
=
c0
?
c0
->
data
<
T
>
()
:
NULL
;
const
T
*
wx_data
=
wx
->
data
<
T
>
();
const
T
*
wh_data
=
wh
->
data
<
T
>
();
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
hidden_out_data
=
hidden_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
cell_out_data
=
cell_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
math
::
FCCompute
<
DeviceContext
,
T
>
(
blas
,
total_T
,
D4
,
M
,
x_data
,
wx_data
,
xx_data
,
bias
->
data
<
T
>
());
int
xx_offset
=
D4
;
int
gate_offset
=
D
;
if
(
is_reverse
)
{
const
int
offset
=
(
total_T
-
1
)
*
D
;
xx_data
=
xx_data
+
offset
*
4
;
hidden_out_data
=
hidden_out_data
+
offset
;
cell_out_data
=
cell_out_data
+
offset
;
xx_offset
=
-
D4
;
gate_offset
=
-
D
;
}
auto
move_step
=
[
&
]()
{
xx_data
=
xx_data
+
xx_offset
;
hidden_out_data
=
hidden_out_data
+
gate_offset
;
cell_out_data
=
cell_out_data
+
gate_offset
;
};
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
int
bid
=
is_reverse
?
N
-
1
-
i
:
i
;
int
seq_len
=
x_lod
[
0
][
bid
+
1
]
-
x_lod
[
0
][
bid
];
const
T
*
prev_cell_data
=
NULL
;
const
T
*
prev_hidden_data
=
NULL
;
int
tstart
=
0
;
if
(
h0_data
)
{
prev_hidden_data
=
h0_data
+
bid
*
D
;
prev_cell_data
=
c0_data
+
bid
*
D
;
}
else
{
// W_ch, W_ih, W_fh, W_oh
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
act_cand
(
D
,
xx_data
,
xx_data
);
// cell out= input*tilde
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
cell_out_data
);
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
// prev
prev_hidden_data
=
hidden_out_data
;
prev_cell_data
=
cell_out_data
;
tstart
=
1
;
move_step
();
}
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
1
,
D4
,
D
,
static_cast
<
T
>
(
1
),
prev_hidden_data
,
D
,
wh_data
,
D4
,
static_cast
<
T
>
(
1
),
xx_data
,
D4
);
// W_ch, W_ih, W_fh, W_oh
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
act_cand
(
D
,
xx_data
,
xx_data
);
// a = forget * prev_cell
blas
.
VMUL
(
D
,
xx_data
+
D2
,
prev_cell_data
,
xx_data
+
D2
);
// b = input * tilde
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
xx_data
+
D
);
// cell out= a+b
blas
.
VADD
(
D
,
xx_data
+
D
,
xx_data
+
D2
,
cell_out_data
);
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
// prev
prev_hidden_data
=
hidden_out_data
;
prev_cell_data
=
cell_out_data
;
move_step
();
}
}
}
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
platform
::
CPUDeviceContext
;
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
wx
=
ctx
.
Input
<
Tensor
>
(
"WeightX"
);
auto
*
wh
=
ctx
.
Input
<
Tensor
>
(
"WeightH"
);
...
...
@@ -339,6 +476,13 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
// restore the output cell state in LoDTensor from the batch cell
to_seq
(
dev_ctx
,
batch_cell
,
cell_out
);
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
if
(
FLAGS_seq_mode
)
{
SeqCompute
(
ctx
);
}
else
{
BatchCompute
(
ctx
);
}
}
};
}
// namespace operators
...
...
@@ -348,7 +492,5 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
fusion_lstm
,
ops
::
FusionLSTMOp
,
ops
::
FusionLSTMOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OP_CPU_KERNEL
(
fusion_lstm
,
ops
::
FuisonLSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
FuisonLSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
fusion_lstm
,
ops
::
FuisonLSTMKernel
<
float
>
,
ops
::
FuisonLSTMKernel
<
double
>
);
paddle/fluid/operators/fusion_seqexpand_concat_fc_op.cc
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h"
#include <string>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/operators/math/fc_compute.h"
#include "paddle/fluid/platform/cpu_info.h"
namespace
paddle
{
namespace
operators
{
void
FusionSeqExpandConcatFCOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE_GT
(
ctx
->
Inputs
(
"X"
).
size
(),
1UL
,
"Inputs(X) of FusionSeqExpandConcatFCOp should larger than 1."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"FCWeight"
),
"Input(FCWeight) of FusionSeqExpandConcatFCOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of FusionSeqExpandConcatFCOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"FCOut"
),
"Output(FCOut) of FusionSeqExpandConcatFCOp should not be null."
);
auto
ins_dims
=
ctx
->
GetInputsDim
(
"X"
);
auto
w_dims
=
ctx
->
GetInputDim
(
"FCWeight"
);
// (M0+M1+M2+..) x D
PADDLE_ENFORCE_EQ
(
w_dims
.
size
(),
2UL
,
"Input(FCWeight)'s rank must be 2."
);
const
int
D
=
w_dims
[
1
];
int
sum
=
ins_dims
[
0
][
1
];
for
(
size_t
i
=
1
;
i
<
ins_dims
.
size
();
++
i
)
{
sum
+=
ins_dims
[
i
][
1
];
}
PADDLE_ENFORCE_EQ
(
sum
,
w_dims
[
0
],
"FC height should be sum of all inputs width."
);
if
(
ctx
->
HasInput
(
"FCBias"
))
{
auto
b_dims
=
ctx
->
GetInputDim
(
"FCBias"
);
PADDLE_ENFORCE_EQ
(
b_dims
.
size
(),
2
,
"Input(FCBias)'s rank must be 2."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
"FCBias shapes must be 1 * %d."
,
D
);
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
D
,
"FCBias shapes must be 1 * %d."
,
D
);
}
ctx
->
SetOutputDim
(
"Out"
,
{
ins_dims
[
0
][
0
],
D
});
// fcout should be reshape when run since can not get lod in infershape
// explicit share the ref lod
ctx
->
ShareLoD
(
"X"
,
"Out"
,
0
);
}
framework
::
OpKernelType
FusionSeqExpandConcatFCOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
MultiInput
<
LoDTensor
>
(
"X"
)[
0
]
->
type
()),
ctx
.
device_context
());
}
void
FusionSeqExpandConcatFCOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) input LodDTensors, the first one must be have ref lod "
"for sequence expand, and the rest input should have same lod."
)
.
AsDuplicable
();
AddInput
(
"FCWeight"
,
"(Tensor) the weights of fc."
);
AddInput
(
"FCBias"
,
"(Tensor, optional) the bias of fc."
).
AsDispensable
();
AddOutput
(
"Out"
,
"(LoDTensor) Output LodTensor."
);
AddOutput
(
"FCOut"
,
"(Tensor) the intermediate tensor to keep the result of fc."
"Shape is (N x D), where N is the batch size, D is the output dim of fc"
)
.
AsIntermediate
();
AddAttr
<
std
::
string
>
(
"fc_activation"
,
"(string, default: identity)"
"The activation for the result of fc."
"`identity` by default."
)
.
SetDefault
(
"identity"
)
.
InEnum
({
"sigmoid"
,
"tanh"
,
"relu"
,
"identity"
});
AddComment
(
R"DOC(
Fusion Sequence expand + concat + fc Operator.
All below conditions should be meet:
The ref_level of seq_expand should be 0.
The ref lod of seq_expand level is the first input of concat.
The other inputs should have same lod and same batch size of ref lod.
The seq len of other inputs should be 1.
The concat axis should be 1.
)DOC"
);
}
template
<
typename
T
>
class
FusionSeqExpandConcatFCOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
paddle
::
platform
::
CPUDeviceContext
;
auto
ins
=
ctx
.
MultiInput
<
LoDTensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
Tensor
>
(
"FCWeight"
);
auto
*
b
=
ctx
.
Input
<
Tensor
>
(
"FCBias"
);
auto
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
auto
*
fc_out
=
ctx
.
Output
<
Tensor
>
(
"FCOut"
);
auto
*
ref_in
=
ins
[
0
];
auto
ref_lod
=
ref_in
->
lod
();
auto
in1_lod
=
ins
[
1
]
->
lod
();
auto
ref_dims
=
ref_in
->
dims
();
// T x M0
auto
in1_dims
=
ins
[
1
]
->
dims
();
// N x M1
auto
w_dims
=
w
->
dims
();
const
int
N
=
ref_lod
[
0
].
size
()
-
1
;
const
int
total_T
=
ref_dims
[
0
];
const
int
M0
=
ref_dims
[
1
];
const
int
M1
=
in1_dims
[
1
];
const
int
D
=
w_dims
[
1
];
// some check and fcout should be reshape here
// since infershape can not get lod info
PADDLE_ENFORCE_EQ
(
ref_lod
.
size
(),
1UL
,
"Only support input lod size is 1."
);
PADDLE_ENFORCE_EQ
(
in1_lod
.
size
(),
1UL
,
"Only support input lod size is 1."
);
PADDLE_ENFORCE_EQ
(
in1_lod
[
0
].
size
()
-
1
,
N
,
"Batch size of all inputs should be equal."
);
PADDLE_ENFORCE_EQ
(
in1_lod
[
0
][
N
],
N
,
"Seq_length of other inputs should be 1."
);
PADDLE_ENFORCE_EQ
(
in1_dims
[
0
],
N
,
"input height should be batch size."
);
for
(
size_t
i
=
2
;
i
<
ins
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
ins
[
i
]
->
dims
()[
0
],
N
,
"All other inputs height should be equal"
);
PADDLE_ENFORCE_EQ
(
ins
[
i
]
->
lod
(),
in1_lod
,
"All other inputs should have same lod"
);
}
fc_out
->
Resize
({
N
,
D
});
std
::
function
<
void
(
const
int
,
const
T
*
,
T
*
)
>
fc_act
;
auto
&
fc_act_str
=
ctx
.
Attr
<
std
::
string
>
(
"fc_activation"
);
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
math
::
VecActivations
<
T
,
platform
::
jit
::
avx
>
act_functor
;
fc_act
=
act_functor
(
fc_act_str
);
}
else
{
math
::
VecActivations
<
T
,
platform
::
jit
::
isa_any
>
act_functor
;
fc_act
=
act_functor
(
fc_act_str
);
}
const
T
*
ref_in_data
=
ref_in
->
data
<
T
>
();
const
T
*
in1_data
=
ins
[
1
]
->
data
<
T
>
();
const
T
*
w_data
=
w
->
data
<
T
>
();
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
fc_out_data
=
fc_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
math
::
FCCompute
<
DeviceContext
,
T
>
(
blas
,
total_T
,
D
,
M0
,
ref_in_data
,
w_data
,
out_data
,
b
?
b
->
data
<
T
>
()
:
NULL
);
w_data
=
w_data
+
M0
*
D
;
// first write on
blas
.
MatMul
(
N
,
D
,
M1
,
in1_data
,
w_data
,
fc_out_data
);
w_data
=
w_data
+
M1
*
D
;
for
(
size_t
i
=
2
;
i
<
ins
.
size
();
++
i
)
{
// add on
const
T
*
in_data
=
ins
[
i
]
->
data
<
T
>
();
const
int
K
=
ins
[
i
]
->
dims
()[
1
];
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
N
,
D
,
K
,
static_cast
<
T
>
(
1
),
in_data
,
K
,
w_data
,
D
,
static_cast
<
T
>
(
1
),
fc_out_data
,
D
);
w_data
=
w_data
+
K
*
D
;
}
T
*
cur_out_data
=
out_data
;
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
int
seq_len
=
ref_lod
[
0
][
i
+
1
]
-
ref_lod
[
0
][
i
];
T
*
src
=
fc_out_data
+
i
*
D
;
for
(
int
step
=
0
;
step
<
seq_len
;
++
step
)
{
blas
.
VADD
(
D
,
cur_out_data
,
src
,
cur_out_data
);
cur_out_data
=
cur_out_data
+
D
;
}
}
fc_act
(
total_T
*
D
,
out_data
,
out_data
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
fusion_seqexpand_concat_fc
,
ops
::
FusionSeqExpandConcatFCOp
,
ops
::
FusionSeqExpandConcatFCOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OP_CPU_KERNEL
(
fusion_seqexpand_concat_fc
,
ops
::
FusionSeqExpandConcatFCOpKernel
<
float
>
,
ops
::
FusionSeqExpandConcatFCOpKernel
<
double
>
);
paddle/fluid/operators/fusion_seqexpand_concat_fc_op.h
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
class
FusionSeqExpandConcatFCOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
};
class
FusionSeqExpandConcatFCOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
;
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/concat.cu
浏览文件 @
13509da6
...
...
@@ -177,6 +177,9 @@ class ConcatFunctor<platform::CUDADeviceContext, T> {
dev_ins_data
,
dev_ins_col_data
,
static_cast
<
int
>
(
inputs_col
.
size
()),
out_row
,
out_col
,
output
->
data
<
T
>
());
}
// Wait() must be called because `inputs_data` may be destructed before
// kernel ends
context
.
Wait
();
}
};
...
...
@@ -252,6 +255,9 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
input
.
data
<
T
>
(),
in_row
,
in_col
,
dev_outs_col_data
,
static_cast
<
int
>
(
outputs_cols
.
size
()),
dev_out_gpu_data
);
}
// Wait() must be called because `outputs_data` may be destructed before
// kernel ends
context
.
Wait
();
}
};
...
...
paddle/fluid/operators/math/cpu_vec_test.cc
浏览文件 @
13509da6
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include <sys/time.h>
#include <cmath>
#include <cstring>
#include <random>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
...
...
paddle/fluid/operators/math/math_function.cc
浏览文件 @
13509da6
...
...
@@ -41,7 +41,8 @@ template struct SetConstant<platform::CPUDeviceContext, uint8_t>;
template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, bool, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int16_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, uint8_t, RANK>;
template struct Transpose<platform::CPUDeviceContext, uint8_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int8_t, RANK>;
DEFINE_CPU_TRANS
(
1
);
DEFINE_CPU_TRANS
(
2
);
...
...
paddle/fluid/operators/math/math_function.cu
浏览文件 @
13509da6
...
...
@@ -33,10 +33,11 @@ template struct SetConstant<platform::CUDADeviceContext, int>;
template
struct
SetConstant
<
platform
::
CUDADeviceContext
,
int64_t
>;
template
struct
SetConstant
<
platform
::
CUDADeviceContext
,
bool
>;
#define DEFINE_GPU_TRANS(RANK) \
template struct Transpose<platform::CUDADeviceContext, float, RANK>; \
template struct Transpose<platform::CUDADeviceContext, double, RANK>; \
template struct Transpose<platform::CUDADeviceContext, float16, RANK>;
#define DEFINE_GPU_TRANS(RANK) \
template struct Transpose<platform::CUDADeviceContext, float, RANK>; \
template struct Transpose<platform::CUDADeviceContext, double, RANK>; \
template struct Transpose<platform::CUDADeviceContext, float16, RANK>; \
template struct Transpose<platform::CUDADeviceContext, int8_t, RANK>;
DEFINE_GPU_TRANS
(
1
);
DEFINE_GPU_TRANS
(
2
);
...
...
paddle/fluid/operators/math/padding.h
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <utility>
#include <vector>
#include "paddle/fluid/framework/tensor.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
typename
T
,
size_t
D
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenTensor
=
framework
::
EigenTensor
<
T
,
D
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
>
void
PadFunction
(
const
framework
::
ExecutionContext
&
context
,
const
std
::
vector
<
int
>&
pads
,
const
framework
::
Tensor
&
src
,
T
pad_value
,
framework
::
Tensor
*
out
)
{
Eigen
::
array
<
std
::
pair
<
int
,
int
>
,
D
>
paddings
;
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
paddings
[
i
].
first
=
pads
[
i
*
2
];
paddings
[
i
].
second
=
pads
[
i
*
2
+
1
];
}
auto
src_tensor
=
EigenTensor
<
T
,
D
>::
From
(
src
);
auto
out_tensor
=
EigenTensor
<
T
,
D
>::
From
(
*
out
);
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
out_tensor
.
device
(
place
)
=
src_tensor
.
pad
(
paddings
,
pad_value
);
}
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
>
void
PadGradFunction
(
const
framework
::
ExecutionContext
&
context
,
const
std
::
vector
<
int
>&
pads
,
const
framework
::
Tensor
&
src
,
framework
::
Tensor
*
d_out
)
{
Eigen
::
array
<
std
::
pair
<
int
,
int
>
,
D
>
paddings
;
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
paddings
[
i
].
first
=
-
pads
[
i
*
2
];
paddings
[
i
].
second
=
-
pads
[
i
*
2
+
1
];
}
auto
d_out_tensor
=
EigenTensor
<
T
,
D
>::
From
(
*
d_out
);
auto
src_tensor
=
EigenTensor
<
T
,
D
>::
From
(
src
);
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
d_out_tensor
.
device
(
place
)
=
src_tensor
.
pad
(
paddings
,
0
);
}
template
<
typename
DeviceContext
,
typename
T
>
void
PaddingFunctor
(
int
rank
,
const
framework
::
ExecutionContext
&
context
,
const
std
::
vector
<
int
>&
pads
,
T
pad_value
,
const
framework
::
Tensor
&
src
,
framework
::
Tensor
*
out
)
{
switch
(
rank
)
{
case
1
:
PadFunction
<
DeviceContext
,
T
,
1
>
(
context
,
pads
,
src
,
pad_value
,
out
);
break
;
case
2
:
PadFunction
<
DeviceContext
,
T
,
2
>
(
context
,
pads
,
src
,
pad_value
,
out
);
break
;
case
3
:
PadFunction
<
DeviceContext
,
T
,
3
>
(
context
,
pads
,
src
,
pad_value
,
out
);
break
;
case
4
:
PadFunction
<
DeviceContext
,
T
,
4
>
(
context
,
pads
,
src
,
pad_value
,
out
);
break
;
case
5
:
PadFunction
<
DeviceContext
,
T
,
5
>
(
context
,
pads
,
src
,
pad_value
,
out
);
break
;
case
6
:
PadFunction
<
DeviceContext
,
T
,
6
>
(
context
,
pads
,
src
,
pad_value
,
out
);
break
;
default:
PADDLE_THROW
(
"PadOp only support tensors with no more than 6 dimensions."
);
}
}
template
<
typename
DeviceContext
,
typename
T
>
void
PaddingGradFunctor
(
int
rank
,
const
framework
::
ExecutionContext
&
context
,
const
std
::
vector
<
int
>&
pads
,
const
framework
::
Tensor
&
src
,
framework
::
Tensor
*
out
)
{
switch
(
rank
)
{
case
1
:
PadGradFunction
<
DeviceContext
,
T
,
1
>
(
context
,
pads
,
src
,
out
);
break
;
case
2
:
PadGradFunction
<
DeviceContext
,
T
,
2
>
(
context
,
pads
,
src
,
out
);
break
;
case
3
:
PadGradFunction
<
DeviceContext
,
T
,
3
>
(
context
,
pads
,
src
,
out
);
break
;
case
4
:
PadGradFunction
<
DeviceContext
,
T
,
4
>
(
context
,
pads
,
src
,
out
);
break
;
case
5
:
PadGradFunction
<
DeviceContext
,
T
,
5
>
(
context
,
pads
,
src
,
out
);
break
;
case
6
:
PadGradFunction
<
DeviceContext
,
T
,
6
>
(
context
,
pads
,
src
,
out
);
break
;
default:
PADDLE_THROW
(
"PadOp only support tensors with no more than 6 dimensions."
);
}
}
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/sequence_padding.cc
浏览文件 @
13509da6
...
...
@@ -18,65 +18,86 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
T
>
void
CopyValidData
(
framework
::
Tensor
*
dst_tensor
,
const
framework
::
Tensor
*
src_tensor
,
const
framework
::
Vector
<
size_t
>&
seq_offsets
,
int
pad_seq_len
,
int
step_width
,
bool
norm_by_len
,
CopyType
type
,
PadLayout
layout
)
{
int
seq_num
=
seq_offsets
.
size
()
-
1
;
const
T
*
src_data
=
src_tensor
->
data
<
T
>
();
T
*
dst_data
=
dst_tensor
->
data
<
T
>
();
int
seq_cpy_gap
=
step_width
;
int
pad_cpy_gap
=
layout
==
kBatchLengthWidth
?
step_width
:
seq_num
*
step_width
;
for
(
int
seq_idx
=
0
;
seq_idx
<
seq_num
;
++
seq_idx
)
{
int
valid_seq_len
=
seq_offsets
[
seq_idx
+
1
]
-
seq_offsets
[
seq_idx
];
PADDLE_ENFORCE_GE
(
pad_seq_len
,
valid_seq_len
,
"The padded sequence length can not be less than its original length."
);
int
seq_data_offset
=
seq_offsets
[
seq_idx
]
*
step_width
;
int
pad_data_offset
=
layout
==
kBatchLengthWidth
?
seq_idx
*
pad_seq_len
*
step_width
:
seq_idx
*
step_width
;
float
scale
=
1.0
f
/
static_cast
<
float
>
(
valid_seq_len
);
for
(
int
step_idx
=
0
;
step_idx
<
valid_seq_len
;
++
step_idx
)
{
const
T
*
src
=
src_data
+
(
type
==
kSeqToPad
?
seq_data_offset
:
pad_data_offset
);
T
*
dst
=
dst_data
+
(
type
==
kSeqToPad
?
pad_data_offset
:
seq_data_offset
);
memcpy
(
dst
,
src
,
step_width
*
sizeof
(
T
));
if
(
norm_by_len
)
{
for
(
int
i
=
0
;
i
<
step_width
;
++
i
)
{
*
(
dst
+
i
)
*=
scale
;
}
}
seq_data_offset
+=
seq_cpy_gap
;
pad_data_offset
+=
pad_cpy_gap
;
}
}
}
template
<
typename
T
>
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq
,
framework
::
Tensor
*
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
.
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The LoD of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
.
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
->
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequence_length, num_sequences, sequence_width]."
);
const
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be the "
"maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be the "
"number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
.
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
const
T
*
seq_data
=
seq
.
data
<
T
>
();
T
*
padding_data
=
padding
->
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
max_sequence_length
;
++
i
)
{
for
(
int64_t
j
=
0
;
j
<
num_sequences
;
++
j
)
{
int64_t
start_pos
=
abs_offset_lod
[
level
][
j
];
int64_t
sequence_length
=
abs_offset_lod
[
level
][
j
+
1
]
-
start_pos
;
if
(
i
<
sequence_length
)
{
// i > 0 => sequence_length > 0
T
scale
=
norm_by_times
?
(
1.0
f
/
static_cast
<
T
>
(
sequence_length
))
:
1.0
f
;
for
(
int64_t
k
=
0
;
k
<
sequence_width
;
++
k
)
{
padding_data
[(
i
*
num_sequences
+
j
)
*
sequence_width
+
k
]
=
seq_data
[(
start_pos
+
i
)
*
sequence_width
+
k
]
*
scale
;
}
}
else
{
memset
(
padding_data
+
(
i
*
num_sequences
+
j
)
*
sequence_width
,
0
,
sequence_width
*
sizeof
(
T
));
}
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
LoDTensor
*
pad_tensor
,
const
framework
::
LoDTensor
&
pad_value
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
)
{
auto
seq_lod
=
seq_tensor
.
lod
();
const
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_lod
)[
lod_level
];
const
auto
&
seq_tensor_dims
=
seq_tensor
.
dims
();
const
auto
&
pad_tensor_dims
=
pad_tensor
->
dims
();
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
}
int
step_width
=
seq_tensor
.
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
PADDLE_ENFORCE
(
pad_value
.
numel
()
==
1
||
pad_value
.
numel
()
==
step_width
,
"The numel of 'pad_value' can only be 1 or be equal to the "
"'step_width'."
);
// fill padding value
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
const
T
*
pad_value_data
=
pad_value
.
data
<
T
>
();
if
(
pad_value
.
numel
()
==
1
)
{
for
(
int
i
=
0
;
i
<
pad_tensor
->
numel
();
++
i
)
{
pad_data
[
i
]
=
*
pad_value_data
;
}
}
else
{
for
(
int
i
=
0
;
i
<
pad_tensor
->
numel
();
i
+=
step_width
)
{
memcpy
(
pad_data
+
i
,
pad_value_data
,
step_width
*
sizeof
(
T
));
}
}
CopyValidData
<
T
>
(
pad_tensor
,
&
seq_tensor
,
seq_offsets
,
pad_seq_len
,
step_width
,
norm_by_times
,
kSeqToPad
,
layout
);
}
};
...
...
@@ -84,62 +105,35 @@ template <typename T>
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
framework
::
LoDTensor
*
seq
,
const
framework
::
Tensor
&
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
->
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The LoD of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
->
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
.
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequnece_length, num_sequences, sequence_width]."
);
const
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be "
"the maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be "
"the number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
->
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
const
T
*
padding_data
=
padding
.
data
<
T
>
();
T
*
seq_data
=
seq
->
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
num_sequences
;
++
i
)
{
int64_t
start_pos
=
abs_offset_lod
[
level
][
i
];
int64_t
sequence_length
=
abs_offset_lod
[
level
][
i
+
1
]
-
start_pos
;
for
(
int64_t
j
=
0
;
j
<
sequence_length
;
++
j
)
{
// sequence_width > j > 0
T
scale
=
norm_by_times
?
(
1.0
f
/
static_cast
<
T
>
(
sequence_length
))
:
1.0
f
;
for
(
int64_t
k
=
0
;
k
<
sequence_width
;
++
k
)
{
seq_data
[(
start_pos
+
j
)
*
sequence_width
+
k
]
=
padding_data
[(
j
*
num_sequences
+
i
)
*
sequence_width
+
k
]
*
scale
;
}
}
const
framework
::
LoDTensor
&
pad_tensor
,
framework
::
LoDTensor
*
seq_tensor
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
)
{
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_tensor
->
lod
())[
lod_level
];
const
auto
&
seq_tensor_dims
=
seq_tensor
->
dims
();
const
auto
&
pad_tensor_dims
=
pad_tensor
.
dims
();
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
}
int
step_width
=
seq_tensor
->
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
CopyValidData
<
T
>
(
seq_tensor
,
&
pad_tensor
,
seq_offsets
,
pad_seq_len
,
step_width
,
norm_by_times
,
kPadToSeq
,
layout
);
}
};
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sequence_padding.cu
浏览文件 @
13509da6
...
...
@@ -19,41 +19,32 @@ namespace paddle {
namespace
operators
{
namespace
math
{
template
<
typename
T
,
bool
NormByTimes
,
bool
Padding
>
__global__
void
SequencePaddingKernel
(
T
*
padding
,
T
*
sequence
,
const
size_t
*
sequence_start_positions
,
const
size_t
sequence_width
,
const
size_t
max_sequence_length
,
const
size_t
num_sequences
)
{
size_t
padding_idx
=
blockIdx
.
y
;
size_t
start_pos
=
sequence_start_positions
[
padding_idx
];
size_t
sequence_length
=
sequence_start_positions
[
padding_idx
+
1
]
-
start_pos
;
size_t
sequence_idx
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
size_t
padding_base_idx
=
(
sequence_idx
*
num_sequences
+
padding_idx
)
*
sequence_width
;
size_t
sequence_base_idx
=
(
start_pos
+
sequence_idx
)
*
sequence_width
;
if
(
sequence_idx
<
sequence_length
)
{
T
scale
=
NormByTimes
?
(
1.0
f
/
static_cast
<
T
>
(
sequence_length
))
:
1.0
f
;
if
(
Padding
)
{
/* sequence -> padding */
for
(
size_t
i
=
threadIdx
.
x
;
i
<
sequence_width
;
i
+=
blockDim
.
x
)
{
padding
[
padding_base_idx
+
i
]
=
scale
*
sequence
[
sequence_base_idx
+
i
];
}
}
else
{
/* padding -> sequence */
for
(
size_t
i
=
threadIdx
.
x
;
i
<
sequence_width
;
i
+=
blockDim
.
x
)
{
sequence
[
sequence_base_idx
+
i
]
=
scale
*
padding
[
padding_base_idx
+
i
];
}
template
<
typename
T
,
CopyType
Type
>
__global__
void
SequencePaddingKernel
(
T
*
dst
,
const
T
*
src
,
const
T
*
pad_value
,
bool
is_constant_pad
,
const
size_t
*
seq_offsets
,
const
size_t
seq_num
,
const
size_t
pad_seq_len
,
const
size_t
step_width
,
bool
norm_by_len
,
const
PadLayout
layout
)
{
size_t
seq_idx
=
blockIdx
.
y
;
size_t
seq_len
=
seq_offsets
[
seq_idx
+
1
]
-
seq_offsets
[
seq_idx
];
size_t
step_idx
=
blockIdx
.
x
*
blockDim
.
y
+
threadIdx
.
y
;
size_t
seq_data_offset
=
(
seq_offsets
[
seq_idx
]
+
step_idx
)
*
step_width
;
size_t
pad_data_offset
=
layout
==
kBatchLengthWidth
?
(
seq_idx
*
pad_seq_len
+
step_idx
)
*
step_width
:
(
step_idx
*
seq_num
+
seq_idx
)
*
step_width
;
T
*
dst_data
=
dst
+
(
Type
==
kSeqToPad
?
pad_data_offset
:
seq_data_offset
);
const
T
*
src_data
=
src
+
(
Type
==
kSeqToPad
?
seq_data_offset
:
pad_data_offset
);
if
(
step_idx
<
seq_len
)
{
float
scale
=
norm_by_len
?
(
1.0
f
/
static_cast
<
float
>
(
seq_len
))
:
1.0
f
;
for
(
size_t
i
=
threadIdx
.
x
;
i
<
step_width
;
i
+=
blockDim
.
x
)
{
dst_data
[
i
]
=
scale
*
src_data
[
i
];
}
}
else
if
(
sequence_idx
<
max_sequence_length
)
{
if
(
Padding
)
{
/* sequence -> padding */
for
(
size_t
i
=
threadIdx
.
x
;
i
<
sequence_width
;
i
+=
blockDim
.
x
)
{
padding
[
padding_base_idx
+
i
]
=
0
;
}
}
else
if
(
step_idx
<
pad_seq_len
&&
Type
==
kSeqToPad
)
{
for
(
size_t
i
=
threadIdx
.
x
;
i
<
step_width
;
i
+=
blockDim
.
x
)
{
dst_data
[
i
]
=
is_constant_pad
?
pad_value
[
0
]
:
pad_value
[
i
];
}
}
}
...
...
@@ -62,74 +53,59 @@ template <typename T>
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq
,
framework
::
Tensor
*
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
.
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The lod of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
.
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
->
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequence_length, num_sequences, sequence_width]."
);
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be the "
"maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be the "
"number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
.
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
if
(
!
norm_by_times
&&
num_sequences
==
1UL
)
{
TensorCopy
(
seq
,
context
.
GetPlace
(),
context
,
padding
);
padding
->
Resize
(
padding_dims
);
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
LoDTensor
*
pad_tensor
,
const
framework
::
LoDTensor
&
pad_value
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
)
{
auto
seq_lod
=
seq_tensor
.
lod
();
const
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_lod
)[
lod_level
];
const
auto
&
seq_tensor_dims
=
seq_tensor
.
dims
();
const
auto
&
pad_tensor_dims
=
pad_tensor
->
dims
();
int
max_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
max_seq_len
;
}
PADDLE_ENFORCE_GE
(
pad_seq_len
,
max_seq_len
,
"The pad_seq_len must be equal to or greater than the "
"original max sequence length."
);
int
step_width
=
seq_tensor
.
numel
()
/
seq_tensor_dims
[
0
];
int
seq_num
=
seq_offsets
.
size
()
-
1
;
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
PADDLE_ENFORCE
(
pad_value
.
numel
()
==
1
||
pad_value
.
numel
()
==
step_width
,
"The numel of 'pad_value' can only be 1 or be equal to the "
"'step_width'."
);
if
(
!
norm_by_times
&&
seq_num
==
1UL
&&
pad_seq_len
==
max_seq_len
)
{
TensorCopy
(
seq_tensor
,
context
.
GetPlace
(),
context
,
pad_tensor
);
pad_tensor
->
Resize
(
pad_tensor_dims
);
return
;
}
const
int
64_t
kBlockSize
=
512
;
const
int
kBlockSize
=
512
;
/* At least use 32 threads to copy sequence_width elements,
* and at least 8 elements for each thread.
*/
size_t
block_dim_x
=
std
::
min
(((((
s
equence
_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
std
::
min
(((((
s
tep
_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
size_t
block_dim_y
=
kBlockSize
/
block_dim_x
;
dim3
threads
(
block_dim_x
,
block_dim_y
);
size_t
grid_dim_x
=
(
max_sequence_length
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
num_sequences
;
size_t
grid_dim_x
=
(
pad_seq_len
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
seq_num
;
dim3
grid
(
grid_dim_x
,
grid_dim_y
);
const
T
*
seq_data
=
seq
.
data
<
T
>
();
T
*
padding_data
=
padding
->
data
<
T
>
();
if
(
norm_by_times
)
{
SequencePaddingKernel
<
T
,
1
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
padding_data
,
const_cast
<
T
*>
(
seq_data
),
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
else
{
SequencePaddingKernel
<
T
,
0
,
1
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
padding_data
,
const_cast
<
T
*>
(
seq_data
),
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
const
T
*
seq_data
=
seq_tensor
.
data
<
T
>
();
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
const
T
*
pad_value_data
=
pad_value
.
data
<
T
>
();
SequencePaddingKernel
<
T
,
kSeqToPad
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
pad_data
,
seq_data
,
pad_value_data
,
pad_value
.
numel
()
==
1
,
seq_offsets
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
pad_seq_len
,
step_width
,
norm_by_times
,
layout
);
}
};
...
...
@@ -137,79 +113,62 @@ template <typename T>
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
framework
::
LoDTensor
*
seq
,
const
framework
::
Tensor
&
padding
,
bool
norm_by_times
)
{
auto
lod
=
seq
->
lod
();
PADDLE_ENFORCE_GT
(
lod
.
size
(),
0UL
,
"The lod of LoDTensor seq should not be null."
);
const
size_t
level
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
auto
seq_dims
=
seq
->
dims
();
PADDLE_ENFORCE_EQ
(
seq_dims
[
0
],
static_cast
<
int64_t
>
(
abs_offset_lod
[
level
].
back
()),
"The first dimension of LoDTensor seq should be "
"equal to the sum of all sequences's length."
);
auto
padding_dims
=
padding
.
dims
();
PADDLE_ENFORCE_EQ
(
padding_dims
.
size
(),
3UL
,
"The input padding should be a 3-D Tensor of shape "
"[max_sequnece_length, num_sequences, sequence_width]."
);
int64_t
max_sequence_length
=
MaximumSequenceLength
(
lod
,
level
);
PADDLE_ENFORCE_EQ
(
padding_dims
[
0
],
max_sequence_length
,
"The first dimension of Tensor padding should be "
"the maximum length of all sequences in LoDTensor seq."
);
const
int64_t
num_sequences
=
abs_offset_lod
[
level
].
size
()
-
1
;
PADDLE_ENFORCE_EQ
(
padding_dims
[
1
],
num_sequences
,
"The second dimension of Tensor padding should be "
"the number of sequences in LoDTensor seq."
);
const
int64_t
sequence_width
=
seq
->
numel
()
/
seq_dims
[
0
];
PADDLE_ENFORCE_EQ
(
padding_dims
[
2
],
sequence_width
,
"The third dimension of Tensor padding should be the "
"width of sequence in LoDTensor seq."
);
if
(
!
norm_by_times
&&
num_sequences
==
1UL
)
{
TensorCopy
(
padding
,
context
.
GetPlace
(),
context
,
seq
);
seq
->
Resize
(
seq_dims
);
const
framework
::
LoDTensor
&
pad_tensor
,
framework
::
LoDTensor
*
seq_tensor
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
)
{
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_tensor
->
lod
())[
lod_level
];
const
auto
&
seq_tensor_dims
=
seq_tensor
->
dims
();
const
auto
&
pad_tensor_dims
=
pad_tensor
.
dims
();
int
max_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
max_seq_len
;
}
int
step_width
=
seq_tensor
->
numel
()
/
seq_tensor_dims
[
0
];
int
seq_num
=
seq_offsets
.
size
()
-
1
;
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
if
(
!
norm_by_times
&&
seq_num
==
1UL
&&
pad_seq_len
==
max_seq_len
)
{
TensorCopy
(
pad_tensor
,
context
.
GetPlace
(),
context
,
seq_tensor
);
seq_tensor
->
Resize
(
seq_tensor_dims
);
return
;
}
const
int
64_t
kBlockSize
=
512
;
const
int
kBlockSize
=
512
;
/* At least use 32 threads to copy sequence_width elements,
* and at least 8 elements for each thread.
*/
size_t
block_dim_x
=
std
::
min
(((((
s
equence
_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
std
::
min
(((((
s
tep
_width
+
7
)
>>
3
)
+
31
)
>>
5
)
<<
5
,
kBlockSize
);
size_t
block_dim_y
=
kBlockSize
/
block_dim_x
;
dim3
threads
(
block_dim_x
,
block_dim_y
);
size_t
grid_dim_x
=
(
max_sequence_length
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
num_sequences
;
size_t
grid_dim_x
=
(
pad_seq_len
+
block_dim_y
-
1
)
/
block_dim_y
;
size_t
grid_dim_y
=
seq_num
;
dim3
grid
(
grid_dim_x
,
grid_dim_y
);
const
T
*
padding_data
=
padding
.
data
<
T
>
();
T
*
seq_data
=
seq
->
data
<
T
>
();
if
(
norm_by_times
)
{
SequencePaddingKernel
<
T
,
1
,
0
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
const_cast
<
T
*>
(
padding_data
),
seq_data
,
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
else
{
SequencePaddingKernel
<
T
,
0
,
0
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
const_cast
<
T
*>
(
padding_data
),
seq_data
,
abs_offset_lod
[
level
].
CUDAData
(
context
.
GetPlace
()),
sequence_width
,
max_sequence_length
,
num_sequences
);
}
const
T
*
pad_data
=
pad_tensor
.
data
<
T
>
();
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
SequencePaddingKernel
<
T
,
kPadToSeq
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
seq_data
,
pad_data
,
nullptr
,
false
,
seq_offsets
.
CUDAData
(
context
.
GetPlace
()),
seq_num
,
pad_seq_len
,
step_width
,
norm_by_times
,
layout
);
}
};
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int64_t
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
double
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
int64_t
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CUDADeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
...
...
paddle/fluid/operators/math/sequence_padding.h
浏览文件 @
13509da6
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/platform/device_context.h"
...
...
@@ -22,17 +23,33 @@ namespace paddle {
namespace
operators
{
namespace
math
{
inline
static
size_t
MaximumSequenceLength
(
const
framework
::
LoD
&
lod
,
const
size_t
level
)
{
const
size_t
num_sequences
=
lod
[
level
].
size
()
-
1
;
size_t
max_sequence_length
=
0
;
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
for
(
size_t
i
=
0
;
i
<
num_sequences
;
++
i
)
{
max_sequence_length
=
std
::
max
(
max_sequence_length
,
abs_offset_lod
[
level
][
i
+
1
]
-
abs_offset_lod
[
level
][
i
]);
enum
PadLayout
{
kBatchLengthWidth
=
0
,
kLengthBatchWidth
};
enum
CopyType
{
kSeqToPad
,
kPadToSeq
};
inline
static
size_t
MaximumSequenceLength
(
const
framework
::
Vector
<
size_t
>&
seq_offset
)
{
size_t
seq_num
=
seq_offset
.
size
()
-
1
;
size_t
max_seq_len
=
0
;
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
max_seq_len
=
std
::
max
(
max_seq_len
,
seq_offset
[
i
+
1
]
-
seq_offset
[
i
]);
}
return
max_sequence_length
;
return
max_seq_len
;
}
inline
static
void
CheckDims
(
const
framework
::
DDim
&
seq_tensor_dims
,
const
framework
::
DDim
&
pad_tensor_dims
,
const
framework
::
Vector
<
size_t
>&
seq_offset
,
int64_t
padded_seq_len
,
int64_t
step_width
,
const
PadLayout
&
layout
)
{
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
seq_tensor_dims
[
0
]),
seq_offset
.
back
(),
"Value of 1st dimension of the sequence tensor should be "
"equal to sum of lengths of all sequences."
);
PADDLE_ENFORCE
(
seq_tensor_dims
.
size
()
+
1
==
pad_tensor_dims
.
size
()
||
seq_tensor_dims
.
size
()
==
pad_tensor_dims
.
size
(),
"pad_tensor's rank should be 1 greater than seq_tensor's "
"rank, or be equal with it."
);
}
/*
...
...
@@ -64,15 +81,22 @@ inline static size_t MaximumSequenceLength(const framework::LoD& lod,
template
<
typename
DeviceContext
,
typename
T
>
class
PaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq
,
framework
::
Tensor
*
padding
,
bool
norm_by_times
);
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
LoDTensor
*
pad_tensor
,
const
framework
::
LoDTensor
&
pad_value
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
);
};
template
<
typename
DeviceContext
,
typename
T
>
class
UnpaddingLoDTensorFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
framework
::
LoDTensor
*
seq
,
const
framework
::
Tensor
&
padding
,
bool
norm_by_times
);
void
operator
()(
const
DeviceContext
&
context
,
const
framework
::
LoDTensor
&
pad_tensor
,
framework
::
LoDTensor
*
seq_tensor
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
);
};
}
// namespace math
...
...
paddle/fluid/operators/math/sequence_padding_test.cc
浏览文件 @
13509da6
...
...
@@ -23,7 +23,9 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
paddle
::
framework
::
LoDTensor
cpu_seq_back
;
paddle
::
framework
::
LoDTensor
seq
;
paddle
::
framework
::
LoDTensor
seq_back
;
paddle
::
framework
::
Tensor
padding
;
paddle
::
framework
::
LoDTensor
padding
;
paddle
::
framework
::
LoDTensor
cpu_pad_value
;
paddle
::
framework
::
LoDTensor
pad_value
;
const
size_t
level
=
lod
.
size
()
-
1
;
auto
seq_dims
=
...
...
@@ -46,20 +48,33 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
}
const
size_t
max_sequence_length
=
paddle
::
operators
::
math
::
MaximumSequenceLength
(
lod
,
level
);
paddle
::
operators
::
math
::
MaximumSequenceLength
(
lod
[
level
]
);
const
size_t
num_sequences
=
lod
[
level
].
size
()
-
1
;
auto
padding_dims
=
paddle
::
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
max_sequence_length
),
static_cast
<
int64_t
>
(
num_sequences
),
static_cast
<
int64_t
>
(
sequence_width
)});
padding
.
mutable_data
<
T
>
(
padding_dims
,
*
place
);
T
*
pad_value_data
=
cpu_pad_value
.
mutable_data
<
T
>
({
1
},
paddle
::
platform
::
CPUPlace
());
*
pad_value_data
=
static_cast
<
T
>
(
0
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
pad_value
=
cpu_pad_value
;
}
else
{
TensorCopySync
(
cpu_pad_value
,
*
place
,
&
pad_value
);
}
paddle
::
operators
::
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
*
context
,
seq
,
&
padding
,
false
);
*
context
,
seq
,
&
padding
,
pad_value
,
-
1
,
0
,
false
,
paddle
::
operators
::
math
::
kLengthBatchWidth
);
seq_back
.
set_lod
(
lod
);
seq_back
.
mutable_data
<
T
>
(
seq_dims
,
*
place
);
paddle
::
operators
::
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
*
context
,
&
seq_back
,
padding
,
false
);
*
context
,
padding
,
&
seq_back
,
-
1
,
0
,
false
,
paddle
::
operators
::
math
::
kLengthBatchWidth
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
cpu_seq_back
=
seq_back
;
...
...
paddle/fluid/operators/pad_constant_like_op.cc
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pad_constant_like_op.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
class
PadConstantLikeOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of PadConstantLikeOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) of PadConstantLikeOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of PadConstantLikeOp should not be null."
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dim
=
ctx
->
GetInputDim
(
"Y"
);
PADDLE_ENFORCE_EQ
(
x_dim
.
size
(),
y_dim
.
size
(),
"The dimention of X and Y should be the same."
);
for
(
int
i
=
0
;
i
<
x_dim
.
size
();
++
i
)
{
PADDLE_ENFORCE_GE
(
x_dim
[
i
],
y_dim
[
i
]);
}
ctx
->
SetOutputDim
(
"Out"
,
x_dim
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
type
()),
ctx
.
device_context
());
}
};
class
PadConstantLikeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"The input of pad_constant_like op. "
"The input should be a k-D tensor(k > 0 and k < 7)"
);
AddInput
(
"Y"
,
"The input of pad_constant_like op. "
"The input should be a k-D tensor(k > 0 and k < 7)"
);
AddOutput
(
"Out"
,
"The output of pad_constant_like op. "
"A tensor with the same shape as X."
);
AddAttr
<
float
>
(
"pad_value"
,
"(float, default 0.0) "
"The value to fill the padded areas."
)
.
SetDefault
(
0.0
f
);
AddComment
(
R"DOC(
PadConstantLikeOp Operator.
Pad input(Y) with a pad_value, the number of values padded to the edges of each
axis is specified by the difference of the shape of X and Y.
((0, shape_x_0 - shape_y_0), … (0, shape_x_n - shape_y_n)) unique pad widths for
each axis.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
case1:
Given:
X = [[1, 2],
[3, 4],
[1, 2],
[3, 4]]],
X.shape = (4, 2)
Y = [[5, 6],
[7, 8]],
Y.shape = (2, 2)
And
pad_value = 0,
Return:
Out = [[5, 6],
[7, 8],
[0, 0],
[0, 0]]
Out.shape = (4, 2)
case2:
Given:
X = [[[[ 0, 1, 2],
[ 3, 4, 5]],
[[ 6, 7, 8],
[ 9, 10, 11]],
[[12, 13, 14],
[15, 16, 17]]],
[[[18, 19, 20],
[21, 22, 23]],
[[24, 25, 26],
[27, 28, 29]],
[[30, 31, 32],
[33, 34, 35]]]]
X.shape = (2, 3, 2, 3)
Y = [[[[35, 36, 37]],
[[38, 39, 40]],
[[41, 42, 43]]]]
Y.shape = (1, 3, 1, 3)
And
pad_value = -1,
Return:
Out = [[[[35, 36, 37],
[-1, -1, -1]],
[[38, 39, 40],
[-1, -1, -1]],
[[41, 42, 43],
[-1, -1, -1]]],
[[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]]]]
Out.shape = (2, 3, 2, 3)
)DOC"
);
}
};
class
PadConstantLikeOpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
auto
y_dim
=
ctx
->
GetInputDim
(
"Y"
);
auto
dout_dim
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
PADDLE_ENFORCE_EQ
(
dout_dim
.
size
(),
y_dim
.
size
(),
"The dimention of X and Y should be the same."
);
auto
y_grad_name
=
framework
::
GradVarName
(
"Y"
);
if
(
ctx
->
HasOutput
(
y_grad_name
))
{
ctx
->
SetOutputDim
(
y_grad_name
,
y_dim
);
ctx
->
ShareLoD
(
"Y"
,
/*->*/
y_grad_name
);
for
(
int
i
=
0
;
i
<
y_dim
.
size
();
++
i
)
{
PADDLE_ENFORCE_GE
(
dout_dim
[
i
],
y_dim
[
i
]);
}
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Y"
)
->
type
()),
ctx
.
device_context
());
}
};
class
PadConstantLikeOpGradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
bind
=
new
framework
::
OpDesc
();
bind
->
SetType
(
"pad_constant_like_grad"
);
bind
->
SetInput
(
"Y"
,
Input
(
"Y"
));
bind
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
bind
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
bind
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
bind
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
pad_constant_like
,
ops
::
PadConstantLikeOp
,
ops
::
PadConstantLikeOpMaker
,
ops
::
PadConstantLikeOpGradMaker
);
REGISTER_OPERATOR
(
pad_constant_like_grad
,
ops
::
PadConstantLikeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
pad_constant_like
,
ops
::
PadConstantLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PadConstantLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pad_constant_like_grad
,
ops
::
PadConstantLikeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PadConstantLikeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/pad_constant_like_op.cu
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/fluid/operators/pad_constant_like_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
pad_constant_like
,
ops
::
PadConstantLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
PadConstantLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
pad_constant_like_grad
,
ops
::
PadConstantLikeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
PadConstantLikeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/pad_constant_like_op.h
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <utility>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/padding.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
PadConstantLikeKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
in_x
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
in_y
=
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
if
(
in_x
->
dims
()
==
in_y
->
dims
())
{
// TensorCopy(in_y, context.GetPlace(), context, out);
out
->
ShareDataWith
(
*
in_y
);
return
;
}
T
pad_value
=
context
.
Attr
<
T
>
(
"pad_value"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
rank
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
dims
().
size
();
std
::
vector
<
int
>
pads
(
rank
*
2
,
0
);
for
(
int
j
=
0
;
j
<
rank
;
++
j
)
{
pads
[
j
*
2
]
=
0
;
pads
[
j
*
2
+
1
]
=
static_cast
<
int
>
(
in_x
->
dims
()[
j
]
-
in_y
->
dims
()[
j
]);
}
math
::
PaddingFunctor
<
DeviceContext
,
T
>
(
rank
,
context
,
pads
,
pad_value
,
*
in_y
,
out
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
PadConstantLikeGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
in_y
=
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
in_dout
=
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_y
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
if
(
d_y
==
nullptr
)
{
return
;
}
if
(
in_dout
->
dims
()
==
in_y
->
dims
())
{
// TensorCopy(in_dout, context.GetPlace(), context, d_y);
d_y
->
ShareDataWith
(
*
in_dout
);
return
;
}
d_y
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
rank
=
in_dout
->
dims
().
size
();
std
::
vector
<
int
>
pads
(
static_cast
<
size_t
>
(
rank
)
*
2
,
0
);
for
(
int
j
=
0
;
j
<
rank
;
++
j
)
{
pads
[
j
*
2
]
=
0
;
pads
[
j
*
2
+
1
]
=
static_cast
<
int
>
(
in_dout
->
dims
()[
j
]
-
in_y
->
dims
()[
j
]);
}
math
::
PaddingGradFunctor
<
DeviceContext
,
T
>
(
rank
,
context
,
pads
,
*
in_dout
,
d_y
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/pad_op.h
浏览文件 @
13509da6
...
...
@@ -18,117 +18,44 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/padding.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
template
<
typename
T
,
size_t
D
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenTensor
=
framework
::
EigenTensor
<
T
,
D
,
MajorType
,
IndexType
>
;
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
>
void
PadFunction
(
const
framework
::
ExecutionContext
&
context
)
{
auto
pads
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
Eigen
::
array
<
std
::
pair
<
int
,
int
>
,
D
>
paddings
;
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
paddings
[
i
].
first
=
pads
[
i
*
2
];
paddings
[
i
].
second
=
pads
[
i
*
2
+
1
];
}
T
pad_value
=
context
.
Attr
<
T
>
(
"pad_value"
);
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
x_tensor
=
EigenTensor
<
T
,
D
>::
From
(
*
x
);
auto
out_tensor
=
EigenTensor
<
T
,
D
>::
From
(
*
out
);
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
out_tensor
.
device
(
place
)
=
x_tensor
.
pad
(
paddings
,
pad_value
);
}
template
<
typename
DeviceContext
,
typename
T
>
class
PadKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
int
rank
=
context
.
Input
<
Tensor
>
(
"X"
)
->
dims
().
size
();
switch
(
rank
)
{
case
1
:
PadFunction
<
DeviceContext
,
T
,
1
>
(
context
);
break
;
case
2
:
PadFunction
<
DeviceContext
,
T
,
2
>
(
context
);
break
;
case
3
:
PadFunction
<
DeviceContext
,
T
,
3
>
(
context
);
break
;
case
4
:
PadFunction
<
DeviceContext
,
T
,
4
>
(
context
);
break
;
case
5
:
PadFunction
<
DeviceContext
,
T
,
5
>
(
context
);
break
;
case
6
:
PadFunction
<
DeviceContext
,
T
,
6
>
(
context
);
break
;
default:
PADDLE_THROW
(
"PadOp only support tensors with no more than 6 dimensions."
);
}
auto
pads
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
T
pad_value
=
context
.
Attr
<
T
>
(
"pad_value"
);
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
rank
=
x
->
dims
().
size
();
math
::
PaddingFunctor
<
DeviceContext
,
T
>
(
rank
,
context
,
pads
,
pad_value
,
*
x
,
out
);
}
};
template
<
typename
DeviceContext
,
typename
T
,
size_t
D
>
void
PadGradFunction
(
const
framework
::
ExecutionContext
&
context
)
{
auto
pads
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
Eigen
::
array
<
std
::
pair
<
int
,
int
>
,
D
>
paddings
;
for
(
size_t
i
=
0
;
i
<
paddings
.
size
();
++
i
)
{
paddings
[
i
].
first
=
-
pads
[
i
*
2
];
paddings
[
i
].
second
=
-
pads
[
i
*
2
+
1
];
}
auto
*
d_out
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_x
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
if
(
d_x
!=
nullptr
)
{
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
d_x_tensor
=
EigenTensor
<
T
,
D
>::
From
(
*
d_x
);
auto
d_out_tensor
=
EigenTensor
<
T
,
D
>::
From
(
*
d_out
);
auto
&
place
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
d_x_tensor
.
device
(
place
)
=
d_out_tensor
.
pad
(
paddings
,
0
);
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
PadGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
size_t
rank
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
))
->
dims
().
size
();
switch
(
rank
)
{
case
1
:
PadGradFunction
<
DeviceContext
,
T
,
1
>
(
context
);
break
;
case
2
:
PadGradFunction
<
DeviceContext
,
T
,
2
>
(
context
);
break
;
case
3
:
PadGradFunction
<
DeviceContext
,
T
,
3
>
(
context
);
break
;
case
4
:
PadGradFunction
<
DeviceContext
,
T
,
4
>
(
context
);
break
;
case
5
:
PadGradFunction
<
DeviceContext
,
T
,
5
>
(
context
);
break
;
case
6
:
PadGradFunction
<
DeviceContext
,
T
,
6
>
(
context
);
break
;
default:
PADDLE_THROW
(
"PadOp only support tensors with no more than 6 dimensions."
);
auto
pads
=
context
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
auto
*
d_out
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_x
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
if
(
d_x
==
nullptr
)
{
return
;
}
d_x
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
rank
=
d_out
->
dims
().
size
();
math
::
PaddingGradFunctor
<
DeviceContext
,
T
>
(
rank
,
context
,
pads
,
*
d_out
,
d_x
);
}
};
...
...
paddle/fluid/operators/scale_op.cc
浏览文件 @
13509da6
...
...
@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/scale_op.h"
#include <string>
#include "paddle/fluid/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -52,6 +55,21 @@ $$Out = scale*X$$
}
};
class
ScaleOpVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
&
in_var_name
=
op_desc
.
Input
(
"X"
).
front
();
auto
&
in_var
=
detail
::
Ref
(
block
->
FindVarRecursive
(
in_var_name
));
auto
out_var_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
*
out_var
=
block
->
FindVarRecursive
(
out_var_name
);
out_var
->
SetType
(
in_var
.
GetType
());
out_var
->
SetDataType
(
in_var
.
GetDataType
());
}
};
class
ScaleGradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
...
...
@@ -71,7 +89,8 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
scale
,
ops
::
ScaleOp
,
ops
::
ScaleOpMaker
,
ops
::
ScaleGradMaker
);
REGISTER_OPERATOR
(
scale
,
ops
::
ScaleOp
,
ops
::
ScaleOpMaker
,
ops
::
ScaleGradMaker
,
ops
::
ScaleOpVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
scale
,
ops
::
ScaleKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
ScaleKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
...
...
paddle/fluid/operators/scale_op.h
浏览文件 @
13509da6
...
...
@@ -22,17 +22,29 @@ namespace operators {
template
<
typename
DeviceContext
,
typename
T
>
class
ScaleKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
virtual
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
{
auto
*
tensor
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
tensor
->
mutable_data
<
T
>
(
in
->
place
());
virtual
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
auto
*
in_var
=
ctx
.
InputVar
(
"X"
);
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
scale
=
static_cast
<
T
>
(
context
.
Attr
<
float
>
(
"scale"
));
auto
*
out_var
=
ctx
.
OutputVar
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
in
->
place
());
auto
eigen_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
tensor
);
PADDLE_ENFORCE_EQ
(
in
->
dims
(),
out
->
dims
(),
"in and out should have the same dim"
);
auto
scale
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"scale"
));
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
()
&&
in_var
!=
out_var
)
{
auto
&
in_slr
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
*
out_slr
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
out_slr
->
set_rows
(
in_slr
.
rows
());
out_slr
->
set_height
(
in_slr
.
height
());
}
auto
eigen_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
out
);
auto
eigen_in
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in
);
auto
&
dev
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
&
dev
=
*
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
eigen_out
.
device
(
dev
)
=
scale
*
eigen_in
;
}
};
...
...
paddle/fluid/operators/send_barrier_op.cc
浏览文件 @
13509da6
...
...
@@ -56,6 +56,10 @@ class SendBarrierOp : public framework::OperatorBase {
class
SendBarrierOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
{
AddInput
(
"X"
,
"(Any) Dummy inputs, used for control dependency"
)
.
AsDuplicable
();
AddOutput
(
"Out"
,
"(Any) Dummy outputs, used for control dependency"
)
.
AsDuplicable
();
AddComment
(
R"DOC(
SendBarrier operator
...
...
paddle/fluid/operators/sequence_pad_op.cc
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/sequence_pad_op.h"
namespace
paddle
{
namespace
operators
{
class
SequencePadOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SequencePadOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"PadValue"
),
"Input(PadValue) of SequencePadOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of SequencePadOp should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_GE
(
x_dims
.
size
(),
2
,
"The rank of Input(x) can't be less than 2."
);
auto
time_step_dims
=
framework
::
slice_ddim
(
x_dims
,
1
,
x_dims
.
size
());
auto
pad_value_dims
=
ctx
->
GetInputDim
(
"PadValue"
);
PADDLE_ENFORCE
(
pad_value_dims
==
framework
::
make_ddim
({
1
})
||
pad_value_dims
==
time_step_dims
,
"The Input(PadValue) must be a scalar or a tensor whose "
"shape equals to time steps in sequences"
);
int
out_dim_0
=
-
1
;
int
out_dim_1
=
-
1
;
if
(
ctx
->
IsRuntime
())
{
// run time
framework
::
Variable
*
x_var
=
boost
::
get
<
framework
::
Variable
*>
(
ctx
->
GetInputVarPtrs
(
"X"
)[
0
]);
const
auto
&
x_lod
=
x_var
->
Get
<
LoDTensor
>
().
lod
();
PADDLE_ENFORCE
(
!
x_lod
.
empty
(),
"The Input(X) must hold lod info."
);
const
auto
&
x_lod_0
=
x_lod
[
0
];
PADDLE_ENFORCE_GE
(
x_lod_0
.
size
(),
2
,
"The Input(X)'s lod info is corrupted."
);
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
static_cast
<
int64_t
>
(
x_lod_0
.
back
()),
"The Input(X)'s lod info mismatches the actual tensor shape."
);
int
seq_num
=
x_lod_0
.
size
()
-
1
;
int
max_seq_len
=
math
::
MaximumSequenceLength
(
x_lod_0
);
int
padded_length
=
ctx
->
Attrs
().
Get
<
int
>
(
"padded_length"
);
if
(
padded_length
==
-
1
)
{
padded_length
=
max_seq_len
;
}
PADDLE_ENFORCE_GE
(
padded_length
,
max_seq_len
,
"The Attr(padded_length) must be -1 or an int greater "
"than the length of the longest original sequence."
);
out_dim_0
=
seq_num
;
out_dim_1
=
padded_length
;
}
else
{
// compile time
framework
::
VarDesc
*
x_desc
=
boost
::
get
<
framework
::
VarDesc
*>
(
ctx
->
GetInputVarPtrs
(
"X"
)[
0
]);
PADDLE_ENFORCE_GE
(
x_desc
->
GetLoDLevel
(),
1
);
}
std
::
vector
<
int
>
out_dims_vec
{
out_dim_0
,
out_dim_1
};
auto
time_step_dims_vec
=
framework
::
vectorize2int
(
time_step_dims
);
out_dims_vec
.
insert
(
out_dims_vec
.
end
(),
time_step_dims_vec
.
begin
(),
time_step_dims_vec
.
end
());
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
out_dims_vec
));
}
};
class
SequencePadOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(LoDTensor, default LoDTensor<float>) Input variable which "
"should contain lod information."
);
AddInput
(
"PadValue"
,
"(LoDTensor), this Tensor holds values that will be fill into "
"padded steps. It can be a scalar or a tensor whose shape equals "
"to time steps in sequences. If it's a scalar, it will be "
"automatically broadcasted to the shape of time step."
);
AddOutput
(
"Out"
,
"(LoDTensor) The output vairable, which contains padded sequences."
);
AddAttr
<
int
>
(
"padded_length"
,
"The length of padded sequences. It can be setted to -1 or "
"any positive int. When it is -1, all sequences will be padded up to "
"the length of the longest one among them; when it a certain positive "
"value, it must be greater than the length of the longest original "
"sequence."
)
.
SetDefault
(
-
1
);
AddComment
(
R"DOC(
Sequence Pad Operator
This operator pads sequences in a same batch to a consistent length.
The length is specified by attribute 'padded_length'. New elements,
whose values are specified by input 'PadValue', will be appended to
the end of each sequence, to make their final lengths consistent.
Following are cases to better explain how this works:
Case 1:
Given a 1-level LoDTensor input(X):
X.lod = [[0, 2, 5]]
X.data = [a, b, c, d, e]
and Input(PadValue):
PadValue.data = [0]
and attribite 'padded_length' = 4,
then we get LoDTensor:
Out.data = [[a, b, 0, 0],
[c, d, e, 0]]
Case 2:
Given a 1-level LoDTensor input(X):
X.lod = [[0, 2, 5]]
X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
and Input(PadValue):
PadValue.data = [0]
and attribite 'padded_length' = -1, which mean using the length
of longest input sequence(3 in this case),
then we get LoDTensor:
Out.data = [[[a1, a2], [b1, b2], [0, 0]],
[[c1, c2], [d1, d2], [e1, e2]]]
Case 3:
Given a 1-level LoDTensor input(X):
X.lod = [[0, 2, 5]]
X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
and Input(PadValue):
PadValue.data = [p1, p2]
and attribite 'padded_length' = -1, which mean using the length
of longest input sequence(3 in this case),
then we get LoDTensor:
Out.data = [[[a1, a2], [b1, b2], [p1, p2]],
[[c1, c2], [d1, d2], [e1, e2]]]
)DOC"
);
}
};
class
SequencePadGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SequencePadGradOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) of SequencePadGradOp should not be null."
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
framework
::
GradVarName
(
"X"
));
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
sequence_pad
,
ops
::
SequencePadOp
,
ops
::
SequencePadOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
sequence_pad_grad
,
ops
::
SequencePadGradOp
);
REGISTER_OP_CPU_KERNEL
(
sequence_pad
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
);
REGISTER_OP_CPU_KERNEL
(
sequence_pad_grad
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
);
paddle/fluid/operators/sequence_pad_op.cu
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/sequence_pad_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
sequence_pad
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
SequencePadOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
REGISTER_OP_CUDA_KERNEL
(
sequence_pad_grad
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
SequencePadGradOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
paddle/fluid/operators/sequence_pad_op.h
0 → 100644
浏览文件 @
13509da6
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_padding.h"
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoD
=
framework
::
LoD
;
template
<
typename
DeviceContext
,
typename
T
>
class
SequencePadOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
x
=
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
auto
*
pad_value
=
ctx
.
Input
<
LoDTensor
>
(
"PadValue"
);
int
padded_length
=
ctx
.
Attr
<
int
>
(
"padded_length"
);
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
*
x
,
out
,
*
pad_value
,
padded_length
,
0
,
false
,
math
::
kBatchLengthWidth
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
SequencePadGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
d_x
=
ctx
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
if
(
d_x
)
{
const
auto
*
d_out
=
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
d_x
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
padded_length
=
ctx
.
Attr
<
int
>
(
"padded_length"
);
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
*
d_out
,
d_x
,
padded_length
,
0
,
false
,
math
::
kBatchLengthWidth
);
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/unstack_op.cc
0 → 100644
浏览文件 @
13509da6
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/unstack_op.h"
namespace
plat
=
paddle
::
platform
;
namespace
ops
=
paddle
::
operators
;
USE_OP
(
stack
);
REGISTER_OPERATOR
(
unstack
,
ops
::
UnStackOp
,
ops
::
UnStackOpMaker
,
ops
::
UnStackOpInferShape
,
ops
::
UnStackGradOpDescMaker
);
REGISTER_OPERATOR
(
unstack_grad
,
ops
::
UnStackGradOp
,
ops
::
UnStackOpGradInferShape
);
paddle/fluid/operators/unstack_op.h
0 → 100644
浏览文件 @
13509da6
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
class
UnStackOpInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) must exist."
);
int
axis
=
ctx
->
Attrs
().
Get
<
int
>
(
"axis"
);
int
num
=
ctx
->
Attrs
().
Get
<
int
>
(
"num"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
int
rank
=
x_dim
.
size
();
PADDLE_ENFORCE
(
axis
>=
-
rank
&&
axis
<
rank
,
"Attr(axis) must be inside [-rank, rank), where rank = %d"
,
rank
);
if
(
axis
<
0
)
axis
+=
rank
;
PADDLE_ENFORCE_EQ
(
ctx
->
Outputs
(
"Y"
).
size
(),
static_cast
<
size_t
>
(
num
),
"Number of Outputs(Y) is wrong"
);
if
(
x_dim
[
axis
]
>
0
)
{
PADDLE_ENFORCE_EQ
(
num
,
x_dim
[
axis
],
"Number of Outputs(Y) is wrong"
);
}
auto
vec
=
framework
::
vectorize2int
(
x_dim
);
vec
.
erase
(
vec
.
begin
()
+
axis
);
ctx
->
SetOutputsDim
(
"Y"
,
std
::
vector
<
framework
::
DDim
>
(
// NOLINT
x_dim
[
axis
],
framework
::
make_ddim
(
vec
)));
}
};
class
UnStackOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"The input of unstack op."
);
AddOutput
(
"Y"
,
"The output of unstack op."
).
AsDuplicable
();
AddAttr
<
int
>
(
"axis"
,
"The axis along which Input(X) should be unstacked."
)
.
SetDefault
(
0
);
AddAttr
<
int
>
(
"num"
,
"The number of outputs(Y)."
).
GreaterThan
(
0
);
AddComment
(
R"DOC(
UnStack Operator.
UnStack Input(X) into several tensors along Attr(axis).
)DOC"
);
}
};
class
UnStackOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
stack_grad_op
=
framework
::
OpRegistry
::
CreateOp
(
"stack_grad"
,
{{
framework
::
GradVarName
(
"Y"
),
{
Input
(
"X"
)}}},
{{
framework
::
GradVarName
(
"X"
),
Outputs
(
"Y"
)}},
Attrs
());
stack_grad_op
->
Run
(
scope
,
place
);
}
};
class
UnStackOpGradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_GT
(
ctx
->
Inputs
(
framework
::
GradVarName
(
"Y"
)).
size
(),
0
,
"Number of Inputs(Y@Grad) must be larger than 0"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
"Output(X@Grad) must exist."
);
auto
input_dims
=
ctx
->
GetInputsDim
(
framework
::
GradVarName
(
"Y"
));
for
(
size_t
i
=
1
;
i
<
input_dims
.
size
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
input_dims
[
i
],
input_dims
[
0
],
"Dims of all Inputs(Y@Grad) must be the same"
);
}
int
axis
=
ctx
->
Attrs
().
Get
<
int
>
(
"axis"
);
int
rank
=
input_dims
[
0
].
size
();
PADDLE_ENFORCE
(
axis
>=
-
(
rank
+
1
)
&&
axis
<
rank
+
1
,
"Attr(axis) must be inside [-(rank+1), rank+1), where rank = %d"
,
rank
);
if
(
axis
<
0
)
axis
+=
(
rank
+
1
);
auto
vec
=
framework
::
vectorize2int
(
input_dims
[
0
]);
vec
.
insert
(
vec
.
begin
()
+
axis
,
input_dims
.
size
());
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
framework
::
make_ddim
(
vec
));
}
};
class
UnStackGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"unstack_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Y"
),
OutputGrad
(
"Y"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
class
UnStackGradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
stack_op
=
framework
::
OpRegistry
::
CreateOp
(
"stack"
,
{{
"X"
,
Inputs
(
framework
::
GradVarName
(
"Y"
))}},
{{
"Y"
,
{
Output
(
framework
::
GradVarName
(
"X"
))}}},
Attrs
());
stack_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/warpctc_op.h
浏览文件 @
13509da6
...
...
@@ -153,17 +153,29 @@ class WarpCTCKernel : public framework::OpKernel<T> {
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
num_sequences
),
1
});
// warpctc needs sequences data stored in transposed padding format
Tensor
warpctc_logits
;
LoD
Tensor
warpctc_logits
;
const
size_t
max_sequence_length
=
math
::
MaximumSequenceLength
(
logits_lod
,
level
);
math
::
MaximumSequenceLength
(
logits_lod
[
level
]
);
auto
warpctc_logits_dims
=
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
max_sequence_length
),
static_cast
<
int64_t
>
(
num_sequences
),
static_cast
<
int64_t
>
(
sequence_width
)});
warpctc_logits
.
mutable_data
<
T
>
(
warpctc_logits_dims
,
ctx
.
GetPlace
());
LoDTensor
cpu_pad_value
;
T
*
pad_value_data
=
cpu_pad_value
.
mutable_data
<
T
>
({
1
},
platform
::
CPUPlace
());
*
pad_value_data
=
static_cast
<
T
>
(
0
);
LoDTensor
pad_value
;
if
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()))
{
pad_value
=
cpu_pad_value
;
}
else
{
TensorCopySync
(
cpu_pad_value
,
ctx
.
GetPlace
(),
&
pad_value
);
}
math
::
PaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
*
logits
,
&
warpctc_logits
,
false
);
pad_value
,
-
1
,
0
,
false
/* norm_by_times */
,
math
::
kLengthBatchWidth
);
const
T
*
warpctc_logits_data
=
warpctc_logits
.
data
<
T
>
();
std
::
vector
<
int
>
warpctc_label_lengths
(
num_sequences
);
...
...
@@ -209,15 +221,15 @@ template <typename DeviceContext, typename T>
class
WarpCTCGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
warpctc_grad
=
ctx
.
Input
<
Tensor
>
(
"WarpCTCGrad"
);
auto
*
warpctc_grad
=
ctx
.
Input
<
LoD
Tensor
>
(
"WarpCTCGrad"
);
auto
*
logits_grad
=
ctx
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"Logits"
));
const
Tensor
*
loss_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Loss"
));
logits_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
bool
norm_by_times
=
ctx
.
Attr
<
bool
>
(
"norm_by_times"
);
math
::
UnpaddingLoDTensorFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
logits
_grad
,
*
warpctc_grad
,
norm_by_times
);
ctx
.
template
device_context
<
DeviceContext
>(),
*
warpctc
_grad
,
logits_grad
,
-
1
,
0
,
norm_by_times
,
math
::
kLengthBatchWidth
);
const
T
*
loss_grad_data
=
loss_grad
->
data
<
T
>
();
math
::
ScaleLoDTensorFunctor
<
DeviceContext
,
T
>
()(
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
13509da6
if
(
NOT WIN32
)
proto_library
(
profiler_proto SRCS profiler.proto DEPS framework_proto
)
py_proto_compile
(
profiler_py_proto SRCS profiler.proto
)
...
...
@@ -10,6 +11,7 @@ add_custom_command(TARGET profiler_py_proto POST_BUILD
COMMAND cp *.py
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/proto/profiler
COMMENT
"Copy generated python proto into directory paddle/fluid/proto/profiler."
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
(
NOT WIN32
)
if
(
WITH_GPU
)
nv_library
(
enforce SRCS enforce.cc
)
...
...
@@ -58,9 +60,12 @@ cc_test(init_test SRCS init_test.cc DEPS device_context)
nv_test
(
cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda
)
nv_test
(
transform_test SRCS transform_test.cu DEPS memory place device_context
)
if
(
NOT WIN32
)
cc_library
(
device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto
${
GPU_CTX_DEPS
}
)
cc_library
(
profiler SRCS profiler.cc DEPS device_context device_tracer
)
cc_test
(
profiler_test SRCS profiler_test.cc DEPS profiler
)
endif
(
NOT WIN32
)
nv_test
(
float16_gpu_test SRCS float16_test.cu DEPS lod_tensor
)
cc_test
(
float16_test SRCS float16_test.cc DEPS lod_tensor
)
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
13509da6
...
...
@@ -22,9 +22,13 @@ limitations under the License. */
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/types.h>
#elif defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#include <windows.h>
#else
#include <unistd.h>
#endif
#endif
// _WIN32
#include <algorithm>
#include "gflags/gflags.h"
...
...
@@ -32,16 +36,20 @@ limitations under the License. */
DEFINE_double
(
fraction_of_cpu_memory_to_use
,
1
,
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc"
);
#if !defined(_WIN32)
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
#ifdef PADDLE_WITH_MKLDNN
/* Aligned with mozga-intel, MKLDNN need at least 5000 MB
* to obtain the best performance*/
5000
,
5000
ul
,
#else
500
,
500
ul
,
#endif
"Initial CPU memory for PaddlePaddle, in MD unit."
);
#else
DEFINE_uint64
(
initial_cpu_memory_in_mb
,
500ul
,
"Initial CPU memory for PaddlePaddle, in MD unit."
);
#endif // !defined(_WIN32)
DEFINE_double
(
fraction_of_cuda_pinned_memory_to_use
,
0.5
,
...
...
@@ -60,6 +68,11 @@ inline size_t CpuTotalPhysicalMemory() {
size_t
len
=
sizeof
(
size
);
if
(
sysctl
(
mib
,
2
,
&
size
,
&
len
,
NULL
,
0
)
==
0
)
return
(
size_t
)
size
;
return
0L
;
#elif defined(_WIN32)
MEMORYSTATUSEX
sMeminfo
;
sMeminfo
.
dwLength
=
sizeof
(
sMeminfo
);
GlobalMemoryStatusEx
(
&
sMeminfo
);
return
sMeminfo
.
ullTotalPhys
;
#else
int64_t
pages
=
sysconf
(
_SC_PHYS_PAGES
);
int64_t
page_size
=
sysconf
(
_SC_PAGE_SIZE
);
...
...
paddle/fluid/platform/device_tracer.h
浏览文件 @
13509da6
...
...
@@ -13,7 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if !defined(_WIN32)
#include <sys/time.h>
#else
#include <windows.h>
#endif // !_WIN32
#include <time.h>
#include <chrono> // NOLINT
#include <string>
...
...
@@ -27,12 +32,15 @@ namespace platform {
///////////////////////
// WARN: Under Development. Don't depend on it yet.
//////////////////////
#if !defined(_WIN32)
inline
uint64_t
PosixInNsec
()
{
struct
timeval
tv
;
gettimeofday
(
&
tv
,
nullptr
);
return
1000
*
(
static_cast
<
uint64_t
>
(
tv
.
tv_sec
)
*
1000000
+
tv
.
tv_usec
);
}
#else
inline
uint64_t
PosixInNsec
()
{
return
static_cast
<
uint64_t
>
(
0
);
}
#endif // !_WIN32
// DeviceTracer performs the following tasks:
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
13509da6
...
...
@@ -16,7 +16,9 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
13509da6
...
...
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
#include <dlfcn.h>
#include <memory>
#include <mutex> // NOLINT
#include <string>
...
...
@@ -23,6 +21,7 @@ limitations under the License. */
#include "glog/logging.h"
#include "paddle/fluid/platform/dynload/cupti_lib_path.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
DEFINE_string
(
cudnn_dir
,
""
,
"Specify path for loading libcudnn.so. For instance, "
...
...
paddle/fluid/platform/enforce.h
浏览文件 @
13509da6
...
...
@@ -18,6 +18,11 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif // __GNUC__
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#endif
#ifdef PADDLE_WITH_CUDA
#include <cublas_v2.h>
#include <cudnn.h>
...
...
@@ -117,7 +122,12 @@ struct EOFException : public std::exception {
// always forces branch prediction of true.
// This generates faster binary code. __builtin_expect is since C++11.
// For more details, please check https://stackoverflow.com/a/43870188/724872.
#if !defined(_WIN32)
#define UNLIKELY(condition) __builtin_expect(static_cast<bool>(condition), 0)
#else
// there is no equivalent intrinsics in msvc.
#define UNLIKELY(condition) (condition == 0)
#endif
template
<
typename
...
Args
>
inline
typename
std
::
enable_if
<
sizeof
...(
Args
)
!=
0
,
void
>::
type
throw_on_error
(
...
...
@@ -230,6 +240,7 @@ inline void throw_on_error(T e) {
throw_on_error
(
e
,
""
);
}
#if !defined(_WIN32)
#define PADDLE_THROW(...) \
do { \
throw ::paddle::platform::EnforceNotMet( \
...
...
@@ -248,15 +259,28 @@ inline void throw_on_error(T e) {
__FILE__, __LINE__); \
} \
} while (false)
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__);
#endif
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
} while (false)
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__)
#endif // REPLACE_ENFORCE_GLOG
#else // !_WIN32
// disable enforce, caused by the varardic macro exception error
#define PADDLE_THROW(x) \
do { \
throw std::make_exception_ptr( \
std::runtime_error("Windows disable the enforce.")); \
} while (false)
#define PADDLE_ENFORCE(x, ...) x
#endif // !_WIN32
/*
* Some enforce helpers here, usage:
* int a = 1;
...
...
paddle/fluid/platform/profiler.h
浏览文件 @
13509da6
...
...
@@ -69,6 +69,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
#if !defined(_WIN32)
struct
RecordEvent
{
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
...
...
@@ -94,6 +95,15 @@ struct RecordBlock {
std
::
string
name_
;
uint64_t
start_ns_
;
};
#else
// windows do not support profiler temporarily.
struct
RecordEvent
{
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{}
};
struct
RecordBlock
{
explicit
RecordBlock
(
int
block_id
)
{}
};
#endif
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
13509da6
...
...
@@ -234,6 +234,7 @@ void BindVarDsec(pybind11::module *m) {
pybind11
::
enum_
<
pd
::
proto
::
VarType
::
Type
>
(
var_desc
,
"VarType"
,
""
)
.
value
(
"BOOL"
,
pd
::
proto
::
VarType
::
BOOL
)
.
value
(
"UINT8"
,
pd
::
proto
::
VarType
::
UINT8
)
.
value
(
"INT8"
,
pd
::
proto
::
VarType
::
INT8
)
.
value
(
"INT16"
,
pd
::
proto
::
VarType
::
INT16
)
.
value
(
"INT32"
,
pd
::
proto
::
VarType
::
INT32
)
.
value
(
"INT64"
,
pd
::
proto
::
VarType
::
INT64
)
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
13509da6
...
...
@@ -130,6 +130,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
uint16_t
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
uint8_t
>
)
.
def
(
"set"
,
PyCPUTensorSetFromArray
<
int8_t
>
)
#ifdef PADDLE_WITH_CUDA
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int
>
)
...
...
@@ -138,6 +139,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
uint16_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
uint8_t
>
)
.
def
(
"set"
,
PyCUDATensorSetFromArray
<
int8_t
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
float
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
int
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
double
>
)
...
...
@@ -145,6 +147,7 @@ PYBIND11_PLUGIN(core) {
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
bool
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
uint16_t
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
uint8_t
>
)
.
def
(
"set"
,
PyCUDAPinnedTensorSetFromArray
<
int8_t
>
)
#endif
.
def
(
"shape"
,
[](
Tensor
&
self
)
{
return
vectorize
(
self
.
dims
());
})
.
def
(
"_set_float_element"
,
TensorSetElement
<
float
>
)
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
13509da6
...
...
@@ -97,7 +97,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
inline
pybind11
::
buffer_info
CastToPyBuffer
(
const
framework
::
Tensor
&
tensor
)
{
auto
buffer_info
=
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
,
uint8_t
,
platform
::
float16
>
()(
tensor
);
uint8_t
,
int8_t
,
platform
::
float16
>
()(
tensor
);
return
buffer_info
;
}
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
13509da6
...
...
@@ -335,12 +335,18 @@ function assert_api_not_changed() {
fi
python
${
PADDLE_ROOT
}
/tools/diff_api.py
${
PADDLE_ROOT
}
/paddle/fluid/API.spec new.spec
deactivate
}
function
assert_api_spec_approvals
()
{
if
[
-z
${
BRANCH
}
]
;
then
BRANCH
=
"develop"
fi
API_CHANGE
=
`
git diff
--name-only
upstream/
develop
|
grep
"paddle/fluid/API.spec"
||
true
`
API_CHANGE
=
`
git diff
--name-only
upstream/
$BRANCH
|
grep
"paddle/fluid/API.spec"
||
true
`
echo
"checking API.spec change, PR:
${
GIT_PR_ID
}
, changes:
${
API_CHANGE
}
"
if
[
${
API_CHANGE
}
]
&&
[
"
${
GIT_PR_ID
}
"
!=
""
]
;
then
#
TODO: curl -H 'Authorization: token ${TOKEN}'
APPROVALS
=
`
curl
-H
"Authorization: token
${
GITHUB_API_TOKEN
}
"
https://api.github.com/repos/PaddlePaddle/Paddle/pulls/
${
GIT_PR_ID
}
/reviews |
\
#
NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
APPROVALS
=
`
curl
-H
"Authorization: token
${
GITHUB_API_TOKEN
}
"
https://api.github.com/repos/PaddlePaddle/Paddle/pulls/
${
GIT_PR_ID
}
/reviews
?per_page
=
10000
|
\
python
${
PADDLE_ROOT
}
/tools/check_pr_approval.py 2 7845005 2887803 728699 13348433
`
echo
"current pr
${
GIT_PR_ID
}
got approvals:
${
APPROVALS
}
"
if
[
"
${
APPROVALS
}
"
==
"FALSE"
]
;
then
...
...
@@ -622,11 +628,12 @@ function main() {
cicheck
)
cmake_gen
${
PYTHON_ABI
:-
""
}
build
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
run_test
gen_capi_package
gen_fluid_inference_lib
test_fluid_inference_lib
assert_api_
not_changed
${
PYTHON_ABI
:-
""
}
assert_api_
spec_approvals
;;
*
)
print_usage
...
...
python/paddle/dataset/movielens.py
浏览文件 @
13509da6
...
...
@@ -24,6 +24,7 @@ set and test set into paddle reader creators.
from
__future__
import
print_function
import
numpy
as
np
import
zipfile
import
paddle.dataset.common
import
re
...
...
@@ -150,12 +151,12 @@ def __initialize_meta_info__():
def
__reader__
(
rand_seed
=
0
,
test_ratio
=
0.1
,
is_test
=
False
):
fn
=
__initialize_meta_info__
()
rand
=
random
.
Random
(
x
=
rand_seed
)
np
.
random
.
seed
(
rand_seed
)
with
zipfile
.
ZipFile
(
file
=
fn
)
as
package
:
with
package
.
open
(
'ml-1m/ratings.dat'
)
as
rating
:
for
line
in
rating
:
line
=
cpt
.
to_text
(
line
,
encoding
=
'latin'
)
if
(
rand
.
random
()
<
test_ratio
)
==
is_test
:
if
(
np
.
random
.
random
()
<
test_ratio
)
==
is_test
:
uid
,
mov_id
,
rating
,
_
=
line
.
strip
().
split
(
"::"
)
uid
=
int
(
uid
)
mov_id
=
int
(
mov_id
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
13509da6
...
...
@@ -95,6 +95,8 @@ def convert_np_dtype_to_dtype_(np_dtype):
return
core
.
VarDesc
.
VarType
.
INT16
elif
dtype
==
np
.
uint8
:
return
core
.
VarDesc
.
VarType
.
UINT8
elif
dtype
==
np
.
int8
:
return
core
.
VarDesc
.
VarType
.
INT8
else
:
raise
ValueError
(
"Not supported numpy dtype %s"
%
dtype
)
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
13509da6
...
...
@@ -246,7 +246,11 @@ def Send(endpoints, send_vars, dummy_output=None, sync=True):
rpc_op_role_name
:
core
.
op_proto_and_checker_maker
.
OpRole
.
RPC
})
if
sync
:
helper
.
append_op
(
type
=
"send_barrier"
,
attrs
=
{
"endpoints"
:
endpoints
})
helper
.
append_op
(
type
=
"send_barrier"
,
inputs
=
{
"X"
:
dummy_output
},
outputs
=
{
"Out"
:
[]},
attrs
=
{
"endpoints"
:
endpoints
})
def
Recv
(
endpoints
,
get_vars
,
dummy_input
=
None
,
sync
=
True
):
...
...
@@ -282,7 +286,10 @@ def Recv(endpoints, get_vars, dummy_input=None, sync=True):
attrs
=
{
"endpoints"
:
endpoints
,
"epmap"
:
epmap
})
if
sync
:
helper
.
append_op
(
type
=
"fetch_barrier"
,
attrs
=
{
"endpoints"
:
endpoints
})
helper
.
append_op
(
type
=
"fetch_barrier"
,
outputs
=
{
"Out"
:
get_vars
},
attrs
=
{
"endpoints"
:
endpoints
})
return
get_vars
...
...
python/paddle/fluid/layers/metric_op.py
浏览文件 @
13509da6
...
...
@@ -119,10 +119,14 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
helper
=
LayerHelper
(
"auc"
,
**
locals
())
auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float64"
)
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
tp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
tn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
fp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
fn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
)
tp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
tn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
fp
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
fn
=
helper
.
create_global_variable
(
persistable
=
True
,
dtype
=
'int64'
,
shape
=
[
num_thresholds
])
for
var
in
[
tp
,
tn
,
fp
,
fn
]:
helper
.
set_variable_initializer
(
var
,
Constant
(
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
13509da6
...
...
@@ -17,6 +17,7 @@ All layers just related to the neural network.
from
__future__
import
print_function
import
numpy
as
np
from
..layer_helper
import
LayerHelper
from
..initializer
import
Normal
,
Constant
from
..framework
import
Variable
...
...
@@ -24,7 +25,6 @@ from ..param_attr import ParamAttr
from
.layer_function_generator
import
autodoc
,
templatedoc
from
.tensor
import
concat
from
.
import
utils
import
random
from
..
import
unique_name
from
functools
import
reduce
...
...
@@ -54,6 +54,7 @@ __all__ = [
'conv2d_transpose'
,
'conv3d_transpose'
,
'sequence_expand'
,
'sequence_pad'
,
'lstm_unit'
,
'reduce_sum'
,
'reduce_mean'
,
...
...
@@ -89,6 +90,7 @@ __all__ = [
'lod_reset'
,
'lrn'
,
'pad'
,
'pad_constant_like'
,
'label_smooth'
,
'roi_pool'
,
'dice_loss'
,
...
...
@@ -107,6 +109,7 @@ __all__ = [
'flatten'
,
'sequence_mask'
,
'stack'
,
'unstack'
,
]
...
...
@@ -2657,6 +2660,51 @@ def sequence_expand(x, y, ref_level=-1, name=None):
return
tmp
@
templatedoc
()
def
sequence_pad
(
x
,
pad_value
,
maxlen
=
None
):
"""
${comment}
Args:
x(Variable): Input variable which should contain lod information.
pad_value(Variable): The Variable that holds values that will be fill
into padded steps. It can be a scalar or a tensor whose shape
equals to time steps in sequences. If it's a scalar, it will be
automatically broadcasted to the shape of time step.
maxlen(int, default None): The length of padded sequences. It can be
None or any positive int. When it is None, all sequences will be
padded up to the length of the longest one among them; when it a
certain positive value, it must be greater than the length of the
longest original sequence."
Returns:
Variable: The padded sequence batch. All sequences has the same length.
Examples:
.. code-block:: python
import numpy
x = fluid.layers.data(name='y', shape=[10, 5],
dtype='float32', lod_level=1)
pad_value = fluid.layers.assign(input=numpy.array([0]))
out = fluid.layers.sequence_pad(x=x, pad_value=pad_value)
"""
helper
=
LayerHelper
(
'sequence_pad'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_tmp_variable
(
dtype
)
if
maxlen
is
None
:
maxlen
=
-
1
helper
.
append_op
(
type
=
'sequence_pad'
,
inputs
=
{
'X'
:
x
,
'PadValue'
:
pad_value
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'padded_length'
:
maxlen
})
return
out
def
beam_search
(
pre_ids
,
pre_scores
,
ids
,
...
...
@@ -4793,6 +4841,86 @@ def pad(x, paddings, pad_value=0., name=None):
return
out
def
pad_constant_like
(
x
,
y
,
pad_value
=
0.
,
name
=
None
):
"""
Pad input(Y) with :attr:`pad_value`, the number of values padded to
the edges of each axis is specified by the difference of the shape
of X and Y. ((0, shape_x_0 - shape_y_0), ... (0, shape_x_n - shape_y_n))
unique pad widths for each axis. The input should be a k-D
tensor(k > 0 and k < 7).
See below for an example.
.. code-block:: text
Given:
X = [[[[ 0, 1, 2],
[ 3, 4, 5]],
[[ 6, 7, 8],
[ 9, 10, 11]],
[[12, 13, 14],
[15, 16, 17]]],
[[[18, 19, 20],
[21, 22, 23]],
[[24, 25, 26],
[27, 28, 29]],
[[30, 31, 32],
[33, 34, 35]]]]
X.shape = (2, 3, 2, 3)
Y = [[[[35, 36, 37]],
[[38, 39, 40]],
[[41, 42, 43]]]]
Y.shape = (1, 3, 1, 3)
And
pad_value = -1,
Return:
Out = [[[[35, 36, 37],
[-1, -1, -1]],
[[38, 39, 40],
[-1, -1, -1]],
[[41, 42, 43],
[-1, -1, -1]]],
[[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]]]]
Out.shape = (2, 3, 2, 3)
Args:
x (Variable): The input tensor variable.
y (Variable): The input tensor variable.
pad_value (float): The constant value used to pad.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The padded tensor variable.
Examples:
.. code-block:: python
# x is a rank 4 tensor variable, x.shape = (2, 3, 2, 3)
# y is a rank 4 tensor variable, y.shape = (1, 3, 1, 3)
out = fluid.layers.pad_constant_like(x=x, y=y, pad_value=0.)
# out is a rank 4 tensor variable, and out.shape = [2, 3 ,2 , 3]
"""
helper
=
LayerHelper
(
'pad_constant_like'
,
input
=
x
,
**
locals
())
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
'pad_constant_like'
,
inputs
=
{
'X'
:
x
,
'Y'
:
y
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'pad_value'
:
float
(
pad_value
)})
return
out
def
label_smooth
(
label
,
prior_dist
=
None
,
epsilon
=
0.1
,
...
...
@@ -5187,7 +5315,7 @@ def random_crop(x, shape, seed=None):
dtype
=
x
.
dtype
out
=
helper
.
create_tmp_variable
(
dtype
)
if
seed
is
None
:
seed
=
random
.
randint
(
-
65536
,
65535
)
seed
=
np
.
random
.
randint
(
-
65536
,
65536
)
op_attrs
=
{
"shape"
:
shape
}
if
isinstance
(
seed
,
int
):
op_attrs
[
"startup_seed"
]
=
seed
...
...
@@ -5389,7 +5517,7 @@ def crop(x, shape=None, offsets=None, name=None):
helper
=
LayerHelper
(
'crop'
,
**
locals
())
if
not
(
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
)
or
\
isinstance
(
shape
,
Variable
)):
isinstance
(
shape
,
Variable
)):
raise
ValueError
(
"The shape should be a list, tuple or Variable."
)
if
offsets
is
None
:
...
...
@@ -5501,7 +5629,7 @@ def prelu(x, mode, param_attr=None, name=None):
channel:elements in a channel share same weight
element:each element has a weight
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
Returns:
Variable: The output tensor with the same shape as input.
...
...
@@ -5615,23 +5743,23 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
Supposing :code:`x` is a Tensor with shape [d_1, d_2, ..., d_n], the
:code:`y` is a mask with shape [d_1, d_2, ..., d_n, maxlen], where:
.. math::
y(i_1, i_2,..., i_n, j) = (j < x(i_1, i_2,..., i_n))
Args:
x (Variable): Input tensor of sequence_mask layer,
x (Variable): Input tensor of sequence_mask layer,
whose elements are integers less than :code:`maxlen`.
maxlen (int|None): Maximum length of the sequence. If :code:`maxlen`
is None, it would be replace with :math:`max(x)`.
dtype (np.dtype|core.VarDesc.VarType|str): Data type of the output.
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
Returns:
Variable: The output sequence mask.
"""
helper
=
LayerHelper
(
'sequence_mask'
,
**
locals
())
...
...
@@ -5656,23 +5784,23 @@ def stack(x, axis=0):
**Stack Layer**
This layer stacks all of the input :code:`x` along axis.
Input :code:`x` can be a single variable, a :code:`list` of variables,
or a :code:`tuple` of variables. If :code:`x` is a :code:`list` or
:code:`tuple`, the shapes of all these variables must be the same.
Supposing the shape of each input is :math:`[d_0, d_1, ..., d_{n-1}]`,
the shape of the output variable would be
:math:`[d_0, d_1, ..., d_{axis}=len(x), ..., d_{n-1}]`.
Input :code:`x` can be a single variable, a :code:`list` of variables,
or a :code:`tuple` of variables. If :code:`x` is a :code:`list` or
:code:`tuple`, the shapes of all these variables must be the same.
Supposing the shape of each input is :math:`[d_0, d_1, ..., d_{n-1}]`,
the shape of the output variable would be
:math:`[d_0, d_1, ..., d_{axis}=len(x), ..., d_{n-1}]`.
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x[0])+1`.
If :code:`axis` is None, it would be replaced with 0.
If :code:`axis` is None, it would be replaced with 0.
Args:
x (Variable|list(Variable)|tuple(Variable)): Input variables.
x (Variable|list(Variable)|tuple(Variable)): Input variables.
axis (int|None): The axis along which all inputs are stacked.
Returns:
Variable: The stacked variable.
"""
helper
=
LayerHelper
(
'stack'
,
**
locals
())
...
...
@@ -5686,3 +5814,44 @@ def stack(x, axis=0):
type
=
'stack'
,
inputs
=
{
'X'
:
x
},
outputs
=
{
'Y'
:
out
},
attrs
=
{
'axis'
:
axis
})
return
out
def
unstack
(
x
,
axis
=
0
,
num
=
None
):
"""
**UnStack Layer**
This layer unstacks input :code:`x` into several tensors along axis.
If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`.
If :code:`num` is None, it would be inferred from :code:`x.shape[axis]`,
and if :code:`x.shape[axis]` <= 0 or is unknown, :code:`ValueError` is
raised.
Args:
x (Variable): Input variable.
axis (int): The axis along which the input is unstacked.
num (int|None): The number of output variables.
Returns:
list(Variable): The unstacked variables.
"""
helper
=
LayerHelper
(
'unstack'
,
**
locals
())
if
num
is
None
:
if
axis
is
None
or
x
.
shape
[
axis
]
<=
0
:
raise
ValueError
(
'unknown unstack number'
)
else
:
num
=
x
.
shape
[
axis
]
outs
=
[]
for
_
in
num
:
outs
.
append
(
helper
.
create_tmp_variable
(
x
.
dtype
))
helper
.
append_op
(
type
=
'unstack'
,
inputs
=
{
'X'
:
[
x
]},
outputs
=
{
'Y'
:
outs
},
attrs
=
{
'axis'
:
axis
,
'num'
:
num
})
return
outs
python/paddle/fluid/optimizer.py
浏览文件 @
13509da6
...
...
@@ -46,10 +46,12 @@ class Optimizer(object):
def
__init__
(
self
,
learning_rate
,
regularization
=
None
,
LARS_weight_decay
=
0.0
):
LARS_weight_decay
=
0.0
,
name
=
None
):
if
not
isinstance
(
learning_rate
,
float
)
and
\
not
isinstance
(
learning_rate
,
framework
.
Variable
):
raise
TypeError
(
"learning rate should be float or Variable"
)
self
.
_name
=
name
self
.
regularization
=
regularization
self
.
_learning_rate
=
learning_rate
# the learning rate type should be inferenced from loss
...
...
@@ -153,6 +155,8 @@ class Optimizer(object):
dtype: data type of the accumulator variable
fill_value: value to initialize the accumulator variable
"""
if
self
.
_name
is
not
None
:
name
=
self
.
_name
+
"_"
+
name
if
(
name
in
self
.
_accumulators
and
param
.
name
in
self
.
_accumulators
[
name
]):
raise
Exception
(
"Accumulator {} already exists for parameter {}"
.
...
...
@@ -181,6 +185,8 @@ class Optimizer(object):
Returns:
accumulator variable for the parameter
"""
if
self
.
_name
is
not
None
:
name
=
self
.
_name
+
"_"
+
name
if
(
name
not
in
self
.
_accumulators
or
param
.
name
not
in
self
.
_accumulators
[
name
]):
raise
Exception
(
"Accumulator {} does not exist for parameter {}"
.
...
...
python/paddle/fluid/tests/unittests/dist_se_resnext.py
浏览文件 @
13509da6
...
...
@@ -134,7 +134,7 @@ class SE_ResNeXt():
size
=
class_dim
,
act
=
'softmax'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.
2
)))
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.
05
)))
return
out
def
shortcut
(
self
,
input
,
ch_out
,
stride
):
...
...
@@ -184,7 +184,7 @@ class SE_ResNeXt():
act
=
None
,
# avoid pserver CPU init differs from GPU
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.
2
)),
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.
05
)),
bias_attr
=
False
)
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
)
...
...
@@ -192,13 +192,19 @@ class SE_ResNeXt():
pool
=
fluid
.
layers
.
pool2d
(
input
=
input
,
pool_size
=
0
,
pool_type
=
'avg'
,
global_pooling
=
True
)
stdv
=
1.0
/
math
.
sqrt
(
pool
.
shape
[
1
]
*
1.0
)
squeeze
=
fluid
.
layers
.
fc
(
input
=
pool
,
size
=
num_channels
//
reduction_ratio
,
act
=
'relu'
)
squeeze
=
fluid
.
layers
.
fc
(
input
=
pool
,
size
=
num_channels
//
reduction_ratio
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.05
)),
act
=
'relu'
)
stdv
=
1.0
/
math
.
sqrt
(
squeeze
.
shape
[
1
]
*
1.0
)
excitation
=
fluid
.
layers
.
fc
(
input
=
squeeze
,
size
=
num_channels
,
act
=
'sigmoid'
)
excitation
=
fluid
.
layers
.
fc
(
input
=
squeeze
,
size
=
num_channels
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.05
)),
act
=
'sigmoid'
)
scale
=
fluid
.
layers
.
elementwise_mul
(
x
=
input
,
y
=
excitation
,
axis
=
0
)
return
scale
...
...
python/paddle/fluid/tests/unittests/dist_word2vec.py
浏览文件 @
13509da6
...
...
@@ -49,28 +49,32 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
()))
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
embed_second
=
fluid
.
layers
.
embedding
(
input
=
words
[
1
],
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
()))
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
embed_third
=
fluid
.
layers
.
embedding
(
input
=
words
[
2
],
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
()))
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
embed_forth
=
fluid
.
layers
.
embedding
(
input
=
words
[
3
],
size
=
[
dict_size
,
EMBED_SIZE
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
()))
name
=
'shared_w'
,
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
concat_embed
=
fluid
.
layers
.
concat
(
input
=
[
embed_first
,
embed_second
,
embed_third
,
embed_forth
],
...
...
@@ -80,13 +84,13 @@ class TestDistWord2vec2x2(TestDistRunnerBase):
size
=
HIDDEN_SIZE
,
act
=
'sigmoid'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
()))
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
predict_word
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
act
=
'softmax'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
()))
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict_word
,
label
=
words
[
4
])
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
...
...
python/paddle/fluid/tests/unittests/test_dist_train.py
浏览文件 @
13509da6
...
...
@@ -100,7 +100,7 @@ class TestSendOp(unittest.TestCase):
main
.
global_block
().
append_op
(
type
=
"fetch_barrier"
,
inputs
=
{},
outputs
=
{},
outputs
=
{
"Out"
:
[]
},
attrs
=
{
"endpoints"
:
[
"127.0.0.1:{0}"
.
format
(
port
)],
RPC_OP_ROLE_ATTR_NAME
:
RPC_OP_ROLE_ATTR_VALUE
...
...
python/paddle/fluid/tests/unittests/test_dist_word2vec.py
浏览文件 @
13509da6
...
...
@@ -22,7 +22,7 @@ class TestDistSeResneXt2x2(TestDistBase):
self
.
_sync_mode
=
True
def
test_se_resnext
(
self
):
self
.
check_with_place
(
"dist_word2vec.py"
,
delta
=
1e-
7
)
self
.
check_with_place
(
"dist_word2vec.py"
,
delta
=
1e-
4
)
class
TestDistSeResneXt2x2Async
(
TestDistBase
):
...
...
python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
浏览文件 @
13509da6
...
...
@@ -20,41 +20,50 @@ import math
from
op_test
import
OpTest
def
quantize_max_abs
(
x
,
num_bits
):
range
=
math
.
pow
(
2
,
num_bits
)
-
1
def
quantize_max_abs
(
x
,
max_range
):
scale
=
np
.
max
(
np
.
abs
(
x
).
flatten
())
y
=
np
.
round
(
x
/
scale
*
range
)
y
=
np
.
round
(
x
/
scale
*
max_
range
)
return
y
,
scale
def
dequantize_max_abs
(
x
,
num_bits
,
scale
):
range
=
math
.
pow
(
2
,
num_bits
)
-
1
y
=
(
scale
/
range
)
*
x
def
dequantize_max_abs
(
x
,
scale
,
max_range
):
y
=
(
scale
/
max_range
)
*
x
return
y
class
TestFakeDequantizeMaxAbsOp
(
OpTest
):
def
set_args
(
self
):
self
.
num_bits
=
8
self
.
max_range
=
math
.
pow
(
2
,
self
.
num_bits
-
1
)
-
1
self
.
data_type
=
"float32"
def
setUp
(
self
):
self
.
set_args
()
self
.
op_type
=
"fake_dequantize_max_abs"
x
=
np
.
random
.
randn
(
31
,
65
).
astype
(
"float32"
)
yq
,
scale
=
quantize_max_abs
(
x
,
self
.
num_bits
)
ydq
=
dequantize_max_abs
(
yq
,
s
elf
.
num_bits
,
scal
e
)
x
=
np
.
random
.
randn
(
31
,
65
).
astype
(
self
.
data_type
)
yq
,
scale
=
quantize_max_abs
(
x
,
self
.
max_range
)
ydq
=
dequantize_max_abs
(
yq
,
s
cale
,
self
.
max_rang
e
)
self
.
inputs
=
{
'X'
:
yq
}
self
.
attrs
=
{
'
num_bits'
:
self
.
num_bits
,
'scale'
:
float
(
scale
)
}
self
.
inputs
=
{
'X'
:
yq
,
'Scale'
:
np
.
array
(
scale
).
astype
(
self
.
data_type
)
}
self
.
attrs
=
{
'
max_range'
:
self
.
max_range
}
self
.
outputs
=
{
'Out'
:
ydq
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestFakeDequantizeMaxAbsOp5Bits
(
OpTest
):
class
TestFakeDequantizeMaxAbsOpDouble
(
TestFakeDequantizeMaxAbsOp
):
def
set_args
(
self
):
self
.
num_bits
=
8
self
.
max_range
=
math
.
pow
(
2
,
self
.
num_bits
-
1
)
-
1
self
.
data_type
=
"float64"
class
TestFakeDequantizeMaxAbsOp5Bits
(
TestFakeDequantizeMaxAbsOp
):
def
set_args
(
self
):
self
.
num_bits
=
5
self
.
max_range
=
math
.
pow
(
2
,
self
.
num_bits
-
1
)
-
1
self
.
data_type
=
"float32"
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
浏览文件 @
13509da6
...
...
@@ -43,13 +43,13 @@ def fusion_lstm(
act_cell
,
act_cand
)
class
Test
Lstm
Op
(
OpTest
):
def
set_
argument
(
self
):
self
.
lod
=
[[
2
,
3
,
2
]]
class
Test
FusionLSTM
Op
(
OpTest
):
def
set_
conf
(
self
):
pass
def
setUp
(
self
):
self
.
op_type
=
'fusion_lstm'
self
.
lod
=
[[
2
,
3
,
2
]]
self
.
lod
=
[[
2
,
3
,
5
,
4
]]
self
.
M
=
8
self
.
D
=
16
self
.
has_initial_state
=
False
...
...
@@ -58,33 +58,33 @@ class TestLstmOp(OpTest):
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
use_peepholes
=
False
self
.
set_
argument
()
self
.
set_
conf
()
T
=
sum
(
self
.
lod
[
0
])
bs
=
len
(
self
.
lod
[
0
])
x
=
np
.
random
.
normal
(
size
=
(
T
,
self
.
M
)).
astype
(
'float
64
'
)
x
=
np
.
random
.
normal
(
size
=
(
T
,
self
.
M
)).
astype
(
'float
32
'
)
if
self
.
has_initial_state
:
h0
=
np
.
random
.
normal
(
size
=
(
bs
,
self
.
D
)).
astype
(
'float
64
'
)
c0
=
np
.
random
.
normal
(
size
=
(
bs
,
self
.
D
)).
astype
(
'float
64
'
)
h0
=
np
.
random
.
normal
(
size
=
(
bs
,
self
.
D
)).
astype
(
'float
32
'
)
c0
=
np
.
random
.
normal
(
size
=
(
bs
,
self
.
D
)).
astype
(
'float
32
'
)
else
:
h0
=
np
.
zeros
((
bs
,
self
.
D
)).
astype
(
'float
64
'
)
c0
=
np
.
zeros
((
bs
,
self
.
D
)).
astype
(
'float
64
'
)
h0
=
np
.
zeros
((
bs
,
self
.
D
)).
astype
(
'float
32
'
)
c0
=
np
.
zeros
((
bs
,
self
.
D
)).
astype
(
'float
32
'
)
wh
=
np
.
random
.
normal
(
size
=
(
self
.
D
,
4
*
self
.
D
)).
astype
(
'float
64
'
)
wh
=
np
.
random
.
normal
(
size
=
(
self
.
D
,
4
*
self
.
D
)).
astype
(
'float
32
'
)
if
self
.
use_peepholes
:
b
=
np
.
random
.
normal
(
size
=
(
1
,
7
*
self
.
D
)).
astype
(
'float
64
'
)
b
=
np
.
random
.
normal
(
size
=
(
1
,
7
*
self
.
D
)).
astype
(
'float
32
'
)
else
:
b
=
np
.
random
.
normal
(
size
=
(
1
,
4
*
self
.
D
)).
astype
(
'float
64
'
)
b
=
np
.
random
.
normal
(
size
=
(
1
,
4
*
self
.
D
)).
astype
(
'float
32
'
)
w_b
=
np
.
copy
(
b
[:,
0
:
4
*
self
.
D
])
w_c
=
b
[:,
4
*
self
.
D
:]
if
self
.
use_peepholes
else
None
# this is the weight of fc
wx
=
np
.
random
.
normal
(
size
=
(
self
.
M
,
4
*
self
.
D
)).
astype
(
'float
64
'
)
wx
=
np
.
random
.
normal
(
size
=
(
self
.
M
,
4
*
self
.
D
)).
astype
(
'float
32
'
)
# this is the bias of fc
# and it should be manually added into the bias of this fusion LSTM
bx
=
np
.
random
.
normal
(
size
=
(
1
,
4
*
self
.
D
)).
astype
(
'float
64
'
)
bx
=
np
.
random
.
normal
(
size
=
(
1
,
4
*
self
.
D
)).
astype
(
'float
32
'
)
b
[
0
,
0
:
4
*
self
.
D
]
+=
bx
[
0
,
:]
h
,
c
=
fusion_lstm
(
x
,
self
.
lod
,
wx
,
bx
,
h0
,
c0
,
wh
,
w_b
,
w_c
,
self
.
is_reverse
,
ACTIVATION
[
self
.
act_gate
],
...
...
@@ -114,35 +114,45 @@ class TestLstmOp(OpTest):
}
def
test_check_output
(
self
):
self
.
check_output
(
atol
=
1e-8
)
self
.
check_output
()
class
TestLstmOpInitReverse
(
TestLstmOp
):
def
set_argument
(
self
):
class
TestFusionLSTMOpInit
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
has_initial_state
=
True
class
TestFusionLSTMOpReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
is_reverse
=
True
class
TestFusionLSTMOpInitReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
has_initial_state
=
True
self
.
is_reverse
=
True
class
Test
LstmOpMD1
(
TestLstm
Op
):
def
set_
argument
(
self
):
class
Test
FusionLSTMOpMD1
(
TestFusionLSTM
Op
):
def
set_
conf
(
self
):
self
.
M
=
36
self
.
D
=
8
class
Test
LstmOpMD2
(
TestLstm
Op
):
def
set_
argument
(
self
):
class
Test
FusionLSTMOpMD2
(
TestFusionLSTM
Op
):
def
set_
conf
(
self
):
self
.
M
=
8
self
.
D
=
8
class
Test
LstmOpMD3
(
TestLstm
Op
):
def
set_
argument
(
self
):
class
Test
FusionLSTMOpMD3
(
TestFusionLSTM
Op
):
def
set_
conf
(
self
):
self
.
M
=
15
self
.
D
=
3
class
Test
LstmOpBS1
(
TestLstm
Op
):
def
set_
argument
(
self
):
class
Test
FusionLSTMOpBS1
(
TestFusionLSTM
Op
):
def
set_
conf
(
self
):
self
.
lod
=
[[
3
]]
self
.
D
=
16
...
...
python/paddle/fluid/tests/unittests/test_fusion_seqexpand_concat_fc_op.py
0 → 100644
浏览文件 @
13509da6
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
from
test_fusion_lstm_op
import
fc
,
ACTIVATION
def
fusion_seqexpand_concat_fc
(
xs
,
lod
,
w
,
b
,
fc_act
):
T
=
sum
(
lod
[
0
])
N
=
len
(
lod
[
0
])
num_inputs
=
len
(
xs
)
D
=
w
.
shape
[
1
]
expanded_inputs
=
[
xs
[
0
]]
for
i
in
range
(
num_inputs
-
1
):
x
=
xs
[
i
+
1
]
assert
x
.
shape
[
0
]
==
N
expanded
=
np
.
repeat
(
x
,
lod
[
0
],
axis
=
0
)
assert
expanded
.
shape
[
0
]
==
T
assert
expanded
.
shape
[
1
]
==
x
.
shape
[
1
]
expanded_inputs
.
append
(
expanded
)
fc_input
=
np
.
concatenate
(
expanded_inputs
,
axis
=
1
)
assert
fc_input
.
shape
[
0
]
==
T
assert
fc_input
.
shape
[
1
]
==
w
.
shape
[
0
]
fc_out
=
fc
(
fc_input
,
w
,
b
)
fc_out
=
fc_act
(
fc_out
)
assert
fc_out
.
shape
[
0
]
==
T
assert
fc_out
.
shape
[
1
]
==
D
return
fc_out
class
TestFusionSeqExpandConcatFCOp
(
OpTest
):
def
set_conf
(
self
):
pass
def
setUp
(
self
):
self
.
op_type
=
'fusion_seqexpand_concat_fc'
self
.
lod
=
[[
3
,
5
,
8
,
2
]]
self
.
inputs_M
=
[
15
,
10
,
10
]
self
.
D
=
20
self
.
with_bias
=
True
self
.
fc_act
=
'relu'
self
.
set_conf
()
T
=
sum
(
self
.
lod
[
0
])
bs
=
len
(
self
.
lod
[
0
])
num_inputs
=
len
(
self
.
inputs_M
)
x0
=
np
.
random
.
normal
(
size
=
(
T
,
self
.
inputs_M
[
0
])).
astype
(
'float32'
)
xs
=
[
x0
]
for
i
in
range
(
num_inputs
-
1
):
xi
=
np
.
random
.
normal
(
size
=
(
bs
,
self
.
inputs_M
[
i
+
1
])).
astype
(
'float32'
)
xs
.
append
(
xi
)
# fc weight and bias
w
=
np
.
random
.
normal
(
size
=
(
sum
(
self
.
inputs_M
),
self
.
D
)).
astype
(
'float32'
)
b
=
np
.
random
.
normal
(
size
=
(
1
,
self
.
D
)).
astype
(
'float32'
)
if
self
.
with_bias
else
np
.
zeros
(
(
1
,
self
.
D
)).
astype
(
'float32'
)
out
=
fusion_seqexpand_concat_fc
(
xs
,
self
.
lod
,
w
,
b
,
ACTIVATION
[
self
.
fc_act
])
self
.
inputs
=
{
'X'
:
[(
'x0'
,
(
x0
,
self
.
lod
))],
'FCWeight'
:
w
}
normal_lod
=
[[
1
]
*
bs
]
for
i
in
range
(
num_inputs
-
1
):
self
.
inputs
[
'X'
].
append
((
'x%d'
%
(
i
+
1
),
(
xs
[
i
+
1
],
normal_lod
)))
if
self
.
with_bias
:
self
.
inputs
[
'FCBias'
]
=
b
self
.
outputs
=
{
'Out'
:
(
out
,
self
.
lod
)}
self
.
attrs
=
{
'fc_activation'
:
self
.
fc_act
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestFusionSECFCOpNonBias
(
TestFusionSeqExpandConcatFCOp
):
def
set_conf
(
self
):
self
.
with_bias
=
False
class
TestFusionSECFCOpNonAct
(
TestFusionSeqExpandConcatFCOp
):
def
set_conf
(
self
):
self
.
fc_act
=
'identity'
class
TestFusionSECFCOpMD1
(
TestFusionSeqExpandConcatFCOp
):
def
set_conf
(
self
):
self
.
inputs_M
=
[
3
,
4
,
2
,
1
,
5
]
self
.
D
=
8
class
TestFusionSECFCOpMD2
(
TestFusionSeqExpandConcatFCOp
):
def
set_conf
(
self
):
self
.
lod
=
[[
5
,
6
]]
self
.
inputs_M
=
[
1
,
1
]
class
TestFusionSECFCOpBS1_1
(
TestFusionSeqExpandConcatFCOp
):
def
set_conf
(
self
):
self
.
lod
=
[[
1
]]
self
.
inputs_M
=
[
3
,
4
,
2
]
class
TestFusionSECFCOpBS1_2
(
TestFusionSeqExpandConcatFCOp
):
def
set_conf
(
self
):
self
.
lod
=
[[
1
]]
self
.
inputs_M
=
[
3
,
4
]
class
TestFusionSECFCOpBS1_3
(
TestFusionSeqExpandConcatFCOp
):
def
set_conf
(
self
):
self
.
lod
=
[[
5
]]
self
.
inputs_M
=
[
6
,
3
]
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_pad_constant_like.py
0 → 100644
浏览文件 @
13509da6
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestPadOp
(
OpTest
):
def
setUp
(
self
):
self
.
initTestCase
()
self
.
op_type
=
"pad_constant_like"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
x_shape
).
astype
(
"float32"
),
'Y'
:
np
.
random
.
random
(
self
.
y_shape
).
astype
(
"float32"
)
}
self
.
attrs
=
{}
self
.
attrs
[
'pad_value'
]
=
self
.
pad_value
self
.
outputs
=
{
'Out'
:
np
.
pad
(
self
.
inputs
[
'Y'
],
self
.
paddings
,
mode
=
'constant'
,
constant_values
=
self
.
pad_value
)
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'Y'
],
'Out'
,
max_relative_error
=
0.006
)
def
initTestCase
(
self
):
self
.
x_shape
=
(
16
,
16
)
self
.
y_shape
=
(
3
,
16
)
self
.
pad_value
=
0.1
self
.
paddings
=
[(
0
,
13
),
(
0
,
0
)]
class
TestCase1
(
TestPadOp
):
def
initTestCase
(
self
):
self
.
x_shape
=
(
4
,
3
,
4
,
4
)
self
.
y_shape
=
(
2
,
3
,
4
,
4
)
self
.
paddings
=
[(
0
,
2
),
(
0
,
0
),
(
0
,
0
),
(
0
,
0
)]
self
.
pad_value
=
0.5
class
TestCase2
(
TestPadOp
):
def
initTestCase
(
self
):
self
.
x_shape
=
(
4
,
3
,
4
,
4
)
self
.
y_shape
=
(
2
,
3
,
2
,
4
)
self
.
paddings
=
[(
0
,
2
),
(
0
,
0
),
(
0
,
2
),
(
0
,
0
)]
self
.
pad_value
=
0.5
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_scale_op.py
浏览文件 @
13509da6
...
...
@@ -17,6 +17,8 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
class
TestScaleOp
(
OpTest
):
...
...
@@ -33,5 +35,57 @@ class TestScaleOp(OpTest):
self
.
check_grad
([
'X'
],
'Out'
)
class
TestScaleOpSelectedRows
(
unittest
.
TestCase
):
def
check_with_place
(
self
,
place
,
in_name
,
out_name
):
scope
=
core
.
Scope
()
# create and initialize Grad Variable
in_height
=
10
in_rows
=
[
0
,
4
,
7
]
in_row_numel
=
12
scale
=
2.0
in_selected_rows
=
scope
.
var
(
in_name
).
get_selected_rows
()
in_selected_rows
.
set_height
(
in_height
)
in_selected_rows
.
set_rows
(
in_rows
)
in_array
=
np
.
random
.
random
(
(
len
(
in_rows
),
in_row_numel
)).
astype
(
"float32"
)
in_tensor
=
in_selected_rows
.
get_tensor
()
in_tensor
.
set
(
in_array
,
place
)
# create and initialize Param Variable
out_selected_rows
=
scope
.
var
(
out_name
).
get_selected_rows
()
out_tensor
=
out_selected_rows
.
get_tensor
()
out_tensor
.
_set_dims
(
in_tensor
.
_get_dims
())
# create and run sgd operator
scale_op
=
Operator
(
"scale"
,
X
=
in_name
,
Out
=
out_name
,
scale
=
scale
)
scale_op
.
run
(
scope
,
place
)
# get and compare result
out_height
=
out_selected_rows
.
height
()
out_rows
=
out_selected_rows
.
rows
()
result_array
=
np
.
array
(
out_tensor
)
assert
(
in_array
*
scale
==
result_array
).
all
()
assert
in_height
==
out_height
assert
in_rows
==
out_rows
def
test_scale_selected_rows
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
,
'in'
,
'out'
)
def
test_scale_selected_rows_inplace
(
self
):
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
self
.
check_with_place
(
place
,
'in'
,
'in'
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_sequence_pad_op.py
0 → 100644
浏览文件 @
13509da6
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestSequencePadOp
(
OpTest
):
def
set_attr
(
self
):
self
.
x_shape
=
[
12
,
4
]
self
.
x_len_lod
=
[[
2
,
3
,
4
,
3
]]
self
.
pad_value
=
[
1.0
]
self
.
padded_length
=
-
1
self
.
dtype
=
'float32'
def
set_data
(
self
):
x_data
=
np
.
random
.
uniform
(
0.1
,
0.5
,
self
.
x_shape
).
astype
(
self
.
dtype
)
pad_value_data
=
np
.
array
(
self
.
pad_value
).
astype
(
self
.
dtype
)
self
.
inputs
=
{
'X'
:
(
x_data
,
self
.
x_len_lod
),
'PadValue'
:
pad_value_data
}
self
.
attrs
=
{
'padded_length'
:
self
.
padded_length
}
def
compute
(
self
):
# get padded length
padded_length
=
self
.
padded_length
x_len_lod_0
=
self
.
x_len_lod
[
0
]
if
padded_length
==
-
1
:
max_seq_len
=
0
for
l
in
x_len_lod_0
:
max_seq_len
=
max
(
max_seq_len
,
l
)
padded_length
=
max_seq_len
# do padding
x_data
=
self
.
inputs
[
'X'
][
0
]
pad_value_data
=
self
.
inputs
[
'PadValue'
]
if
pad_value_data
.
shape
==
(
1
,
):
pad_value_data
=
np
.
broadcast_to
(
pad_value_data
,
shape
=
x_data
.
shape
[
1
:])
padded_sequences
=
[]
start_idx
=
0
for
l
in
x_len_lod_0
:
end_idx
=
start_idx
+
l
seq
=
x_data
[
start_idx
:
end_idx
]
to_pad_len
=
padded_length
-
l
for
_
in
range
(
to_pad_len
):
seq
=
np
.
append
(
seq
,
pad_value_data
[
np
.
newaxis
,
:],
axis
=
0
)
padded_sequences
.
append
(
seq
)
start_idx
=
end_idx
out_data
=
np
.
array
(
padded_sequences
)
self
.
outputs
=
{
'Out'
:
out_data
}
def
setUp
(
self
):
self
.
op_type
=
'sequence_pad'
self
.
set_attr
()
self
.
set_data
()
self
.
compute
()
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestSequencePadOp2
(
TestSequencePadOp
):
def
set_attr
(
self
):
self
.
x_shape
=
[
12
,
4
]
self
.
x_len_lod
=
[[
2
,
3
,
4
,
3
]]
self
.
pad_value
=
[
1.0
,
2.0
,
3.0
,
4.0
]
self
.
padded_length
=
-
1
self
.
dtype
=
'float32'
class
TestSequencePadOp3
(
TestSequencePadOp
):
def
set_attr
(
self
):
self
.
x_shape
=
[
12
,
4
]
self
.
x_len_lod
=
[[
2
,
3
,
4
,
3
]]
self
.
pad_value
=
[
1.0
]
self
.
padded_length
=
7
self
.
dtype
=
'float32'
class
TestSequencePadOp4
(
TestSequencePadOp
):
def
set_attr
(
self
):
self
.
x_shape
=
[
12
,
4
]
self
.
x_len_lod
=
[[
2
,
3
,
4
,
3
]]
self
.
pad_value
=
[
1.0
,
2.0
,
3.0
,
4.0
]
self
.
padded_length
=
7
self
.
dtype
=
'float32'
class
TestSequencePadOp5
(
TestSequencePadOp
):
def
set_attr
(
self
):
self
.
x_shape
=
[
12
,
2
,
2
]
self
.
x_len_lod
=
[[
2
,
3
,
4
,
3
]]
self
.
pad_value
=
[
1.0
]
self
.
padded_length
=
-
1
self
.
dtype
=
'float32'
class
TestSequencePadOp6
(
TestSequencePadOp
):
def
set_attr
(
self
):
self
.
x_shape
=
[
12
,
2
,
2
]
self
.
x_len_lod
=
[[
2
,
3
,
4
,
3
]]
self
.
pad_value
=
[[
1.0
,
2.0
],
[
3.0
,
4.0
]]
self
.
padded_length
=
-
1
self
.
dtype
=
'float32'
class
TestSequencePadOp7
(
TestSequencePadOp
):
def
set_attr
(
self
):
self
.
x_shape
=
[
12
,
2
,
2
]
self
.
x_len_lod
=
[[
2
,
3
,
4
,
3
]]
self
.
pad_value
=
[
1.0
]
self
.
padded_length
=
7
self
.
dtype
=
'float32'
python/paddle/fluid/tests/unittests/test_tensor.py
浏览文件 @
13509da6
...
...
@@ -59,6 +59,27 @@ class TestTensor(unittest.TestCase):
self
.
assertAlmostEqual
(
1.0
,
tensor_array_2
[
3
,
9
])
self
.
assertAlmostEqual
(
2.0
,
tensor_array_2
[
19
,
11
])
def
test_int8_tensor
(
self
):
scope
=
core
.
Scope
()
var
=
scope
.
var
(
"int8_tensor"
)
cpu_tensor
=
var
.
get_tensor
()
tensor_array
=
numpy
.
random
.
randint
(
-
127
,
high
=
128
,
size
=
[
100
,
200
],
dtype
=
numpy
.
int8
)
place
=
core
.
CPUPlace
()
cpu_tensor
.
set
(
tensor_array
,
place
)
cpu_tensor_array_2
=
numpy
.
array
(
cpu_tensor
)
self
.
assertAlmostEqual
(
cpu_tensor_array_2
.
all
(),
tensor_array
.
all
())
if
core
.
is_compiled_with_cuda
():
cuda_tensor
=
var
.
get_tensor
()
tensor_array
=
numpy
.
random
.
randint
(
-
127
,
high
=
128
,
size
=
[
100
,
200
],
dtype
=
numpy
.
int8
)
place
=
core
.
CUDAPlace
(
0
)
cuda_tensor
.
set
(
tensor_array
,
place
)
cuda_tensor_array_2
=
numpy
.
array
(
cuda_tensor
)
self
.
assertAlmostEqual
(
cuda_tensor_array_2
.
all
(),
tensor_array
.
all
())
def
test_int_lod_tensor
(
self
):
place
=
core
.
CPUPlace
()
scope
=
core
.
Scope
()
...
...
python/paddle/fluid/tests/unittests/test_unstack_op.py
0 → 100644
浏览文件 @
13509da6
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
op_test
import
OpTest
import
numpy
as
np
import
unittest
class
TestUnStackOpBase
(
OpTest
):
def
initDefaultParameters
(
self
):
self
.
input_dim
=
(
5
,
6
,
7
)
self
.
axis
=
0
self
.
dtype
=
'float32'
def
initParameters
(
self
):
pass
def
get_y_names
(
self
):
y_names
=
[]
for
i
in
range
(
self
.
input_dim
[
self
.
axis
]):
y_names
.
append
(
'y{}'
.
format
(
i
))
return
y_names
def
setUp
(
self
):
self
.
initDefaultParameters
()
self
.
initParameters
()
self
.
op_type
=
'unstack'
self
.
x
=
np
.
random
.
random
(
size
=
self
.
input_dim
).
astype
(
self
.
dtype
)
outs
=
np
.
split
(
self
.
x
,
self
.
input_dim
[
self
.
axis
],
self
.
axis
)
new_shape
=
list
(
self
.
input_dim
)
del
new_shape
[
self
.
axis
]
y_names
=
self
.
get_y_names
()
tmp
=
[]
for
i
in
range
(
self
.
input_dim
[
self
.
axis
]):
tmp
.
append
((
y_names
[
i
],
np
.
reshape
(
outs
[
i
],
new_shape
)))
self
.
inputs
=
{
'X'
:
self
.
x
}
self
.
outputs
=
{
'Y'
:
tmp
}
self
.
attrs
=
{
'axis'
:
self
.
axis
,
'num'
:
self
.
input_dim
[
self
.
axis
]}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
'X'
,
self
.
get_y_names
())
class
TestStackOp3
(
TestUnStackOpBase
):
def
initParameters
(
self
):
self
.
axis
=
-
1
class
TestStackOp4
(
TestUnStackOpBase
):
def
initParameters
(
self
):
self
.
axis
=
-
3
class
TestStackOp5
(
TestUnStackOpBase
):
def
initParameters
(
self
):
self
.
axis
=
1
class
TestStackOp6
(
TestUnStackOpBase
):
def
initParameters
(
self
):
self
.
axis
=
2
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_variable.py
浏览文件 @
13509da6
...
...
@@ -31,7 +31,8 @@ class TestVariable(unittest.TestCase):
self
.
assertEqual
(
DT
.
INT16
,
convert
(
"int16"
))
self
.
assertEqual
(
DT
.
INT64
,
convert
(
"int64"
))
self
.
assertEqual
(
DT
.
BOOL
,
convert
(
"bool"
))
self
.
assertRaises
(
ValueError
,
lambda
:
convert
(
"int8"
))
self
.
assertEqual
(
DT
.
INT8
,
convert
(
"int8"
))
self
.
assertEqual
(
DT
.
UINT8
,
convert
(
"uint8"
))
def
test_var
(
self
):
b
=
default_main_program
().
current_block
()
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
13509da6
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录