Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
7a517dc9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7a517dc9
编写于
4月 18, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
差异文件
merge develop
上级
61cb4f2f
387e10c6
变更
107
展开全部
显示空白变更内容
内联
并排
Showing
107 changed file
with
2207 addition
and
462 deletion
+2207
-462
doc/fluid/api/layers.rst
doc/fluid/api/layers.rst
+5
-0
doc/fluid/dev/contribute_to_paddle_cn.md
doc/fluid/dev/contribute_to_paddle_cn.md
+1
-0
doc/fluid/dev/contribute_to_paddle_en.md
doc/fluid/dev/contribute_to_paddle_en.md
+1
-0
doc/fluid/dev/index_cn.rst
doc/fluid/dev/index_cn.rst
+2
-0
doc/fluid/dev/index_en.rst
doc/fluid/dev/index_en.rst
+2
-0
doc/fluid/dev/write_docs_cn.rst
doc/fluid/dev/write_docs_cn.rst
+1
-0
doc/fluid/dev/write_docs_en.rst
doc/fluid/dev/write_docs_en.rst
+1
-0
doc/v2/api/data/data_reader.rst
doc/v2/api/data/data_reader.rst
+37
-1
doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md
doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md
+672
-1
doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md
doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md
+672
-1
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-1
paddle/fluid/framework/details/fetch_op_handle.cc
paddle/fluid/framework/details/fetch_op_handle.cc
+9
-9
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+106
-94
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+14
-0
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+10
-5
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+4
-3
paddle/fluid/framework/grad_op_desc_maker.h
paddle/fluid/framework/grad_op_desc_maker.h
+2
-2
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+2
-30
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+19
-5
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+2
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+3
-3
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+0
-2
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+1
-1
paddle/fluid/operators/bilinear_tensor_product_op.cc
paddle/fluid/operators/bilinear_tensor_product_op.cc
+5
-3
paddle/fluid/operators/clip_op.cc
paddle/fluid/operators/clip_op.cc
+3
-2
paddle/fluid/operators/concat_op.cc
paddle/fluid/operators/concat_op.cc
+4
-2
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+9
-6
paddle/fluid/operators/conv_shift_op.cc
paddle/fluid/operators/conv_shift_op.cc
+3
-2
paddle/fluid/operators/conv_transpose_op.cc
paddle/fluid/operators/conv_transpose_op.cc
+8
-4
paddle/fluid/operators/cos_sim_op.cc
paddle/fluid/operators/cos_sim_op.cc
+3
-2
paddle/fluid/operators/crop_op.cc
paddle/fluid/operators/crop_op.cc
+3
-1
paddle/fluid/operators/cross_entropy_op.cc
paddle/fluid/operators/cross_entropy_op.cc
+3
-2
paddle/fluid/operators/detail/sendrecvop_utils.cc
paddle/fluid/operators/detail/sendrecvop_utils.cc
+5
-4
paddle/fluid/operators/dropout_op.cc
paddle/fluid/operators/dropout_op.cc
+3
-2
paddle/fluid/operators/elementwise_div_op.cc
paddle/fluid/operators/elementwise_div_op.cc
+4
-2
paddle/fluid/operators/elementwise_max_op.cc
paddle/fluid/operators/elementwise_max_op.cc
+4
-2
paddle/fluid/operators/elementwise_min_op.cc
paddle/fluid/operators/elementwise_min_op.cc
+4
-2
paddle/fluid/operators/elementwise_mul_op.cc
paddle/fluid/operators/elementwise_mul_op.cc
+4
-2
paddle/fluid/operators/elementwise_sub_op.cc
paddle/fluid/operators/elementwise_sub_op.cc
+4
-2
paddle/fluid/operators/expand_op.cc
paddle/fluid/operators/expand_op.cc
+5
-2
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+3
-2
paddle/fluid/operators/gather_op.cc
paddle/fluid/operators/gather_op.cc
+3
-2
paddle/fluid/operators/gru_op.cc
paddle/fluid/operators/gru_op.cc
+3
-1
paddle/fluid/operators/gru_unit_op.cc
paddle/fluid/operators/gru_unit_op.cc
+3
-2
paddle/fluid/operators/hinge_loss_op.cc
paddle/fluid/operators/hinge_loss_op.cc
+3
-2
paddle/fluid/operators/huber_loss_op.cc
paddle/fluid/operators/huber_loss_op.cc
+3
-2
paddle/fluid/operators/im2sequence_op.cc
paddle/fluid/operators/im2sequence_op.cc
+3
-2
paddle/fluid/operators/l1_norm_op.cc
paddle/fluid/operators/l1_norm_op.cc
+3
-2
paddle/fluid/operators/label_smooth_op.cc
paddle/fluid/operators/label_smooth_op.cc
+3
-2
paddle/fluid/operators/layer_norm_op.cc
paddle/fluid/operators/layer_norm_op.cc
+3
-2
paddle/fluid/operators/linear_chain_crf_op.cc
paddle/fluid/operators/linear_chain_crf_op.cc
+4
-2
paddle/fluid/operators/lod_reset_op.cc
paddle/fluid/operators/lod_reset_op.cc
+3
-2
paddle/fluid/operators/log_loss_op.cc
paddle/fluid/operators/log_loss_op.cc
+3
-2
paddle/fluid/operators/lrn_op.cc
paddle/fluid/operators/lrn_op.cc
+3
-1
paddle/fluid/operators/lstm_op.cc
paddle/fluid/operators/lstm_op.cc
+3
-1
paddle/fluid/operators/lstm_unit_op.cc
paddle/fluid/operators/lstm_unit_op.cc
+3
-2
paddle/fluid/operators/lstmp_op.cc
paddle/fluid/operators/lstmp_op.cc
+3
-2
paddle/fluid/operators/margin_rank_loss_op.cc
paddle/fluid/operators/margin_rank_loss_op.cc
+4
-3
paddle/fluid/operators/matmul_op.cc
paddle/fluid/operators/matmul_op.cc
+3
-2
paddle/fluid/operators/maxout_op.cc
paddle/fluid/operators/maxout_op.cc
+3
-2
paddle/fluid/operators/modified_huber_loss_op.cc
paddle/fluid/operators/modified_huber_loss_op.cc
+4
-3
paddle/fluid/operators/mul_op.cc
paddle/fluid/operators/mul_op.cc
+3
-1
paddle/fluid/operators/nce_op.cc
paddle/fluid/operators/nce_op.cc
+5
-1
paddle/fluid/operators/norm_op.cc
paddle/fluid/operators/norm_op.cc
+3
-2
paddle/fluid/operators/pool_op.cc
paddle/fluid/operators/pool_op.cc
+6
-4
paddle/fluid/operators/pool_with_index_op.cc
paddle/fluid/operators/pool_with_index_op.cc
+8
-6
paddle/fluid/operators/prelu_op.cc
paddle/fluid/operators/prelu_op.cc
+3
-2
paddle/fluid/operators/rank_loss_op.cc
paddle/fluid/operators/rank_loss_op.cc
+3
-2
paddle/fluid/operators/reduce_op.cc
paddle/fluid/operators/reduce_op.cc
+24
-16
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+3
-2
paddle/fluid/operators/roi_pool_op.cc
paddle/fluid/operators/roi_pool_op.cc
+3
-2
paddle/fluid/operators/row_conv_op.cc
paddle/fluid/operators/row_conv_op.cc
+3
-2
paddle/fluid/operators/scatter_op.cc
paddle/fluid/operators/scatter_op.cc
+3
-2
paddle/fluid/operators/sequence_concat_op.cc
paddle/fluid/operators/sequence_concat_op.cc
+5
-3
paddle/fluid/operators/sequence_conv_op.cc
paddle/fluid/operators/sequence_conv_op.cc
+5
-2
paddle/fluid/operators/sequence_expand_op.cc
paddle/fluid/operators/sequence_expand_op.cc
+4
-2
paddle/fluid/operators/sequence_slice_op.cc
paddle/fluid/operators/sequence_slice_op.cc
+4
-2
paddle/fluid/operators/sequence_softmax_op.cc
paddle/fluid/operators/sequence_softmax_op.cc
+4
-3
paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc
...e/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc
+6
-5
paddle/fluid/operators/smooth_l1_loss_op.cc
paddle/fluid/operators/smooth_l1_loss_op.cc
+3
-2
paddle/fluid/operators/softmax_op.cc
paddle/fluid/operators/softmax_op.cc
+3
-2
paddle/fluid/operators/split_byref_op.cc
paddle/fluid/operators/split_byref_op.cc
+101
-0
paddle/fluid/operators/split_byref_op.cu.cc
paddle/fluid/operators/split_byref_op.cu.cc
+19
-0
paddle/fluid/operators/split_byref_op.h
paddle/fluid/operators/split_byref_op.h
+43
-0
paddle/fluid/operators/split_op.cc
paddle/fluid/operators/split_op.cc
+0
-15
paddle/fluid/operators/split_op.h
paddle/fluid/operators/split_op.h
+15
-0
paddle/fluid/operators/spp_op.cc
paddle/fluid/operators/spp_op.cc
+3
-1
paddle/fluid/operators/squared_l2_distance_op.cc
paddle/fluid/operators/squared_l2_distance_op.cc
+4
-3
paddle/fluid/operators/squared_l2_norm_op.cc
paddle/fluid/operators/squared_l2_norm_op.cc
+4
-2
paddle/fluid/operators/top_k_op.h
paddle/fluid/operators/top_k_op.h
+3
-4
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+3
-2
paddle/fluid/operators/unpool_op.cc
paddle/fluid/operators/unpool_op.cc
+3
-2
paddle/fluid/operators/warpctc_op.cc
paddle/fluid/operators/warpctc_op.cc
+3
-2
paddle/fluid/platform/nccl_helper.h
paddle/fluid/platform/nccl_helper.h
+16
-34
python/paddle/fluid/distribute_transpiler.py
python/paddle/fluid/distribute_transpiler.py
+4
-3
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+0
-38
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+30
-3
python/paddle/fluid/layers/metric.py
python/paddle/fluid/layers/metric.py
+4
-15
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+49
-9
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+6
-5
python/paddle/fluid/tests/unittests/test_conv3d_op.py
python/paddle/fluid/tests/unittests/test_conv3d_op.py
+7
-4
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+9
-0
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+52
-9
python/paddle/fluid/tests/unittests/test_pool2d_op.py
python/paddle/fluid/tests/unittests/test_pool2d_op.py
+5
-2
python/paddle/fluid/tests/unittests/test_pool3d_op.py
python/paddle/fluid/tests/unittests/test_pool3d_op.py
+5
-2
python/paddle/fluid/tests/unittests/test_split_op.py
python/paddle/fluid/tests/unittests/test_split_op.py
+9
-1
python/paddle/v2/dataset/imdb.py
python/paddle/v2/dataset/imdb.py
+2
-2
未找到文件。
doc/fluid/api/layers.rst
浏览文件 @
7a517dc9
...
...
@@ -815,3 +815,8 @@ zeros
.. autofunction:: paddle.fluid.layers.zeros
:noindex:
topk
----
.. autofunction:: paddle.fluid.layers.topk
:noindex:
doc/fluid/dev/contribute_to_paddle_cn.md
0 → 120000
浏览文件 @
7a517dc9
../../v2/dev/contribute_to_paddle_cn.md
\ No newline at end of file
doc/fluid/dev/contribute_to_paddle_en.md
0 → 120000
浏览文件 @
7a517dc9
../../v2/dev/contribute_to_paddle_en.md
\ No newline at end of file
doc/fluid/dev/index_cn.rst
浏览文件 @
7a517dc9
...
...
@@ -4,6 +4,8 @@
.. toctree::
:maxdepth: 1
contribute_to_paddle_cn.md
write_docs_cn.md
api_doc_std_cn.md
new_op_cn.md
new_op_kernel.md
...
...
doc/fluid/dev/index_en.rst
浏览文件 @
7a517dc9
...
...
@@ -4,6 +4,8 @@ Development
.. toctree::
:maxdepth: 1
contribute_to_paddle_en.md
write_docs_en.md
api_doc_std_en.md
new_op_en.md
new_op_kernel.md
...
...
doc/fluid/dev/write_docs_cn.rst
0 → 120000
浏览文件 @
7a517dc9
../../v2/dev/write_docs_cn.rst
\ No newline at end of file
doc/fluid/dev/write_docs_en.rst
0 → 120000
浏览文件 @
7a517dc9
../../v2/dev/write_docs_en.rst
\ No newline at end of file
doc/v2/api/data/data_reader.rst
浏览文件 @
7a517dc9
...
...
@@ -6,7 +6,43 @@ Data Reader Interface
DataTypes
=========
.. automodule:: paddle.v2.data_type
.. autofunction:: paddle.v2.data_type.dense_array
:noindex:
.. autofunction:: paddle.v2.data_type.integer_value
:noindex:
.. autofunction:: paddle.v2.data_type.integer_value_sequence
:noindex:
.. autofunction:: paddle.v2.data_type.integer_value_sub_sequence
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_binary_vector
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_binary_vector_sequence
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_binary_vector_sub_sequence
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_float_vector
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_float_vector_sequence
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_float_vector_sub_sequence
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_non_value_slot
:noindex:
.. autofunction:: paddle.v2.data_type.sparse_value_slot
:noindex:
.. autoclass:: paddle.v2.data_type.InputType
:members:
:noindex:
...
...
doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md
已删除
120000 → 0
浏览文件 @
61cb4f2f
k8s_aws_en.md
\ No newline at end of file
doc/v2/howto/cluster/multi_cluster/k8s_aws_cn.md
0 → 100644
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
7a517dc9
...
...
@@ -102,7 +102,7 @@ cc_test(init_test SRCS init_test.cc DEPS init)
cc_test
(
op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto
)
cc_test
(
cow_ptr_tests SRCS details/cow_ptr_test.cc
)
cc_test
(
channel_test SRCS channel_test.cc
)
#
cc_test(channel_test SRCS channel_test.cc)
cc_test
(
tuple_test SRCS tuple_test.cc
)
cc_test
(
concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op
channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op
...
...
paddle/fluid/framework/details/fetch_op_handle.cc
浏览文件 @
7a517dc9
...
...
@@ -51,23 +51,23 @@ void FetchOpHandle::RunImpl() {
auto
*
var
=
static_cast
<
VarHandle
*>
(
input
);
var
->
generated_op_
->
Wait
(
cpu_ctx
);
}
tensors_
.
resize
(
inputs_
.
size
());
auto
*
var
=
static_cast
<
VarHandle
*>
(
inputs_
[
0
]);
auto
&
var_name
=
var
->
name_
;
auto
*
var
_handle
=
static_cast
<
VarHandle
*>
(
inputs_
[
0
]);
auto
&
var_name
=
var
_handle
->
name_
;
platform
::
CPUPlace
cpu
;
auto
&
scopes
=
*
local_scopes_
;
for
(
size_t
i
=
0
;
i
<
scopes
.
size
();
++
i
)
{
auto
&
scope
=
scopes
[
i
];
auto
&
t
=
scope
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
()
->
FindVar
(
var_name
)
->
Get
<
framework
::
LoDTensor
>
();
if
(
platform
::
is_gpu_place
(
var
->
place_
))
{
auto
*
var
=
scope
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
()
->
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Cannot find variable %s in execution scope"
,
var_name
);
auto
&
t
=
var
->
Get
<
framework
::
LoDTensor
>
();
if
(
platform
::
is_gpu_place
(
t
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
TensorCopy
(
t
,
cpu
,
*
dev_ctxes_
[
t
.
place
()],
&
tensors_
[
i
]);
dev_ctxes_
[
t
.
place
()]
->
Wait
();
dev_ctxes_
.
at
(
t
.
place
())
->
Wait
();
#endif
}
else
{
tensors_
[
i
].
ShareDataWith
(
t
);
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
7a517dc9
...
...
@@ -89,101 +89,25 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
bool
is_forwarding
=
true
;
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
bool
change_forward
=
false
;
if
(
!
is_forwarding
)
{
// FIXME(yy): Do not hard code like this
if
(
op
->
OutputArgumentNames
().
size
()
==
1
&&
op
->
OutputArgumentNames
()[
0
]
==
GradVarName
(
loss_var_name_
))
{
continue
;
// Drop fill 1. for backward coeff;
}
}
if
(
op
->
Type
()
==
"send"
)
{
// append send op if program is distributed trainer main program.
// always use the first device
if
(
!
is_forwarding
&&
op
->
Type
()
==
"send"
)
{
auto
&
p
=
places_
[
0
];
auto
*
s
=
local_scopes_
[
0
];
// FIXME(wuyi): send op always copy from GPU 0
result
.
ops_
.
emplace_back
(
new
SendOpHandle
(
*
op
,
s
,
p
));
// Create inputs for output on original place and no ssa output
// is created for send op.
CreateOpHandleIOs
(
&
result
,
*
op
,
p
,
0
);
continue
;
}
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
auto
*
s
=
local_scopes_
[
i
];
result
.
ops_
.
emplace_back
(
new
ComputationOpHandle
(
*
op
,
s
,
p
));
auto
*
op_handle
=
result
.
ops_
.
back
().
get
();
CreateOpHandleIOs
(
&
result
,
*
op
,
p
,
i
);
auto
var_names
=
op
->
OutputArgumentNames
();
if
(
is_forwarding
)
{
if
(
var_names
.
size
()
==
1
&&
var_names
[
0
]
==
loss_var_name_
)
{
// Insert ScaleCost OpHandle
#ifdef PADDLE_WITH_CUDA
auto
*
communication_dev_ctx
=
nccl_ctxs_
->
DevCtx
(
p
);
#else
auto
*
communication_dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
#endif
op_handle
=
new
ScaleLossGradOpHandle
(
local_scopes_
.
size
(),
s
,
p
,
communication_dev_ctx
);
result
.
ops_
.
emplace_back
(
op_handle
);
// FIXME: Currently ScaleLossGradOp only use device_count as scale
// factor. So it does not depend on any other operators.
// VarHandle *loss = GetVarHandle(loss_var_name, place);
// loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss);
CreateOpOutput
(
&
result
,
op_handle
,
GradVarName
(
loss_var_name_
),
p
,
i
);
change_forward
=
true
;
}
}
}
if
(
change_forward
)
{
CreateSendOp
(
&
result
,
*
op
);
}
else
if
(
IsScaleLossOp
(
*
op
))
{
CreateScaleLossGradOp
(
&
result
);
is_forwarding
=
false
;
}
}
else
{
CreateComputationalOps
(
&
result
,
*
op
);
if
(
!
is_forwarding
)
{
auto
var_names
=
op
->
OutputArgumentNames
();
// Currently, we assume that once gradient is generated, it can be
// broadcast, and each gradient is only broadcast once. But there are no
// other cases, for example, we need to adjust the gradient according to
// the input when we get the gradient, which is not considered at present.
for
(
auto
&
og
:
var_names
)
{
if
(
grad_names_
.
count
(
og
)
!=
0
&&
og_has_been_broadcast
.
count
(
og
)
==
0
)
{
// is param grad
// Insert NCCL AllReduce Op
og_has_been_broadcast
.
insert
(
og
);
#ifdef PADDLE_WITH_CUDA
result
.
ops_
.
emplace_back
(
new
NCCLAllReduceOpHandle
(
local_scopes_
,
places_
,
*
nccl_ctxs_
));
auto
*
op_handle
=
result
.
ops_
.
back
().
get
();
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
auto
&
vars
=
result
.
vars_
[
i
][
og
];
if
(
vars
.
empty
())
{
// This device has no data. continue.
continue
;
// the input when we get the gradient, which is not considered at
// present.
for
(
auto
&
og
:
op
->
OutputArgumentNames
())
{
if
(
IsParameterGradientOnce
(
og
,
&
og_has_been_broadcast
))
{
InsertNCCLAllReduceOp
(
&
result
,
og
);
}
auto
&
prev_grad
=
vars
[
vars
.
size
()
-
1
];
op_handle
->
AddInput
(
prev_grad
.
get
());
auto
var
=
new
VarHandle
(
vars
.
size
()
-
1
,
i
,
og
,
p
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
}
#else
PADDLE_ENFORCE
(
"Not implemented"
);
#endif
}
}
}
...
...
@@ -207,7 +131,95 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
}
return
std
::
unique_ptr
<
SSAGraph
>
(
graph
);
}
// namespace details
}
void
MultiDevSSAGraphBuilder
::
InsertNCCLAllReduceOp
(
SSAGraph
*
result
,
const
std
::
string
&
og
)
const
{
#ifdef PADDLE_WITH_CUDA
result
->
ops_
.
emplace_back
(
new
NCCLAllReduceOpHandle
(
local_scopes_
,
places_
,
*
nccl_ctxs_
));
auto
*
op_handle
=
result
->
ops_
.
back
().
get
();
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
auto
&
vars
=
result
->
vars_
[
i
][
og
];
PADDLE_ENFORCE
(
!
vars
.
empty
());
auto
&
prev_grad
=
vars
.
back
();
op_handle
->
AddInput
(
prev_grad
.
get
());
auto
var
=
new
VarHandle
(
vars
.
size
()
-
1
,
i
,
og
,
p
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
}
#else
PADDLE_ENFORCE
(
"Not implemented"
);
#endif
}
bool
MultiDevSSAGraphBuilder
::
IsParameterGradientOnce
(
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
{
bool
is_pg_once
=
grad_names_
.
count
(
og
)
!=
0
&&
og_has_been_broadcast
->
count
(
og
)
==
0
;
if
(
is_pg_once
)
{
// Insert NCCL AllReduce Op
og_has_been_broadcast
->
insert
(
og
);
}
return
is_pg_once
;
}
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
SSAGraph
*
result
)
const
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
// Insert ScaleCost OpHandle
#ifdef PADDLE_WITH_CUDA
auto
*
communication_dev_ctx
=
nccl_ctxs_
->
DevCtx
(
places_
[
i
]);
#else
auto
*
communication_dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
#endif
auto
*
op_handle
=
new
ScaleLossGradOpHandle
(
local_scopes_
.
size
(),
local_scopes_
[
i
],
places_
[
i
],
communication_dev_ctx
);
result
->
ops_
.
emplace_back
(
op_handle
);
// FIXME: Currently ScaleLossGradOp only use device_count as scale
// factor. So it does not depend on any other operators.
// VarHandle *loss = GetVarHandle(loss_var_name, place);
// loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss);
CreateOpOutput
(
result
,
op_handle
,
GradVarName
(
loss_var_name_
),
places_
[
i
],
i
);
}
}
void
MultiDevSSAGraphBuilder
::
CreateComputationalOps
(
SSAGraph
*
result
,
const
OpDesc
&
op
)
const
{
for
(
size_t
scope_idx
=
0
;
scope_idx
<
places_
.
size
();
++
scope_idx
)
{
auto
p
=
places_
[
scope_idx
];
auto
s
=
local_scopes_
[
scope_idx
];
result
->
ops_
.
emplace_back
(
new
ComputationOpHandle
(
op
,
s
,
p
));
CreateOpHandleIOs
(
result
,
op
,
p
,
scope_idx
);
}
}
void
MultiDevSSAGraphBuilder
::
CreateSendOp
(
SSAGraph
*
result
,
const
OpDesc
&
op
)
const
{
auto
&
p
=
places_
[
0
];
auto
*
s
=
local_scopes_
[
0
];
// FIXME(wuyi): send op always copy from GPU 0
result
->
ops_
.
emplace_back
(
new
SendOpHandle
(
op
,
s
,
p
));
// Create inputs for output on original place and no ssa output
// is created for send op.
CreateOpHandleIOs
(
result
,
op
,
p
,
0
);
}
bool
MultiDevSSAGraphBuilder
::
IsScaleLossOp
(
const
OpDesc
&
op
)
const
{
// FIXME(yy): Do not hard code like this
return
op
.
OutputArgumentNames
().
size
()
==
1
&&
op
.
OutputArgumentNames
()[
0
]
==
GradVarName
(
loss_var_name_
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
7a517dc9
...
...
@@ -57,6 +57,20 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#ifdef PADDLE_WITH_CUDA
platform
::
NCCLContextMap
*
nccl_ctxs_
;
#endif
bool
IsScaleLossOp
(
const
OpDesc
&
op
)
const
;
void
CreateSendOp
(
SSAGraph
*
result
,
const
OpDesc
&
op
)
const
;
void
CreateComputationalOps
(
SSAGraph
*
result
,
const
OpDesc
&
op
)
const
;
void
CreateScaleLossGradOp
(
SSAGraph
*
result
)
const
;
bool
IsParameterGradientOnce
(
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
;
void
InsertNCCLAllReduceOp
(
SSAGraph
*
result
,
const
std
::
string
&
og
)
const
;
};
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
浏览文件 @
7a517dc9
...
...
@@ -73,8 +73,9 @@ void NCCLAllReduceOpHandle::RunImpl() {
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
*
s
=
local_scopes_
[
i
];
auto
&
local_scope
=
*
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
lod_tensor
=
s
->
FindVar
(
var_name
)
->
Get
<
LoDTensor
>
();
auto
&
lod_tensor
=
local_scope
.
FindVar
(
var_name
)
->
Get
<
LoDTensor
>
();
lod_tensors
.
emplace_back
(
lod_tensor
);
}
...
...
@@ -110,17 +111,21 @@ void NCCLAllReduceOpHandle::RunImpl() {
}
});
}
else
{
// Special handle CPU only Operator's gradient. Like CRF
auto
&
trg
=
*
this
->
local_scopes_
[
0
]
->
Var
()
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
&
trg
=
*
this
->
local_scopes_
[
0
]
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
()
->
Var
()
->
GetMutable
<
framework
::
LoDTensor
>
();
// Reduce All Tensor to trg in CPU
ReduceLoDTensor
func
(
lod_tensors
,
&
trg
);
VisitDataType
(
ToDataType
(
lod_tensors
[
0
].
type
()),
func
);
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
&
scope
=
local_scopes_
[
i
];
auto
&
scope
=
*
local_scopes_
[
i
]
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
p
=
places_
[
i
];
auto
*
var
=
scope
->
FindVar
(
var_name
);
auto
*
var
=
scope
.
FindVar
(
var_name
);
auto
*
dev_ctx
=
dev_ctxes_
[
p
];
RunAndRecordEvent
(
p
,
[
&
trg
,
var
,
dev_ctx
,
p
]
{
...
...
paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
浏览文件 @
7a517dc9
...
...
@@ -30,10 +30,11 @@ ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {}
void
ScaleLossGradOpHandle
::
RunImpl
()
{
std
::
string
var_name
=
static_cast
<
VarHandle
*>
(
this
->
outputs_
[
0
])
->
name_
;
auto
&
local_scope
=
*
scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
float
*
tmp
=
scope_
->
FindVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
()
->
mutable_data
<
float
>
(
make_ddim
({
1
}),
place_
);
float
*
tmp
=
local_scope
.
FindVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
()
->
mutable_data
<
float
>
(
make_ddim
({
1
}),
place_
);
if
(
platform
::
is_cpu_place
(
place_
))
{
*
tmp
=
coeff_
;
...
...
paddle/fluid/framework/grad_op_desc_maker.h
浏览文件 @
7a517dc9
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <unordered_set>
#include <vector>
...
...
@@ -69,8 +70,7 @@ class GradOpDescMakerBase {
" for input argument with a list of variables, "
" drop_empty_grad is not allowed because it makes"
" the correspondence bewteen a variable and its gradient"
" ambiguous. Use REGISTER_OP_EX to register the op"
" or call InputGrad(?,false) in GradOpDescMaker."
" ambiguous."
" Op type %s"
,
fwd_op_
.
Type
());
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
7a517dc9
...
...
@@ -16,6 +16,8 @@ limitations under the License. */
#include <algorithm>
#include <atomic>
#include <string>
#include <tuple>
#include <type_traits>
#include <typeinfo>
#include <unordered_map>
...
...
@@ -141,36 +143,6 @@ class OpKernelRegistrar : public Registrar {
return 0; \
}
/**
* Macro to register Operator. When the input is duplicable, you should
* use REGISTER_OP_EX with drop_empty_grad=false instead.
*/
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class) \
REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class, true)
// When an argument is duplicable, we need to use this version.
// Perhaps we can omit DropEmptyIG template parameter and
// only have one version of REGISTER_OP.
#define REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class, drop_empty_grad) \
REGISTER_OPERATOR(grad_op_type, grad_op_class); \
class _GradOpDescMaker_##grad_op_type##_ \
: public ::paddle::framework::DefaultGradOpDescMaker<drop_empty_grad> { \
using ::paddle::framework::DefaultGradOpDescMaker< \
drop_empty_grad>::DefaultGradOpDescMaker; \
\
protected: \
virtual std::string GradOpType() const { return #grad_op_type; } \
}; \
REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \
op_maker_class);
#define REGISTER_OP_WITH_KERNEL(op_type, ...) \
REGISTER_OPERATOR(op_type, ::paddle::framework::OperatorWithKernel, \
##__VA_ARGS__)
#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \
REGISTER_OPERATOR(op_type, op_class, op_maker_class)
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
7a517dc9
...
...
@@ -44,6 +44,7 @@ class ParallelExecutorPrivate {
#endif
std
::
vector
<
std
::
tuple
<
std
::
string
,
proto
::
VarType
::
Type
,
bool
>>
var_types_
;
bool
own_local_scope
;
};
std
::
vector
<
Scope
*>
&
ParallelExecutor
::
GetLocalScopes
()
{
...
...
@@ -63,13 +64,16 @@ ParallelExecutor::ParallelExecutor(
// Step 1. Bcast the params to devs.
// Create local scopes
if
(
local_scopes
.
empty
())
{
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
member_
->
local_scopes_
.
push_back
(
&
scope
->
NewScope
());
member_
->
own_local_scope
=
true
;
member_
->
local_scopes_
.
emplace_back
(
member_
->
global_scope_
);
for
(
size_t
i
=
1
;
i
<
member_
->
places_
.
size
();
++
i
)
{
member_
->
local_scopes_
.
emplace_back
(
&
scope
->
NewScope
());
}
}
else
{
member_
->
own_local_scope
=
false
;
PADDLE_ENFORCE_EQ
(
member_
->
places_
.
size
(),
local_scopes
.
size
());
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
member_
->
local_scopes_
.
push
_back
(
local_scopes
[
i
]);
member_
->
local_scopes_
.
emplace
_back
(
local_scopes
[
i
]);
}
}
...
...
@@ -159,7 +163,9 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
const
std
::
string
&
fetched_var_name
)
{
platform
::
RecordBlock
b
(
0
);
// Create local scopes.
for
(
auto
&
scope
:
member_
->
local_scopes_
)
{
for
(
auto
it
=
member_
->
local_scopes_
.
rbegin
();
it
!=
member_
->
local_scopes_
.
rend
();
++
it
)
{
auto
&
scope
=
*
it
;
Scope
&
local_scope
=
scope
->
NewScope
();
*
scope
->
Var
(
details
::
kLocalExecScopeName
)
->
GetMutable
<
Scope
*>
()
=
&
local_scope
;
...
...
@@ -173,7 +179,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
InitializeVariable
(
scope
->
Var
(
std
::
get
<
0
>
(
name_type_pair
)),
std
::
get
<
1
>
(
name_type_pair
));
}
else
{
InitializeVariable
(
scope
->
Var
(
std
::
get
<
0
>
(
name_type_pair
)),
InitializeVariable
(
local_scope
.
Var
(
std
::
get
<
0
>
(
name_type_pair
)),
std
::
get
<
1
>
(
name_type_pair
));
}
}
...
...
@@ -228,5 +234,13 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes(
}
}
ParallelExecutor
::~
ParallelExecutor
()
{
if
(
member_
->
own_local_scope
)
{
for
(
size_t
i
=
1
;
i
<
member_
->
local_scopes_
.
size
();
++
i
)
{
member_
->
global_scope_
->
DeleteScope
(
member_
->
local_scopes_
[
i
]);
}
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/parallel_executor.h
浏览文件 @
7a517dc9
...
...
@@ -42,6 +42,8 @@ class ParallelExecutor {
const
std
::
vector
<
Scope
*>&
local_scopes
,
bool
allow_op_delay
);
~
ParallelExecutor
();
std
::
vector
<
Scope
*>&
GetLocalScopes
();
/**
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
7a517dc9
...
...
@@ -110,12 +110,12 @@ function(op_library TARGET)
# Note that it's enough to just adding one operator to pybind in a *_op.cc file.
# And for detail pybind information, please see generated paddle/pybind/pybind.h.
file
(
READ
${
TARGET
}
.cc TARGET_CONTENT
)
string
(
REGEX MATCH
"REGISTER_OP
\\
(.*REGISTER_OP
\\
("
multi_register
"
${
TARGET_CONTENT
}
"
)
string
(
REGEX MATCH
"REGISTER_OP
\\
([a-z0-9_]*,"
one_register
"
${
multi_register
}
"
)
string
(
REGEX MATCH
"REGISTER_OP
ERATOR
\\
(.*REGISTER_OPERATOR
\\
("
multi_register
"
${
TARGET_CONTENT
}
"
)
string
(
REGEX MATCH
"REGISTER_OP
ERATOR
\\
([a-z0-9_]*,"
one_register
"
${
multi_register
}
"
)
if
(
one_register STREQUAL
""
)
string
(
REPLACE
"_op"
""
TARGET
"
${
TARGET
}
"
)
else
()
string
(
REPLACE
"REGISTER_OP("
""
TARGET
"
${
one_register
}
"
)
string
(
REPLACE
"REGISTER_OP
ERATOR
("
""
TARGET
"
${
one_register
}
"
)
string
(
REPLACE
","
""
TARGET
"
${
TARGET
}
"
)
endif
()
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
7a517dc9
...
...
@@ -469,8 +469,6 @@ REGISTER_ACTIVATION_OP_GRAD_MAKER(HardSigmoid, hard_sigmoid);
namespace
ops
=
paddle
::
operators
;
void
DummyFunctor
()
{}
#define FOR_EACH_INPLACE_OP_FUNCTOR(__macro) \
__macro(Sigmoid, sigmoid); \
__macro(Relu, relu); \
...
...
paddle/fluid/operators/activation_op.h
浏览文件 @
7a517dc9
...
...
@@ -648,7 +648,7 @@ struct SoftReluGradFunctor : public BaseActivationFunctor<T> {
typename
BaseActivationFunctor
<
T
>::
AttrPair
GetAttrs
()
{
return
{{
"threshold"
,
&
threshold
}};
}
bool
Inplace
()
const
{
return
IsInplace
(
"softrelu"
);
}
bool
Inplace
()
const
{
return
IsInplace
(
"soft
_
relu"
);
}
template
<
typename
Device
,
typename
X
,
typename
Out
,
typename
dOut
,
typename
dX
>
void
operator
()(
Device
d
,
X
x
,
Out
out
,
dOut
dout
,
dX
dx
)
const
{
...
...
paddle/fluid/operators/bilinear_tensor_product_op.cc
浏览文件 @
7a517dc9
...
...
@@ -153,9 +153,11 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
bilinear_tensor_product
,
ops
::
BilinearTensorProductOp
,
ops
::
BilinearTensorProductOpMaker
,
bilinear_tensor_product_grad
,
ops
::
BilinearTensorProductOpGrad
);
REGISTER_OPERATOR
(
bilinear_tensor_product
,
ops
::
BilinearTensorProductOp
,
ops
::
BilinearTensorProductOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
bilinear_tensor_product_grad
,
ops
::
BilinearTensorProductOpGrad
)
REGISTER_OP_CPU_KERNEL
(
bilinear_tensor_product
,
ops
::
BilinearTensorProductKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/clip_op.cc
浏览文件 @
7a517dc9
...
...
@@ -81,8 +81,9 @@ class ClipOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
clip
,
ops
::
ClipOp
,
ops
::
ClipOpMaker
<
float
>
,
clip_grad
,
ops
::
ClipOpGrad
);
REGISTER_OPERATOR
(
clip
,
ops
::
ClipOp
,
ops
::
ClipOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
clip_grad
,
ops
::
ClipOpGrad
)
REGISTER_OP_CPU_KERNEL
(
clip
,
ops
::
ClipKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/concat_op.cc
浏览文件 @
7a517dc9
...
...
@@ -103,8 +103,10 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_EX
(
concat
,
ops
::
ConcatOp
,
ops
::
ConcatOpMaker
,
concat_grad
,
ops
::
ConcatOpGrad
,
false
)
REGISTER_OPERATOR
(
concat
,
ops
::
ConcatOp
,
ops
::
ConcatOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
false
>
/* set false to disable empty grad */
)
REGISTER_OPERATOR
(
concat_grad
,
ops
::
ConcatOpGrad
)
REGISTER_OP_CPU_KERNEL
(
concat
,
ops
::
ConcatKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
)
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/conv_op.cc
浏览文件 @
7a517dc9
...
...
@@ -335,14 +335,17 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
conv2d_grad
,
ops
::
ConvOpGrad
);
REGISTER_OPERATOR
(
conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
conv2d_grad
,
ops
::
ConvOpGrad
)
// depthwise convolution op
REGISTER_OP
(
depthwise_conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
depthwise_conv2d_grad
,
ops
::
ConvOpGrad
);
REGISTER_OP
(
conv3d
,
ops
::
ConvOp
,
ops
::
Conv3DOpMaker
,
conv3d_grad
,
ops
::
ConvOpGrad
);
REGISTER_OPERATOR
(
depthwise_conv2d
,
ops
::
ConvOp
,
ops
::
Conv2DOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
depthwise_conv2d_grad
,
ops
::
ConvOpGrad
)
REGISTER_OPERATOR
(
conv3d
,
ops
::
ConvOp
,
ops
::
Conv3DOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
conv3d_grad
,
ops
::
ConvOpGrad
)
// depthwise conv kernel
// TODO(xingzhaolong): neon kernel for mobile
...
...
paddle/fluid/operators/conv_shift_op.cc
浏览文件 @
7a517dc9
...
...
@@ -193,8 +193,9 @@ class ConvShiftGradKernel<platform::CPUPlace, T>
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv_shift
,
ops
::
ConvShiftOp
,
ops
::
ConvShiftOpMaker
,
conv_shift_grad
,
ops
::
ConvShiftGradOp
);
REGISTER_OPERATOR
(
conv_shift
,
ops
::
ConvShiftOp
,
ops
::
ConvShiftOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
conv_shift_grad
,
ops
::
ConvShiftGradOp
)
REGISTER_OP_CPU_KERNEL
(
conv_shift
,
ops
::
ConvShiftKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/conv_transpose_op.cc
浏览文件 @
7a517dc9
...
...
@@ -298,8 +298,10 @@ framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType(
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
conv2d_transpose
,
ops
::
ConvTransposeOp
,
ops
::
Conv2DTransposeOpMaker
,
conv2d_transpose_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OPERATOR
(
conv2d_transpose
,
ops
::
ConvTransposeOp
,
ops
::
Conv2DTransposeOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
conv2d_transpose_grad
,
ops
::
ConvTransposeOpGrad
)
REGISTER_OP_CPU_KERNEL
(
conv2d_transpose
,
...
...
@@ -311,8 +313,10 @@ REGISTER_OP_CPU_KERNEL(
ops
::
GemmConvTransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP
(
conv3d_transpose
,
ops
::
ConvTransposeOp
,
ops
::
Conv3DTransposeOpMaker
,
conv3d_transpose_grad
,
ops
::
ConvTransposeOpGrad
);
REGISTER_OPERATOR
(
conv3d_transpose
,
ops
::
ConvTransposeOp
,
ops
::
Conv3DTransposeOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
conv3d_transpose_grad
,
ops
::
ConvTransposeOpGrad
)
REGISTER_OP_CPU_KERNEL
(
conv3d_transpose
,
...
...
paddle/fluid/operators/cos_sim_op.cc
浏览文件 @
7a517dc9
...
...
@@ -153,8 +153,9 @@ class CosSimOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
cos_sim
,
ops
::
CosSimOp
,
ops
::
CosSimOpMaker
,
cos_sim_grad
,
ops
::
CosSimOpGrad
);
REGISTER_OPERATOR
(
cos_sim
,
ops
::
CosSimOp
,
ops
::
CosSimOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
cos_sim_grad
,
ops
::
CosSimOpGrad
)
REGISTER_OP_CPU_KERNEL
(
cos_sim
,
ops
::
CosSimKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/crop_op.cc
浏览文件 @
7a517dc9
...
...
@@ -153,7 +153,9 @@ class CropOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
crop
,
ops
::
CropOp
,
ops
::
CropOpMaker
,
crop_grad
,
ops
::
CropOpGrad
);
REGISTER_OPERATOR
(
crop
,
ops
::
CropOp
,
ops
::
CropOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
crop_grad
,
ops
::
CropOpGrad
);
REGISTER_OP_CPU_KERNEL
(
crop
,
ops
::
CropKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
crop_grad
,
ops
::
CropGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
paddle/fluid/operators/cross_entropy_op.cc
浏览文件 @
7a517dc9
...
...
@@ -164,8 +164,9 @@ or not. But the output only shares the LoD information with input X.
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
cross_entropy
,
ops
::
CrossEntropyOp
,
ops
::
CrossEntropyOpMaker
,
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
);
REGISTER_OPERATOR
(
cross_entropy
,
ops
::
CrossEntropyOp
,
ops
::
CrossEntropyOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
cross_entropy_grad
,
ops
::
CrossEntropyGradientOp
)
REGISTER_OP_CPU_KERNEL
(
cross_entropy
,
ops
::
CrossEntropyOpKernel
<
float
>
,
ops
::
CrossEntropyOpKernel
<
double
>
);
REGISTER_OP_CPU_KERNEL
(
cross_entropy_grad
,
...
...
paddle/fluid/operators/detail/sendrecvop_utils.cc
浏览文件 @
7a517dc9
...
...
@@ -82,7 +82,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
platform
::
CPUPlace
cpu
;
auto
&
gpu_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
);
auto
copy_size
=
tensor
.
memory_size
(
);
auto
copy_size
=
tensor
.
numel
()
*
framework
::
SizeOfType
(
tensor
.
type
()
);
payload
=
memory
::
Alloc
(
cpu
,
copy_size
);
memory
::
Copy
(
cpu
,
payload
,
...
...
@@ -99,7 +99,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
}
else
{
payload
=
tensor
.
data
<
void
>
();
}
payload_size
=
tensor
.
memory_size
(
);
payload_size
=
tensor
.
numel
()
*
framework
::
SizeOfType
(
tensor
.
type
()
);
e
.
WriteVarlengthBeginning
(
VarMsg
::
kSerializedFieldNumber
,
payload_size
);
}
break
;
case
framework
::
proto
::
VarType_Type_SELECTED_ROWS
:
{
...
...
@@ -118,7 +118,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
platform
::
CPUPlace
cpu
;
auto
&
gpu_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
);
auto
copy_size
=
tensor
->
memory_size
();
auto
copy_size
=
tensor
->
numel
()
*
framework
::
SizeOfType
(
tensor
->
type
());
payload
=
memory
::
Alloc
(
cpu
,
copy_size
);
memory
::
Copy
(
cpu
,
payload
,
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
->
place
()),
...
...
@@ -133,7 +134,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
}
else
{
payload
=
slr
->
mutable_value
()
->
data
<
void
>
();
}
payload_size
=
tensor
->
memory_size
(
);
payload_size
=
tensor
->
numel
()
*
framework
::
SizeOfType
(
tensor
->
type
()
);
e
.
WriteVarlengthBeginning
(
VarMsg
::
kSerializedFieldNumber
,
payload_size
);
}
break
;
default:
...
...
paddle/fluid/operators/dropout_op.cc
浏览文件 @
7a517dc9
...
...
@@ -101,8 +101,9 @@ class DropoutOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
dropout
,
ops
::
DropoutOp
,
ops
::
DropoutOpMaker
,
dropout_grad
,
ops
::
DropoutOpGrad
);
REGISTER_OPERATOR
(
dropout
,
ops
::
DropoutOp
,
ops
::
DropoutOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
dropout_grad
,
ops
::
DropoutOpGrad
)
REGISTER_OP_CPU_KERNEL
(
dropout
,
ops
::
CPUDropoutKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/elementwise_div_op.cc
浏览文件 @
7a517dc9
...
...
@@ -30,8 +30,10 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
elementwise_div
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseDivOpMaker
,
elementwise_div_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OPERATOR
(
elementwise_div
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseDivOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
elementwise_div_grad
,
ops
::
ElementwiseOpGrad
)
REGISTER_OP_CPU_KERNEL
(
elementwise_div
,
ops
::
ElementwiseDivKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise_max_op.cc
浏览文件 @
7a517dc9
...
...
@@ -29,8 +29,10 @@ class ElementwiseMaxOpMaker : public ElementwiseOpMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
elementwise_max
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMaxOpMaker
,
elementwise_max_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OPERATOR
(
elementwise_max
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMaxOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
elementwise_max_grad
,
ops
::
ElementwiseOpGrad
)
REGISTER_OP_CPU_KERNEL
(
elementwise_max
,
ops
::
ElementwiseMaxKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise_min_op.cc
浏览文件 @
7a517dc9
...
...
@@ -29,8 +29,10 @@ class ElementwiseMinOpMaker : public ElementwiseOpMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
elementwise_min
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMinOpMaker
,
elementwise_min_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OPERATOR
(
elementwise_min
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMinOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
elementwise_min_grad
,
ops
::
ElementwiseOpGrad
)
REGISTER_OP_CPU_KERNEL
(
elementwise_min
,
ops
::
ElementwiseMinKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise_mul_op.cc
浏览文件 @
7a517dc9
...
...
@@ -31,8 +31,10 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
elementwise_mul
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMulOpMaker
,
elementwise_mul_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OPERATOR
(
elementwise_mul
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMulOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
elementwise_mul_grad
,
ops
::
ElementwiseOpGrad
)
REGISTER_OP_CPU_KERNEL
(
elementwise_mul
,
ops
::
ElementwiseMulKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise_sub_op.cc
浏览文件 @
7a517dc9
...
...
@@ -29,8 +29,10 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
elementwise_sub
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseSubOpMaker
,
elementwise_sub_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OPERATOR
(
elementwise_sub
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseSubOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
elementwise_sub_grad
,
ops
::
ElementwiseOpGrad
)
REGISTER_OP_CPU_KERNEL
(
elementwise_sub
,
ops
::
ElementwiseSubKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/expand_op.cc
浏览文件 @
7a517dc9
...
...
@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/expand_op.h"
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -128,8 +130,9 @@ class ExpandGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
expand
,
ops
::
ExpandOp
,
ops
::
ExpandOpMaker
,
expand_grad
,
ops
::
ExpandGradOp
);
REGISTER_OPERATOR
(
expand
,
ops
::
ExpandOp
,
ops
::
ExpandOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
expand_grad
,
ops
::
ExpandGradOp
)
REGISTER_OP_CPU_KERNEL
(
expand
,
ops
::
ExpandKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/fc_op.cc
浏览文件 @
7a517dc9
...
...
@@ -98,5 +98,6 @@ FCOpMaker::FCOpMaker(OpProto* proto, OpAttrChecker* op_checker)
}
// namespace operators
}
// namespace paddle
REGISTER_OP
(
fc
,
paddle
::
operators
::
FCOp
,
paddle
::
operators
::
FCOpMaker
,
fc_grad
,
paddle
::
operators
::
FCOpGrad
);
REGISTER_OPERATOR
(
fc
,
paddle
::
operators
::
FCOp
,
paddle
::
operators
::
FCOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
fc_grad
,
paddle
::
operators
::
FCOpGrad
)
paddle/fluid/operators/gather_op.cc
浏览文件 @
7a517dc9
...
...
@@ -100,7 +100,8 @@ Out = [[3, 4],
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
gather
,
ops
::
GatherOp
,
ops
::
GatherOpMaker
,
gather_grad
,
ops
::
GatherGradOp
);
REGISTER_OPERATOR
(
gather
,
ops
::
GatherOp
,
ops
::
GatherOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
gather_grad
,
ops
::
GatherGradOp
)
REGISTER_OP_CPU_KERNEL
(
gather
,
ops
::
GatherOpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
gather_grad
,
ops
::
GatherGradientOpKernel
<
float
>
);
paddle/fluid/operators/gru_op.cc
浏览文件 @
7a517dc9
...
...
@@ -216,7 +216,9 @@ class GRUGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
gru
,
ops
::
GRUOp
,
ops
::
GRUOpMaker
,
gru_grad
,
ops
::
GRUGradOp
);
REGISTER_OPERATOR
(
gru
,
ops
::
GRUOp
,
ops
::
GRUOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
gru_grad
,
ops
::
GRUGradOp
)
REGISTER_OP_CPU_KERNEL
(
gru
,
ops
::
GRUKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GRUKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/gru_unit_op.cc
浏览文件 @
7a517dc9
...
...
@@ -198,8 +198,9 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
gru_unit
,
ops
::
GRUUnitOp
,
ops
::
GRUUnitOpMaker
,
gru_unit_grad
,
ops
::
GRUUnitGradOp
);
REGISTER_OPERATOR
(
gru_unit
,
ops
::
GRUUnitOp
,
ops
::
GRUUnitOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
gru_unit_grad
,
ops
::
GRUUnitGradOp
)
REGISTER_OP_CPU_KERNEL
(
gru_unit
,
ops
::
GRUUnitKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
GRUUnitKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/hinge_loss_op.cc
浏览文件 @
7a517dc9
...
...
@@ -103,8 +103,9 @@ class HingeLossGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
hinge_loss
,
ops
::
HingeLossOp
,
ops
::
HingeLossOpMaker
<
float
>
,
hinge_loss_grad
,
ops
::
HingeLossGradOp
);
REGISTER_OPERATOR
(
hinge_loss
,
ops
::
HingeLossOp
,
ops
::
HingeLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
hinge_loss_grad
,
ops
::
HingeLossGradOp
)
REGISTER_OP_CPU_KERNEL
(
hinge_loss
,
ops
::
HingeLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/huber_loss_op.cc
浏览文件 @
7a517dc9
...
...
@@ -121,8 +121,9 @@ class HuberLossGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
huber_loss
,
ops
::
HuberLossOp
,
ops
::
HuberLossOpMaker
<
float
>
,
huber_loss_grad
,
ops
::
HuberLossGradOp
);
REGISTER_OPERATOR
(
huber_loss
,
ops
::
HuberLossOp
,
ops
::
HuberLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
huber_loss_grad
,
ops
::
HuberLossGradOp
)
REGISTER_OP_CPU_KERNEL
(
huber_loss
,
ops
::
HuberLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/im2sequence_op.cc
浏览文件 @
7a517dc9
...
...
@@ -148,8 +148,9 @@ class Im2SequenceGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
im2sequence
,
ops
::
Im2SequenceOp
,
ops
::
Im2SequenceOpMaker
,
im2sequence_grad
,
ops
::
Im2SequenceGradOp
);
REGISTER_OPERATOR
(
im2sequence
,
ops
::
Im2SequenceOp
,
ops
::
Im2SequenceOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
im2sequence_grad
,
ops
::
Im2SequenceGradOp
)
REGISTER_OP_CPU_KERNEL
(
im2sequence
,
ops
::
Im2SequenceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/l1_norm_op.cc
浏览文件 @
7a517dc9
...
...
@@ -67,8 +67,9 @@ $$Out = \sum{|X|}$$
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
l1_norm
,
ops
::
L1NormOp
,
ops
::
L1NormOpMaker
,
l1_norm_grad
,
ops
::
L1NormGradOp
);
REGISTER_OPERATOR
(
l1_norm
,
ops
::
L1NormOp
,
ops
::
L1NormOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
l1_norm_grad
,
ops
::
L1NormGradOp
)
REGISTER_OP_CPU_KERNEL
(
l1_norm
,
ops
::
L1NormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/label_smooth_op.cc
浏览文件 @
7a517dc9
...
...
@@ -117,8 +117,9 @@ class LabelSmoothGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
label_smooth
,
ops
::
LabelSmoothOp
,
ops
::
LabelSmoothOpMaker
,
label_smooth_grad
,
ops
::
LabelSmoothGradOp
);
REGISTER_OPERATOR
(
label_smooth
,
ops
::
LabelSmoothOp
,
ops
::
LabelSmoothOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
label_smooth_grad
,
ops
::
LabelSmoothGradOp
)
REGISTER_OP_CPU_KERNEL
(
label_smooth
,
ops
::
LabelSmoothKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/layer_norm_op.cc
浏览文件 @
7a517dc9
...
...
@@ -162,8 +162,9 @@ class LayerNormGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
layer_norm
,
ops
::
LayerNormOp
,
ops
::
LayerNormOpMaker
,
layer_norm_grad
,
ops
::
LayerNormGradOp
);
REGISTER_OPERATOR
(
layer_norm
,
ops
::
LayerNormOp
,
ops
::
LayerNormOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
layer_norm_grad
,
ops
::
LayerNormGradOp
)
REGISTER_OP_CPU_KERNEL
(
layer_norm
,
ops
::
LayerNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LayerNormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/linear_chain_crf_op.cc
浏览文件 @
7a517dc9
...
...
@@ -256,8 +256,10 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
linear_chain_crf
,
ops
::
LinearChainCRFOp
,
ops
::
LinearChainCRFOpMaker
,
linear_chain_crf_grad
,
ops
::
LinearChainCRFGradOp
);
REGISTER_OPERATOR
(
linear_chain_crf
,
ops
::
LinearChainCRFOp
,
ops
::
LinearChainCRFOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
linear_chain_crf_grad
,
ops
::
LinearChainCRFGradOp
)
REGISTER_OP_CPU_KERNEL
(
linear_chain_crf
,
ops
::
LinearChainCRFOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/lod_reset_op.cc
浏览文件 @
7a517dc9
...
...
@@ -155,8 +155,9 @@ class LoDResetGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
lod_reset
,
ops
::
LoDResetOp
,
ops
::
LoDResetOpMaker
,
lod_reset_grad
,
ops
::
LoDResetGradOp
);
REGISTER_OPERATOR
(
lod_reset
,
ops
::
LoDResetOp
,
ops
::
LoDResetOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
lod_reset_grad
,
ops
::
LoDResetGradOp
)
REGISTER_OP_CPU_KERNEL
(
lod_reset
,
ops
::
LoDResetKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
LoDResetKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
,
...
...
paddle/fluid/operators/log_loss_op.cc
浏览文件 @
7a517dc9
...
...
@@ -106,8 +106,9 @@ class LogLossGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
log_loss
,
ops
::
LogLossOp
,
ops
::
LogLossOpMaker
<
float
>
,
log_loss_grad
,
ops
::
LogLossGradOp
);
REGISTER_OPERATOR
(
log_loss
,
ops
::
LogLossOp
,
ops
::
LogLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
log_loss_grad
,
ops
::
LogLossGradOp
)
REGISTER_OP_CPU_KERNEL
(
log_loss
,
ops
::
LogLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/lrn_op.cc
浏览文件 @
7a517dc9
...
...
@@ -276,7 +276,9 @@ class LRNOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
lrn
,
ops
::
LRNOp
,
ops
::
LRNOpMaker
<
float
>
,
lrn_grad
,
ops
::
LRNOpGrad
);
REGISTER_OPERATOR
(
lrn
,
ops
::
LRNOp
,
ops
::
LRNOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
lrn_grad
,
ops
::
LRNOpGrad
)
REGISTER_OP_CPU_KERNEL
(
lrn
,
ops
::
LRNKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/lstm_op.cc
浏览文件 @
7a517dc9
...
...
@@ -273,7 +273,9 @@ class LSTMGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
lstm
,
ops
::
LSTMOp
,
ops
::
LSTMOpMaker
,
lstm_grad
,
ops
::
LSTMGradOp
);
REGISTER_OPERATOR
(
lstm
,
ops
::
LSTMOp
,
ops
::
LSTMOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
lstm_grad
,
ops
::
LSTMGradOp
)
REGISTER_OP_CPU_KERNEL
(
lstm
,
ops
::
LSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LSTMKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/lstm_unit_op.cc
浏览文件 @
7a517dc9
...
...
@@ -97,8 +97,9 @@ class LstmUnitGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
lstm_unit
,
ops
::
LstmUnitOp
,
ops
::
LstmUnitOpMaker
,
lstm_unit_grad
,
ops
::
LstmUnitGradOp
);
REGISTER_OPERATOR
(
lstm_unit
,
ops
::
LstmUnitOp
,
ops
::
LstmUnitOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
lstm_unit_grad
,
ops
::
LstmUnitGradOp
)
REGISTER_OP_CPU_KERNEL
(
lstm_unit
,
ops
::
LstmUnitKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
LstmUnitKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
...
...
paddle/fluid/operators/lstmp_op.cc
浏览文件 @
7a517dc9
...
...
@@ -322,8 +322,9 @@ class LSTMPGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
lstmp
,
ops
::
LSTMPOp
,
ops
::
LSTMPOpMaker
,
lstmp_grad
,
ops
::
LSTMPGradOp
);
REGISTER_OPERATOR
(
lstmp
,
ops
::
LSTMPOp
,
ops
::
LSTMPOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
lstmp_grad
,
ops
::
LSTMPGradOp
)
REGISTER_OP_CPU_KERNEL
(
lstmp
,
ops
::
LSTMPKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
LSTMPKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/margin_rank_loss_op.cc
浏览文件 @
7a517dc9
...
...
@@ -111,9 +111,10 @@ class MarginRankLossGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
margin_rank_loss
,
ops
::
MarginRankLossOp
,
ops
::
MarginRankLossOpMaker
<
float
>
,
margin_rank_loss_grad
,
ops
::
MarginRankLossGradOp
);
REGISTER_OPERATOR
(
margin_rank_loss
,
ops
::
MarginRankLossOp
,
ops
::
MarginRankLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
margin_rank_loss_grad
,
ops
::
MarginRankLossGradOp
)
REGISTER_OP_CPU_KERNEL
(
margin_rank_loss
,
ops
::
MarginRankLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/matmul_op.cc
浏览文件 @
7a517dc9
...
...
@@ -237,8 +237,9 @@ class MatMulOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
matmul
,
ops
::
MatMulOp
,
ops
::
MatMulOpMaker
,
matmul_grad
,
ops
::
MatMulOpGrad
);
REGISTER_OPERATOR
(
matmul
,
ops
::
MatMulOp
,
ops
::
MatMulOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
matmul_grad
,
ops
::
MatMulOpGrad
)
REGISTER_OP_CPU_KERNEL
(
matmul
,
ops
::
MatMulKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/maxout_op.cc
浏览文件 @
7a517dc9
...
...
@@ -101,8 +101,9 @@ class MaxOutOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
maxout
,
ops
::
MaxOutOp
,
ops
::
MaxOutOpMaker
,
maxout_grad
,
ops
::
MaxOutOpGrad
);
REGISTER_OPERATOR
(
maxout
,
ops
::
MaxOutOp
,
ops
::
MaxOutOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
maxout_grad
,
ops
::
MaxOutOpGrad
)
REGISTER_OP_CPU_KERNEL
(
maxout
,
ops
::
MaxOutKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/modified_huber_loss_op.cc
浏览文件 @
7a517dc9
...
...
@@ -108,9 +108,10 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
modified_huber_loss
,
ops
::
ModifiedHuberLossOp
,
ops
::
ModifiedHuberLossOpMaker
,
modified_huber_loss_grad
,
ops
::
ModifiedHuberLossGradOp
);
REGISTER_OPERATOR
(
modified_huber_loss
,
ops
::
ModifiedHuberLossOp
,
ops
::
ModifiedHuberLossOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
modified_huber_loss_grad
,
ops
::
ModifiedHuberLossGradOp
)
REGISTER_OP_CPU_KERNEL
(
modified_huber_loss
,
...
...
paddle/fluid/operators/mul_op.cc
浏览文件 @
7a517dc9
...
...
@@ -160,7 +160,9 @@ class MulGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
mul
,
ops
::
MulOp
,
ops
::
MulOpMaker
,
mul_grad
,
ops
::
MulGradOp
);
REGISTER_OPERATOR
(
mul
,
ops
::
MulOp
,
ops
::
MulOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
mul_grad
,
ops
::
MulGradOp
)
REGISTER_OP_CPU_KERNEL
(
mul
,
ops
::
MulKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/nce_op.cc
浏览文件 @
7a517dc9
...
...
@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/nce_op.h"
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -179,7 +181,9 @@ class NCEOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpMaker
,
nce_grad
,
ops
::
NCEOpGrad
);
REGISTER_OPERATOR
(
nce
,
ops
::
NCEOp
,
ops
::
NCEOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
nce_grad
,
ops
::
NCEOpGrad
)
REGISTER_OP_CPU_KERNEL
(
nce
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
,
ops
::
NCEKernel
<
paddle
::
platform
::
CPUPlace
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
nce_grad
,
...
...
paddle/fluid/operators/norm_op.cc
浏览文件 @
7a517dc9
...
...
@@ -85,8 +85,9 @@ class NormOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
norm
,
ops
::
NormOp
,
ops
::
NormOpMaker
<
float
>
,
norm_grad
,
ops
::
NormOpGrad
);
REGISTER_OPERATOR
(
norm
,
ops
::
NormOp
,
ops
::
NormOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
norm_grad
,
ops
::
NormOpGrad
)
REGISTER_OP_CPU_KERNEL
(
norm
,
ops
::
NormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
NormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
float
>
);
...
...
paddle/fluid/operators/pool_op.cc
浏览文件 @
7a517dc9
...
...
@@ -333,8 +333,9 @@ Example:
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
pool2d
,
ops
::
PoolOp
,
ops
::
Pool2dOpMaker
,
pool2d_grad
,
ops
::
PoolOpGrad
);
REGISTER_OPERATOR
(
pool2d
,
ops
::
PoolOp
,
ops
::
Pool2dOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
pool2d_grad
,
ops
::
PoolOpGrad
)
REGISTER_OP_CPU_KERNEL
(
pool2d
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
@@ -343,8 +344,9 @@ REGISTER_OP_CPU_KERNEL(
pool2d_grad
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PoolGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
)
REGISTER_OP
(
pool3d
,
ops
::
PoolOp
,
ops
::
Pool3dOpMaker
,
pool3d_grad
,
ops
::
PoolOpGrad
);
REGISTER_OPERATOR
(
pool3d
,
ops
::
PoolOp
,
ops
::
Pool3dOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
pool3d_grad
,
ops
::
PoolOpGrad
)
REGISTER_OP_CPU_KERNEL
(
pool3d
,
ops
::
PoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/pool_with_index_op.cc
浏览文件 @
7a517dc9
...
...
@@ -258,9 +258,10 @@ Example:
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
max_pool2d_with_index
,
ops
::
MaxPoolWithIndexOp
,
ops
::
MaxPool2dWithIndexOpMaker
,
max_pool2d_with_index_grad
,
ops
::
MaxPoolWithIndexOpGrad
);
REGISTER_OPERATOR
(
max_pool2d_with_index
,
ops
::
MaxPoolWithIndexOp
,
ops
::
MaxPool2dWithIndexOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
max_pool2d_with_index_grad
,
ops
::
MaxPoolWithIndexOpGrad
)
REGISTER_OP_CPU_KERNEL
(
max_pool2d_with_index
,
...
...
@@ -274,9 +275,10 @@ REGISTER_OP_CPU_KERNEL(
ops
::
MaxPoolWithIndexGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
,
int
>
)
REGISTER_OP
(
max_pool3d_with_index
,
ops
::
MaxPoolWithIndexOp
,
ops
::
MaxPool3dWithIndexOpMaker
,
max_pool3d_with_index_grad
,
ops
::
MaxPoolWithIndexOpGrad
);
REGISTER_OPERATOR
(
max_pool3d_with_index
,
ops
::
MaxPoolWithIndexOp
,
ops
::
MaxPool3dWithIndexOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
max_pool3d_with_index_grad
,
ops
::
MaxPoolWithIndexOpGrad
)
REGISTER_OP_CPU_KERNEL
(
max_pool3d_with_index
,
...
...
paddle/fluid/operators/prelu_op.cc
浏览文件 @
7a517dc9
...
...
@@ -83,8 +83,9 @@ class PReluGradOp : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
prelu
,
ops
::
PReluOp
,
ops
::
PReluOpMaker
,
prelu_grad
,
ops
::
PReluGradOp
);
REGISTER_OPERATOR
(
prelu
,
ops
::
PReluOp
,
ops
::
PReluOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
prelu_grad
,
ops
::
PReluGradOp
)
REGISTER_OP_CPU_KERNEL
(
prelu
,
ops
::
PReluKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/rank_loss_op.cc
浏览文件 @
7a517dc9
...
...
@@ -121,8 +121,9 @@ class RankLossGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
rank_loss
,
ops
::
RankLossOp
,
ops
::
RankLossOpMaker
,
rank_loss_grad
,
ops
::
RankLossGradOp
);
REGISTER_OPERATOR
(
rank_loss
,
ops
::
RankLossOp
,
ops
::
RankLossOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
rank_loss_grad
,
ops
::
RankLossGradOp
)
REGISTER_OP_CPU_KERNEL
(
rank_loss
,
ops
::
RankLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/reduce_op.cc
浏览文件 @
7a517dc9
...
...
@@ -14,6 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/reduce_op.h"
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -122,18 +125,18 @@ If reduce_all is true, just reduce along all dimensions and output a scalar.
protected:
std
::
string
comment_
;
void
Replace
(
std
::
string
&
src
,
std
::
string
from
,
std
::
string
to
)
{
void
Replace
(
std
::
string
*
src
,
std
::
string
from
,
std
::
string
to
)
{
std
::
size_t
len_from
=
std
::
strlen
(
from
.
c_str
());
std
::
size_t
len_to
=
std
::
strlen
(
to
.
c_str
());
for
(
std
::
size_t
pos
=
src
.
find
(
from
);
pos
!=
std
::
string
::
npos
;
pos
=
src
.
find
(
from
,
pos
+
len_to
))
{
src
.
replace
(
pos
,
len_from
,
to
);
for
(
std
::
size_t
pos
=
src
->
find
(
from
);
pos
!=
std
::
string
::
npos
;
pos
=
src
->
find
(
from
,
pos
+
len_to
))
{
src
->
replace
(
pos
,
len_from
,
to
);
}
}
void
SetComment
(
std
::
string
name
,
std
::
string
op
)
{
Replace
(
comment_
,
"{ReduceOp}"
,
name
);
Replace
(
comment_
,
"{reduce}"
,
op
);
Replace
(
&
comment_
,
"{ReduceOp}"
,
name
);
Replace
(
&
comment_
,
"{reduce}"
,
op
);
}
};
...
...
@@ -187,20 +190,25 @@ class ReduceProdOpMaker : public ReduceOpMaker {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
reduce_sum
,
ops
::
ReduceOp
,
ops
::
ReduceSumOpMaker
,
reduce_sum_grad
,
ops
::
ReduceGradOp
);
REGISTER_OPERATOR
(
reduce_sum
,
ops
::
ReduceOp
,
ops
::
ReduceSumOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
reduce_sum_grad
,
ops
::
ReduceGradOp
)
REGISTER_OP
(
reduce_mean
,
ops
::
ReduceOp
,
ops
::
ReduceMeanOpMaker
,
reduce_mean_grad
,
ops
::
ReduceGradOp
);
REGISTER_OPERATOR
(
reduce_mean
,
ops
::
ReduceOp
,
ops
::
ReduceMeanOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
reduce_mean_grad
,
ops
::
ReduceGradOp
)
REGISTER_OP
(
reduce_max
,
ops
::
ReduceOp
,
ops
::
ReduceMaxOpMaker
,
reduce_max_grad
,
ops
::
ReduceGradOp
);
REGISTER_OPERATOR
(
reduce_max
,
ops
::
ReduceOp
,
ops
::
ReduceMaxOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
reduce_max_grad
,
ops
::
ReduceGradOp
)
REGISTER_OP
(
reduce_min
,
ops
::
ReduceOp
,
ops
::
ReduceMinOpMaker
,
reduce_min_grad
,
ops
::
ReduceGradOp
);
REGISTER_OPERATOR
(
reduce_min
,
ops
::
ReduceOp
,
ops
::
ReduceMinOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
reduce_min_grad
,
ops
::
ReduceGradOp
)
REGISTER_OP
(
reduce_prod
,
ops
::
ReduceOp
,
ops
::
ReduceProdOpMaker
,
reduce_prod_grad
,
ops
::
ReduceGradOp
);
REGISTER_OPERATOR
(
reduce_prod
,
ops
::
ReduceOp
,
ops
::
ReduceProdOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
reduce_prod_grad
,
ops
::
ReduceGradOp
)
#define REGISTER_REDUCE_CPU_KERNEL(reduce_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL(reduce_type, \
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
7a517dc9
...
...
@@ -113,8 +113,9 @@ class ReshapeGradOp : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
using
CPU
=
paddle
::
platform
::
CPUDeviceContext
;
REGISTER_OP
(
reshape
,
ops
::
ReshapeOp
,
ops
::
ReshapeOpMaker
,
reshape_grad
,
ops
::
ReshapeGradOp
);
REGISTER_OPERATOR
(
reshape
,
ops
::
ReshapeOp
,
ops
::
ReshapeOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
reshape_grad
,
ops
::
ReshapeGradOp
)
REGISTER_OP_CPU_KERNEL
(
reshape
,
ops
::
ReshapeKernel
<
CPU
,
float
>
,
ops
::
ReshapeKernel
<
CPU
,
double
>
,
ops
::
ReshapeKernel
<
CPU
,
int
>
,
...
...
paddle/fluid/operators/roi_pool_op.cc
浏览文件 @
7a517dc9
...
...
@@ -153,8 +153,9 @@ https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
roi_pool
,
ops
::
ROIPoolOp
,
ops
::
ROIPoolOpMaker
,
roi_pool_grad
,
ops
::
ROIPoolGradOp
);
REGISTER_OPERATOR
(
roi_pool
,
ops
::
ROIPoolOp
,
ops
::
ROIPoolOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
roi_pool_grad
,
ops
::
ROIPoolGradOp
)
REGISTER_OP_CPU_KERNEL
(
roi_pool
,
ops
::
CPUROIPoolOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/row_conv_op.cc
浏览文件 @
7a517dc9
...
...
@@ -250,8 +250,9 @@ class RowConvGradKernel<platform::CPUDeviceContext, T>
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
row_conv
,
ops
::
RowConvOp
,
ops
::
RowConvOpMaker
,
row_conv_grad
,
ops
::
RowConvGradOp
);
REGISTER_OPERATOR
(
row_conv
,
ops
::
RowConvOp
,
ops
::
RowConvOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
row_conv_grad
,
ops
::
RowConvGradOp
)
REGISTER_OP_CPU_KERNEL
(
row_conv
,
ops
::
RowConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/scatter_op.cc
浏览文件 @
7a517dc9
...
...
@@ -102,7 +102,8 @@ $$
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
scatter
,
ops
::
ScatterOp
,
ops
::
ScatterOpMaker
,
scatter_grad
,
ops
::
ScatterGradOp
);
REGISTER_OPERATOR
(
scatter
,
ops
::
ScatterOp
,
ops
::
ScatterOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
scatter_grad
,
ops
::
ScatterGradOp
)
REGISTER_OP_CPU_KERNEL
(
scatter
,
ops
::
ScatterOpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
scatter_grad
,
ops
::
ScatterGradientOpKernel
<
float
>
);
paddle/fluid/operators/sequence_concat_op.cc
浏览文件 @
7a517dc9
...
...
@@ -124,9 +124,11 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_EX
(
sequence_concat
,
ops
::
SequenceConcatOp
,
ops
::
SequenceConcatOpMaker
,
sequence_concat_grad
,
ops
::
SequenceConcatGradOp
,
false
);
REGISTER_OPERATOR
(
sequence_concat
,
ops
::
SequenceConcatOp
,
ops
::
SequenceConcatOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
false
>
/* set false to disable empty grad */
)
REGISTER_OPERATOR
(
sequence_concat_grad
,
ops
::
SequenceConcatGradOp
);
REGISTER_OP_CPU_KERNEL
(
sequence_concat
,
ops
::
SequenceConcatOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/sequence_conv_op.cc
浏览文件 @
7a517dc9
...
...
@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/sequence_conv_op.h"
#include <algorithm>
namespace
paddle
{
namespace
operators
{
...
...
@@ -174,8 +176,9 @@ context_length, context_stride and context_start.
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
sequence_conv
,
ops
::
SequenceConvOp
,
ops
::
SequenceConvOpMaker
,
sequence_conv_grad
,
ops
::
SequenceConvGradOp
);
REGISTER_OPERATOR
(
sequence_conv
,
ops
::
SequenceConvOp
,
ops
::
SequenceConvOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
sequence_conv_grad
,
ops
::
SequenceConvGradOp
)
REGISTER_OP_CPU_KERNEL
(
sequence_conv
,
...
...
paddle/fluid/operators/sequence_expand_op.cc
浏览文件 @
7a517dc9
...
...
@@ -200,8 +200,10 @@ class SequenceExpandOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
sequence_expand
,
ops
::
SequenceExpandOp
,
ops
::
SequenceExpandOpMaker
,
sequence_expand_grad
,
ops
::
SequenceExpandOpGrad
);
REGISTER_OPERATOR
(
sequence_expand
,
ops
::
SequenceExpandOp
,
ops
::
SequenceExpandOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
sequence_expand_grad
,
ops
::
SequenceExpandOpGrad
)
REGISTER_OP_CPU_KERNEL
(
sequence_expand
,
ops
::
SequenceExpandKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/sequence_slice_op.cc
浏览文件 @
7a517dc9
...
...
@@ -120,8 +120,10 @@ NOTE: The first dimension size of input, the size of offset and Length, should b
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
sequence_slice
,
ops
::
SequenceSliceOp
,
ops
::
SequenceSliceOpMaker
,
sequence_slice_grad
,
ops
::
SequenceSliceGradOp
);
REGISTER_OPERATOR
(
sequence_slice
,
ops
::
SequenceSliceOp
,
ops
::
SequenceSliceOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
sequence_slice_grad
,
ops
::
SequenceSliceGradOp
)
REGISTER_OP_CPU_KERNEL
(
sequence_slice
,
ops
::
SequenceSliceOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/sequence_softmax_op.cc
浏览文件 @
7a517dc9
...
...
@@ -155,9 +155,10 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
sequence_softmax
,
ops
::
SequenceSoftmaxOp
,
ops
::
SequenceSoftmaxOpMaker
,
sequence_softmax_grad
,
ops
::
SequenceSoftmaxGradOp
);
REGISTER_OPERATOR
(
sequence_softmax
,
ops
::
SequenceSoftmaxOp
,
ops
::
SequenceSoftmaxOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
sequence_softmax_grad
,
ops
::
SequenceSoftmaxGradOp
)
REGISTER_OP_CPU_KERNEL
(
sequence_softmax
,
ops
::
SequenceSoftmaxKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc
浏览文件 @
7a517dc9
...
...
@@ -135,11 +135,12 @@ However the output only shares the LoD with input `X`.
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
sigmoid_cross_entropy_with_logits
,
REGISTER_OP
ERATOR
(
sigmoid_cross_entropy_with_logits
,
ops
::
SigmoidCrossEntropyWithLogitsOp
,
ops
::
SigmoidCrossEntropyWithLogitsOpMaker
,
sigmoid_cross_entropy_with_logits_grad
,
ops
::
SigmoidCrossEntropyWithLogitsGradOp
);
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
sigmoid_cross_entropy_with_logits_grad
,
ops
::
SigmoidCrossEntropyWithLogitsGradOp
)
REGISTER_OP_CPU_KERNEL
(
sigmoid_cross_entropy_with_logits
,
ops
::
SigmoidCrossEntropyWithLogitsKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/smooth_l1_loss_op.cc
浏览文件 @
7a517dc9
...
...
@@ -132,8 +132,9 @@ class SmoothL1LossGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
smooth_l1_loss
,
ops
::
SmoothL1LossOp
,
ops
::
SmoothL1LossOpMaker
,
smooth_l1_loss_grad
,
ops
::
SmoothL1LossGradOp
);
REGISTER_OPERATOR
(
smooth_l1_loss
,
ops
::
SmoothL1LossOp
,
ops
::
SmoothL1LossOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
smooth_l1_loss_grad
,
ops
::
SmoothL1LossGradOp
)
REGISTER_OP_CPU_KERNEL
(
smooth_l1_loss
,
ops
::
SmoothL1LossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/softmax_op.cc
浏览文件 @
7a517dc9
...
...
@@ -160,8 +160,9 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
softmax
,
ops
::
SoftmaxOp
,
ops
::
SoftmaxOpMaker
,
softmax_grad
,
ops
::
SoftmaxOpGrad
);
REGISTER_OPERATOR
(
softmax
,
ops
::
SoftmaxOp
,
ops
::
SoftmaxOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
softmax_grad
,
ops
::
SoftmaxOpGrad
)
REGISTER_OP_CPU_KERNEL
(
softmax
,
ops
::
SoftmaxKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/split_byref_op.cc
0 → 100644
浏览文件 @
7a517dc9
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/split_byref_op.h"
#include "paddle/fluid/operators/split_op.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
Tensor
;
class
SplitByrefOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SplitOp should not be null."
);
PADDLE_ENFORCE_GE
(
ctx
->
Outputs
(
"Out"
).
size
(),
1UL
,
"Outputs(Out) of SplitOp should not be empty."
);
auto
in_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
outs_names
=
ctx
->
Outputs
(
"Out"
);
size_t
num
=
static_cast
<
size_t
>
(
ctx
->
Attrs
().
Get
<
int
>
(
"num"
));
std
::
vector
<
int
>
sections
=
static_cast
<
std
::
vector
<
int
>>
(
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"sections"
));
const
size_t
outs_number
=
outs_names
.
size
();
std
::
vector
<
framework
::
DDim
>
outs_dims
;
outs_dims
.
reserve
(
outs_number
);
if
(
num
>
0
)
{
int64_t
in_axis_dim
=
in_dims
[
0
];
PADDLE_ENFORCE_EQ
(
in_axis_dim
%
num
,
0
,
"tensor split does not result"
" in an equal division"
);
size_t
out_axis_dim
=
in_axis_dim
/
num
;
for
(
size_t
i
=
0
;
i
<
outs_number
;
++
i
)
{
auto
dim
=
in_dims
;
dim
[
0
]
=
out_axis_dim
;
outs_dims
.
push_back
(
dim
);
}
}
else
if
(
sections
.
size
()
>
0
)
{
PADDLE_ENFORCE_EQ
(
sections
.
size
(),
outs_number
,
"tensor split sections size"
"should be equal to output size."
);
for
(
size_t
i
=
0
;
i
<
outs_number
;
++
i
)
{
auto
dim
=
in_dims
;
dim
[
0
]
=
sections
[
i
];
outs_dims
.
push_back
(
dim
);
}
}
ctx
->
SetOutputsDim
(
"Out"
,
outs_dims
);
}
};
class
SplitByrefOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
SplitByrefOpMaker
(
OpProto
*
proto
,
OpAttrChecker
*
op_checker
)
:
OpProtoAndCheckerMaker
(
proto
,
op_checker
)
{
AddInput
(
"X"
,
"(Tensor) Input tensor of the split operator."
);
AddOutput
(
"Out"
,
"(Tensor) Output tensors of the split operator."
)
.
AsDuplicable
();
AddComment
(
R"DOC(
SplitByref operator
Split source tensor to sevaral tensors by axis 0. No copy in this operator
is performed, output tensor shares the same blocks of memory.
)DOC"
);
AddAttr
<
std
::
vector
<
int
>>
(
"sections"
,
"(vector<int>) "
"the length of each output along the "
"specified axis."
)
.
SetDefault
(
std
::
vector
<
int
>
{});
AddAttr
<
int
>
(
"num"
,
"(int, default 0)"
"Number of sub-tensors. This must evenly divide "
"Input.dims()[axis]"
)
.
SetDefault
(
0
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
// NOTE: concat op default axis must be 0!
USE_CPU_ONLY_OP
(
concat
);
REGISTER_OPERATOR
(
split_byref
,
ops
::
SplitByrefOp
,
ops
::
SplitByrefOpMaker
,
ops
::
SplitGradMaker
);
REGISTER_OP_CPU_KERNEL
(
split_byref
,
ops
::
SplitByrefOpKernel
<
paddle
::
platform
::
CPUPlace
,
float
>
);
paddle/fluid/operators/split_byref_op.cu.cc
0 → 100644
浏览文件 @
7a517dc9
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/split_byref_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
split_byref
,
ops
::
SplitByrefOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/split_byref_op.h
0 → 100644
浏览文件 @
7a517dc9
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
SplitByrefOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
Tensor
>
(
"Out"
);
auto
place
=
ctx
.
GetPlace
();
size_t
row_offset
=
0
;
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
++
i
)
{
// NOTE: no need to call mutable_data here to allocate memory.
auto
*
out
=
outs
[
i
];
VLOG
(
3
)
<<
"spliting by ref: "
<<
row_offset
<<
" "
<<
out
->
dims
()[
0
];
*
out
=
std
::
move
(
in
->
Slice
(
row_offset
,
row_offset
+
out
->
dims
()[
0
]));
row_offset
+=
out
->
dims
()[
0
];
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/split_op.cc
浏览文件 @
7a517dc9
...
...
@@ -108,21 +108,6 @@ Example:
}
};
class
SplitGradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
op
=
new
framework
::
OpDesc
();
op
->
SetType
(
"concat"
);
op
->
SetInput
(
"X"
,
OutputGrad
(
"Out"
));
op
->
SetOutput
(
"Out"
,
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
paddle/fluid/operators/split_op.h
浏览文件 @
7a517dc9
...
...
@@ -44,5 +44,20 @@ class SplitOpKernel : public framework::OpKernel<T> {
}
};
class
SplitGradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
op
=
new
framework
::
OpDesc
();
op
->
SetType
(
"concat"
);
op
->
SetInput
(
"X"
,
OutputGrad
(
"Out"
));
op
->
SetOutput
(
"Out"
,
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/spp_op.cc
浏览文件 @
7a517dc9
...
...
@@ -92,7 +92,9 @@ class SppOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
spp
,
ops
::
SppOp
,
ops
::
SppOpMaker
,
spp_grad
,
ops
::
SppOpGrad
);
REGISTER_OPERATOR
(
spp
,
ops
::
SppOp
,
ops
::
SppOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
spp_grad
,
ops
::
SppOpGrad
)
REGISTER_OP_CPU_KERNEL
(
spp
,
ops
::
SppKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SppKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/squared_l2_distance_op.cc
浏览文件 @
7a517dc9
...
...
@@ -109,9 +109,10 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
squared_l2_distance
,
ops
::
SquaredL2DistanceOp
,
ops
::
SquaredL2DistanceOpMaker
,
squared_l2_distance_grad
,
ops
::
SquaredL2DistanceGradOp
);
REGISTER_OPERATOR
(
squared_l2_distance
,
ops
::
SquaredL2DistanceOp
,
ops
::
SquaredL2DistanceOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
squared_l2_distance_grad
,
ops
::
SquaredL2DistanceGradOp
)
REGISTER_OP_CPU_KERNEL
(
squared_l2_distance
,
ops
::
SquaredL2DistanceKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/squared_l2_norm_op.cc
浏览文件 @
7a517dc9
...
...
@@ -67,8 +67,10 @@ $$Out = \sum_{i} X_{i}^2$$
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
squared_l2_norm
,
ops
::
SquaredL2NormOp
,
ops
::
SquaredL2NormOpMaker
,
squared_l2_norm_grad
,
ops
::
SquaredL2NormGradOp
);
REGISTER_OPERATOR
(
squared_l2_norm
,
ops
::
SquaredL2NormOp
,
ops
::
SquaredL2NormOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
squared_l2_norm_grad
,
ops
::
SquaredL2NormGradOp
)
REGISTER_OP_CPU_KERNEL
(
squared_l2_norm
,
ops
::
SquaredL2NormKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/top_k_op.h
浏览文件 @
7a517dc9
...
...
@@ -24,7 +24,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
...
...
@@ -36,9 +35,9 @@ class TopkKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// Get the top k elements of each row of input tensor
// FIXME: only deal with matrix(2d tensor).
auto
*
input
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
indices
=
ctx
.
Output
<
LoD
Tensor
>
(
"Indices"
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
*
indices
=
ctx
.
Output
<
Tensor
>
(
"Indices"
);
// k is determined by Attr
const
size_t
k
=
static_cast
<
int
>
(
ctx
.
Attr
<
int
>
(
"k"
));
...
...
paddle/fluid/operators/transpose_op.cc
浏览文件 @
7a517dc9
...
...
@@ -118,8 +118,9 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
transpose
,
ops
::
TransposeOp
,
ops
::
TransposeOpMaker
,
transpose_grad
,
ops
::
TransposeOpGrad
);
REGISTER_OPERATOR
(
transpose
,
ops
::
TransposeOp
,
ops
::
TransposeOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
transpose_grad
,
ops
::
TransposeOpGrad
)
REGISTER_OP_CPU_KERNEL
(
transpose
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/operators/unpool_op.cc
浏览文件 @
7a517dc9
...
...
@@ -132,8 +132,9 @@ class UnpoolOpGrad : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
unpool
,
ops
::
UnpoolOp
,
ops
::
Unpool2dOpMaker
,
unpool_grad
,
ops
::
UnpoolOpGrad
);
REGISTER_OPERATOR
(
unpool
,
ops
::
UnpoolOp
,
ops
::
Unpool2dOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
unpool_grad
,
ops
::
UnpoolOpGrad
)
REGISTER_OP_CPU_KERNEL
(
unpool
,
ops
::
UnpoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
UnpoolKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
...
...
paddle/fluid/operators/warpctc_op.cc
浏览文件 @
7a517dc9
...
...
@@ -132,8 +132,9 @@ class WarpCTCGradOp : public framework::OperatorWithKernel {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP
(
warpctc
,
ops
::
WarpCTCOp
,
ops
::
WarpCTCOpMaker
,
warpctc_grad
,
ops
::
WarpCTCGradOp
);
REGISTER_OPERATOR
(
warpctc
,
ops
::
WarpCTCOp
,
ops
::
WarpCTCOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
)
REGISTER_OPERATOR
(
warpctc_grad
,
ops
::
WarpCTCGradOp
)
REGISTER_OP_CPU_KERNEL
(
warpctc
,
ops
::
WarpCTCKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
...
...
paddle/fluid/platform/nccl_helper.h
浏览文件 @
7a517dc9
...
...
@@ -39,20 +39,19 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) {
class
NCCLGroupGuard
{
public:
static
std
::
mutex
&
NCCLMutex
()
{
static
std
::
mutex
mtx
;
return
mtx
;
}
inline
NCCLGroupGuard
()
{
m
utex
().
lock
();
NCCLM
utex
().
lock
();
PADDLE_ENFORCE
(
dynload
::
ncclGroupStart
());
}
inline
~
NCCLGroupGuard
()
{
PADDLE_ENFORCE
(
dynload
::
ncclGroupEnd
());
mutex
().
unlock
();
}
private:
static
std
::
mutex
&
mutex
()
{
static
std
::
mutex
mtx
;
return
mtx
;
NCCLMutex
().
unlock
();
}
};
...
...
@@ -68,26 +67,6 @@ struct NCCLContext {
int
device_id
()
const
{
return
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx_
->
GetPlace
()).
device
;
}
static
void
InitNCCLContext
(
std
::
unordered_map
<
int
,
NCCLContext
>
*
contexts
,
const
std
::
vector
<
platform
::
Place
>
&
places
)
{
std
::
vector
<
ncclComm_t
>
comms
;
std
::
vector
<
int
>
devs
;
comms
.
resize
(
contexts
->
size
());
devs
.
reserve
(
contexts
->
size
());
for
(
auto
&
p
:
places
)
{
devs
.
push_back
(
boost
::
get
<
platform
::
CUDAPlace
>
(
p
).
device
);
}
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclCommInitAll
(
&
comms
[
0
],
static_cast
<
int
>
(
contexts
->
size
()),
&
devs
[
0
]));
int
i
=
0
;
for
(
auto
&
dev_id
:
devs
)
{
contexts
->
at
(
dev_id
).
comm_
=
comms
[
i
++
];
}
}
};
struct
NCCLContextMap
{
...
...
@@ -107,12 +86,12 @@ struct NCCLContextMap {
"NCCL Context Map does not support contain two or more same device"
);
if
(
places
.
size
()
>
1
)
{
std
::
vector
<
ncclComm_t
>
comms
;
comms
.
resize
(
order_
.
size
());
std
::
unique_ptr
<
ncclComm_t
[]
>
comms
(
new
ncclComm_t
[
order_
.
size
()])
;
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
NCCLGroupGuard
::
NCCLMutex
());
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclCommInitAll
(
&
comms
[
0
],
static_cast
<
int
>
(
order_
.
size
()),
&
order_
[
0
]
));
comms
.
get
(),
static_cast
<
int
>
(
order_
.
size
()),
order_
.
data
()
));
}
int
i
=
0
;
for
(
auto
&
dev_id
:
order_
)
{
contexts_
.
at
(
dev_id
).
comm_
=
comms
[
i
++
];
...
...
@@ -120,6 +99,9 @@ struct NCCLContextMap {
}
}
NCCLContextMap
(
const
NCCLContextMap
&
other
)
=
delete
;
NCCLContextMap
&
operator
=
(
const
NCCLContextMap
&
other
)
=
delete
;
CUDADeviceContext
*
DevCtx
(
int
dev_id
)
const
{
return
at
(
dev_id
).
ctx_
.
get
();
}
CUDADeviceContext
*
DevCtx
(
platform
::
Place
p
)
const
{
...
...
python/paddle/fluid/distribute_transpiler.py
浏览文件 @
7a517dc9
...
...
@@ -420,12 +420,13 @@ class DistributeTranspiler:
# append op to the current block
per_opt_block
=
append_block
for
_
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
for
idx
,
opt_op
in
enumerate
(
opt_op_on_pserver
):
for
_
,
op
in
enumerate
(
self
.
optimize_ops
):
# optimizer is connected to itself
if
ufind
.
is_connected
(
op
,
opt_op
)
and
\
op
not
in
global_ops
:
__append_optimize_op__
(
op
,
per_opt_block
)
if
idx
==
len
(
opt_op_on_pserver
)
-
1
and
global_ops
:
per_opt_block
=
pserver_program
.
create_block
(
append_block
.
idx
)
# append global ops
...
...
@@ -824,7 +825,7 @@ class DistributeTranspiler:
for
v
in
splited_vars
:
sections
.
append
(
v
.
shape
[
0
])
program
.
global_block
().
append_op
(
type
=
"split"
,
type
=
"split
_byref
"
,
inputs
=
{
"X"
:
orig_var
},
outputs
=
{
"Out"
:
splited_vars
},
attrs
=
{
"sections"
:
sections
}
# assume split evenly
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
7a517dc9
...
...
@@ -32,7 +32,6 @@ __all__ = [
'Switch'
,
'lod_rank_table'
,
'max_sequence_len'
,
'topk'
,
'lod_tensor_to_array'
,
'array_to_lod_tensor'
,
'increment'
,
...
...
@@ -751,43 +750,6 @@ def max_sequence_len(rank_table):
return
res
def
topk
(
input
,
k
):
"""
**topk**
This function performs the operation that selects the k entries in the input
vector and outputs their values and indices as vectors. Thus topk_out[j] is
the j-th largest entry in input, and its index is topk_indices[j]
Args:
input (Variable|list): The input tensor that has all the data.
k (int): The number of top elements that the function will pick.
Returns:
Variable: The variable of type array that contains the k largest entries
from input.
Variable: The variable of type array that contains the indices of k
largest entries from input.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10])
k = 5
array = fluid.layers.topk(x, k)
"""
helper
=
LayerHelper
(
'topk'
,
**
locals
())
topk_out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
topk_indices
=
helper
.
create_tmp_variable
(
dtype
=
'int64'
)
helper
.
append_op
(
type
=
'top_k'
,
inputs
=
{
'X'
:
[
input
]},
outputs
=
{
'Out'
:
[
topk_out
],
'Indices'
:
[
topk_indices
]},
attrs
=
{
'k'
:
k
})
return
topk_out
,
topk_indices
def
lod_tensor_to_array
(
x
,
table
):
""" Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY.
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
7a517dc9
...
...
@@ -20,7 +20,7 @@ from ..initializer import init_on_cpu
__all__
=
[
'exponential_decay'
,
'natural_exp_decay'
,
'inverse_time_decay'
,
'polynomial_decay'
,
'piecewise_decay'
'polynomial_decay'
,
'piecewise_decay'
,
'noam_decay'
]
"""
When training a model, it's often useful to decay the
...
...
@@ -32,14 +32,41 @@ strategy according to this module.
"""
def
_decay_step_counter
():
def
_decay_step_counter
(
begin
=
0
):
# the first global step is zero in learning rate decay
global_step
=
nn
.
autoincreased_step_counter
(
counter_name
=
'@LR_DECAY_COUNTER@'
,
begin
=
0
,
step
=
1
)
counter_name
=
'@LR_DECAY_COUNTER@'
,
begin
=
begin
,
step
=
1
)
global_step
=
tensor
.
cast
(
global_step
,
'float32'
)
return
global_step
def
noam_decay
(
d_model
,
warmup_steps
):
"""Apply decay to learning rate.
```python
lr_value = np.power(d_model, -0.5) * np.min([
np.power(current_steps, -0.5),
np.power(warmup_steps, -1.5) * current_steps
])
```
Args:
d_model(Variable): The dimensionality of input and output of model.
Reference: attention is all you need
https://arxiv.org/pdf/1706.03762.pdf
warmup_steps(Variable): A super parameter.
Returns:
The decayed learning rate.
"""
global_step
=
_decay_step_counter
(
1
)
with
init_on_cpu
():
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
ops
.
elementwise_min
(
a
,
b
)
return
lr_value
def
exponential_decay
(
learning_rate
,
decay_steps
,
decay_rate
,
staircase
=
False
):
"""Applies exponential decay to the learning rate.
...
...
python/paddle/fluid/layers/metric.py
浏览文件 @
7a517dc9
...
...
@@ -20,6 +20,7 @@ from ..layer_helper import LayerHelper
from
..initializer
import
Normal
,
Constant
from
..framework
import
Variable
from
..param_attr
import
ParamAttr
import
nn
__all__
=
[
'accuracy'
,
'auc'
]
...
...
@@ -27,17 +28,10 @@ __all__ = ['accuracy', 'auc']
def
accuracy
(
input
,
label
,
k
=
1
,
correct
=
None
,
total
=
None
):
"""
This function computes the accuracy using the input and label.
The output is the top
_
k inputs and their indices.
The output is the top
k inputs and their indices.
"""
helper
=
LayerHelper
(
"accuracy"
,
**
locals
())
topk_out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
topk_indices
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
helper
.
append_op
(
type
=
"top_k"
,
inputs
=
{
"X"
:
[
input
]},
outputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
]},
attrs
=
{
"k"
:
k
})
topk_out
,
topk_indices
=
nn
.
topk
(
input
,
k
=
k
)
acc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float32"
)
if
correct
is
None
:
correct
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
...
...
@@ -68,12 +62,7 @@ def auc(input, label, curve='ROC', num_thresholds=200):
helper
=
LayerHelper
(
"auc"
,
**
locals
())
topk_out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
topk_indices
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
helper
.
append_op
(
type
=
"top_k"
,
inputs
=
{
"X"
:
[
input
]},
outputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
]},
attrs
=
{
"k"
:
k
})
topk_out
,
topk_indices
=
nn
.
topk
(
input
,
k
=
k
)
auc_out
=
helper
.
create_tmp_variable
(
dtype
=
"float32"
)
if
correct
is
None
:
correct
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
7a517dc9
...
...
@@ -60,6 +60,7 @@ __all__ = [
'edit_distance'
,
'l2_normalize'
,
'matmul'
,
'topk'
,
'warpctc'
,
'sequence_reshape'
,
'transpose'
,
...
...
@@ -2576,6 +2577,53 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
return
out
def
topk
(
input
,
k
):
"""
This operator is used to find values and indices of the k largest entries
for the last dimension.
If the input is a vector (rank=1), finds the k largest entries in the vector
and outputs their values and indices as vectors. Thus values[j] is the j-th
largest entry in input, and its index is indices[j].
If the input is a Tensor with higher rank, this operator computes the top k
entries along the last dimension.
Args:
input(Variable): The input variable which can be a vector or Tensor with
higher rank.
k(int): An integer value to specify the top k largest elements.
Returns:
values(Variable): The k largest elements along each last dimensional
slice.
indices(Variable): The indices of values within the last dimension of
input.
Examples:
.. code-block:: python
top5_values, top5_indices = layers.topk(input, k=5)
"""
shape
=
input
.
shape
if
k
<
1
and
k
>=
shape
[
-
1
]:
raise
ValueError
(
"k must be greater than 0 and less than %d."
%
(
shape
[
-
1
]))
helper
=
LayerHelper
(
"top_k"
,
**
locals
())
values
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
indices
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
helper
.
append_op
(
type
=
"top_k"
,
inputs
=
{
"X"
:
[
input
]},
outputs
=
{
"Out"
:
[
values
],
"Indices"
:
[
indices
]},
attrs
=
{
"k"
:
k
})
values
.
stop_gradient
=
True
indices
.
stop_gradient
=
True
return
values
,
indices
def
edit_distance
(
input
,
label
,
normalized
=
True
,
ignored_tokens
=
None
,
name
=
None
):
"""
...
...
@@ -2717,15 +2765,7 @@ def ctc_greedy_decoder(input, blank, name=None):
cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0)
"""
helper
=
LayerHelper
(
"ctc_greedy_decoder"
,
**
locals
())
# top 1 op
topk_out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
topk_indices
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
helper
.
append_op
(
type
=
"top_k"
,
inputs
=
{
"X"
:
[
input
]},
outputs
=
{
"Out"
:
[
topk_out
],
"Indices"
:
[
topk_indices
]},
attrs
=
{
"k"
:
1
})
_
,
topk_indices
=
topk
(
input
,
k
=
1
)
# ctc align op
ctc_out
=
helper
.
create_tmp_variable
(
dtype
=
"int64"
)
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
7a517dc9
...
...
@@ -16,6 +16,7 @@ import core
import
multiprocessing
import
framework
import
executor
import
warnings
import
sys
__all__
=
[
'ParallelExecutor'
]
...
...
@@ -62,8 +63,8 @@ class ParallelExecutor(object):
main_program=test_program,
share_vars_from=train_exe)
train_loss, = train_exe.run([loss.name], feed
_dict
=feed_dict)
test_loss, = test_exe.run([loss.name], feed
_dict
=feed_dict)
train_loss, = train_exe.run([loss.name], feed=feed_dict)
test_loss, = test_exe.run([loss.name], feed=feed_dict)
"""
self
.
_places
=
[]
...
...
@@ -103,8 +104,8 @@ class ParallelExecutor(object):
self
.
persistable_vars
=
[
v
.
name
for
v
in
filter
(
lambda
var
:
\
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
,
for
v
in
filter
(
lambda
var
:
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
,
main
.
list_vars
())
]
...
...
@@ -163,7 +164,7 @@ class ParallelExecutor(object):
Returns: fetched result list.
"""
if
feed
is
None
:
if
feed
is
None
and
feed_dict
is
not
None
:
feed
=
feed_dict
print
>>
sys
.
stderr
,
"`feed_dict` is deprecated. Please use `feed=`"
...
...
python/paddle/fluid/tests/unittests/test_conv3d_op.py
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_pool2d_op.py
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_pool3d_op.py
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
python/paddle/fluid/tests/unittests/test_split_op.py
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
python/paddle/v2/dataset/imdb.py
浏览文件 @
7a517dc9
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录