Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b9a1c7e2
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b9a1c7e2
编写于
9月 27, 2020
作者:
X
xiemoyuan
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into op_error_message
上级
3a6bc9ad
6b727e08
变更
86
隐藏空白更改
内联
并排
Showing
86 changed file
with
2946 addition
and
481 deletion
+2946
-481
cmake/external/xpu.cmake
cmake/external/xpu.cmake
+1
-1
cmake/operators.cmake
cmake/operators.cmake
+4
-4
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+2
-2
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+22
-10
paddle/fluid/framework/details/broadcast_op_handle_test.h
paddle/fluid/framework/details/broadcast_op_handle_test.h
+32
-9
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+2
-1
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+26
-14
paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
+69
-16
paddle/fluid/framework/details/fused_broadcast_op_handle.cc
paddle/fluid/framework/details/fused_broadcast_op_handle.cc
+10
-1
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
...fluid/framework/details/fused_broadcast_op_handle_test.cc
+4
-1
paddle/fluid/framework/details/gather_op_handle.cc
paddle/fluid/framework/details/gather_op_handle.cc
+29
-10
paddle/fluid/framework/details/gather_op_handle_test.cc
paddle/fluid/framework/details/gather_op_handle_test.cc
+23
-5
paddle/fluid/framework/details/nccl_op_handle.h
paddle/fluid/framework/details/nccl_op_handle.h
+38
-23
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+32
-17
paddle/fluid/framework/details/op_registry.h
paddle/fluid/framework/details/op_registry.h
+8
-6
paddle/fluid/framework/details/reduce_and_gather.h
paddle/fluid/framework/details/reduce_and_gather.h
+23
-5
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+44
-13
paddle/fluid/framework/details/reduce_op_handle_test.cc
paddle/fluid/framework/details/reduce_op_handle_test.cc
+32
-10
paddle/fluid/framework/details/share_tensor_buffer_functor.cc
...le/fluid/framework/details/share_tensor_buffer_functor.cc
+4
-5
paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
...le/fluid/framework/details/sparse_all_reduce_op_handle.cc
+75
-24
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+4
-5
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+51
-0
paddle/fluid/operators/activation_op.cu
paddle/fluid/operators/activation_op.cu
+12
-0
paddle/fluid/operators/activation_op.h
paddle/fluid/operators/activation_op.h
+35
-1
paddle/fluid/operators/activation_op_xpu.cc
paddle/fluid/operators/activation_op_xpu.cc
+179
-0
paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc
paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc
+162
-0
paddle/fluid/operators/elementwise/elementwise_xpu.h
paddle/fluid/operators/elementwise/elementwise_xpu.h
+113
-0
paddle/fluid/operators/matmul_op_xpu.cc
paddle/fluid/operators/matmul_op_xpu.cc
+343
-0
paddle/fluid/operators/mul_op_xpu.cc
paddle/fluid/operators/mul_op_xpu.cc
+1
-1
paddle/fluid/operators/optimizers/dpsgd_op.cc
paddle/fluid/operators/optimizers/dpsgd_op.cc
+24
-11
paddle/fluid/operators/optimizers/dpsgd_op.h
paddle/fluid/operators/optimizers/dpsgd_op.h
+10
-8
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+50
-29
paddle/fluid/operators/optimizers/rmsprop_op.cc
paddle/fluid/operators/optimizers/rmsprop_op.cc
+58
-30
paddle/fluid/operators/optimizers/rmsprop_op.h
paddle/fluid/operators/optimizers/rmsprop_op.h
+26
-11
paddle/fluid/operators/optimizers/sgd_op.cc
paddle/fluid/operators/optimizers/sgd_op.cc
+21
-13
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+27
-9
paddle/fluid/operators/optimizers/sgd_op.h
paddle/fluid/operators/optimizers/sgd_op.h
+61
-18
paddle/fluid/platform/init_test.cc
paddle/fluid/platform/init_test.cc
+1
-0
paddle/fluid/platform/xpu_header.h
paddle/fluid/platform/xpu_header.h
+27
-0
python/paddle/__init__.py
python/paddle/__init__.py
+2
-1
python/paddle/device.py
python/paddle/device.py
+34
-1
python/paddle/fluid/dygraph/nn.py
python/paddle/fluid/dygraph/nn.py
+2
-5
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+16
-0
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
...n/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+24
-0
python/paddle/fluid/tests/unittests/test_activation_op.py
python/paddle/fluid/tests/unittests/test_activation_op.py
+4
-4
python/paddle/fluid/tests/unittests/test_adamax_api.py
python/paddle/fluid/tests/unittests/test_adamax_api.py
+1
-1
python/paddle/fluid/tests/unittests/test_adamw_op.py
python/paddle/fluid/tests/unittests/test_adamw_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py
.../paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py
+2
-2
python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py
.../paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py
+2
-2
python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
.../paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
+2
-2
python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
.../paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
+2
-2
python/paddle/fluid/tests/unittests/test_addmm_op.py
python/paddle/fluid/tests/unittests/test_addmm_op.py
+3
-3
python/paddle/fluid/tests/unittests/test_arange.py
python/paddle/fluid/tests/unittests/test_arange.py
+3
-3
python/paddle/fluid/tests/unittests/test_cholesky_op.py
python/paddle/fluid/tests/unittests/test_cholesky_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_clip_op.py
python/paddle/fluid/tests/unittests/test_clip_op.py
+3
-3
python/paddle/fluid/tests/unittests/test_concat_op.py
python/paddle/fluid/tests/unittests/test_concat_op.py
+3
-3
python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py
...addle/fluid/tests/unittests/test_cosine_similarity_api.py
+8
-8
python/paddle/fluid/tests/unittests/test_cumsum_op.py
python/paddle/fluid/tests/unittests/test_cumsum_op.py
+1
-2
python/paddle/fluid/tests/unittests/test_default_dtype.py
python/paddle/fluid/tests/unittests/test_default_dtype.py
+0
-1
python/paddle/fluid/tests/unittests/test_directory_migration.py
.../paddle/fluid/tests/unittests/test_directory_migration.py
+1
-1
python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
...fluid/tests/unittests/test_flatten_contiguous_range_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_imperative_basic.py
python/paddle/fluid/tests/unittests/test_imperative_basic.py
+2
-2
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
...le/fluid/tests/unittests/test_imperative_selected_rows.py
+1
-1
python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py
python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_jit_save_load.py
python/paddle/fluid/tests/unittests/test_jit_save_load.py
+7
-7
python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py
python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_l1_loss.py
python/paddle/fluid/tests/unittests/test_l1_loss.py
+4
-4
python/paddle/fluid/tests/unittests/test_log_softmax.py
python/paddle/fluid/tests/unittests/test_log_softmax.py
+2
-2
python/paddle/fluid/tests/unittests/test_logsumexp.py
python/paddle/fluid/tests/unittests/test_logsumexp.py
+2
-2
python/paddle/fluid/tests/unittests/test_matmul_op.py
python/paddle/fluid/tests/unittests/test_matmul_op.py
+1
-0
python/paddle/fluid/tests/unittests/test_max_op.py
python/paddle/fluid/tests/unittests/test_max_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_maximum_op.py
python/paddle/fluid/tests/unittests/test_maximum_op.py
+4
-4
python/paddle/fluid/tests/unittests/test_mean_op.py
python/paddle/fluid/tests/unittests/test_mean_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_min_op.py
python/paddle/fluid/tests/unittests/test_min_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_mul_op.py
python/paddle/fluid/tests/unittests/test_mul_op.py
+2
-52
python/paddle/fluid/tests/unittests/test_randn_op.py
python/paddle/fluid/tests/unittests/test_randn_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_retain_graph.py
python/paddle/fluid/tests/unittests/test_retain_graph.py
+2
-2
python/paddle/fluid/tests/unittests/test_transformer_api.py
python/paddle/fluid/tests/unittests/test_transformer_api.py
+20
-20
python/paddle/fluid/tests/unittests/test_zeros_like_op.py
python/paddle/fluid/tests/unittests/test_zeros_like_op.py
+1
-1
python/paddle/fluid/tests/unittests/xpu/test_activation_op.py
...on/paddle/fluid/tests/unittests/xpu/test_activation_op.py
+215
-0
python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op.py
...ddle/fluid/tests/unittests/xpu/test_elementwise_add_op.py
+346
-0
python/paddle/fluid/tests/unittests/xpu/test_matmul_op.py
python/paddle/fluid/tests/unittests/xpu/test_matmul_op.py
+355
-0
python/paddle/fluid/tests/unittests/xpu/test_mul_op.py
python/paddle/fluid/tests/unittests/xpu/test_mul_op.py
+161
-0
python/paddle/tensor/linalg.py
python/paddle/tensor/linalg.py
+6
-12
python/paddle/tensor/math.py
python/paddle/tensor/math.py
+1
-2
tools/wlist.json
tools/wlist.json
+6
-3
未找到文件。
cmake/external/xpu.cmake
浏览文件 @
b9a1c7e2
...
@@ -4,7 +4,7 @@ endif()
...
@@ -4,7 +4,7 @@ endif()
INCLUDE
(
ExternalProject
)
INCLUDE
(
ExternalProject
)
SET
(
XPU_PROJECT
"extern_xpu"
)
SET
(
XPU_PROJECT
"extern_xpu"
)
SET
(
XPU_URL
"https://
kunlun1.su.bcebos.com
/xpu.tar.gz"
CACHE STRING
""
FORCE
)
SET
(
XPU_URL
"https://
baidu-kunlun-public.su.bcebos.com/paddle_depence
/xpu.tar.gz"
CACHE STRING
""
FORCE
)
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
SET
(
XPU_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/xpu"
)
SET
(
XPU_DOWNLOAD_DIR
"
${
XPU_SOURCE_DIR
}
/src/
${
XPU_PROJECT
}
"
)
SET
(
XPU_DOWNLOAD_DIR
"
${
XPU_SOURCE_DIR
}
/src/
${
XPU_PROJECT
}
"
)
SET
(
XPU_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu"
)
SET
(
XPU_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/install/xpu"
)
...
...
cmake/operators.cmake
浏览文件 @
b9a1c7e2
...
@@ -62,9 +62,9 @@ function(op_library TARGET)
...
@@ -62,9 +62,9 @@ function(op_library TARGET)
endif
()
endif
()
endif
()
endif
()
if
(
WITH_XPU
)
if
(
WITH_XPU
)
string
(
REPLACE
"_op"
"_
xpu_op
"
XPU_FILE
"
${
TARGET
}
"
)
string
(
REPLACE
"_op"
"_
op_xpu
"
XPU_FILE
"
${
TARGET
}
"
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
xpu/
${
XPU_FILE
}
.cc
)
if
(
EXISTS
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
XPU_FILE
}
.cc
)
list
(
APPEND xpu_cc_srcs
xpu/
${
XPU_FILE
}
.cc
)
list
(
APPEND xpu_cc_srcs
${
XPU_FILE
}
.cc
)
endif
()
endif
()
endif
()
endif
()
else
()
else
()
...
@@ -83,7 +83,7 @@ function(op_library TARGET)
...
@@ -83,7 +83,7 @@ function(op_library TARGET)
list
(
APPEND mkldnn_cc_srcs
${
src
}
)
list
(
APPEND mkldnn_cc_srcs
${
src
}
)
elseif
(
${
src
}
MATCHES
".*
\\
.cu.cc$"
)
elseif
(
${
src
}
MATCHES
".*
\\
.cu.cc$"
)
list
(
APPEND cu_cc_srcs
${
src
}
)
list
(
APPEND cu_cc_srcs
${
src
}
)
elseif
(
WITH_XPU AND
${
src
}
MATCHES
".*_
xpu_op
.cc$"
)
elseif
(
WITH_XPU AND
${
src
}
MATCHES
".*_
op_xpu
.cc$"
)
list
(
APPEND xpu_cc_srcs
${
src
}
)
list
(
APPEND xpu_cc_srcs
${
src
}
)
elseif
(
${
src
}
MATCHES
".*
\\
.cc$"
)
elseif
(
${
src
}
MATCHES
".*
\\
.cc$"
)
list
(
APPEND cc_srcs
${
src
}
)
list
(
APPEND cc_srcs
${
src
}
)
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -76,7 +76,7 @@ void AllReduceOpHandle::AllReduceImpl(
...
@@ -76,7 +76,7 @@ void AllReduceOpHandle::AllReduceImpl(
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The NoDummyInputSize should be equal "
"The NoDummyInputSize should be equal "
"to the number of places, but got NoDummyInputSize is "
"to the number of places, but got NoDummyInputSize is "
"%d and the number of place is %d."
,
"%d and the number of place
s
is %d."
,
in_var_handles
.
size
(),
num_places
));
in_var_handles
.
size
(),
num_places
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
out_var_handles
.
size
(),
in_var_handles
.
size
(),
out_var_handles
.
size
(),
...
@@ -89,7 +89,7 @@ void AllReduceOpHandle::AllReduceImpl(
...
@@ -89,7 +89,7 @@ void AllReduceOpHandle::AllReduceImpl(
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The number of local scopes should be equal "
"The number of local scopes should be equal "
"to the number of places, but got the number of local scopes is "
"to the number of places, but got the number of local scopes is "
"%d and the number of place is %d."
,
"%d and the number of place
s
is %d."
,
in_var_handles
.
size
(),
num_places
));
in_var_handles
.
size
(),
num_places
));
std
::
vector
<
const
void
*>
lod_tensor_data
;
std
::
vector
<
const
void
*>
lod_tensor_data
;
...
...
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -31,10 +32,15 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -31,10 +32,15 @@ void BroadcastOpHandle::RunImpl() {
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
1UL
,
"The number of input should be one."
);
platform
::
errors
::
PreconditionNotMet
(
PADDLE_ENFORCE_EQ
(
"The number of inputs should be 1, but got %d."
,
out_var_handles
.
size
(),
places_
.
size
(),
in_var_handles
.
size
()));
"The number of output should equal to the number of places."
);
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
places_
.
size
(),
platform
::
errors
::
PreconditionNotMet
(
"The number of outputs and the number of places should "
"be equal, but got the number of outputs is %d and the "
"number of places is %d."
,
out_var_handles
.
size
(),
places_
.
size
()));
VarHandle
*
in_var_handle
=
in_var_handles
[
0
];
VarHandle
*
in_var_handle
=
in_var_handles
[
0
];
...
@@ -47,7 +53,9 @@ void BroadcastOpHandle::BroadcastOneVar(
...
@@ -47,7 +53,9 @@ void BroadcastOpHandle::BroadcastOneVar(
const
std
::
vector
<
Scope
*>
&
var_scopes
)
{
const
std
::
vector
<
Scope
*>
&
var_scopes
)
{
auto
*
in_var
=
auto
*
in_var
=
var_scopes
.
at
(
in_var_handle
.
scope_idx
())
->
FindVar
(
in_var_handle
.
name
());
var_scopes
.
at
(
in_var_handle
.
scope_idx
())
->
FindVar
(
in_var_handle
.
name
());
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scopes."
,
in_var_handle
.
name
()));
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
if
(
UNLIKELY
(
!
in_tensor
.
IsInitialized
()))
{
if
(
UNLIKELY
(
!
in_tensor
.
IsInitialized
()))
{
VLOG
(
3
)
<<
"in var "
<<
in_var_handle
.
name
()
<<
"not inited, return!"
;
VLOG
(
3
)
<<
"in var "
<<
in_var_handle
.
name
()
<<
"not inited, return!"
;
...
@@ -103,7 +111,7 @@ void BroadcastOpHandle::BroadcastOneVar(
...
@@ -103,7 +111,7 @@ void BroadcastOpHandle::BroadcastOneVar(
broadcast_calls
.
emplace_back
(
broadcast_calls
.
emplace_back
(
[
send_recv_buffer
,
numel
,
type
,
root_id
,
&
nccl_ctx
]
{
[
send_recv_buffer
,
numel
,
type
,
root_id
,
&
nccl_ctx
]
{
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclBcast
(
send_recv_buffer
,
numel
,
static_cast
<
ncclDataType_t
>
(
type
),
send_recv_buffer
,
numel
,
static_cast
<
ncclDataType_t
>
(
type
),
root_id
,
nccl_ctx
.
comm_
,
nccl_ctx
.
stream
()));
root_id
,
nccl_ctx
.
comm_
,
nccl_ctx
.
stream
()));
});
});
...
@@ -131,7 +139,8 @@ void BroadcastOpHandle::BroadcastOneVar(
...
@@ -131,7 +139,8 @@ void BroadcastOpHandle::BroadcastOneVar(
nccl_ctxs_
->
DevCtx
(
p
)
->
Wait
();
nccl_ctxs_
->
DevCtx
(
p
)
->
Wait
();
}
}
#else
#else
PADDLE_THROW
(
"CUDA is not enabled."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with NCLL."
));
#endif
#endif
}
}
}
}
...
@@ -154,10 +163,13 @@ void BroadcastOpHandle::InitOutputValue(
...
@@ -154,10 +163,13 @@ void BroadcastOpHandle::InitOutputValue(
auto
t_out_p
=
out_var_handle
->
place
();
auto
t_out_p
=
out_var_handle
->
place
();
auto
*
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx
())
auto
*
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx
())
->
FindVar
(
out_var_handle
->
name
());
->
FindVar
(
out_var_handle
->
name
());
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scopes."
,
out_var_handle
->
name
()));
if
(
is_gpu_place
(
in_tensor
.
place
()))
{
if
(
is_gpu_place
(
in_tensor
.
place
()))
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
t_out_p
),
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
t_out_p
),
true
,
"Places of input and output must be all on GPU."
);
platform
::
errors
::
PreconditionNotMet
(
"Places of input and output must be all on GPU."
));
}
else
{
}
else
{
t_out_p
=
platform
::
CPUPlace
();
t_out_p
=
platform
::
CPUPlace
();
}
}
...
...
paddle/fluid/framework/details/broadcast_op_handle_test.h
浏览文件 @
b9a1c7e2
...
@@ -79,7 +79,8 @@ struct TestBroadcastOpHandle {
...
@@ -79,7 +79,8 @@ struct TestBroadcastOpHandle {
}
}
nccl_ctxs_
.
reset
(
new
platform
::
NCCLContextMap
(
place_list_
));
nccl_ctxs_
.
reset
(
new
platform
::
NCCLContextMap
(
place_list_
));
#else
#else
PADDLE_THROW
(
"CUDA is not support."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with NCLL."
));
#endif
#endif
}
else
{
}
else
{
int
count
=
8
;
int
count
=
8
;
...
@@ -113,7 +114,8 @@ struct TestBroadcastOpHandle {
...
@@ -113,7 +114,8 @@ struct TestBroadcastOpHandle {
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
op_handle_
=
new
BroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
place_list_
,
nccl_ctxs_
.
get
());
#else
#else
PADDLE_THROW
(
"CUDA is not support."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with NCLL."
));
#endif
#endif
}
else
{
}
else
{
#if defined(PADDLE_WITH_NCCL)
#if defined(PADDLE_WITH_NCCL)
...
@@ -171,7 +173,9 @@ struct TestBroadcastOpHandle {
...
@@ -171,7 +173,9 @@ struct TestBroadcastOpHandle {
float
val_scalar
=
0.0
)
{
float
val_scalar
=
0.0
)
{
auto
var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
varname
);
auto
var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
varname
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
varname
));
auto
lod_tensor
=
var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
lod_tensor
=
var
->
GetMutable
<
f
::
LoDTensor
>
();
std
::
vector
<
float
>
send_vector
(
static_cast
<
size_t
>
(
f
::
product
(
kDims
)));
std
::
vector
<
float
>
send_vector
(
static_cast
<
size_t
>
(
f
::
product
(
kDims
)));
for
(
size_t
k
=
0
;
k
<
send_vector
.
size
();
++
k
)
{
for
(
size_t
k
=
0
;
k
<
send_vector
.
size
();
++
k
)
{
...
@@ -194,7 +198,9 @@ struct TestBroadcastOpHandle {
...
@@ -194,7 +198,9 @@ struct TestBroadcastOpHandle {
}
}
auto
var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
varname
);
auto
var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
varname
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
varname
));
auto
selected_rows
=
var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
selected_rows
=
var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
selected_rows
->
mutable_value
();
auto
value
=
selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
place_list_
[
input_scope_idx
]);
value
->
mutable_data
<
float
>
(
kDims
,
place_list_
[
input_scope_idx
]);
...
@@ -211,13 +217,24 @@ struct TestBroadcastOpHandle {
...
@@ -211,13 +217,24 @@ struct TestBroadcastOpHandle {
const
std
::
vector
<
float
>&
send_vector
,
const
std
::
vector
<
float
>&
send_vector
,
const
std
::
vector
<
int64_t
>&
rows
,
int
height
)
{
const
std
::
vector
<
int64_t
>&
rows
,
int
height
)
{
auto
var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
varname
);
auto
var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
varname
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
varname
));
auto
&
selected_rows
=
var
->
Get
<
f
::
SelectedRows
>
();
auto
&
selected_rows
=
var
->
Get
<
f
::
SelectedRows
>
();
auto
rt
=
selected_rows
.
value
();
auto
rt
=
selected_rows
.
value
();
PADDLE_ENFORCE_EQ
(
selected_rows
.
height
(),
height
,
"height is not equal."
);
PADDLE_ENFORCE_EQ
(
selected_rows
.
height
(),
height
,
platform
::
errors
::
InvalidArgument
(
"The height of SelectedRows is not equal to "
"the expected, expect %d, but got %ld."
,
height
,
selected_rows
.
height
()));
for
(
size_t
k
=
0
;
k
<
selected_rows
.
rows
().
size
();
++
k
)
{
for
(
size_t
k
=
0
;
k
<
selected_rows
.
rows
().
size
();
++
k
)
{
PADDLE_ENFORCE_EQ
(
selected_rows
.
rows
()[
k
],
rows
[
k
]);
PADDLE_ENFORCE_EQ
(
selected_rows
.
rows
()[
k
],
rows
[
k
],
platform
::
errors
::
InvalidArgument
(
"The item at position %zu of rows of SelectedRows "
"is not equal to the expected, expect %ld, but got %ld."
,
k
,
rows
[
k
],
selected_rows
.
rows
()[
k
]));
}
}
p
::
CPUPlace
cpu_place
;
p
::
CPUPlace
cpu_place
;
...
@@ -235,9 +252,15 @@ struct TestBroadcastOpHandle {
...
@@ -235,9 +252,15 @@ struct TestBroadcastOpHandle {
framework
::
Scope
*
scope
)
{
framework
::
Scope
*
scope
)
{
p
::
CPUPlace
cpu_place
;
p
::
CPUPlace
cpu_place
;
auto
var
=
scope
->
FindVar
(
varname
);
auto
var
=
scope
->
FindVar
(
varname
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
varname
));
auto
tensor
=
var
->
Get
<
f
::
LoDTensor
>
();
auto
tensor
=
var
->
Get
<
f
::
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
tensor
.
lod
(),
lod
,
"lod is not equal."
);
PADDLE_ENFORCE_EQ
(
tensor
.
lod
(),
lod
,
platform
::
errors
::
InvalidArgument
(
"The LoD of tensor is not equal to "
"the expected, expect %s, but got %s."
,
lod
,
tensor
.
lod
()));
f
::
Tensor
result_tensor
;
f
::
Tensor
result_tensor
;
f
::
TensorCopySync
(
tensor
,
cpu_place
,
&
result_tensor
);
f
::
TensorCopySync
(
tensor
,
cpu_place
,
&
result_tensor
);
float
*
ct
=
result_tensor
.
mutable_data
<
float
>
(
cpu_place
);
float
*
ct
=
result_tensor
.
mutable_data
<
float
>
(
cpu_place
);
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
b9a1c7e2
...
@@ -235,7 +235,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
...
@@ -235,7 +235,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass
(
"reduce_mode_multi_devices_pass"
).
get
();
AppendPass
(
"reduce_mode_multi_devices_pass"
).
get
();
break
;
break
;
default:
default:
PADDLE_THROW
(
"Unknown reduce strategy."
);
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unknown reduce strategy."
));
}
}
}
}
multi_devices_pass
->
SetNotOwned
<
const
BuildStrategy
>
(
"strategy"
,
multi_devices_pass
->
SetNotOwned
<
const
BuildStrategy
>
(
"strategy"
,
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -12,11 +12,12 @@
...
@@ -12,11 +12,12 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include <memory>
#include <memory>
#include <unordered_set>
#include <unordered_set>
#include <utility>
#include <utility>
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
...
@@ -47,15 +48,19 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
...
@@ -47,15 +48,19 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
if
(
dynamic_cast
<
StreamGarbageCollector
*>
(
gc_
))
{
if
(
dynamic_cast
<
StreamGarbageCollector
*>
(
gc_
))
{
platform
::
CUDADeviceGuard
guard
(
platform
::
CUDADeviceGuard
guard
(
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
);
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
);
PADDLE_ENFORCE
(
cudaEventCreateWithFlags
(
&
event_
,
cudaEventDisableTiming
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
PADDLE_ENFORCE_NOT_NULL
(
event_
);
cudaEventCreateWithFlags
(
&
event_
,
cudaEventDisableTiming
));
PADDLE_ENFORCE_NOT_NULL
(
event_
,
platform
::
errors
::
InvalidArgument
(
"The cuda envet created is NULL."
));
}
}
}
}
#endif
#endif
PADDLE_ENFORCE_NE
(
vars
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE_NE
(
vars
.
empty
(),
true
,
"Variable names are empty."
));
platform
::
errors
::
InvalidArgument
(
"The variables to be deleted are empty."
));
for
(
auto
*
var
:
var_infos_
)
{
for
(
auto
*
var
:
var_infos_
)
{
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
InvalidArgument
(
"The memory optimization info is NULL."
));
}
}
}
}
...
@@ -64,7 +69,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() {
...
@@ -64,7 +69,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() {
if
(
event_
)
{
if
(
event_
)
{
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
dev_ctx_
->
GetPlace
());
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
dev_ctx_
->
GetPlace
());
platform
::
CUDADeviceGuard
guard
(
gpu_place
.
device
);
platform
::
CUDADeviceGuard
guard
(
gpu_place
.
device
);
PADDLE_ENFORCE
(
cudaEventDestroy
(
event_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaEventDestroy
(
event_
));
}
}
#endif
#endif
}
}
...
@@ -78,12 +83,17 @@ void EagerDeletionOpHandle::InitCUDA() {
...
@@ -78,12 +83,17 @@ void EagerDeletionOpHandle::InitCUDA() {
}
}
void
EagerDeletionOpHandle
::
CallOnce
()
{
void
EagerDeletionOpHandle
::
CallOnce
()
{
PADDLE_ENFORCE
(
vars_
.
empty
(),
"vars_ must be initialized here"
);
PADDLE_ENFORCE_EQ
(
vars_
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The variables to be deleted should be initialized here."
));
Scope
*
exec_scope
=
local_exec_scopes_
[
0
];
Scope
*
exec_scope
=
local_exec_scopes_
[
0
];
for
(
auto
*
var_info
:
var_infos_
)
{
for
(
auto
*
var_info
:
var_infos_
)
{
auto
*
var
=
exec_scope
->
FindVar
(
var_info
->
Name
());
auto
*
var
=
exec_scope
->
FindVar
(
var_info
->
Name
());
PADDLE_ENFORCE_NOT_NULL
(
var
,
"Variable %s should not be nullptr"
,
PADDLE_ENFORCE_NOT_NULL
(
var_info
->
Name
());
var
,
platform
::
errors
::
NotFound
(
"The variable(%s) to be inplaced is not found in scope."
,
var_info
->
Name
()));
vars_
.
emplace_back
(
var
);
vars_
.
emplace_back
(
var
);
}
}
}
}
...
@@ -119,8 +129,9 @@ void EagerDeletionOpHandle::RunImpl() {
...
@@ -119,8 +129,9 @@ void EagerDeletionOpHandle::RunImpl() {
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
garbages
.
emplace_back
(
t
.
MoveMemoryHolder
());
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
framework
::
ToTypeName
(
var
->
Type
()),
var_info
->
Name
());
"The variable(%s) of type %s is not supported in eager deletion."
,
framework
::
ToTypeName
(
var
->
Type
()),
var_info
->
Name
()));
}
}
}
}
...
@@ -137,8 +148,9 @@ void EagerDeletionOpHandle::ClearGarbages(
...
@@ -137,8 +148,9 @@ void EagerDeletionOpHandle::ClearGarbages(
auto
callback_stream
=
auto
callback_stream
=
reinterpret_cast
<
StreamGarbageCollector
*>
(
gc_
)
->
stream
();
reinterpret_cast
<
StreamGarbageCollector
*>
(
gc_
)
->
stream
();
auto
callback_func
=
[
=
]()
{
auto
callback_func
=
[
=
]()
{
PADDLE_ENFORCE
(
cudaEventRecord
(
event_
,
compute_stream
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventRecord
(
event_
,
compute_stream
));
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
callback_stream
,
event_
,
0
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
callback_stream
,
event_
,
0
));
};
};
gc_
->
Add
(
std
::
move
(
*
garbages
),
callback_func
);
gc_
->
Add
(
std
::
move
(
*
garbages
),
callback_func
);
}
else
{
}
else
{
...
...
paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -12,8 +12,10 @@
...
@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"
#include <algorithm>
#include <algorithm>
#include <utility>
#include <utility>
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
...
@@ -56,10 +58,20 @@ void FusedAllReduceOpHandle::RunImpl() {
...
@@ -56,10 +58,20 @@ void FusedAllReduceOpHandle::RunImpl() {
size_t
place_num
=
places_
.
size
();
size_t
place_num
=
places_
.
size
();
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
place_num
*
num_of_all_reduce_
,
in_var_handles
.
size
(),
place_num
*
num_of_all_reduce_
,
"The NoDummyInputSize should be equal to the number of places."
);
platform
::
errors
::
PreconditionNotMet
(
"The number of input variable handles should be equal to the number "
"of places plus the number of all reduce handles, "
"but got the number of input variable handles is %d, the "
"number of places is %d, and the number of all reduce handles "
"is %d."
,
in_var_handles
.
size
(),
place_num
,
num_of_all_reduce_
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
out_var_handles
.
size
(),
in_var_handles
.
size
(),
out_var_handles
.
size
(),
"The NoDummyInputSize and NoDummyOutputSize should be equal."
);
platform
::
errors
::
PreconditionNotMet
(
"The number of input variable handles should be equal to the number "
"of output variable handles, but got the number of input variable "
"handles is %d, and the number of output variable handles is %d."
,
in_var_handles
.
size
(),
out_var_handles
.
size
()));
// Note: some gradient op doesn't have CUDAKernel, so the gradients of
// Note: some gradient op doesn't have CUDAKernel, so the gradients of
// those op are in CPUPlace, in this case, the all reduce should not be fused.
// those op are in CPUPlace, in this case, the all reduce should not be fused.
...
@@ -106,7 +118,13 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
...
@@ -106,7 +118,13 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
dtype
=
ele_dtype
;
dtype
=
ele_dtype
;
}
}
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
);
PADDLE_ENFORCE_EQ
(
ele_dtype
,
dtype
,
platform
::
errors
::
InvalidArgument
(
"The DataType of grad tensors of fused_all_reduce_op_handle "
"must be consistent. The current dtype is %s, but the "
"previous dtype is %s."
,
DataTypeToString
(
ele_dtype
),
DataTypeToString
(
dtype
)));
// Check whether the address space is contiguous.
// Check whether the address space is contiguous.
std
::
sort
(
std
::
sort
(
...
@@ -130,16 +148,29 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
...
@@ -130,16 +148,29 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
"input[%d] address: 0X%02x. The offset: %d"
,
"input[%d] address: 0X%02x. The offset: %d"
,
k
-
1
,
g_tensor
.
at
(
k
-
1
).
first
,
cur_address
,
g_tensor
.
at
(
k
).
first
,
k
,
k
-
1
,
g_tensor
.
at
(
k
-
1
).
first
,
cur_address
,
g_tensor
.
at
(
k
).
first
,
k
,
next_address
,
k
,
infer_next_address
,
offset
);
next_address
,
k
,
infer_next_address
,
offset
);
PADDLE_ENFORCE_EQ
(
infer_next_address
,
next_address
,
PADDLE_ENFORCE_EQ
(
"The address is not consistent."
);
infer_next_address
,
next_address
,
platform
::
errors
::
InvalidArgument
(
"The infered address of the next tensor should be equal to the "
"real address of the next tensor. But got infered address is %p "
"and real address is %p."
,
infer_next_address
,
next_address
));
}
}
}
}
if
(
!
FLAGS_skip_fused_all_reduce_check
)
{
if
(
!
FLAGS_skip_fused_all_reduce_check
)
{
for
(
size_t
scope_idx
=
0
;
scope_idx
<
place_num
;
++
scope_idx
)
{
for
(
size_t
scope_idx
=
0
;
scope_idx
<
place_num
;
++
scope_idx
)
{
for
(
size_t
j
=
1
;
j
<
num_of_all_reduce_
;
++
j
)
{
for
(
size_t
j
=
1
;
j
<
num_of_all_reduce_
;
++
j
)
{
PADDLE_ENFORCE_EQ
(
grads_tensor
.
at
(
0
).
at
(
j
).
first
,
PADDLE_ENFORCE_EQ
(
grads_tensor
.
at
(
scope_idx
).
at
(
j
).
first
);
grads_tensor
.
at
(
0
).
at
(
j
).
first
,
grads_tensor
.
at
(
scope_idx
).
at
(
j
).
first
,
platform
::
errors
::
InvalidArgument
(
"The variable name of grad tensors of "
"fused_all_reduce_op_handle "
"must be consistent. The current name is %s, but the "
"previous name is %s."
,
grads_tensor
.
at
(
0
).
at
(
j
).
first
,
grads_tensor
.
at
(
scope_idx
).
at
(
j
).
first
));
}
}
}
}
}
}
...
@@ -167,7 +198,9 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace(
...
@@ -167,7 +198,9 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace(
for
(
size_t
j
=
0
;
j
<
in_var_handles
.
size
();
j
+=
place_num
)
{
for
(
size_t
j
=
0
;
j
<
in_var_handles
.
size
();
j
+=
place_num
)
{
auto
var_name
=
in_var_handles
[
j
]
->
name
();
auto
var_name
=
in_var_handles
[
j
]
->
name
();
auto
var
=
local_scope
->
FindVar
(
var_name
);
auto
var
=
local_scope
->
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"%s is not found in local scope."
,
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"The variable '%s' is not found in local scope."
,
var_name
));
auto
&
lod_tensor
=
var
->
Get
<
LoDTensor
>
();
auto
&
lod_tensor
=
var
->
Get
<
LoDTensor
>
();
if
(
!
is_same_place
(
lod_tensor
.
place
(),
places_
.
at
(
scope_idx
)))
{
if
(
!
is_same_place
(
lod_tensor
.
place
(),
places_
.
at
(
scope_idx
)))
{
return
true
;
return
true
;
...
@@ -185,14 +218,24 @@ void FusedAllReduceOpHandle::GetGradLoDTensor(
...
@@ -185,14 +218,24 @@ void FusedAllReduceOpHandle::GetGradLoDTensor(
size_t
place_num
=
places_
.
size
();
size_t
place_num
=
places_
.
size
();
for
(
size_t
j
=
0
;
j
<
in_var_handles
.
size
();
j
+=
place_num
)
{
for
(
size_t
j
=
0
;
j
<
in_var_handles
.
size
();
j
+=
place_num
)
{
auto
var_name
=
in_var_handles
[
j
]
->
name
();
auto
var_name
=
in_var_handles
[
j
]
->
name
();
PADDLE_ENFORCE_EQ
(
var_name
,
out_var_handles
[
j
]
->
name
());
PADDLE_ENFORCE_EQ
(
var_name
,
out_var_handles
[
j
]
->
name
(),
platform
::
errors
::
InvalidArgument
(
"The name of input variable should be equal "
"to the name of output variable. But got the name of input "
"variable is %s and the name of output variable is %s."
,
var_name
,
out_var_handles
[
j
]
->
name
()));
auto
var
=
local_scope
->
FindVar
(
var_name
);
auto
var
=
local_scope
->
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
"%s is not found in local scope."
,
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"The variable '%s' is not found in local scope."
,
var_name
));
auto
&
lod_tensor
=
var
->
Get
<
LoDTensor
>
();
auto
&
lod_tensor
=
var
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
platform
::
is_same_place
(
lod_tensor
.
place
(),
places_
.
at
(
scope_idx
)),
platform
::
is_same_place
(
lod_tensor
.
place
(),
places_
.
at
(
scope_idx
)),
true
,
"%s(%d) is not in the right place."
,
var_name
,
scope_idx
);
true
,
platform
::
errors
::
InvalidArgument
(
"The variable '%s' at scope %d is not in the right place."
,
var_name
,
scope_idx
));
grad_tensor
->
emplace_back
(
std
::
make_pair
(
var_name
,
&
lod_tensor
));
grad_tensor
->
emplace_back
(
std
::
make_pair
(
var_name
,
&
lod_tensor
));
}
}
}
}
...
@@ -204,16 +247,26 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
...
@@ -204,16 +247,26 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
size_t
size_of_dtype
=
0
;
size_t
size_of_dtype
=
0
;
for
(
size_t
i
=
0
;
i
<
grad_tensor
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
grad_tensor
.
size
();
++
i
)
{
// Get dtype
// Get dtype
auto
ele_type
=
grad_tensor
.
at
(
i
).
second
->
type
();
auto
ele_
d
type
=
grad_tensor
.
at
(
i
).
second
->
type
();
if
(
i
==
0
)
{
if
(
i
==
0
)
{
*
dtype
=
ele_type
;
*
dtype
=
ele_
d
type
;
size_of_dtype
=
framework
::
SizeOfType
(
ele_type
);
size_of_dtype
=
framework
::
SizeOfType
(
ele_
d
type
);
}
}
PADDLE_ENFORCE_EQ
(
ele_type
,
*
dtype
);
PADDLE_ENFORCE_EQ
(
ele_dtype
,
*
dtype
,
platform
::
errors
::
InvalidArgument
(
"The DataType of grad tensors of fused_all_reduce_op_handle "
"must be consistent. The current dtype is %s, but the "
"previous dtype is %s."
,
DataTypeToString
(
ele_dtype
),
DataTypeToString
(
*
dtype
)));
// Get element number
// Get element number
int64_t
len
=
grad_tensor
.
at
(
i
).
second
->
numel
();
int64_t
len
=
grad_tensor
.
at
(
i
).
second
->
numel
();
PADDLE_ENFORCE_GT
(
len
,
0
);
PADDLE_ENFORCE_GT
(
len
,
0
,
platform
::
errors
::
InvalidArgument
(
"The size of grad tensors of fused_all_reduce_op_handle "
"must be > 0, but got %d."
,
len
));
*
numel
+=
*
numel
+=
platform
::
Alignment
(
len
*
size_of_dtype
,
places_
[
0
])
/
size_of_dtype
;
platform
::
Alignment
(
len
*
size_of_dtype
,
places_
[
0
])
/
size_of_dtype
;
}
}
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -32,7 +33,15 @@ void FusedBroadcastOpHandle::RunImpl() {
...
@@ -32,7 +33,15 @@ void FusedBroadcastOpHandle::RunImpl() {
WaitInputVarGenerated
();
WaitInputVarGenerated
();
size_t
place_num
=
places_
.
size
();
size_t
place_num
=
places_
.
size
();
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
()
*
place_num
,
out_var_handles
.
size
());
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
()
*
place_num
,
out_var_handles
.
size
(),
platform
::
errors
::
PreconditionNotMet
(
"The number of input variable handles plus the number "
"of places should be equal to the number of output variable handles, "
"but got the number of input variable handles is %d, the "
"number of places is %d, and the number of output variable handles "
"is %d."
,
in_var_handles
.
size
(),
place_num
,
out_var_handles
.
size
()));
for
(
size_t
i
=
0
;
i
<
in_var_handles
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
in_var_handles
.
size
();
++
i
)
{
BroadcastOneVar
(
BroadcastOneVar
(
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
浏览文件 @
b9a1c7e2
...
@@ -13,8 +13,10 @@
...
@@ -13,8 +13,10 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include <memory>
#include <memory>
#include <unordered_map>
#include <unordered_map>
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/details/broadcast_op_handle_test.h"
#include "paddle/fluid/framework/details/broadcast_op_handle_test.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
...
@@ -58,7 +60,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
...
@@ -58,7 +60,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
op_handle_
=
new
FusedBroadcastOpHandle
(
op_handle_
=
new
FusedBroadcastOpHandle
(
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
nodes_
.
back
().
get
(),
local_scopes_
,
place_list_
,
nccl_ctxs_
.
get
());
#else
#else
PADDLE_THROW
(
"CUDA is not supported."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with CUDA."
));
#endif
#endif
}
else
{
}
else
{
#if defined(PADDLE_WITH_NCCL)
#if defined(PADDLE_WITH_NCCL)
...
...
paddle/fluid/framework/details/gather_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/gather_op_handle.h"
#include "paddle/fluid/framework/details/gather_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
...
@@ -32,13 +33,20 @@ void GatherOpHandle::RunImpl() {
...
@@ -32,13 +33,20 @@ void GatherOpHandle::RunImpl() {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
places_
.
size
(),
in_var_handles
.
size
(),
places_
.
size
(),
"The number of output should equal to the number of places."
);
platform
::
errors
::
InvalidArgument
(
"The number of input variables should be equal "
"to the number of places, but got the number of input variables is "
"%d and the number of places is %d."
,
in_var_handles
.
size
(),
places_
.
size
()));
VarHandle
*
out_var_handle
;
VarHandle
*
out_var_handle
;
{
{
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
1
,
PADDLE_ENFORCE_EQ
(
"The number of output should be one."
);
out_var_handles
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"The number of output variables should be 1, but got %d."
,
out_var_handles
.
size
()));
out_var_handle
=
out_var_handles
.
front
();
out_var_handle
=
out_var_handles
.
front
();
}
}
...
@@ -47,10 +55,14 @@ void GatherOpHandle::RunImpl() {
...
@@ -47,10 +55,14 @@ void GatherOpHandle::RunImpl() {
auto
in_0_handle
=
in_var_handles
[
0
];
auto
in_0_handle
=
in_var_handles
[
0
];
auto
pre_in_var
=
auto
pre_in_var
=
var_scopes
.
at
(
in_0_handle
->
scope_idx
())
->
FindVar
(
in_0_handle
->
name
());
var_scopes
.
at
(
in_0_handle
->
scope_idx
())
->
FindVar
(
in_0_handle
->
name
());
PADDLE_ENFORCE_NOT_NULL
(
pre_in_var
);
PADDLE_ENFORCE_NOT_NULL
(
pre_in_var
,
platform
::
errors
::
NotFound
(
"The variable '%s' is not found in the scope."
,
in_0_handle
->
name
()));
PADDLE_ENFORCE
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
(),
PADDLE_ENFORCE_EQ
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
(),
true
,
"Currently, gather_op only can gather SelectedRows."
);
platform
::
errors
::
Unimplemented
(
"Currently, gather_op only supports SelectedRows."
));
// Wait input done, this Wait is asynchronous operation
// Wait input done, this Wait is asynchronous operation
WaitInputVarGenerated
();
WaitInputVarGenerated
();
...
@@ -63,7 +75,10 @@ void GatherOpHandle::RunImpl() {
...
@@ -63,7 +75,10 @@ void GatherOpHandle::RunImpl() {
for
(
auto
*
in_handle
:
in_var_handles
)
{
for
(
auto
*
in_handle
:
in_var_handles
)
{
auto
*
in_var
=
auto
*
in_var
=
var_scopes
.
at
(
in_handle
->
scope_idx
())
->
FindVar
(
in_handle
->
name
());
var_scopes
.
at
(
in_handle
->
scope_idx
())
->
FindVar
(
in_handle
->
name
());
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
"The variable '%s' is not found in the scope."
,
in_handle
->
name
()));
VariableVisitor
::
EnforceShapeAndDTypeEQ
(
*
in_var
,
*
pre_in_var
);
VariableVisitor
::
EnforceShapeAndDTypeEQ
(
*
in_var
,
*
pre_in_var
);
auto
&
in_sr_value
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
&
in_sr_value
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
...
@@ -76,15 +91,19 @@ void GatherOpHandle::RunImpl() {
...
@@ -76,15 +91,19 @@ void GatherOpHandle::RunImpl() {
// NOTE: The Places of all input tensor must be all on CPU or all on GPU.
// NOTE: The Places of all input tensor must be all on CPU or all on GPU.
platform
::
Place
t_out_p
=
out_var_handle
->
place
();
platform
::
Place
t_out_p
=
out_var_handle
->
place
();
if
(
platform
::
is_gpu_place
(
pre_in_value
.
place
()))
{
if
(
platform
::
is_gpu_place
(
pre_in_value
.
place
()))
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
t_out_p
),
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
t_out_p
),
true
,
"Places of input and output must be all on GPU."
);
platform
::
errors
::
PreconditionNotMet
(
"Places of input and output must be all on GPU."
));
}
else
{
}
else
{
t_out_p
=
platform
::
CPUPlace
();
t_out_p
=
platform
::
CPUPlace
();
}
}
auto
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx
())
auto
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx
())
->
FindVar
(
out_var_handle
->
name
());
->
FindVar
(
out_var_handle
->
name
());
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
,
platform
::
errors
::
NotFound
(
"The variable '%s' is not found in the scope."
,
out_var_handle
->
name
()));
auto
out_value
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
auto
out_value
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
();
out_value
->
set_height
(
pre_in_value
.
height
());
out_value
->
set_height
(
pre_in_value
.
height
());
out_value
->
set_rows
(
out_rows
);
out_value
->
set_rows
(
out_rows
);
...
...
paddle/fluid/framework/details/gather_op_handle_test.cc
浏览文件 @
b9a1c7e2
...
@@ -13,8 +13,10 @@
...
@@ -13,8 +13,10 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/gather_op_handle.h"
#include "paddle/fluid/framework/details/gather_op_handle.h"
#include <memory>
#include <memory>
#include <unordered_map>
#include <unordered_map>
#include "gtest/gtest.h"
#include "gtest/gtest.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -60,7 +62,8 @@ struct TestGatherOpHandle {
...
@@ -60,7 +62,8 @@ struct TestGatherOpHandle {
ctxs_
.
emplace_back
(
new
p
::
CUDADeviceContext
(
p
));
ctxs_
.
emplace_back
(
new
p
::
CUDADeviceContext
(
p
));
}
}
#else
#else
PADDLE_THROW
(
"CUDA is not support."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with CUDA."
));
#endif
#endif
}
else
{
}
else
{
int
count
=
8
;
int
count
=
8
;
...
@@ -141,7 +144,9 @@ struct TestGatherOpHandle {
...
@@ -141,7 +144,9 @@ struct TestGatherOpHandle {
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
++
input_scope_idx
)
{
++
input_scope_idx
)
{
auto
in_var
=
param_scopes_
.
at
(
input_scope_idx
)
->
FindVar
(
"input"
);
auto
in_var
=
param_scopes_
.
at
(
input_scope_idx
)
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
"The variable '%s' is not found in the scope."
,
"input"
));
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
...
@@ -155,7 +160,9 @@ struct TestGatherOpHandle {
...
@@ -155,7 +160,9 @@ struct TestGatherOpHandle {
}
}
auto
out_var
=
param_scopes_
.
at
(
output_scope_idx
)
->
FindVar
(
"out"
);
auto
out_var
=
param_scopes_
.
at
(
output_scope_idx
)
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
,
platform
::
errors
::
NotFound
(
"The variable '%s' is not found in the scope."
,
"out"
));
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_var
=
param_scopes_
.
at
(
output_scope_idx
)
->
FindVar
(
"input"
);
auto
in_var
=
param_scopes_
.
at
(
output_scope_idx
)
->
FindVar
(
"input"
);
...
@@ -173,9 +180,19 @@ struct TestGatherOpHandle {
...
@@ -173,9 +180,19 @@ struct TestGatherOpHandle {
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
rt
=
out_select_rows
.
value
();
auto
rt
=
out_select_rows
.
value
();
PADDLE_ENFORCE_EQ
(
out_select_rows
.
height
(),
height
,
"height is not equal."
);
PADDLE_ENFORCE_EQ
(
out_select_rows
.
height
(),
height
,
platform
::
errors
::
InvalidArgument
(
"The height of SelectedRows is not equal to "
"the expected, expect %d, but got %d."
,
height
,
out_select_rows
.
height
()));
for
(
size_t
k
=
0
;
k
<
out_select_rows
.
rows
().
size
();
++
k
)
{
for
(
size_t
k
=
0
;
k
<
out_select_rows
.
rows
().
size
();
++
k
)
{
PADDLE_ENFORCE_EQ
(
out_select_rows
.
rows
()[
k
],
rows
[
k
%
rows
.
size
()]);
PADDLE_ENFORCE_EQ
(
out_select_rows
.
rows
()[
k
],
rows
[
k
%
rows
.
size
()],
platform
::
errors
::
InvalidArgument
(
"The item at position %d of rows of SelectedRows is not equal to "
"the expected, expect %d, but got %d."
,
k
,
rows
[
k
%
rows
.
size
()],
out_select_rows
.
rows
()[
k
]));
}
}
f
::
Tensor
result_tensor
;
f
::
Tensor
result_tensor
;
...
@@ -207,6 +224,7 @@ TEST(GatherTester, TestGPUGatherTestSelectedRows) {
...
@@ -207,6 +224,7 @@ TEST(GatherTester, TestGPUGatherTestSelectedRows) {
test_op
.
TestGatherSelectedRows
(
input_scope_idx
);
test_op
.
TestGatherSelectedRows
(
input_scope_idx
);
}
}
#endif
#endif
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/details/nccl_op_handle.h
浏览文件 @
b9a1c7e2
...
@@ -46,14 +46,17 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -46,14 +46,17 @@ class NCCLOpHandleBase : public OpHandleBase {
}
}
virtual
~
NCCLOpHandleBase
()
{
virtual
~
NCCLOpHandleBase
()
{
for
(
auto
&
ev
:
inter_events_
)
{
for
(
auto
&
ev
:
inter_events_
)
{
PADDLE_ENFORCE
(
cudaEventDestroy
(
ev
.
second
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaEventDestroy
(
ev
.
second
));
}
}
for
(
auto
&
ev
:
exter_events_
)
{
for
(
auto
&
ev
:
exter_events_
)
{
PADDLE_ENFORCE
(
cudaEventDestroy
(
ev
.
second
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaEventDestroy
(
ev
.
second
));
}
}
}
}
void
SetRunEnv
(
int
run_order
,
bool
use_hierarchical_allreduce
)
{
void
SetRunEnv
(
int
run_order
,
bool
use_hierarchical_allreduce
)
{
PADDLE_ENFORCE
(
run_order
>=
0
,
"run_order must >= 0"
);
PADDLE_ENFORCE_GE
(
run_order
,
0
,
platform
::
errors
::
InvalidArgument
(
"The argument run_order must be >= 0, but got %d."
,
run_order
));
run_order_
=
run_order
;
run_order_
=
run_order
;
use_hierarchical_allreduce_
=
use_hierarchical_allreduce
;
use_hierarchical_allreduce_
=
use_hierarchical_allreduce
;
...
@@ -74,8 +77,11 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -74,8 +77,11 @@ class NCCLOpHandleBase : public OpHandleBase {
return
;
return
;
}
}
PADDLE_ENFORCE
(
places_
.
size
()
==
1
,
PADDLE_ENFORCE_EQ
(
places_
.
size
(),
1
,
"HierarchicalAllReduce run one proc with one card mode."
);
platform
::
errors
::
InvalidArgument
(
"HierarchicalAllReduce can only run "
"one proccess with one card mode, but got %d cards."
,
places_
.
size
()));
for
(
auto
&
p
:
places_
)
{
for
(
auto
&
p
:
places_
)
{
auto
ctxs
=
nccl_ctxs_
->
GetHierarchicalInterCtx
(
run_order
);
auto
ctxs
=
nccl_ctxs_
->
GetHierarchicalInterCtx
(
run_order
);
...
@@ -88,11 +94,11 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -88,11 +94,11 @@ class NCCLOpHandleBase : public OpHandleBase {
continue
;
continue
;
}
}
PADDLE_ENFORCE
(
cudaSetDevice
(
dev_id
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaSetDevice
(
dev_id
));
PADDLE_ENFORCE
(
cudaEventCreateWithFlags
(
&
inter_events_
[
dev_id
],
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaEventCreateWithFlags
(
cudaEventDisableTiming
));
&
inter_events_
[
dev_id
],
cudaEventDisableTiming
));
PADDLE_ENFORCE
(
cudaEventCreateWithFlags
(
&
exter_events_
[
dev_id
],
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaEventCreateWithFlags
(
cudaEventDisableTiming
));
&
exter_events_
[
dev_id
],
cudaEventDisableTiming
));
VLOG
(
10
)
<<
"Create events on dev_id:"
<<
dev_id
VLOG
(
10
)
<<
"Create events on dev_id:"
<<
dev_id
<<
", inter_event:"
<<
&
inter_events_
[
dev_id
]
<<
", inter_event:"
<<
&
inter_events_
[
dev_id
]
<<
", exter_event:"
<<
&
exter_events_
[
dev_id
];
<<
", exter_event:"
<<
&
exter_events_
[
dev_id
];
...
@@ -102,7 +108,10 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -102,7 +108,10 @@ class NCCLOpHandleBase : public OpHandleBase {
void
FlatNCCLAllReduce
(
platform
::
Place
place
,
const
void
*
sendbuff
,
void
FlatNCCLAllReduce
(
platform
::
Place
place
,
const
void
*
sendbuff
,
void
*
recvbuff
,
size_t
count
,
ncclDataType_t
datatype
,
void
*
recvbuff
,
size_t
count
,
ncclDataType_t
datatype
,
ncclRedOp_t
op
)
{
ncclRedOp_t
op
)
{
PADDLE_ENFORCE
(
run_order_
>=
0
,
"run_order must > 0"
);
PADDLE_ENFORCE_GE
(
run_order_
,
0
,
platform
::
errors
::
InvalidArgument
(
"The argument run_order_ must be >= 0, but got %d."
,
run_order_
));
auto
flat_nccl_ctxs
=
nccl_ctxs_
->
GetFlatCtx
(
run_order_
);
auto
flat_nccl_ctxs
=
nccl_ctxs_
->
GetFlatCtx
(
run_order_
);
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
auto
&
nccl_ctx
=
flat_nccl_ctxs
->
at
(
dev_id
);
auto
&
nccl_ctx
=
flat_nccl_ctxs
->
at
(
dev_id
);
...
@@ -113,14 +122,17 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -113,14 +122,17 @@ class NCCLOpHandleBase : public OpHandleBase {
<<
", dev_id:"
<<
dev_id
<<
", dtype:"
<<
datatype
<<
", dev_id:"
<<
dev_id
<<
", dtype:"
<<
datatype
<<
", place:"
<<
place
;
<<
", place:"
<<
place
;
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclAllReduce
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclAllReduce
(
sendbuff
,
recvbuff
,
count
,
datatype
,
op
,
comm
,
stream
));
sendbuff
,
recvbuff
,
count
,
datatype
,
op
,
comm
,
stream
));
}
}
void
NCCLAllReduce
(
platform
::
Place
place
,
const
void
*
sendbuff
,
void
NCCLAllReduce
(
platform
::
Place
place
,
const
void
*
sendbuff
,
void
*
recvbuff
,
size_t
count
,
ncclDataType_t
datatype
,
void
*
recvbuff
,
size_t
count
,
ncclDataType_t
datatype
,
ncclRedOp_t
op
)
{
ncclRedOp_t
op
)
{
PADDLE_ENFORCE
(
run_order_
>=
0
,
"run_order must > 0"
);
PADDLE_ENFORCE_GE
(
run_order_
,
0
,
platform
::
errors
::
InvalidArgument
(
"The argument run_order_ must be >= 0, but got %d."
,
run_order_
));
if
(
!
use_hierarchical_allreduce_
)
{
if
(
!
use_hierarchical_allreduce_
)
{
FlatNCCLAllReduce
(
place
,
sendbuff
,
recvbuff
,
count
,
datatype
,
op
);
FlatNCCLAllReduce
(
place
,
sendbuff
,
recvbuff
,
count
,
datatype
,
op
);
return
;
return
;
...
@@ -132,7 +144,10 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -132,7 +144,10 @@ class NCCLOpHandleBase : public OpHandleBase {
void
HierarchicalAllReduce
(
platform
::
Place
place
,
const
void
*
sendbuff
,
void
HierarchicalAllReduce
(
platform
::
Place
place
,
const
void
*
sendbuff
,
void
*
recvbuff
,
size_t
count
,
void
*
recvbuff
,
size_t
count
,
ncclDataType_t
datatype
,
ncclRedOp_t
op
)
{
ncclDataType_t
datatype
,
ncclRedOp_t
op
)
{
PADDLE_ENFORCE
(
run_order_
>=
0
,
"run_order must > 0"
);
PADDLE_ENFORCE_GE
(
run_order_
,
0
,
platform
::
errors
::
InvalidArgument
(
"The argument run_order_ must be >= 0, but got %d."
,
run_order_
));
InterReduce
(
place
,
sendbuff
,
recvbuff
,
count
,
datatype
,
op
);
InterReduce
(
place
,
sendbuff
,
recvbuff
,
count
,
datatype
,
op
);
// When a trainer is not in exter allreduce ring
// When a trainer is not in exter allreduce ring
// they need not to call this.
// they need not to call this.
...
@@ -157,14 +172,13 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -157,14 +172,13 @@ class NCCLOpHandleBase : public OpHandleBase {
<<
", dtype:"
<<
datatype
<<
", place:"
<<
place
<<
", dtype:"
<<
datatype
<<
", place:"
<<
place
<<
", stream:"
<<
stream
;
<<
", stream:"
<<
stream
;
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclReduce
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclReduce
(
sendbuff
,
recvbuff
,
count
,
datatype
,
ncclSum
,
0
,
comm
,
stream
));
sendbuff
,
recvbuff
,
count
,
datatype
,
ncclSum
,
0
,
comm
,
stream
));
cudaEventRecord
(
inter_events_
.
at
(
dev_id
),
stream
);
cudaEventRecord
(
inter_events_
.
at
(
dev_id
),
stream
);
if
(
FLAGS_sync_nccl_allreduce
)
{
if
(
FLAGS_sync_nccl_allreduce
)
{
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
stream
),
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamSynchronize
(
stream
));
"sync HierarchicalAllReduce inter stream error"
);
}
}
}
}
...
@@ -172,7 +186,9 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -172,7 +186,9 @@ class NCCLOpHandleBase : public OpHandleBase {
void
*
recvbuff
,
size_t
count
,
ncclDataType_t
datatype
,
void
*
recvbuff
,
size_t
count
,
ncclDataType_t
datatype
,
ncclRedOp_t
op
)
{
ncclRedOp_t
op
)
{
auto
nccl_ctxs
=
nccl_ctxs_
->
GetHierarchicalExterCtx
(
run_order_
);
auto
nccl_ctxs
=
nccl_ctxs_
->
GetHierarchicalExterCtx
(
run_order_
);
PADDLE_ENFORCE
(
nccl_ctxs_
,
"can't get exter %d nccl_ctxs"
,
run_order_
);
PADDLE_ENFORCE_NOT_NULL
(
nccl_ctxs_
,
platform
::
errors
::
NotFound
(
"Can't get exter %d nccl contexts."
,
run_order_
));
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
auto
&
nccl_ctx
=
nccl_ctxs
->
at
(
dev_id
);
auto
&
nccl_ctx
=
nccl_ctxs
->
at
(
dev_id
);
auto
stream
=
nccl_ctx
.
stream
();
auto
stream
=
nccl_ctx
.
stream
();
...
@@ -185,14 +201,13 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -185,14 +201,13 @@ class NCCLOpHandleBase : public OpHandleBase {
cudaStreamWaitEvent
(
stream
,
inter_events_
.
at
(
dev_id
),
0
);
cudaStreamWaitEvent
(
stream
,
inter_events_
.
at
(
dev_id
),
0
);
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclAllReduce
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclAllReduce
(
sendbuff
,
recvbuff
,
count
,
datatype
,
op
,
comm
,
stream
));
sendbuff
,
recvbuff
,
count
,
datatype
,
op
,
comm
,
stream
));
cudaEventRecord
(
exter_events_
.
at
(
dev_id
),
stream
);
cudaEventRecord
(
exter_events_
.
at
(
dev_id
),
stream
);
if
(
FLAGS_sync_nccl_allreduce
)
{
if
(
FLAGS_sync_nccl_allreduce
)
{
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
stream
),
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamSynchronize
(
stream
));
"sync HierarchicalAllReduce exter stream error"
);
}
}
}
}
...
@@ -210,8 +225,8 @@ class NCCLOpHandleBase : public OpHandleBase {
...
@@ -210,8 +225,8 @@ class NCCLOpHandleBase : public OpHandleBase {
<<
", stream:"
<<
stream
;
<<
", stream:"
<<
stream
;
cudaStreamWaitEvent
(
stream
,
exter_events_
.
at
(
dev_id
),
0
);
cudaStreamWaitEvent
(
stream
,
exter_events_
.
at
(
dev_id
),
0
);
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
sendbuff
,
count
,
datatype
,
0
,
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclBcast
(
comm
,
stream
));
sendbuff
,
count
,
datatype
,
0
,
comm
,
stream
));
}
}
protected:
protected:
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
b9a1c7e2
...
@@ -47,8 +47,8 @@ void OpHandleBase::InitCUDA() {
...
@@ -47,8 +47,8 @@ void OpHandleBase::InitCUDA() {
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
for
(
auto
&
p
:
dev_ctxes_
)
{
for
(
auto
&
p
:
dev_ctxes_
)
{
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
p
.
first
).
device
;
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
p
.
first
).
device
;
PADDLE_ENFORCE
(
cudaSetDevice
(
dev_id
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaSetDevice
(
dev_id
));
PADDLE_ENFORCE
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaEventCreateWithFlags
(
&
events_
[
dev_id
],
cudaEventDisableTiming
));
cudaEventCreateWithFlags
(
&
events_
[
dev_id
],
cudaEventDisableTiming
));
}
}
if
(
IsMultiDeviceTransfer
()
&&
dev_ctxes_
.
size
()
>
0
)
{
if
(
IsMultiDeviceTransfer
()
&&
dev_ctxes_
.
size
()
>
0
)
{
...
@@ -62,17 +62,22 @@ void OpHandleBase::InitCUDA() {
...
@@ -62,17 +62,22 @@ void OpHandleBase::InitCUDA() {
}
}
}
}
}
else
{
}
else
{
PADDLE_ENFORCE_EQ
(
dev_ctxes_
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
"%s should have only one dev_ctx."
,
Name
());
dev_ctxes_
.
size
(),
1UL
,
platform
::
errors
::
InvalidArgument
(
"Operator %s should have only one dev_ctx, but got %d."
,
Name
(),
dev_ctxes_
.
size
()));
auto
&
place
=
dev_ctxes_
.
begin
()
->
first
;
auto
&
place
=
dev_ctxes_
.
begin
()
->
first
;
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
for
(
auto
&
out_var
:
outputs_
)
{
for
(
auto
&
out_var
:
outputs_
)
{
auto
*
out_var_handle
=
dynamic_cast
<
VarHandle
*>
(
out_var
);
auto
*
out_var_handle
=
dynamic_cast
<
VarHandle
*>
(
out_var
);
if
(
out_var_handle
)
{
if
(
out_var_handle
)
{
PADDLE_ENFORCE
(
platform
::
is_same_place
(
place
,
out_var_handle
->
place
()),
PADDLE_ENFORCE_EQ
(
"The place of output(%s) is not consistent with the "
platform
::
is_same_place
(
place
,
out_var_handle
->
place
()),
true
,
"place of current op(%s)."
,
platform
::
errors
::
InvalidArgument
(
out_var_handle
->
Name
(),
Name
());
"The place of output(%s) is not consistent with the "
"place of current op(%s)."
,
out_var_handle
->
Name
(),
Name
()));
out_var_handle
->
SetGenerateEvent
(
events_
.
at
(
dev_id
));
out_var_handle
->
SetGenerateEvent
(
events_
.
at
(
dev_id
));
}
}
}
}
...
@@ -86,7 +91,10 @@ void OpHandleBase::Run(bool use_cuda) {
...
@@ -86,7 +91,10 @@ void OpHandleBase::Run(bool use_cuda) {
InitCUDA
();
InitCUDA
();
}
}
#else
#else
PADDLE_ENFORCE
(
!
use_cuda
);
PADDLE_ENFORCE_EQ
(
use_cuda
,
false
,
platform
::
errors
::
InvalidArgument
(
"Argument use_cuda should be false when Paddle is not "
"compiled with CUDA."
));
#endif
#endif
// skip running current op, used with inplace_addto_op_pass
// skip running current op, used with inplace_addto_op_pass
...
@@ -100,17 +108,20 @@ void OpHandleBase::Run(bool use_cuda) {
...
@@ -100,17 +108,20 @@ void OpHandleBase::Run(bool use_cuda) {
void
OpHandleBase
::
RecordWaitEventOnCtx
(
platform
::
DeviceContext
*
waited_ctx
)
{
void
OpHandleBase
::
RecordWaitEventOnCtx
(
platform
::
DeviceContext
*
waited_ctx
)
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_NOT_NULL
(
waited_ctx
);
PADDLE_ENFORCE_NOT_NULL
(
waited_ctx
,
platform
::
errors
::
InvalidArgument
(
"Argument waited_ctx is NULL."
));
if
(
platform
::
is_cpu_place
(
waited_ctx
->
GetPlace
())
||
events_
.
empty
())
{
if
(
platform
::
is_cpu_place
(
waited_ctx
->
GetPlace
())
||
events_
.
empty
())
{
for
(
auto
&
dev_ctx
:
dev_ctxes_
)
{
for
(
auto
&
dev_ctx
:
dev_ctxes_
)
{
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx
.
second
);
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx
.
second
,
platform
::
errors
::
InvalidArgument
(
"The device context is NULL."
));
dev_ctx
.
second
->
Wait
();
dev_ctx
.
second
->
Wait
();
}
}
}
else
{
}
else
{
auto
stream
=
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
waited_ctx
)
->
stream
();
static_cast
<
platform
::
CUDADeviceContext
*>
(
waited_ctx
)
->
stream
();
for
(
auto
&
ev
:
events_
)
{
for
(
auto
&
ev
:
events_
)
{
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
stream
,
ev
.
second
,
0
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
stream
,
ev
.
second
,
0
));
}
}
}
}
#else
#else
...
@@ -145,10 +156,11 @@ void OpHandleBase::WaitInputVarGenerated() {
...
@@ -145,10 +156,11 @@ void OpHandleBase::WaitInputVarGenerated() {
auto
stream
=
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
dev_ctxes_
.
at
(
place
))
static_cast
<
platform
::
CUDADeviceContext
*>
(
dev_ctxes_
.
at
(
place
))
->
stream
();
->
stream
();
PADDLE_ENFORCE
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
stream
,
in_var_handle
->
GetEvent
(),
0
));
cudaStreamWaitEvent
(
stream
,
in_var_handle
->
GetEvent
(),
0
));
#else
#else
PADDLE_THROW
(
"Doesn't compile the GPU."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with CUDA."
));
#endif
#endif
}
}
// There are nothing to do when the place is CPUPlace.
// There are nothing to do when the place is CPUPlace.
...
@@ -169,10 +181,11 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) {
...
@@ -169,10 +181,11 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) {
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
dev_ctxes_
.
at
(
in_var_handle
->
place
()))
dev_ctxes_
.
at
(
in_var_handle
->
place
()))
->
stream
();
->
stream
();
PADDLE_ENFORCE
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
stream
,
in_var_handle
->
GetEvent
(),
0
));
cudaStreamWaitEvent
(
stream
,
in_var_handle
->
GetEvent
(),
0
));
#else
#else
PADDLE_THROW
(
"Doesn't compile the GPU."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with CUDA."
));
#endif
#endif
}
}
// There are nothing to do when the place is CPUPlace.
// There are nothing to do when the place is CPUPlace.
...
@@ -242,7 +255,9 @@ void OpHandleBase::SetLocalExecScopes(
...
@@ -242,7 +255,9 @@ void OpHandleBase::SetLocalExecScopes(
auto
scopes
=
GetLocalScopes
();
auto
scopes
=
GetLocalScopes
();
for
(
auto
*
scope
:
scopes
)
{
for
(
auto
*
scope
:
scopes
)
{
auto
iter
=
scope_map
.
find
(
scope
);
auto
iter
=
scope_map
.
find
(
scope
);
PADDLE_ENFORCE
(
iter
!=
scope_map
.
end
(),
"Local scope not found"
);
PADDLE_ENFORCE_NE
(
iter
,
scope_map
.
end
(),
platform
::
errors
::
NotFound
(
"Local scope not found in scope map."
));
local_exec_scopes_
.
emplace_back
(
iter
->
second
);
local_exec_scopes_
.
emplace_back
(
iter
->
second
);
}
}
}
}
...
...
paddle/fluid/framework/details/op_registry.h
浏览文件 @
b9a1c7e2
...
@@ -21,6 +21,7 @@ limitations under the License. */
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <unordered_set>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/inplace_op_inference.h"
#include "paddle/fluid/framework/inplace_op_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
...
@@ -186,19 +187,20 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
...
@@ -186,19 +187,20 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
void
operator
()(
const
char
*
op_type
,
OpInfo
*
info
)
const
{
void
operator
()(
const
char
*
op_type
,
OpInfo
*
info
)
const
{
PADDLE_ENFORCE_EQ
(
info
->
proto_
,
nullptr
,
PADDLE_ENFORCE_EQ
(
info
->
proto_
,
nullptr
,
platform
::
errors
::
AlreadyExists
(
platform
::
errors
::
AlreadyExists
(
"OpProto of %s has been registered"
,
op_type
));
"OpProto of %s has been registered
.
"
,
op_type
));
PADDLE_ENFORCE_EQ
(
info
->
checker_
,
nullptr
,
PADDLE_ENFORCE_EQ
(
info
->
checker_
,
nullptr
,
platform
::
errors
::
AlreadyExists
(
platform
::
errors
::
AlreadyExists
(
"OpAttrChecker of %s has been registered"
,
op_type
));
"OpAttrChecker of %s has been registered
.
"
,
op_type
));
info
->
proto_
=
new
proto
::
OpProto
;
info
->
proto_
=
new
proto
::
OpProto
;
info
->
checker_
=
new
OpAttrChecker
();
info
->
checker_
=
new
OpAttrChecker
();
T
maker
;
T
maker
;
maker
(
info
->
proto_
,
info
->
checker_
);
maker
(
info
->
proto_
,
info
->
checker_
);
info
->
proto_
->
set_type
(
op_type
);
info
->
proto_
->
set_type
(
op_type
);
PADDLE_ENFORCE
(
PADDLE_ENFORCE_EQ
(
info
->
proto_
->
IsInitialized
(),
info
->
proto_
->
IsInitialized
(),
true
,
"Fail to initialize %s's OpProto, because %s is not initialized"
,
platform
::
errors
::
PreconditionNotMet
(
op_type
,
info
->
proto_
->
InitializationErrorString
());
"Fail to initialize %s's OpProto, because %s is not initialized."
,
op_type
,
info
->
proto_
->
InitializationErrorString
()));
}
}
};
};
...
...
paddle/fluid/framework/details/reduce_and_gather.h
浏览文件 @
b9a1c7e2
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <algorithm>
#include <algorithm>
#include <map>
#include <map>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
...
@@ -32,9 +33,13 @@ struct ReduceLoDTensor {
...
@@ -32,9 +33,13 @@ struct ReduceLoDTensor {
template
<
typename
T
>
template
<
typename
T
>
void
apply
()
const
{
void
apply
()
const
{
PADDLE_ENFORCE
(
!
src_tensors_
.
empty
());
PADDLE_ENFORCE_NE
(
src_tensors_
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The number of tensors to be reduced is 0."
));
auto
&
t0
=
*
src_tensors_
[
0
];
auto
&
t0
=
*
src_tensors_
[
0
];
PADDLE_ENFORCE_NE
(
t0
.
numel
(),
0
);
PADDLE_ENFORCE_NE
(
t0
.
numel
(),
0
,
platform
::
errors
::
InvalidArgument
(
"The size of first tensor to be reduced is 0."
));
dst_tensor_
.
Resize
(
t0
.
dims
());
dst_tensor_
.
Resize
(
t0
.
dims
());
T
*
dst
=
dst_tensor_
.
mutable_data
<
T
>
(
platform
::
CPUPlace
());
T
*
dst
=
dst_tensor_
.
mutable_data
<
T
>
(
platform
::
CPUPlace
());
...
@@ -45,8 +50,19 @@ struct ReduceLoDTensor {
...
@@ -45,8 +50,19 @@ struct ReduceLoDTensor {
continue
;
continue
;
}
}
PADDLE_ENFORCE_EQ
(
t
.
dims
(),
t0
.
dims
());
PADDLE_ENFORCE_EQ
(
t
.
dims
(),
t0
.
dims
(),
PADDLE_ENFORCE_EQ
(
t
.
type
(),
t0
.
type
());
platform
::
errors
::
InvalidArgument
(
"The shape of tensors to be reduced must be "
"consistent. The shape of current tensor is %s, "
"but the shape of the first tensor is %s."
,
t
.
dims
(),
t0
.
dims
()));
PADDLE_ENFORCE_EQ
(
t
.
type
(),
t0
.
type
(),
platform
::
errors
::
InvalidArgument
(
"The type of tensors to be reduced must be "
"consistent. The type of current tensor is %s, "
"but the type of the first tensor is %s."
,
t
.
type
(),
t0
.
type
()));
std
::
transform
(
t
.
data
<
T
>
(),
t
.
data
<
T
>
()
+
t
.
numel
(),
dst
,
dst
,
std
::
transform
(
t
.
data
<
T
>
(),
t
.
data
<
T
>
()
+
t
.
numel
(),
dst
,
dst
,
[](
T
a
,
T
b
)
->
T
{
return
a
+
b
;
});
[](
T
a
,
T
b
)
->
T
{
return
a
+
b
;
});
}
}
...
@@ -88,7 +104,9 @@ struct GatherLocalSelectedRowsFunctor {
...
@@ -88,7 +104,9 @@ struct GatherLocalSelectedRowsFunctor {
in_places_
(
in_places
),
in_places_
(
in_places
),
out_place_
(
out_place
),
out_place_
(
out_place
),
dst_selected_rows_
(
dst_selected_rows
)
{
dst_selected_rows_
(
dst_selected_rows
)
{
PADDLE_ENFORCE_EQ
(
src_selected_rows
.
empty
(),
false
);
PADDLE_ENFORCE_NE
(
src_selected_rows
.
empty
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The number of selected_rows to be gathered is 0."
));
std
::
vector
<
int64_t
>
out_rows
;
std
::
vector
<
int64_t
>
out_rows
;
...
...
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -13,7 +13,9 @@
...
@@ -13,7 +13,9 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include <memory>
#include <memory>
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
...
@@ -116,8 +118,15 @@ void ReduceOpHandle::GatherSelectedRows(
...
@@ -116,8 +118,15 @@ void ReduceOpHandle::GatherSelectedRows(
merged_dev_ctx
->
Wait
();
merged_dev_ctx
->
Wait
();
scope
->
EraseVars
(
std
::
vector
<
std
::
string
>
{
gathered_var_name
});
scope
->
EraseVars
(
std
::
vector
<
std
::
string
>
{
gathered_var_name
});
PADDLE_ENFORCE
(
client
->
Gather
(
vars
,
&
remote
,
*
merged_dev_ctx
,
scope
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE
(
remote
.
size
()
==
vars
.
size
());
client
->
Gather
(
vars
,
&
remote
,
*
merged_dev_ctx
,
scope
),
true
,
platform
::
errors
::
PreconditionNotMet
(
"Gather SelectedRows failed."
));
PADDLE_ENFORCE_EQ
(
remote
.
size
(),
vars
.
size
(),
platform
::
errors
::
PreconditionNotMet
(
"The number of remotes should be equal to the number "
"of variables to be gathered, but got the number of "
"remotes is %d and the number of variables is %d."
,
remote
.
size
(),
vars
.
size
()));
// 4. merged local selected rows.
// 4. merged local selected rows.
std
::
vector
<
const
SelectedRows
*>
all
;
std
::
vector
<
const
SelectedRows
*>
all
;
...
@@ -151,14 +160,19 @@ void ReduceOpHandle::RunImpl() {
...
@@ -151,14 +160,19 @@ void ReduceOpHandle::RunImpl() {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
places_
.
size
(),
in_var_handles
.
size
(),
places_
.
size
(),
"The number of output should equal to the number of places."
);
platform
::
errors
::
InvalidArgument
(
"The number of inputs should equal to the number of places, but got "
"the number of inputs is %d and the number of places is %d."
,
in_var_handles
.
size
(),
places_
.
size
()));
VarHandle
*
out_var_handle
;
VarHandle
*
out_var_handle
;
{
{
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
out_var_handles
.
size
(),
1UL
,
"The number of output should be one."
);
platform
::
errors
::
InvalidArgument
(
"The number of output should be one, but got %d."
,
out_var_handles
.
size
()));
out_var_handle
=
out_var_handles
.
front
();
out_var_handle
=
out_var_handles
.
front
();
}
}
...
@@ -168,7 +182,10 @@ void ReduceOpHandle::RunImpl() {
...
@@ -168,7 +182,10 @@ void ReduceOpHandle::RunImpl() {
auto
pre_in_var
=
auto
pre_in_var
=
var_scopes
.
at
(
in_0_handle
->
scope_idx
())
->
FindVar
(
in_0_handle
->
name
());
var_scopes
.
at
(
in_0_handle
->
scope_idx
())
->
FindVar
(
in_0_handle
->
name
());
PADDLE_ENFORCE_NOT_NULL
(
pre_in_var
);
PADDLE_ENFORCE_NOT_NULL
(
pre_in_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
in_0_handle
->
name
()));
// NOTE: The Places of all input tensor must be all on CPU or all on GPU.
// NOTE: The Places of all input tensor must be all on CPU or all on GPU.
std
::
vector
<
platform
::
Place
>
in_places
;
// used to get dev_ctx
std
::
vector
<
platform
::
Place
>
in_places
;
// used to get dev_ctx
...
@@ -176,21 +193,29 @@ void ReduceOpHandle::RunImpl() {
...
@@ -176,21 +193,29 @@ void ReduceOpHandle::RunImpl() {
in_places
.
emplace_back
(
in_handle
->
place
());
in_places
.
emplace_back
(
in_handle
->
place
());
auto
in_var
=
auto
in_var
=
var_scopes
.
at
(
in_handle
->
scope_idx
())
->
FindVar
(
in_handle
->
name
());
var_scopes
.
at
(
in_handle
->
scope_idx
())
->
FindVar
(
in_handle
->
name
());
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
in_handle
->
name
()));
VariableVisitor
::
EnforceShapeAndDTypeEQ
(
*
pre_in_var
,
*
in_var
);
VariableVisitor
::
EnforceShapeAndDTypeEQ
(
*
pre_in_var
,
*
in_var
);
}
}
auto
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx
())
auto
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx
())
->
FindVar
(
out_var_handle
->
name
());
->
FindVar
(
out_var_handle
->
name
());
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
out_var_handle
->
name
()));
// NOTE: The tensors' Place of input and output must be all on GPU or all on
// NOTE: The tensors' Place of input and output must be all on GPU or all on
// CPU.
// CPU.
auto
in_p
=
VariableVisitor
::
GetMutableTensor
(
pre_in_var
).
place
();
auto
in_p
=
VariableVisitor
::
GetMutableTensor
(
pre_in_var
).
place
();
platform
::
Place
t_out_p
;
platform
::
Place
t_out_p
;
if
(
platform
::
is_gpu_place
(
in_p
))
{
if
(
platform
::
is_gpu_place
(
in_p
))
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_var_handle
->
place
()),
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
out_var_handle
->
place
()),
true
,
"Places of input and output must be all on GPU."
);
platform
::
errors
::
PreconditionNotMet
(
"Places of input and output must be all on GPU."
));
t_out_p
=
out_var_handle
->
place
();
t_out_p
=
out_var_handle
->
place
();
}
else
{
}
else
{
t_out_p
=
platform
::
CPUPlace
();
t_out_p
=
platform
::
CPUPlace
();
...
@@ -229,7 +254,10 @@ void ReduceOpHandle::RunImpl() {
...
@@ -229,7 +254,10 @@ void ReduceOpHandle::RunImpl() {
in_selected_rows
,
in_places
,
dev_ctxes_
,
out_var_handle
,
t_out_p
,
in_selected_rows
,
in_places
,
dev_ctxes_
,
out_var_handle
,
t_out_p
,
out_var
->
GetMutable
<
framework
::
SelectedRows
>
());
out_var
->
GetMutable
<
framework
::
SelectedRows
>
());
}
else
{
}
else
{
PADDLE_THROW
(
"only support double or float when gather SelectedRows"
);
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Only support double or float when gather SelectedRows, but got "
"%s."
,
framework
::
DataTypeToString
(
in_selected_rows
[
0
]
->
value
().
type
())));
}
}
#endif
#endif
});
});
...
@@ -292,7 +320,7 @@ void ReduceOpHandle::RunImpl() {
...
@@ -292,7 +320,7 @@ void ReduceOpHandle::RunImpl() {
size_t
numel
=
static_cast
<
size_t
>
(
lod_tensor
.
numel
());
size_t
numel
=
static_cast
<
size_t
>
(
lod_tensor
.
numel
());
all_reduce_calls
.
emplace_back
(
all_reduce_calls
.
emplace_back
(
[
buffer
,
recvbuffer
,
type
,
numel
,
root_id
,
&
nccl_ctx
]
{
[
buffer
,
recvbuffer
,
type
,
numel
,
root_id
,
&
nccl_ctx
]
{
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclReduce
(
PADDLE_ENFORCE
_CUDA_SUCCESS
(
platform
::
dynload
::
ncclReduce
(
buffer
,
recvbuffer
,
numel
,
static_cast
<
ncclDataType_t
>
(
type
),
buffer
,
recvbuffer
,
numel
,
static_cast
<
ncclDataType_t
>
(
type
),
ncclSum
,
root_id
,
nccl_ctx
.
comm_
,
nccl_ctx
.
stream
()));
ncclSum
,
root_id
,
nccl_ctx
.
comm_
,
nccl_ctx
.
stream
()));
});
});
...
@@ -306,10 +334,13 @@ void ReduceOpHandle::RunImpl() {
...
@@ -306,10 +334,13 @@ void ReduceOpHandle::RunImpl() {
}
}
});
});
#else
#else
PADDLE_THROW
(
"CUDA is not enabled."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with CUDA."
));
#endif
#endif
}
else
{
}
else
{
PADDLE_THROW
(
"Place should be CPUPlace or CUDAPlace."
);
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"The place of tensor should be CPUPlace or CUDAPlace, but got %s."
,
lod_tensors
[
0
]
->
place
()));
}
}
}
}
}
}
...
...
paddle/fluid/framework/details/reduce_op_handle_test.cc
浏览文件 @
b9a1c7e2
...
@@ -13,7 +13,9 @@
...
@@ -13,7 +13,9 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include <unordered_map>
#include <unordered_map>
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
...
@@ -69,7 +71,8 @@ struct TestReduceOpHandle {
...
@@ -69,7 +71,8 @@ struct TestReduceOpHandle {
}
}
nccl_ctxs_
.
reset
(
new
platform
::
NCCLContextMap
(
gpu_list_
));
nccl_ctxs_
.
reset
(
new
platform
::
NCCLContextMap
(
gpu_list_
));
#else
#else
PADDLE_THROW
(
"CUDA is not support."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with NCLL."
));
#endif
#endif
}
else
{
}
else
{
int
count
=
8
;
int
count
=
8
;
...
@@ -103,7 +106,8 @@ struct TestReduceOpHandle {
...
@@ -103,7 +106,8 @@ struct TestReduceOpHandle {
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
op_handle_
.
reset
(
new
ReduceOpHandle
(
nodes
.
back
().
get
(),
local_scopes_
,
gpu_list_
,
nccl_ctxs_
.
get
()));
gpu_list_
,
nccl_ctxs_
.
get
()));
#else
#else
PADDLE_THROW
(
"CUDA is not support."
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Not compiled with NCLL."
));
#endif
#endif
}
else
{
}
else
{
#if defined(PADDLE_WITH_NCCL)
#if defined(PADDLE_WITH_NCCL)
...
@@ -164,7 +168,10 @@ struct TestReduceOpHandle {
...
@@ -164,7 +168,10 @@ struct TestReduceOpHandle {
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
++
input_scope_idx
)
{
++
input_scope_idx
)
{
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
"input"
));
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_selected_rows
=
in_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
value
=
in_selected_rows
->
mutable_value
();
auto
value
=
in_selected_rows
->
mutable_value
();
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
value
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
...
@@ -178,7 +185,9 @@ struct TestReduceOpHandle {
...
@@ -178,7 +185,9 @@ struct TestReduceOpHandle {
}
}
auto
out_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"out"
);
auto
out_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
"out"
));
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
out_selected_rows
=
out_var
->
GetMutable
<
f
::
SelectedRows
>
();
auto
in_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"input"
);
auto
in_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"input"
);
...
@@ -196,9 +205,18 @@ struct TestReduceOpHandle {
...
@@ -196,9 +205,18 @@ struct TestReduceOpHandle {
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
&
out_select_rows
=
out_var
->
Get
<
f
::
SelectedRows
>
();
auto
rt
=
out_select_rows
.
value
();
auto
rt
=
out_select_rows
.
value
();
PADDLE_ENFORCE_EQ
(
out_select_rows
.
height
(),
height
,
"height is not equal."
);
PADDLE_ENFORCE_EQ
(
out_select_rows
.
height
(),
height
,
platform
::
errors
::
InvalidArgument
(
"The height of SelectedRows is not equal to "
"the expected, expect %d, but got %d."
,
height
,
out_select_rows
.
height
()));
for
(
size_t
k
=
0
;
k
<
out_select_rows
.
rows
().
size
();
++
k
)
{
for
(
size_t
k
=
0
;
k
<
out_select_rows
.
rows
().
size
();
++
k
)
{
PADDLE_ENFORCE_EQ
(
out_select_rows
.
rows
()[
k
],
rows
[
k
%
rows
.
size
()]);
PADDLE_ENFORCE_EQ
(
out_select_rows
.
rows
()[
k
],
rows
[
k
%
rows
.
size
()],
platform
::
errors
::
InvalidArgument
(
"The item at position %d of rows of SelectedRows is not equal to "
"the expected, expect %d, but got %d."
,
k
,
rows
[
k
%
rows
.
size
()],
out_select_rows
.
rows
()[
k
]));
}
}
f
::
Tensor
result_tensor
;
f
::
Tensor
result_tensor
;
...
@@ -208,7 +226,7 @@ struct TestReduceOpHandle {
...
@@ -208,7 +226,7 @@ struct TestReduceOpHandle {
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
result_tensor
.
dims
());
++
j
)
{
for
(
int64_t
j
=
0
;
j
<
f
::
product
(
result_tensor
.
dims
());
++
j
)
{
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
%
send_vector
.
size
()],
1e-5
);
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
%
send_vector
.
size
()],
1e-5
);
}
}
}
}
// namespace details
void
TestReduceLodTensors
(
size_t
output_scope_idx
)
{
void
TestReduceLodTensors
(
size_t
output_scope_idx
)
{
std
::
vector
<
float
>
send_vector
(
static_cast
<
size_t
>
(
f
::
product
(
kDims
)));
std
::
vector
<
float
>
send_vector
(
static_cast
<
size_t
>
(
f
::
product
(
kDims
)));
...
@@ -220,7 +238,9 @@ struct TestReduceOpHandle {
...
@@ -220,7 +238,9 @@ struct TestReduceOpHandle {
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
for
(
size_t
input_scope_idx
=
0
;
input_scope_idx
<
gpu_list_
.
size
();
++
input_scope_idx
)
{
++
input_scope_idx
)
{
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
auto
in_var
=
param_scopes_
[
input_scope_idx
]
->
FindVar
(
"input"
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
"input"
));
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
in_lod_tensor
=
in_var
->
GetMutable
<
f
::
LoDTensor
>
();
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
in_lod_tensor
->
mutable_data
<
float
>
(
kDims
,
gpu_list_
[
input_scope_idx
]);
in_lod_tensor
->
set_lod
(
lod
);
in_lod_tensor
->
set_lod
(
lod
);
...
@@ -230,7 +250,9 @@ struct TestReduceOpHandle {
...
@@ -230,7 +250,9 @@ struct TestReduceOpHandle {
}
}
auto
out_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"out"
);
auto
out_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"out"
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
);
PADDLE_ENFORCE_NOT_NULL
(
out_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
"out"
));
auto
out_lodtensor
=
out_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
out_lodtensor
=
out_var
->
GetMutable
<
f
::
LoDTensor
>
();
auto
in_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"input"
);
auto
in_var
=
param_scopes_
[
output_scope_idx
]
->
FindVar
(
"input"
);
...
@@ -254,7 +276,7 @@ struct TestReduceOpHandle {
...
@@ -254,7 +276,7 @@ struct TestReduceOpHandle {
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
]
*
gpu_list_
.
size
(),
1e-5
);
ASSERT_NEAR
(
ct
[
j
],
send_vector
[
j
]
*
gpu_list_
.
size
(),
1e-5
);
}
}
}
}
};
};
// namespace details
TEST
(
ReduceTester
,
TestCPUReduceTestSelectedRows
)
{
TEST
(
ReduceTester
,
TestCPUReduceTestSelectedRows
)
{
TestReduceOpHandle
test_op
;
TestReduceOpHandle
test_op
;
...
...
paddle/fluid/framework/details/share_tensor_buffer_functor.cc
浏览文件 @
b9a1c7e2
...
@@ -111,13 +111,12 @@ void ShareTensorBufferFunctor::CallOnce() {
...
@@ -111,13 +111,12 @@ void ShareTensorBufferFunctor::CallOnce() {
auto
*
out_var
=
exec_scope_
->
FindVar
(
out_var_names_
[
i
]);
auto
*
out_var
=
exec_scope_
->
FindVar
(
out_var_names_
[
i
]);
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
in_var
,
platform
::
errors
::
NotFound
(
"The
input variable(%s)to be inplaced should not be NULL
."
,
"The
variable(%s) to be inplaced is not found in scope
."
,
in_var_infos_
[
i
]
->
Name
()));
in_var_infos_
[
i
]
->
Name
()));
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
out_var
,
out_var
,
platform
::
errors
::
NotFound
(
platform
::
errors
::
NotFound
(
"The variable(%s) to be inplaced is not found in scope."
,
"The output variable(%s) to be inplaced should not be NULL."
,
out_var_names_
[
i
]));
out_var_names_
[
i
]));
PADDLE_ENFORCE_NE
(
PADDLE_ENFORCE_NE
(
in_var
,
out_var
,
in_var
,
out_var
,
platform
::
errors
::
PreconditionNotMet
(
platform
::
errors
::
PreconditionNotMet
(
...
...
paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
浏览文件 @
b9a1c7e2
...
@@ -12,8 +12,10 @@
...
@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h"
#include <algorithm>
#include <algorithm>
#include <utility>
#include <utility>
#include "dgc/dgc.h"
#include "dgc/dgc.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
...
@@ -38,18 +40,23 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle(
...
@@ -38,18 +40,23 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle(
is_encoded_
(
is_encoded
),
is_encoded_
(
is_encoded
),
nranks_
(
nranks
)
{
nranks_
(
nranks
)
{
// TODO(gongwb) :polish them!
// TODO(gongwb) :polish them!
PADDLE_ENFORCE_EQ
(
is_encoded
,
true
);
PADDLE_ENFORCE_EQ
(
is_encoded
,
true
,
platform
::
errors
::
InvalidArgument
(
"The argument is_encoded is false."
));
VLOG
(
1
)
<<
"Use dgc allreduce mode"
VLOG
(
1
)
<<
"Use dgc allreduce mode"
<<
", nranks:"
<<
nranks_
;
<<
", nranks:"
<<
nranks_
;
PADDLE_ENFORCE_GT
(
local_scopes_
.
size
(),
0
);
PADDLE_ENFORCE_GT
(
local_scopes_
.
size
(),
0
,
platform
::
errors
::
PreconditionNotMet
(
"The number of local scope should be > 0, but got %zu."
,
local_scopes_
.
size
()));
auto
nranks_name
=
g_dgc_nranks
;
auto
nranks_name
=
g_dgc_nranks
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
*
local_scope
=
local_scopes_
[
i
];
auto
*
local_scope
=
local_scopes_
[
i
];
auto
nranks_var
=
local_scope
->
FindVar
(
nranks_name
);
auto
nranks_var
=
local_scope
->
FindVar
(
nranks_name
);
if
(
nranks_var
==
nullptr
)
{
PADDLE_THROW
(
"not find nranks_var:%s"
,
nranks_name
);
PADDLE_ENFORCE_NOT_NULL
(
}
nranks_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
nranks_name
));
float
*
dgc_nranks
=
nranks_var
->
GetMutable
<
LoDTensor
>
()
->
data
<
float
>
();
float
*
dgc_nranks
=
nranks_var
->
GetMutable
<
LoDTensor
>
()
->
data
<
float
>
();
*
dgc_nranks
=
nranks
;
*
dgc_nranks
=
nranks
;
...
@@ -64,10 +71,18 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -64,10 +71,18 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
places_
.
size
(),
in_var_handles
.
size
(),
places_
.
size
(),
"The NoDummyInputSize should be equal to the number of places."
);
platform
::
errors
::
PreconditionNotMet
(
"The number of input variables should be equal to the number of "
"places, but got the number of input variables is %zu and the the "
"number of places is %zu."
,
in_var_handles
.
size
(),
places_
.
size
()));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
(),
out_var_handles
.
size
(),
in_var_handles
.
size
(),
out_var_handles
.
size
(),
"The NoDummyInputSize and NoDummyOutputSize should be equal."
);
platform
::
errors
::
PreconditionNotMet
(
"The number of input variables should be equal to the number of "
"output variables, but got the number of input variables is %zu and "
"the the number of output variables is %zu."
,
in_var_handles
.
size
(),
out_var_handles
.
size
()));
std
::
vector
<
const
LoDTensor
*>
ins
;
std
::
vector
<
const
LoDTensor
*>
ins
;
std
::
vector
<
LoDTensor
*>
gathers
;
std
::
vector
<
LoDTensor
*>
gathers
;
...
@@ -80,14 +95,17 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -80,14 +95,17 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
auto
encode_var_name
=
original_name
+
g_dgc_encoded
;
auto
encode_var_name
=
original_name
+
g_dgc_encoded
;
auto
*
in_var
=
local_scope
->
FindVar
(
encode_var_name
);
auto
*
in_var
=
local_scope
->
FindVar
(
encode_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
"%s should not be null"
,
encode_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
encode_var_name
));
auto
&
in
=
in_var
->
Get
<
LoDTensor
>
();
auto
&
in
=
in_var
->
Get
<
LoDTensor
>
();
ins
.
emplace_back
(
&
in
);
ins
.
emplace_back
(
&
in
);
auto
gather_var_name
=
original_name
+
g_dgc_gather
;
auto
gather_var_name
=
original_name
+
g_dgc_gather
;
auto
*
gather_var
=
local_scope
->
FindVar
(
gather_var_name
);
auto
*
gather_var
=
local_scope
->
FindVar
(
gather_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
gather_var
,
"%s should not be null"
,
PADDLE_ENFORCE_NOT_NULL
(
gather_var_name
);
gather_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
gather_var
));
auto
*
gather
=
gather_var
->
GetMutable
<
LoDTensor
>
();
auto
*
gather
=
gather_var
->
GetMutable
<
LoDTensor
>
();
gathers
.
emplace_back
(
gather
);
gathers
.
emplace_back
(
gather
);
...
@@ -100,14 +118,26 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -100,14 +118,26 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
}
}
}
}
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
ins
[
0
]
->
place
()));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
outs
[
0
]
->
place
()));
platform
::
is_gpu_place
(
ins
[
0
]
->
place
()),
true
,
PADDLE_ENFORCE
(
nccl_ctxs_
,
"nccl_ctxs should not be nullptr."
);
platform
::
errors
::
InvalidArgument
(
"The place of input variable should be CUDAPlace, but got %s."
,
ins
[
0
]
->
place
()));
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
outs
[
0
]
->
place
()),
true
,
platform
::
errors
::
InvalidArgument
(
"The place of input variable should be CUDAPlace, but got %s."
,
outs
[
0
]
->
place
()));
PADDLE_ENFORCE_NOT_NULL
(
nccl_ctxs_
,
platform
::
errors
::
PreconditionNotMet
(
"The nccl contexts are NULL."
));
int
dtype
=
-
1
;
int
dtype
=
-
1
;
size_t
in_numel
=
0
;
size_t
in_numel
=
0
;
size_t
out_numel
=
0
;
size_t
out_numel
=
0
;
PADDLE_ENFORCE
(
nranks_
>
1
);
PADDLE_ENFORCE_GT
(
nranks_
,
1
,
platform
::
errors
::
PreconditionNotMet
(
"The number of ranks should be > 1, but got %d."
,
nranks_
));
std
::
vector
<
std
::
function
<
void
()
>>
all_gather_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
all_gather_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
sparse_reduce_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
sparse_reduce_calls
;
...
@@ -123,8 +153,16 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -123,8 +153,16 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
dtype
=
(
dtype
==
-
1
)
?
platform
::
ToNCCLDataType
(
in
.
type
())
:
dtype
;
dtype
=
(
dtype
==
-
1
)
?
platform
::
ToNCCLDataType
(
in
.
type
())
:
dtype
;
in_numel
=
(
in_numel
==
0
)
?
static_cast
<
size_t
>
(
in
.
numel
())
:
in_numel
;
in_numel
=
(
in_numel
==
0
)
?
static_cast
<
size_t
>
(
in
.
numel
())
:
in_numel
;
PADDLE_ENFORCE
(
in_numel
%
2
==
0
);
PADDLE_ENFORCE_EQ
(
in_numel
%
2
,
0
,
PADDLE_ENFORCE
(
in_numel
/
2
==
static_cast
<
size_t
>
(
k
));
platform
::
errors
::
InvalidArgument
(
"The number of elements of input variable should be "
"even, but got %zu."
,
in_numel
));
PADDLE_ENFORCE_EQ
(
in_numel
/
2
,
static_cast
<
size_t
>
(
k
),
platform
::
errors
::
InvalidArgument
(
"The number of elements of input variable should be "
"even, but got %zu."
,
in_numel
));
out_numel
=
(
out_numel
==
0
)
?
static_cast
<
size_t
>
(
out
.
numel
())
:
out_numel
;
out_numel
=
(
out_numel
==
0
)
?
static_cast
<
size_t
>
(
out
.
numel
())
:
out_numel
;
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
int
dev_id
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
place
).
device
;
...
@@ -154,7 +192,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -154,7 +192,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
PADDLE_ENFORCE_EQ
(
paddle
::
communication
::
dgc
::
sparseReduce
(
PADDLE_ENFORCE_EQ
(
paddle
::
communication
::
dgc
::
sparseReduce
(
gather_buff
,
k
,
out_tensor_buf
,
gather_buff
,
k
,
out_tensor_buf
,
static_cast
<
int
>
(
out_numel
),
nranks_
,
stream
),
static_cast
<
int
>
(
out_numel
),
nranks_
,
stream
),
true
);
true
,
platform
::
errors
::
Unavailable
(
"Calling sparseReduce() failed."
));
});
});
}
}
...
@@ -187,11 +226,16 @@ void SparseAllReduceOpHandle::SparseAllReduceFunc(
...
@@ -187,11 +226,16 @@ void SparseAllReduceOpHandle::SparseAllReduceFunc(
int
SparseAllReduceOpHandle
::
GetKValue
(
const
std
::
string
&
grad_name
)
{
int
SparseAllReduceOpHandle
::
GetKValue
(
const
std
::
string
&
grad_name
)
{
auto
original_name
=
paddle
::
framework
::
GradOriginalVarName
(
grad_name
);
auto
original_name
=
paddle
::
framework
::
GradOriginalVarName
(
grad_name
);
auto
var_name
=
original_name
+
g_dgc_k
;
auto
var_name
=
original_name
+
g_dgc_k
;
PADDLE_ENFORCE
(
local_scopes_
.
size
()
>
0
);
PADDLE_ENFORCE_GT
(
local_scopes_
.
size
(),
0
,
platform
::
errors
::
PreconditionNotMet
(
"The number of local scope should be > 0, but got %zu."
,
local_scopes_
.
size
()));
auto
*
scope
=
local_exec_scopes_
[
0
];
auto
*
scope
=
local_exec_scopes_
[
0
];
auto
var
=
scope
->
FindVar
(
var_name
);
auto
var
=
scope
->
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
var_name
));
auto
tensor
=
var
->
Get
<
LoDTensor
>
().
data
<
float
>
();
auto
tensor
=
var
->
Get
<
LoDTensor
>
().
data
<
float
>
();
return
*
tensor
;
return
*
tensor
;
}
}
...
@@ -202,15 +246,22 @@ bool SparseAllReduceOpHandle::IsEncoded() {
...
@@ -202,15 +246,22 @@ bool SparseAllReduceOpHandle::IsEncoded() {
}
}
auto
counter_name
=
g_dgc_counter_name
;
auto
counter_name
=
g_dgc_counter_name
;
auto
step_name
=
g_dgc_rampup_begin_step
;
auto
step_name
=
g_dgc_rampup_begin_step
;
PADDLE_ENFORCE
(
local_scopes_
.
size
()
>
0
);
PADDLE_ENFORCE_GT
(
local_scopes_
.
size
(),
0
,
platform
::
errors
::
PreconditionNotMet
(
"The number of local scope should be > 0, but got %zu."
,
local_scopes_
.
size
()));
auto
*
local_scope
=
local_exec_scopes_
[
0
];
auto
*
local_scope
=
local_exec_scopes_
[
0
];
auto
count_var
=
local_scope
->
FindVar
(
counter_name
);
auto
count_var
=
local_scope
->
FindVar
(
counter_name
);
auto
step_var
=
local_scope
->
FindVar
(
step_name
);
auto
step_var
=
local_scope
->
FindVar
(
step_name
);
if
(
count_var
==
nullptr
||
step_var
==
nullptr
)
{
PADDLE_THROW
(
"not find count_var:%s or step_var:%s"
,
counter_name
,
PADDLE_ENFORCE_NOT_NULL
(
step_var
);
count_var
,
platform
::
errors
::
NotFound
(
}
"Variable %s is not found in scope."
,
counter_name
));
PADDLE_ENFORCE_NOT_NULL
(
step_var
,
platform
::
errors
::
NotFound
(
"Variable %s is not found in scope."
,
step_var
));
float
count
=
*
count_var
->
Get
<
LoDTensor
>
().
data
<
float
>
();
float
count
=
*
count_var
->
Get
<
LoDTensor
>
().
data
<
float
>
();
float
step
=
*
step_var
->
Get
<
LoDTensor
>
().
data
<
float
>
();
float
step
=
*
step_var
->
Get
<
LoDTensor
>
().
data
<
float
>
();
...
...
paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
浏览文件 @
b9a1c7e2
...
@@ -127,11 +127,10 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
...
@@ -127,11 +127,10 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
"Baidu Kunlun Card is properly installed."
,
"Baidu Kunlun Card is properly installed."
,
ret
));
ret
));
ret
=
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
p
),
size
);
ret
=
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
p
),
size
);
PADDLE_ENFORCE_EQ
(
ret
,
XPU_SUCCESS
,
PADDLE_ENFORCE_EQ
(
platform
::
errors
::
External
(
ret
,
XPU_SUCCESS
,
"XPU API return wrong value[%d], please check whether "
platform
::
errors
::
External
(
"Baidu Kunlun Card is properly installed."
,
"XPU API return wrong value[%d], no enough memory"
,
ret
));
ret
));
if
(
FLAGS_init_allocated_mem
)
{
if
(
FLAGS_init_allocated_mem
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"xpu memory FLAGS_init_allocated_mem is not implemented."
));
"xpu memory FLAGS_init_allocated_mem is not implemented."
));
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
b9a1c7e2
...
@@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
...
@@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
}
}
};
};
// log Grad: dx = dout / x
// log Grad Grad: ddout = ddx / x; dx = -(dout / x) * (ddx / x)
template
<
typename
T
>
class
LogDoubleGradMaker
:
public
::
paddle
::
framework
::
SingleGradOpMaker
<
T
>
{
public:
using
::
paddle
::
framework
::
SingleGradOpMaker
<
T
>::
SingleGradOpMaker
;
protected:
void
Apply
(
GradOpPtr
<
T
>
op
)
const
override
{
op
->
SetType
(
"log_grad_grad"
);
op
->
SetInput
(
"X"
,
this
->
Input
(
"X"
));
// X@GRAD@GRAD: ddx
op
->
SetInput
(
"DDX"
,
this
->
OutputGrad
(
framework
::
GradVarName
(
"X"
)));
op
->
SetInput
(
"DOut"
,
this
->
Input
(
framework
::
GradVarName
(
"Out"
)));
op
->
SetAttrMap
(
this
->
Attrs
());
// X@GRAD: dx
op
->
SetOutput
(
"DX"
,
this
->
InputGrad
(
"X"
));
// Out@GRAD@GRAD: ddy
op
->
SetOutput
(
"DDOut"
,
this
->
InputGrad
(
framework
::
GradVarName
(
"Out"
)));
}
};
DECLARE_INPLACE_OP_INFERER
(
ActivationGradOpInplaceInferer
,
DECLARE_INPLACE_OP_INFERER
(
ActivationGradOpInplaceInferer
,
{
framework
::
GradVarName
(
"Out"
),
{
framework
::
GradVarName
(
"Out"
),
framework
::
GradVarName
(
"X"
)});
framework
::
GradVarName
(
"X"
)});
...
@@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL(
...
@@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL(
ops
::
AbsGradGradFunctor
<
int64_t
>>
);
ops
::
AbsGradGradFunctor
<
int64_t
>>
);
/* ========================================================================== */
/* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_OPERATOR
(
log
,
ops
::
ActivationOp
,
ops
::
LogOpMaker
,
ops
::
ActivationOpInferVarType
,
ops
::
ActivationGradOpMaker
<
ops
::
LogGradFunctor
<
float
>::
FwdDeps
(),
paddle
::
framework
::
OpDesc
>
,
ops
::
ActivationGradOpMaker
<
ops
::
LogGradFunctor
<
float
>::
FwdDeps
(),
paddle
::
imperative
::
OpBase
>
,
ops
::
ActFwdInplaceInferer
);
REGISTER_OPERATOR
(
log_grad
,
ops
::
ActivationOpGrad
,
ops
::
ActivationGradOpInplaceInferer
,
ops
::
LogDoubleGradMaker
<
paddle
::
framework
::
OpDesc
>
,
ops
::
LogDoubleGradMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OPERATOR
(
log_grad_grad
,
ops
::
ActivationOpDoubleGrad
<
ops
::
LogGradGradFunctor
<
float
>::
FwdDeps
()
>
,
ops
::
ActivationDoubleGradOpInplaceInferer
);
REGISTER_ACTIVATION_CPU_KERNEL
(
log
,
Log
,
LogFunctor
,
LogGradFunctor
);
REGISTER_OP_CPU_KERNEL
(
log_grad_grad
,
ops
::
LogDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
LogGradGradFunctor
<
float
>>
,
ops
::
LogDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
LogGradGradFunctor
<
double
>>
,
ops
::
LogDoubleGradKernel
<
plat
::
CPUDeviceContext
,
ops
::
LogGradGradFunctor
<
plat
::
float16
>>
);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION
(
leaky_relu
)
REGISTER_OP_VERSION
(
leaky_relu
)
.
AddCheckpoint
(
.
AddCheckpoint
(
...
...
paddle/fluid/operators/activation_op.cu
浏览文件 @
b9a1c7e2
...
@@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL(
...
@@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL(
ops
::
ActivationDoubleGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
ActivationDoubleGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
ops
::
AbsGradGradFunctor
<
int64_t
>>
);
ops
::
AbsGradGradFunctor
<
int64_t
>>
);
/* ========================================================================== */
/* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_ACTIVATION_CUDA_KERNEL
(
log
,
Log
,
LogFunctor
,
LogGradFunctor
);
REGISTER_OP_CUDA_KERNEL
(
log_grad_grad
,
ops
::
LogDoubleGradKernel
<
plat
::
CUDADeviceContext
,
ops
::
LogGradGradFunctor
<
float
>>
,
ops
::
LogDoubleGradKernel
<
plat
::
CUDADeviceContext
,
ops
::
LogGradGradFunctor
<
double
>>
,
ops
::
LogDoubleGradKernel
<
plat
::
CUDADeviceContext
,
ops
::
LogGradGradFunctor
<
plat
::
float16
>>
);
/* ========================================================================== */
paddle/fluid/operators/activation_op.h
浏览文件 @
b9a1c7e2
...
@@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel
...
@@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel
}
}
};
};
template
<
typename
DeviceContext
,
typename
Functor
>
class
LogDoubleGradKernel
:
public
SquareDoubleGradKernel
<
DeviceContext
,
Functor
>
{};
template
<
typename
DeviceContext
,
typename
Functor
>
template
<
typename
DeviceContext
,
typename
Functor
>
class
ELUDoubleGradKernel
class
ELUDoubleGradKernel
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
...
@@ -1852,6 +1856,37 @@ class PowGradKernel
...
@@ -1852,6 +1856,37 @@ class PowGradKernel
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
functor
(
*
place
,
x
,
out
,
dout
,
dx
);
}
}
};
};
template
<
typename
T
>
struct
LogGradGradFunctor
:
public
BaseActivationFunctor
<
T
>
{
template
<
typename
Device
>
void
operator
()(
const
Device
&
dev
,
const
framework
::
Tensor
*
X
,
const
framework
::
Tensor
*
ddX
,
framework
::
Tensor
*
ddOut
,
const
framework
::
Tensor
*
dOut
,
framework
::
Tensor
*
dX
)
const
{
auto
*
d
=
dev
.
eigen_device
();
auto
ddx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddX
,
"Input"
,
"DDX"
,
"LogGradGrad"
));
auto
x
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
X
,
"Input"
,
"X"
,
"LogGradGrad"
));
// ddout = ddx / x; dx = -(dout / x) * (ddx / x)
// calculate dx first, so ddout can inplace ddx
if
(
dX
)
{
auto
dout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dOut
,
"Output"
,
"DOut"
,
"LogGradGrad"
));
auto
dx
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
dX
,
"Output"
,
"DX"
,
"LogGradGrad"
));
dx
.
device
(
*
d
)
=
dout
*
static_cast
<
T
>
(
-
1
)
*
ddx
/
(
x
*
x
);
}
if
(
ddOut
)
{
auto
ddout
=
framework
::
EigenVector
<
T
>::
Flatten
(
GET_DATA_SAFELY
(
ddOut
,
"Output"
,
"DDOut"
,
"LogGradGrad"
));
ddout
.
device
(
*
d
)
=
ddx
*
static_cast
<
T
>
(
1
)
/
x
;
}
}
static
constexpr
ActBwdOpFwdDeps
FwdDeps
()
{
return
kDepX
;
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
@@ -1872,7 +1907,6 @@ class PowGradKernel
...
@@ -1872,7 +1907,6 @@ class PowGradKernel
__macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \
__macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log, Log, LogFunctor, LogGradFunctor); \
__macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \
__macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \
__macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \
__macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
...
...
paddle/fluid/operators/activation_op_xpu.cc
0 → 100644
浏览文件 @
b9a1c7e2
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/activation_op.h"
#include <string>
#include "paddle/fluid/platform/xpu_header.h"
namespace
paddle
{
namespace
operators
{
using
paddle
::
framework
::
Tensor
;
template
<
typename
Functor
>
class
XPUActivationKernel
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
Functor
functor
;
auto
attrs
=
functor
.
GetAttrs
();
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
}
functor
(
context
);
}
};
template
<
typename
Functor
>
class
XPUActivationGradKernel
:
public
framework
::
OpKernel
<
typename
Functor
::
ELEMENT_TYPE
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
Functor
functor
;
auto
attrs
=
functor
.
GetAttrs
();
for
(
auto
&
attr
:
attrs
)
{
*
attr
.
second
=
context
.
Attr
<
float
>
(
attr
.
first
);
}
functor
(
context
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
void
xpu_activation_forward
(
const
framework
::
ExecutionContext
&
ctx
,
xpu
::
Activation_t
type
)
{
const
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
const
T
*
x_data
=
x
->
data
<
T
>
();
T
*
y_data
=
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
r
=
0
;
if
(
xpu
::
Activation_t
::
ACT_POW
==
type
.
type
)
{
type
.
pow_factor
=
ctx
.
Attr
<
float
>
(
"factor"
);
}
auto
xpu_context
=
ctx
.
device_context
<
DeviceContext
>
().
x_context
();
r
=
xpu
::
activation_forward
(
xpu_context
,
type
,
x
->
numel
(),
reinterpret_cast
<
const
float
*>
(
x_data
),
reinterpret_cast
<
float
*>
(
y_data
));
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
}
template
<
typename
DeviceContext
,
typename
T
>
void
xpu_activation_backward
(
const
framework
::
ExecutionContext
&
ctx
,
xpu
::
Activation_t
type
)
{
/* TODO: relu tanh sigmoid are inplace */
const
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dOut
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dX
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
const
T
*
x_data
=
nullptr
;
const
T
*
y_data
=
nullptr
;
const
T
*
y_grad
=
nullptr
;
if
(
x
!=
nullptr
)
x_data
=
x
->
data
<
T
>
();
if
(
y
!=
nullptr
)
y_data
=
y
->
data
<
T
>
();
if
(
dOut
!=
nullptr
)
y_grad
=
dOut
->
data
<
T
>
();
T
*
x_grad
=
dX
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
xpu_context
=
ctx
.
device_context
<
DeviceContext
>
().
x_context
();
int
r
=
xpu
::
activation_backward
(
xpu_context
,
type
,
dX
->
numel
(),
reinterpret_cast
<
const
float
*>
(
x_data
),
reinterpret_cast
<
const
float
*>
(
y_data
),
reinterpret_cast
<
const
float
*>
(
y_grad
),
reinterpret_cast
<
float
*>
(
x_grad
));
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
}
template
<
typename
T
,
xpu
::
Activation_t
::
act_enum
algorithm
>
struct
XPUActivationFunc
:
public
BaseActivationFunctor
<
T
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
)
const
{
xpu_activation_forward
<
paddle
::
platform
::
XPUDeviceContext
,
T
>
(
ctx
,
algorithm
);
}
};
template
<
typename
T
,
xpu
::
Activation_t
::
act_enum
algorithm
>
struct
XPUActivationGradFunc
:
public
BaseActivationFunctor
<
T
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
)
const
{
xpu_activation_backward
<
paddle
::
platform
::
XPUDeviceContext
,
T
>
(
ctx
,
algorithm
);
}
};
template
<
typename
T
>
using
XPUReluFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
RELU
>
;
template
<
typename
T
>
using
XPUSigmoidFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
SIGMOID
>
;
template
<
typename
T
>
using
XPUTanhFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
TANH
>
;
template
<
typename
T
>
using
XPUGeluFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
GELU
>
;
template
<
typename
T
>
using
XPULogFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
LOG
>
;
template
<
typename
T
>
using
XPUSquareFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
SQUARE
>
;
template
<
typename
T
>
using
XPUSuareGradFunctor
=
XPUActivationGradFunc
<
T
,
xpu
::
Activation_t
::
SQUARE
>
;
template
<
typename
T
>
using
XPUReluGradFunctor
=
XPUActivationGradFunc
<
T
,
xpu
::
Activation_t
::
RELU
>
;
template
<
typename
T
>
using
XPUSigmoidGradFunctor
=
XPUActivationGradFunc
<
T
,
xpu
::
Activation_t
::
SIGMOID
>
;
template
<
typename
T
>
using
XPUTanhGradFunctor
=
XPUActivationGradFunc
<
T
,
xpu
::
Activation_t
::
TANH
>
;
template
<
typename
T
>
using
XPUGeluGradFunctor
=
XPUActivationGradFunc
<
T
,
xpu
::
Activation_t
::
GELU
>
;
template
<
typename
T
>
using
XPUSqrtFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
SQRT
>
;
template
<
typename
T
>
using
XPUSqrtGradFunctor
=
XPUActivationGradFunc
<
T
,
xpu
::
Activation_t
::
SQRT
>
;
template
<
typename
T
>
using
XPUACTPowFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
ACT_POW
>
;
template
<
typename
T
>
using
XPUABSFunctor
=
XPUActivationFunc
<
T
,
xpu
::
Activation_t
::
ABS
>
;
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_XPU_KERNEL(act_type, \
ops::XPUActivationKernel<ops::functor<float>>); \
REGISTER_OP_XPU_KERNEL( \
act_type##_grad, \
ops::XPUActivationGradKernel<ops::grad_functor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL
(
relu
,
XPUReluFunctor
,
XPUReluGradFunctor
)
REGISTER_ACTIVATION_XPU_KERNEL
(
tanh
,
XPUTanhFunctor
,
XPUTanhGradFunctor
)
REGISTER_ACTIVATION_XPU_KERNEL
(
sigmoid
,
XPUSigmoidFunctor
,
XPUSigmoidGradFunctor
)
REGISTER_ACTIVATION_XPU_KERNEL
(
gelu
,
XPUGeluFunctor
,
XPUGeluGradFunctor
)
REGISTER_ACTIVATION_XPU_KERNEL
(
sqrt
,
XPUSqrtFunctor
,
XPUSqrtGradFunctor
)
REGISTER_ACTIVATION_XPU_KERNEL
(
square
,
XPUSquareFunctor
,
XPUSuareGradFunctor
)
REGISTER_OP_XPU_KERNEL
(
log
,
ops
::
XPUActivationKernel
<
ops
::
XPULogFunctor
<
float
>>
);
REGISTER_OP_XPU_KERNEL
(
pow
,
ops
::
XPUActivationKernel
<
ops
::
XPUACTPowFunctor
<
float
>>
);
REGISTER_OP_XPU_KERNEL
(
abs
,
ops
::
XPUActivationKernel
<
ops
::
XPUABSFunctor
<
float
>>
);
#endif // PADDLE_WITH_XPU
paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc
0 → 100644
浏览文件 @
b9a1c7e2
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
ElementwiseAddXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
XPUElementwise
<
T
,
XPUAddFunctor
<
T
>>
(
ctx
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
ElementwiseAddGradXPUKernel
:
public
ElemwiseGradKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
ElemwiseGradKernel
<
T
>::
Compute
(
ctx
);
using
Tensor
=
framework
::
Tensor
;
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
dx_dims
=
dout
->
dims
();
auto
dy_dims_untrimed
=
dout
->
dims
();
T
*
dx_data
=
NULL
;
T
*
dy_data
=
NULL
;
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
PADDLE_ENFORCE_GE
(
dx_dims
.
size
(),
dy_dims_untrimed
.
size
(),
"Rank of first input must >= rank of second input."
);
if
(
dx
!=
nullptr
)
{
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
dx_dims
=
dx
->
dims
();
dx_data
=
dx
->
data
<
T
>
();
}
if
(
dy
!=
nullptr
)
{
dy
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
dy_dims_untrimed
=
dy
->
dims
();
dy_data
=
dy
->
data
<
T
>
();
}
int
pre
,
n
,
post
,
is_common_broadcast
;
if
(
dx_dims
==
dy_dims_untrimed
)
{
pre
=
post
=
1
;
n
=
dout
->
numel
();
}
else
{
axis
=
(
axis
==
-
1
?
dx_dims
.
size
()
-
dy_dims_untrimed
.
size
()
:
axis
);
PADDLE_ENFORCE
(
axis
>=
0
&&
axis
<
dx_dims
.
size
(),
"Axis should be in range [0, dx_dims)"
);
auto
dy_dims
=
trim_trailing_singular_dims
(
dy_dims_untrimed
);
axis
=
(
dy_dims
.
size
()
==
0
)
?
dx_dims
.
size
()
:
axis
;
get_mid_dims
(
dx_dims
,
dy_dims
,
axis
,
&
pre
,
&
n
,
&
post
,
&
is_common_broadcast
);
}
int
len
=
pre
*
n
*
post
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
if
(
post
==
1
)
{
int
r
=
xpu
::
matrix_vector_add_grad
(
dev_ctx
.
x_context
(),
dout
->
data
<
T
>
(),
dout
->
data
<
T
>
(),
dout
->
data
<
T
>
(),
dout
->
data
<
T
>
(),
dx_data
,
dy_data
,
pre
,
n
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
return
;
}
if
(
dx
==
nullptr
)
{
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
dx_data
),
len
*
sizeof
(
float
)),
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU has no enough memory"
));
}
if
(
dy
==
nullptr
)
{
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
dy_data
),
len
*
sizeof
(
float
)),
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU has no enough memory"
));
}
else
{
if
(
len
!=
n
)
{
PADDLE_ENFORCE_EQ
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
dy_data
),
len
*
sizeof
(
float
)),
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU has no enough memory"
));
}
}
int
r
=
xpu
::
elementwise_add_grad
(
dev_ctx
.
x_context
(),
dout
->
data
<
T
>
()
/*x*/
,
dout
->
data
<
T
>
()
/*y*/
,
dout
->
data
<
T
>
()
/*out*/
,
dout
->
data
<
T
>
(),
dx_data
,
dy_data
,
len
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
if
((
dy
!=
nullptr
)
&&
(
len
!=
n
))
{
r
=
xpu
::
reduce_ew
(
dev_ctx
.
x_context
(),
dy_data
,
dy
->
data
<
T
>
(),
pre
,
n
,
post
,
xpu
::
ElementwiseOp
::
ASSIGN
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
dev_ctx
.
Wait
();
xpu_free
(
dy_data
);
}
if
((
dx
==
nullptr
||
dy
==
nullptr
)
&&
!
(
dy
!=
nullptr
&&
len
!=
n
))
{
dev_ctx
.
Wait
();
}
if
(
dx
==
nullptr
)
{
xpu_free
(
dx_data
);
}
if
(
dy
==
nullptr
)
{
xpu_free
(
dy_data
);
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_XPU_KERNEL
(
elementwise_add
,
ops
::
ElementwiseAddXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
REGISTER_OP_XPU_KERNEL
(
elementwise_add_grad
,
ops
::
ElementwiseAddGradXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
#endif
paddle/fluid/operators/elementwise/elementwise_xpu.h
0 → 100644
浏览文件 @
b9a1c7e2
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
struct
XPUAddFunctor
{
int
operator
()(
xpu
::
Context
*
ctx
,
const
T
*
x
,
const
T
*
y
,
T
*
z
,
int
len
)
{
return
xpu
::
elementwise_add
(
ctx
,
x
,
y
,
z
,
len
);
}
};
template
<
typename
T
>
struct
XPUMulFunctor
{
int
operator
()(
xpu
::
Context
*
ctx
,
const
T
*
x
,
const
T
*
y
,
T
*
z
,
int
len
)
{
return
xpu
::
elementwise_mul
(
ctx
,
x
,
y
,
z
,
len
);
}
};
template
<
typename
T
,
typename
Functor
>
void
XPUElementwise
(
const
framework
::
ExecutionContext
&
ctx
)
{
PADDLE_ENFORCE
(
platform
::
is_xpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on XPU device."
);
auto
x_var
=
ctx
.
InputVar
(
"X"
);
PADDLE_ENFORCE_NE
(
x_var
,
nullptr
,
platform
::
errors
::
Fatal
(
"Cannot get input Variable X"
));
PADDLE_ENFORCE
(
x_var
->
IsType
<
framework
::
LoDTensor
>
(),
"XPU only support LoDTensor"
);
auto
x
=
x_var
->
Get
<
framework
::
LoDTensor
>
();
auto
*
y
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Y"
);
auto
*
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
auto
x_dims
=
x
.
dims
();
auto
y_dims_untrimed
=
y
->
dims
();
PADDLE_ENFORCE_GE
(
x_dims
.
size
(),
y_dims_untrimed
.
size
(),
"Rank of first input must >= rank of second input."
);
axis
=
(
axis
==
-
1
?
x_dims
.
size
()
-
y_dims_untrimed
.
size
()
:
axis
);
PADDLE_ENFORCE
(
axis
>=
0
&&
axis
<
x_dims
.
size
(),
"Axis should be in range [0, x_dims)"
);
auto
y_dims
=
trim_trailing_singular_dims
(
y_dims_untrimed
);
axis
=
(
y_dims
.
size
()
==
0
)
?
x_dims
.
size
()
:
axis
;
int
pre
,
n
,
post
,
is_common_broadcast
;
get_mid_dims
(
x_dims
,
y_dims
,
axis
,
&
pre
,
&
n
,
&
post
,
&
is_common_broadcast
);
int
len
=
pre
*
n
*
post
;
const
T
*
x_data
=
x
.
data
<
T
>
();
const
T
*
y_data
=
y
->
data
<
T
>
();
T
*
z_data
=
z
->
data
<
T
>
();
T
*
y_broadcast
=
nullptr
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
XPUDeviceContext
>();
if
(
post
==
1
)
{
if
(
std
::
is_same
<
Functor
,
XPUAddFunctor
<
T
>>::
value
)
{
int
res
=
xpu
::
matrix_vector_add
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
z_data
,
pre
,
n
);
PADDLE_ENFORCE
(
res
==
xpu
::
Error_t
::
SUCCESS
,
"XPU kernel error! res = %d"
,
res
);
return
;
}
if
(
std
::
is_same
<
Functor
,
XPUMulFunctor
<
T
>>::
value
)
{
int
res
=
xpu
::
matrix_vector_mul
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
z_data
,
pre
,
n
);
PADDLE_ENFORCE
(
res
==
xpu
::
Error_t
::
SUCCESS
,
"XPU kernel error! res = %d"
,
res
);
return
;
}
}
if
(
pre
!=
1
||
post
!=
1
)
{
PADDLE_ENFORCE
(
xpu_malloc
(
reinterpret_cast
<
void
**>
(
&
y_broadcast
),
len
*
sizeof
(
T
))
==
XPU_SUCCESS
);
int
res
=
xpu
::
broadcast_ew
(
dev_ctx
.
x_context
(),
y_data
,
y_broadcast
,
pre
,
n
,
post
,
xpu
::
ElementwiseOp
::
ASSIGN
);
PADDLE_ENFORCE
(
res
==
xpu
::
Error_t
::
SUCCESS
,
"XPU kernel error! res = %d"
,
res
);
y_data
=
y_broadcast
;
}
Functor
functor
;
int
res
=
functor
(
dev_ctx
.
x_context
(),
x_data
,
y_data
,
z_data
,
len
);
PADDLE_ENFORCE
(
res
==
xpu
::
Error_t
::
SUCCESS
,
"XPU kernel error! res = %d"
,
res
);
if
(
pre
!=
1
||
post
!=
1
)
{
dev_ctx
.
Wait
();
xpu_free
(
y_broadcast
);
}
}
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/matmul_op_xpu.cc
0 → 100644
浏览文件 @
b9a1c7e2
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <algorithm>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
operators
{
static
framework
::
DDim
RowMatrixFromVector
(
const
framework
::
DDim
&
x_dim
)
{
if
(
x_dim
.
size
()
>
1
)
{
return
x_dim
;
}
return
framework
::
make_ddim
({
1
,
x_dim
[
0
]});
}
static
framework
::
Tensor
FoldInitDims
(
const
framework
::
Tensor
&
input
)
{
auto
output
=
input
;
auto
in_dims
=
input
.
dims
();
if
(
in_dims
.
size
()
==
3
)
{
output
.
Resize
({
in_dims
[
0
]
*
in_dims
[
1
],
in_dims
[
2
]});
}
return
output
;
}
/**
* Get column matrix shape from a vector shape. If the ran of y_dim > 1, the
* original y_dim is returned.
*/
static
framework
::
DDim
ColumnMatrixFromVector
(
const
framework
::
DDim
&
y_dim
)
{
if
(
y_dim
.
size
()
>
1
)
{
return
y_dim
;
}
return
framework
::
make_ddim
({
y_dim
[
0
],
1
});
}
static
void
ReshapeTensorIntoMatrixSequence
(
framework
::
Tensor
*
x
,
const
math
::
MatDescriptor
&
descriptor
)
{
int64_t
h
,
w
;
h
=
descriptor
.
height_
;
w
=
descriptor
.
width_
;
if
(
descriptor
.
trans_
)
{
std
::
swap
(
w
,
h
);
}
if
(
descriptor
.
batch_size_
)
{
x
->
Resize
({
descriptor
.
batch_size_
,
h
,
w
});
}
else
{
x
->
Resize
({
h
,
w
});
}
}
/**
* Reshape the x,y,out tensor to 3-D or 2-D tensor by matrix descriptor
* Out = matmul(x, y)
*
* This method will first calculate X,Y matrix sequence, and then calculate
* the out shape.
*
* Assume X = [BatchSize, H1, W1], Y = [BatchSize, H2, W2]
* The out = [BatchSize, H1, W2]
*
* If there is no batch size in `X` and `Y`, the out will be [H1, W2]
* If any of `X` and `Y` has batch size BatchSize, the out will have the
* BatchSize.
*/
static
void
ReshapeXYOutIntoMatrixSequence
(
framework
::
Tensor
*
x
,
framework
::
Tensor
*
y
,
framework
::
Tensor
*
out
,
bool
trans_x
,
bool
trans_y
)
{
auto
x_dim
=
RowMatrixFromVector
(
x
->
dims
());
auto
y_dim
=
ColumnMatrixFromVector
(
y
->
dims
());
auto
mat_dim_x
=
math
::
CreateMatrixDescriptor
(
x_dim
,
0
,
trans_x
);
auto
mat_dim_y
=
math
::
CreateMatrixDescriptor
(
y_dim
,
0
,
trans_y
);
if
(
mat_dim_x
.
batch_size_
==
0
&&
mat_dim_y
.
batch_size_
==
0
)
{
out
->
Resize
({
mat_dim_x
.
height_
,
mat_dim_y
.
width_
});
}
else
{
out
->
Resize
({
std
::
max
(
mat_dim_x
.
batch_size_
,
mat_dim_y
.
batch_size_
),
mat_dim_x
.
height_
,
mat_dim_y
.
width_
});
}
ReshapeTensorIntoMatrixSequence
(
x
,
mat_dim_x
);
ReshapeTensorIntoMatrixSequence
(
y
,
mat_dim_y
);
}
template
<
typename
DeviceContext
,
typename
T
>
class
MatMulXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
x
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
y
=
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
mat_dim_a
=
math
::
CreateMatrixDescriptor
(
RowMatrixFromVector
(
x
->
dims
()),
0
,
context
.
Attr
<
bool
>
(
"transpose_X"
));
auto
mat_dim_b
=
math
::
CreateMatrixDescriptor
(
ColumnMatrixFromVector
(
y
->
dims
()),
0
,
context
.
Attr
<
bool
>
(
"transpose_Y"
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op"
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_op"
));
T
alpha
=
static_cast
<
T
>
(
context
.
Attr
<
float
>
(
"alpha"
));
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
float
*
data_c
=
out
->
data
<
T
>
();
if
(
mat_dim_a
.
batch_size_
==
0
||
mat_dim_a
.
batch_size_
==
1
)
{
int
r
=
xpu
::
fc_int16
(
dev_ctx
.
x_context
(),
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
mat_dim_a
.
height_
,
mat_dim_b
.
width_
,
mat_dim_a
.
width_
,
alpha
,
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
0.0
f
,
data_c
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
}
else
{
// batch matmul
int
r
=
xpu
::
batched_gemm_int16
(
dev_ctx
.
x_context
(),
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
mat_dim_a
.
batch_size_
,
mat_dim_a
.
height_
,
mat_dim_b
.
width_
,
mat_dim_a
.
width_
,
alpha
,
x
->
data
<
T
>
(),
y
->
data
<
T
>
(),
data_c
,
nullptr
,
nullptr
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
}
}
};
// Reshape a rank-3 tensor from P x M x N to M x (P * N).
// (Warning: This requires transposing data and writes into new memory.)
// Identity op if the tensor is not of rank 3.
template
<
typename
DeviceContext
,
typename
T
>
static
framework
::
Tensor
XPUFoldHeadAndLastDims
(
const
DeviceContext
&
context
,
const
framework
::
Tensor
&
input
)
{
auto
in_dims
=
input
.
dims
();
if
(
in_dims
.
size
()
!=
3
)
{
return
input
;
}
framework
::
Tensor
output
;
output
.
Resize
({
in_dims
[
1
],
in_dims
[
0
],
in_dims
[
2
]});
output
.
mutable_data
<
T
>
(
context
.
GetPlace
());
std
::
vector
<
int
>
in_shape_host
=
{
static_cast
<
int
>
(
in_dims
[
0
]),
static_cast
<
int
>
(
in_dims
[
1
]),
static_cast
<
int
>
(
in_dims
[
2
])};
std
::
vector
<
int
>
axis_host
=
{
1
,
0
,
2
};
int
r
=
xpu
::
transpose
(
context
.
x_context
(),
input
.
data
<
T
>
(),
output
.
data
<
T
>
(),
in_shape_host
.
data
(),
axis_host
.
data
(),
/*ndims=*/
3
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
output
.
Resize
({
in_dims
[
1
],
in_dims
[
0
]
*
in_dims
[
2
]});
return
output
;
}
// Using dimensional constraints on matrix multiplication, it is
// straight-forward to check the following table for when X and Y
// are both matrices.
//
// transpose_X | False | True | False | True
// transpose_Y | False | False | True | True
// -----------+----------+----------+----------+-----------
// dX = | dOut Y^T | Y dOut^T | dOut Y | Y^T dOut^T
// dY = | X^T dOut | X dOut | dOut^T X | dOut^T X^T
//
// When X is a vector of size K, we treat it instead as a matrix of shape
// (1, K). Similarly, when Y is a vector of size K, we treat it instead as
// a matrix of shape (K, 1).
//
// When X and Y are both 3-dimensional tensors, then the first dimension
// the batch dimension can be ignored and the exact same formulas apply
// as for two matrices.
//
// Finally, when, e.g., X is a 3-dimensional tensor but Y is a matrix, we end
// up with formulas like
//
// dY_{ij} = \sum_{p, m} X_{pmi} dOut_{pmj}
//
// To handle this sort of scenario, we reshape X : P x M x K, dOut: P x M x N
// to X: (P * M) x K, dOut: (P * M) x N.
template
<
typename
DeviceContext
,
typename
T
>
class
MatMulGradXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
MatMul
(
const
framework
::
ExecutionContext
&
context
,
const
framework
::
Tensor
&
a
,
bool
trans_a
,
const
framework
::
Tensor
&
b
,
bool
trans_b
,
framework
::
Tensor
*
out
)
const
{
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
mat_dim_a
=
math
::
CreateMatrixDescriptor
(
a
.
dims
(),
0
,
trans_a
);
auto
mat_dim_b
=
math
::
CreateMatrixDescriptor
(
b
.
dims
(),
0
,
trans_b
);
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
width_
,
mat_dim_b
.
height_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_grad_op"
));
PADDLE_ENFORCE_EQ
(
mat_dim_a
.
batch_size_
,
mat_dim_b
.
batch_size_
,
platform
::
errors
::
InvalidArgument
(
"Shape mistake in matmul_grad_op"
));
T
alpha
=
static_cast
<
T
>
(
context
.
Attr
<
float
>
(
"alpha"
));
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
float
*
data_c
=
out
->
data
<
T
>
();
if
(
mat_dim_a
.
batch_size_
==
0
||
mat_dim_a
.
batch_size_
==
1
)
{
int
r
=
xpu
::
fc_int16
(
dev_ctx
.
x_context
(),
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
mat_dim_a
.
height_
,
mat_dim_b
.
width_
,
mat_dim_a
.
width_
,
alpha
,
a
.
data
<
T
>
(),
b
.
data
<
T
>
(),
0.0
f
,
data_c
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
}
else
{
// batch matmul
int
r
=
xpu
::
batched_gemm_int16
(
dev_ctx
.
x_context
(),
mat_dim_a
.
trans_
,
mat_dim_b
.
trans_
,
mat_dim_a
.
batch_size_
,
mat_dim_a
.
height_
,
mat_dim_b
.
width_
,
mat_dim_a
.
width_
,
alpha
,
a
.
data
<
T
>
(),
b
.
data
<
T
>
(),
data_c
,
nullptr
,
nullptr
);
PADDLE_ENFORCE_EQ
(
r
,
XPU_SUCCESS
,
platform
::
errors
::
External
(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed."
,
r
));
}
}
void
CalcInputGrad
(
const
framework
::
ExecutionContext
&
context
,
const
framework
::
Tensor
&
a
,
bool
trans_a
,
bool
is_fold_init_dims_a
,
const
framework
::
Tensor
&
b
,
bool
trans_b
,
bool
is_fold_init_dims_b
,
framework
::
Tensor
*
out
)
const
{
if
(
out
==
nullptr
)
return
;
bool
need_combine
=
(
a
.
dims
().
size
()
==
3
||
b
.
dims
().
size
()
==
3
)
&&
out
->
dims
().
size
()
==
2
;
if
(
!
need_combine
)
{
MatMul
(
context
,
a
,
trans_a
,
b
,
trans_b
,
out
);
}
else
{
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
MatMul
(
context
,
is_fold_init_dims_a
?
FoldInitDims
(
a
)
:
XPUFoldHeadAndLastDims
<
DeviceContext
,
T
>
(
dev_ctx
,
a
),
trans_a
,
is_fold_init_dims_b
?
FoldInitDims
(
b
)
:
XPUFoldHeadAndLastDims
<
DeviceContext
,
T
>
(
dev_ctx
,
b
),
trans_b
,
out
);
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
x
=
*
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
y
=
*
context
.
Input
<
framework
::
Tensor
>
(
"Y"
);
auto
dout
=
*
context
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
context
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
bool
transpose_x
=
context
.
Attr
<
bool
>
(
"transpose_X"
);
bool
transpose_y
=
context
.
Attr
<
bool
>
(
"transpose_Y"
);
ReshapeXYOutIntoMatrixSequence
(
&
x
,
&
y
,
&
dout
,
transpose_x
,
transpose_y
);
framework
::
DDim
dx_dims
;
if
(
dx
)
{
dx_dims
=
dx
->
dims
();
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
x
.
dims
());
}
}
framework
::
DDim
dy_dims
;
if
(
dy
)
{
dy_dims
=
dy
->
dims
();
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
y
.
dims
());
}
}
if
(
transpose_x
&&
transpose_y
)
{
CalcInputGrad
(
context
,
y
,
true
,
true
,
dout
,
true
,
false
,
dx
);
CalcInputGrad
(
context
,
dout
,
true
,
true
,
x
,
true
,
false
,
dy
);
}
else
if
(
transpose_x
)
{
CalcInputGrad
(
context
,
y
,
false
,
false
,
dout
,
true
,
false
,
dx
);
CalcInputGrad
(
context
,
x
,
false
,
false
,
dout
,
false
,
true
,
dy
);
}
else
if
(
transpose_y
)
{
CalcInputGrad
(
context
,
dout
,
false
,
false
,
y
,
false
,
true
,
dx
);
CalcInputGrad
(
context
,
dout
,
true
,
true
,
x
,
false
,
true
,
dy
);
}
else
{
CalcInputGrad
(
context
,
dout
,
false
,
false
,
y
,
true
,
false
,
dx
);
CalcInputGrad
(
context
,
x
,
true
,
true
,
dout
,
false
,
true
,
dy
);
}
if
(
dx
)
{
if
(
dx_dims
!=
x
.
dims
())
{
dx
->
Resize
(
dx_dims
);
}
}
if
(
dy
)
{
if
(
dy_dims
!=
y
.
dims
())
{
dy
->
Resize
(
dy_dims
);
}
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_XPU_KERNEL
(
matmul
,
ops
::
MatMulXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
REGISTER_OP_XPU_KERNEL
(
matmul_grad
,
ops
::
MatMulGradXPUKernel
<
paddle
::
platform
::
XPUDeviceContext
,
float
>
);
#endif
paddle/fluid/operators/
xpu/mul_xpu_op
.cc
→
paddle/fluid/operators/
mul_op_xpu
.cc
浏览文件 @
b9a1c7e2
...
@@ -14,11 +14,11 @@ limitations under the License. */
...
@@ -14,11 +14,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/mul_op.h"
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <vector>
#include <vector>
#include "paddle/fluid/operators/mul_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/operators/optimizers/dpsgd_op.cc
浏览文件 @
b9a1c7e2
...
@@ -24,32 +24,45 @@ class DpsgdOp : public framework::OperatorWithKernel {
...
@@ -24,32 +24,45 @@ class DpsgdOp : public framework::OperatorWithKernel {
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Param"
),
true
,
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Param"
),
true
,
"Input(Param) of DpsgdOp should not be null."
);
platform
::
errors
::
NotFound
(
"Input(Param) of DpsgdOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Grad"
),
true
,
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Grad"
),
true
,
"Input(Grad) of DpsgdOp should not be null."
);
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"LearningRate"
),
true
,
"Input(Grad) of DpsgdOp should not be null."
));
"Input(LearningRate) of DpsgdOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"LearningRate"
),
true
,
platform
::
errors
::
NotFound
(
"Input(LearningRate) of DpsgdOp should not be null."
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
platform
::
errors
::
InvalidArgument
(
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
platform
::
errors
::
InvalidArgument
(
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
());
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
"Output(ParamOut) of DpsgdOp should not be null."
);
platform
::
errors
::
NotFound
(
"Output(ParamOut) of DpsgdOp should not be null."
));
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
"Learning rate should have 1 dimension"
);
platform
::
errors
::
InvalidArgument
(
"Learning rate should have 1 dimension. But Received "
"LearningRate's dims [%s]."
,
framework
::
product
(
lr_dims
)));
auto
param_dims
=
ctx
->
GetInputDim
(
"Param"
);
auto
param_dims
=
ctx
->
GetInputDim
(
"Param"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
param_dims
,
ctx
->
GetInputDim
(
"Grad"
),
param_dims
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and Grad input of DpsgdOp should have same dimension"
);
platform
::
errors
::
InvalidArgument
(
"Param and Grad input of DpsgdOp should have same dimension. But "
"received Para's dim [%s] and Grad's dim [%s]."
,
param_dims
,
ctx
->
GetInputDim
(
"Grad"
)));
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dims
);
}
}
...
...
paddle/fluid/operators/optimizers/dpsgd_op.h
浏览文件 @
b9a1c7e2
...
@@ -28,17 +28,19 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
...
@@ -28,17 +28,19 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
"The Var(%s)'s type should be LoDTensor, "
platform
::
errors
::
InvalidArgument
(
"but the received is %s"
,
"The Var(%s)'s type should be LoDTensor, "
ctx
.
InputNames
(
"Param"
).
front
(),
"but the received is %s"
,
framework
::
ToTypeName
(
param_var
->
Type
()));
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
"The Var(%s)'s type should be LoDTensor, "
platform
::
errors
::
InvalidArgument
(
"but the received is %s"
,
"The Var(%s)'s type should be LoDTensor, "
ctx
.
InputNames
(
"Grad"
).
front
(),
"but the received is %s"
,
framework
::
ToTypeName
(
grad_var
->
Type
()));
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
())));
const
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
const
auto
*
learning_rate
=
ctx
.
Input
<
framework
::
Tensor
>
(
"LearningRate"
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
b9a1c7e2
...
@@ -40,43 +40,62 @@ class MomentumOp : public framework::OperatorWithKernel {
...
@@ -40,43 +40,62 @@ class MomentumOp : public framework::OperatorWithKernel {
protected:
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Param"
),
true
,
"Input(param) of Momentum should not be null."
);
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
"Input(param) of Momentum should not be null."
));
"Input(grad) of Momentum should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Grad"
),
true
,
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Velocity"
),
platform
::
errors
::
NotFound
(
"Input(velocity) of Momentum should not be null."
);
"Input(grad) of Momentum should not be null."
));
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Velocity"
),
true
,
"Input(LearningRate) of Momentum should not be null."
);
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE
(
"Input(velocity) of Momentum should not be null."
));
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
PADDLE_ENFORCE_EQ
(
framework
::
proto
::
VarType
::
LOD_TENSOR
,
ctx
->
HasInput
(
"LearningRate"
),
true
,
"The input var's type should be LoDTensor, but the received is %s"
,
platform
::
errors
::
NotFound
(
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
"Input(LearningRate) of Momentum should not be null."
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
"Output(ParamOut) of Momentum should not be null."
);
framework
::
proto
::
VarType
::
LOD_TENSOR
,
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"VelocityOut"
),
platform
::
errors
::
InvalidArgument
(
"Output(VelocityOut) of Momentum should not be null."
);
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(ParamOut) of Momentum should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"VelocityOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(VelocityOut) of Momentum should not be null."
));
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_NE
(
framework
::
product
(
lr_dims
),
0
,
PADDLE_ENFORCE_NE
(
framework
::
product
(
lr_dims
),
0
,
"Maybe the Input variable LearningRate has not "
platform
::
errors
::
InvalidArgument
(
"been initialized. You may need to confirm "
"Maybe the Input variable LearningRate has not "
"if you put exe.run(startup_program) "
"been initialized. You may need to confirm "
"after optimizer.minimize function."
);
"if you put exe.run(startup_program) "
"after optimizer.minimize function."
));
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
"Learning_rate should be a scalar"
);
platform
::
errors
::
InvalidArgument
(
"Learning_rate should be a scalar. But Received "
"LearningRate's dim [%s]"
,
framework
::
product
(
lr_dims
)));
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Grad"
),
param_dim
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and Grad input of MomentumOp should have the same dimension."
);
platform
::
errors
::
InvalidArgument
(
"Param and Grad input of MomentumOp should have the same "
"dimension. But received Param's dim [%s] and Grad's dim [%s]."
,
param_dim
,
ctx
->
GetInputDim
(
"Grad"
)));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Velocity"
),
param_dim
,
ctx
->
GetInputDim
(
"Velocity"
),
"Param and Velocity of MomentumOp should have the same dimension."
);
platform
::
errors
::
InvalidArgument
(
"Param and Velocity of MomentumOp should have the same "
"dimension. But received Param's dim [%s] and Velocity [%s]."
,
param_dim
,
ctx
->
GetInputDim
(
"Velocity"
)));
}
}
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
...
@@ -398,10 +417,12 @@ class MomentumOpKernel : public framework::OpKernel<T> {
...
@@ -398,10 +417,12 @@ class MomentumOpKernel : public framework::OpKernel<T> {
for_range
(
functor
);
for_range
(
functor
);
}
}
}
else
{
}
else
{
PADDLE_THROW
(
PADDLE_ENFORCE_EQ
(
false
,
true
,
string
::
Sprintf
(
"MomentumOp only supports LoDTensor or SelectedRows "
platform
::
errors
::
PermissionDenied
(
"gradient, but the received Variable Type is %s"
,
"Unsupported Variable Type of Grad "
framework
::
ToTypeName
(
grad_var
->
Type
())));
"in MomentumOp. Excepted LodTensor "
"or SelectedRows, But received [%s]"
,
paddle
::
framework
::
ToTypeName
(
grad_var
->
Type
())));
}
}
}
}
};
};
...
...
paddle/fluid/operators/optimizers/rmsprop_op.cc
浏览文件 @
b9a1c7e2
...
@@ -22,47 +22,75 @@ class RmspropOp : public framework::OperatorWithKernel {
...
@@ -22,47 +22,75 @@ class RmspropOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Param"
),
true
,
"Input(Param) of RmspropOp should not be null."
);
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"MeanSquare"
),
"Input(Param) of RmspropOp should not be null."
));
"Input(MeanSquare) of RmspropOp should not be null."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
ctx
->
HasInput
(
"MeanSquare"
),
true
,
"Input(LearningRate) of RmspropOp should not be null."
);
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
"Input(MeanSquare) of RmspropOp should not be null."
));
"Input(Grad) of RmspropOp should not be null."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Moment"
),
ctx
->
HasInput
(
"LearningRate"
),
true
,
"Input(Moment) of RmspropOp should not be null."
);
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE
(
"Input(LearningRate) of RmspropOp should not be null."
));
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Grad"
),
true
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
NotFound
(
"The input var's type should be LoDTensor, but the received is %s"
,
"Input(Grad) of RmspropOp should not be null."
));
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Moment"
),
true
,
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Input(Moment) of RmspropOp should not be null."
));
"Output(param_out) of RmspropOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MomentOut"
),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"Output(MomentOut) of RmspropOp should not be null."
);
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MeanSquareOut"
),
"The input var's type in RmspropOp should be "
"Output(MeanSquareOut) of RmspropOp should not be null."
);
"LoDTensor, but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(param_out) of RmspropOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"MomentOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(MomentOut) of RmspropOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"MeanSquareOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(MeanSquareOut) of RmspropOp should not be null."
));
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"centered"
))
{
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"centered"
))
{
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"MeanGradOut"
),
PADDLE_ENFORCE_EQ
(
"Output(MeanGradOut) of RmspropOp should not be null."
);
ctx
->
HasOutput
(
"MeanGradOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(MeanGradOut) of RmspropOp should not be null."
));
}
}
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Grad"
),
param_dim
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and grad input of RmspropOp should have the same dimension."
);
platform
::
errors
::
InvalidArgument
(
"Param and grad input of RmspropOp should have the same dimension. "
"But received Param's dim [%s] and Grad's dim [%s]."
,
param_dim
,
ctx
->
GetInputDim
(
"Grad"
)));
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Moment"
),
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Moment"
),
"Param and Momentum input of RmspropOp "
platform
::
errors
::
InvalidArgument
(
"should have the same dimension."
);
"Param and Momentum input of RmspropOp "
"should have the same dimension. But received "
"Param's dim [%s] and Moment [%s]"
,
param_dim
,
ctx
->
GetInputDim
(
"Moment"
)));
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"MeanSquare"
),
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"MeanSquare"
),
"Param and Momentum input of RmspropOp "
platform
::
errors
::
InvalidArgument
(
"should have the same dimension."
);
"Param and Momentum input of RmspropOp "
"should have the same dimension. But received "
"Param's dim [%s] and MeanSquare [%s]"
,
param_dim
,
ctx
->
GetInputDim
(
"MeanSquare"
)));
auto
lr_dim
=
ctx
->
GetInputDim
(
"LearningRate"
);
auto
lr_dim
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dim
),
1
,
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dim
),
1
,
"Learning Rate should be a scalar."
);
platform
::
errors
::
InvalidArgument
(
"Learning Rate of RmspropOp should be a scalar. But "
"received LearningRate's dim [%s]"
,
framework
::
product
(
lr_dim
)));
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"MomentOut"
,
param_dim
);
...
...
paddle/fluid/operators/optimizers/rmsprop_op.h
浏览文件 @
b9a1c7e2
...
@@ -148,11 +148,15 @@ class RmspropOpKernel : public framework::OpKernel<T> {
...
@@ -148,11 +148,15 @@ class RmspropOpKernel : public framework::OpKernel<T> {
auto
&
mom_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"Moment"
);
auto
&
mom_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"Moment"
);
PADDLE_ENFORCE_EQ
(
&
p_tensor
,
param_out
,
PADDLE_ENFORCE_EQ
(
&
p_tensor
,
param_out
,
"Param and ParamOut must be the same Tensor"
);
platform
::
errors
::
InvalidArgument
(
"Param and ParamOut must be the same Tensor"
));
PADDLE_ENFORCE_EQ
(
&
mom_tensor
,
moment_out
,
PADDLE_ENFORCE_EQ
(
&
mom_tensor
,
moment_out
,
"Moment and MomentOut must be the same Tensor"
);
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE_EQ
(
&
ms_tensor
,
mean_square_out
,
"Moment and MomentOut must be the same Tensor"
));
"MeanSquare and MeanSquareOut must be the same Tensor"
);
PADDLE_ENFORCE_EQ
(
&
ms_tensor
,
mean_square_out
,
platform
::
errors
::
InvalidArgument
(
"MeanSquare and MeanSquareOut must be the same Tensor"
));
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
size_t
limit
=
static_cast
<
size_t
>
(
ms_tensor
.
numel
());
size_t
limit
=
static_cast
<
size_t
>
(
ms_tensor
.
numel
());
...
@@ -179,8 +183,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
...
@@ -179,8 +183,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
mg
=
EigenVector
<
T
>::
Flatten
(
mg_tensor
);
auto
mg
=
EigenVector
<
T
>::
Flatten
(
mg_tensor
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
PADDLE_ENFORCE_EQ
(
&
mg_tensor
,
mean_grad_out
,
PADDLE_ENFORCE_EQ
(
"MeanGrad and MeanGradOut must be the same Tensor"
);
&
mg_tensor
,
mean_grad_out
,
platform
::
errors
::
InvalidArgument
(
"MeanGrad and MeanGradOut must be the same Tensor"
));
auto
mg_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_grad_out
);
auto
mg_out
=
EigenVector
<
T
>::
Flatten
(
*
mean_grad_out
);
mg_out
.
device
(
place
)
=
rho
*
mg
+
(
1
-
rho
)
*
g
;
mg_out
.
device
(
place
)
=
rho
*
mg
+
(
1
-
rho
)
*
g
;
...
@@ -198,8 +204,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
...
@@ -198,8 +204,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
if
(
centered
)
{
if
(
centered
)
{
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
PADDLE_ENFORCE_EQ
(
&
mg_tensor
,
mean_grad_out
,
PADDLE_ENFORCE_EQ
(
"MeanGrad and MeanGradOut must be the same Tensor"
);
&
mg_tensor
,
mean_grad_out
,
platform
::
errors
::
InvalidArgument
(
"MeanGrad and MeanGradOut must be the same Tensor"
));
for_range
(
CenteredRmspropFunctor
<
T
,
DenseRmspropGradFunctor
<
T
>>
(
for_range
(
CenteredRmspropFunctor
<
T
,
DenseRmspropGradFunctor
<
T
>>
(
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
...
@@ -233,8 +241,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
...
@@ -233,8 +241,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
if
(
centered
)
{
if
(
centered
)
{
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
&
mg_tensor
=
*
ctx
.
Input
<
LoDTensor
>
(
"MeanGrad"
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
auto
*
mean_grad_out
=
ctx
.
Output
<
LoDTensor
>
(
"MeanGradOut"
);
PADDLE_ENFORCE_EQ
(
&
mg_tensor
,
mean_grad_out
,
PADDLE_ENFORCE_EQ
(
"MeanGrad and MeanGradOut must be the same Tensor"
);
&
mg_tensor
,
mean_grad_out
,
platform
::
errors
::
InvalidArgument
(
"MeanGrad and MeanGradOut must be the same Tensor"
));
for_range
(
CenteredRmspropFunctor
<
T
,
SparseRmspropGradFunctor
<
T
>>
(
for_range
(
CenteredRmspropFunctor
<
T
,
SparseRmspropGradFunctor
<
T
>>
(
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
mean_square_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
...
@@ -249,7 +259,12 @@ class RmspropOpKernel : public framework::OpKernel<T> {
...
@@ -249,7 +259,12 @@ class RmspropOpKernel : public framework::OpKernel<T> {
rho
,
epsilon
,
momentum
,
grad_func
));
rho
,
epsilon
,
momentum
,
grad_func
));
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"RMSProp only supports LoDTensor or SelectedRows gradient"
);
PADDLE_ENFORCE_EQ
(
false
,
true
,
platform
::
errors
::
PermissionDenied
(
"Unsupported Variable Type of Grad "
"in RmspropOp. Excepted LodTensor "
"or SelectedRows, But received [%s]"
,
paddle
::
framework
::
ToTypeName
(
grad_var
->
Type
())));
}
}
}
}
};
};
...
...
paddle/fluid/operators/optimizers/sgd_op.cc
浏览文件 @
b9a1c7e2
...
@@ -22,23 +22,31 @@ class SGDOp : public framework::OperatorWithKernel {
...
@@ -22,23 +22,31 @@ class SGDOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"Param"
),
true
,
"Input(Param) of SGDOp should not be null."
);
platform
::
errors
::
NotFound
(
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
"Input(Param) of SGDOp should not be null."
));
"Input(Grad) of SGDOp should not be null."
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
ctx
->
HasInput
(
"Grad"
),
true
,
"Input(LearningRate) of SGDOp should not be null."
);
platform
::
errors
::
NotFound
(
"Input(Grad) of SGDOp should not be null."
));
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"LearningRate"
),
true
,
"Output(ParamOut) of SGDOp should not be null."
);
platform
::
errors
::
NotFound
(
"Input(LearningRate) of SGDOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
platform
::
errors
::
NotFound
(
"Output(ParamOut) of SGDOp should not be null."
));
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
auto
lr_dims
=
ctx
->
GetInputDim
(
"LearningRate"
);
PADDLE_ENFORCE_NE
(
framework
::
product
(
lr_dims
),
0
,
PADDLE_ENFORCE_NE
(
framework
::
product
(
lr_dims
),
0
,
"Maybe the Input variable LearningRate has not "
platform
::
errors
::
NotFound
(
"been initialized. You may need to confirm "
"Maybe the Input variable LearningRate has not "
"if you put exe.run(startup_program) "
"been initialized. You may need to confirm "
"after optimizer.minimize function."
);
"if you put exe.run(startup_program) "
"after optimizer.minimize function."
));
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
PADDLE_ENFORCE_EQ
(
framework
::
product
(
lr_dims
),
1
,
"Learning rate should have 1 element"
);
platform
::
errors
::
InvalidArgument
(
"Learning rate should have 1 element. But received "
"LearningRate dims [%s]"
,
framework
::
product
(
lr_dims
)));
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
b9a1c7e2
...
@@ -57,11 +57,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
...
@@ -57,11 +57,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
"The Var(%s)'s type should be LoDTensor, "
platform
::
errors
::
InvalidArgument
(
"but the received is %s"
,
"The Var(%s)'s type should be LoDTensor, "
ctx
.
InputNames
(
"Param"
).
front
(),
"but the received is %s"
,
framework
::
ToTypeName
(
param_var
->
Type
()));
ctx
.
InputNames
(
"Param"
).
front
(),
paddle
::
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
...
@@ -91,18 +92,30 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
...
@@ -91,18 +92,30 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency.
// This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution.
// It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ
(
param
,
param_out
);
PADDLE_ENFORCE_EQ
(
param
,
param_out
,
platform
::
errors
::
InvalidArgument
(
"The input tensor Param of SgdOp should be equal with ParamOut "
"if variable's type is SelectedRows."
));
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
auto
in_height
=
grad
->
height
();
auto
in_height
=
grad
->
height
();
auto
out_dims
=
param_out
->
dims
();
auto
out_dims
=
param_out
->
dims
();
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
],
platform
::
errors
::
InvalidArgument
(
"The input tensor Grad's height of SgdOp should be "
"equal with ParamOut's dims. But received Grad's "
"height [%s] and ParamOut's dims [%s]"
,
in_height
,
out_dims
[
0
]));
auto
&
in_value
=
grad
->
value
();
auto
&
in_value
=
grad
->
value
();
auto
&
in_rows
=
grad
->
rows
();
auto
&
in_rows
=
grad
->
rows
();
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
);
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
,
platform
::
errors
::
InvalidArgument
(
"The in_row_numel of SgdOp should be equal with "
"param_out's numel / in_height."
));
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
in_data
=
in_value
.
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
auto
*
out_data
=
param_out
->
data
<
T
>
();
...
@@ -118,7 +131,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
...
@@ -118,7 +131,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
out_data
,
in_row_numel
,
in_rows
.
size
());
out_data
,
in_row_numel
,
in_rows
.
size
());
}
else
{
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
PADDLE_ENFORCE_EQ
(
false
,
true
,
platform
::
errors
::
PermissionDenied
(
"Unsupported Variable Type of Grad "
"in SgdOp. Excepted LodTensor or "
"SelectedRows, But received [%s]"
,
paddle
::
framework
::
ToTypeName
(
grad_var
->
Type
())));
}
}
}
}
};
};
...
...
paddle/fluid/operators/optimizers/sgd_op.h
浏览文件 @
b9a1c7e2
...
@@ -44,8 +44,20 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -44,8 +44,20 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
())
{
const
auto
*
grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
);
const
auto
*
grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Grad"
);
auto
sz
=
param_out
->
numel
();
auto
sz
=
param_out
->
numel
();
PADDLE_ENFORCE_EQ
(
param
->
numel
(),
sz
);
PADDLE_ENFORCE_EQ
(
param
->
numel
(),
sz
,
PADDLE_ENFORCE_EQ
(
grad
->
numel
(),
sz
);
platform
::
errors
::
InvalidArgument
(
"The input tensor Param's numel of SgdOp "
"should be equal with ParamOut's numel. "
"But received Param's "
"numel = [%s], ParamOut's numel = [%s]"
,
param
->
numel
(),
sz
));
PADDLE_ENFORCE_EQ
(
grad
->
numel
(),
sz
,
platform
::
errors
::
InvalidArgument
(
"The input tensor Grad's numel of SgdOp "
"should be equal with ParamOut's numel. "
"But received Grad's "
"numel = [%s], ParamOut's numel = [%s]"
,
grad
->
numel
(),
sz
));
jit
::
sgd_attr_t
attr
(
1
,
sz
,
1
,
sz
,
1
);
jit
::
sgd_attr_t
attr
(
1
,
sz
,
1
,
sz
,
1
);
const
T
*
lr
=
learning_rate
->
data
<
T
>
();
const
T
*
lr
=
learning_rate
->
data
<
T
>
();
...
@@ -62,7 +74,11 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -62,7 +74,11 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency.
// This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution.
// It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ
(
param
,
param_out
);
PADDLE_ENFORCE_EQ
(
param
,
param_out
,
platform
::
errors
::
InvalidArgument
(
"The input tensor Param of SgdOp "
"should be equal with ParamOut if variable's "
"type is SelectedRows. "
));
const
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
const
auto
*
grad
=
ctx
.
Input
<
framework
::
SelectedRows
>
(
"Grad"
);
auto
&
grad_rows
=
grad
->
rows
();
auto
&
grad_rows
=
grad
->
rows
();
...
@@ -73,7 +89,13 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -73,7 +89,13 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
}
}
auto
out_dims
=
param_out
->
dims
();
auto
out_dims
=
param_out
->
dims
();
PADDLE_ENFORCE_EQ
(
grad
->
height
(),
out_dims
[
0
]);
PADDLE_ENFORCE_EQ
(
grad
->
height
(),
out_dims
[
0
],
platform
::
errors
::
InvalidArgument
(
"The input tensor Grad's height of SgdOp "
"should be equal with ParamOut's dims. But received Grad's "
"height [%s] and ParamOut's dims [%s]"
,
grad
->
height
(),
out_dims
[
0
]));
auto
&
grad_value
=
grad
->
value
();
auto
&
grad_value
=
grad
->
value
();
const
T
*
param_data
=
param
->
data
<
T
>
();
const
T
*
param_data
=
param
->
data
<
T
>
();
const
T
*
grad_data
=
grad_value
.
data
<
T
>
();
const
T
*
grad_data
=
grad_value
.
data
<
T
>
();
...
@@ -87,19 +109,31 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -87,19 +109,31 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
attr
.
grad_height
=
grad_rows
.
size
();
// note: it is not grad->height()
attr
.
grad_height
=
grad_rows
.
size
();
// note: it is not grad->height()
attr
.
grad_width
=
grad_value
.
numel
()
/
attr
.
grad_height
;
attr
.
grad_width
=
grad_value
.
numel
()
/
attr
.
grad_height
;
attr
.
selected_rows_size
=
grad_rows
.
size
();
attr
.
selected_rows_size
=
grad_rows
.
size
();
PADDLE_ENFORCE_EQ
(
attr
.
grad_width
,
attr
.
param_width
);
PADDLE_ENFORCE_EQ
(
attr
.
grad_width
,
attr
.
param_width
,
platform
::
errors
::
InvalidArgument
(
"The grad_value's numel of SgdOp "
"should be equal with param_out's numel. But received "
"grad_value's numel [%s] and param_out's numel [%s]"
,
attr
.
grad_width
,
attr
.
param_width
));
auto
sgd
=
auto
sgd
=
jit
::
KernelFuncs
<
jit
::
SgdTuple
<
T
>
,
platform
::
CPUPlace
>::
Cache
().
At
(
jit
::
KernelFuncs
<
jit
::
SgdTuple
<
T
>
,
platform
::
CPUPlace
>::
Cache
().
At
(
attr
);
attr
);
sgd
(
lr
,
param_data
,
grad_data
,
rows_data
,
out_data
,
&
attr
);
sgd
(
lr
,
param_data
,
grad_data
,
rows_data
,
out_data
,
&
attr
);
}
else
{
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Grad"
);
PADDLE_ENFORCE_EQ
(
false
,
true
,
platform
::
errors
::
PermissionDenied
(
"Unsupported Variable Type of Grad in SgdOp. Excepted "
"LodTensor or SelectedRows, But received [%s]"
,
paddle
::
framework
::
ToTypeName
(
grad_var
->
Type
())));
}
}
}
else
if
(
param_var
->
IsType
<
framework
::
SelectedRows
>
())
{
}
else
if
(
param_var
->
IsType
<
framework
::
SelectedRows
>
())
{
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
(),
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
framework
::
SelectedRows
>
(),
true
,
"when param "
platform
::
errors
::
InvalidArgument
(
"is SelectedRows, gradient should also be SelectedRows"
);
"when param is SelectedRows, "
"gradient should also be SelectedRows"
));
const
auto
&
param
=
param_var
->
Get
<
framework
::
SelectedRows
>
();
const
auto
&
param
=
param_var
->
Get
<
framework
::
SelectedRows
>
();
auto
*
param_out
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
SelectedRows
>
(
"ParamOut"
);
const
auto
&
grad
=
grad_var
->
Get
<
framework
::
SelectedRows
>
();
const
auto
&
grad
=
grad_var
->
Get
<
framework
::
SelectedRows
>
();
...
@@ -112,27 +146,36 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
...
@@ -112,27 +146,36 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
auto
param_row_width
=
param
.
value
().
dims
()[
1
];
auto
param_row_width
=
param
.
value
().
dims
()[
1
];
auto
grad_row_width
=
grad
.
value
().
dims
()[
1
];
auto
grad_row_width
=
grad
.
value
().
dims
()[
1
];
VLOG
(
4
)
<<
" param rows: "
<<
param
.
rows
().
size
()
PADDLE_ENFORCE_EQ
(
<<
" param memory rows: "
<<
param
.
value
().
dims
()[
0
]
param_row_width
,
grad_row_width
,
<<
" grad rows: "
<<
grad
.
rows
().
size
()
platform
::
errors
::
InvalidArgument
(
<<
" grad memory rows: "
<<
grad
.
value
().
dims
()[
0
];
"The param_row in SgdOP should have the same size with grad_row. "
PADDLE_ENFORCE_EQ
(
param_row_width
,
grad_row_width
,
"But received param_row's width is [%s], and grad_row's width is "
"param_row should have the same size with grad_row"
);
"[%s]"
,
param_row_width
,
grad_row_width
));
const
auto
*
lr
=
learning_rate
->
data
<
T
>
();
const
auto
*
lr
=
learning_rate
->
data
<
T
>
();
const
auto
*
grad_data
=
grad
.
value
().
data
<
T
>
();
const
auto
*
grad_data
=
grad
.
value
().
data
<
T
>
();
auto
*
out_data
=
param_out
->
mutable_value
()
->
data
<
T
>
();
auto
*
out_data
=
param_out
->
mutable_value
()
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
grad
.
rows
().
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
grad
.
rows
().
size
();
i
++
)
{
int64_t
id_index
=
param_out
->
AutoGrownIndex
(
grad
.
rows
()[
i
],
false
);
int64_t
id_index
=
param_out
->
AutoGrownIndex
(
grad
.
rows
()[
i
],
false
);
PADDLE_ENFORCE_GE
(
id_index
,
static_cast
<
int64_t
>
(
0
),
PADDLE_ENFORCE_GE
(
"id should be in the table"
);
id_index
,
static_cast
<
int64_t
>
(
0
),
platform
::
errors
::
InvalidArgument
(
"The id in SgdOp should be >= 0. But recevied id_index is [%s]"
,
id_index
));
for
(
int64_t
j
=
0
;
j
<
grad_row_width
;
j
++
)
{
for
(
int64_t
j
=
0
;
j
<
grad_row_width
;
j
++
)
{
out_data
[
id_index
*
grad_row_width
+
j
]
-=
out_data
[
id_index
*
grad_row_width
+
j
]
-=
lr
[
0
]
*
grad_data
[
i
*
grad_row_width
+
j
];
lr
[
0
]
*
grad_data
[
i
*
grad_row_width
+
j
];
}
}
}
}
}
else
{
}
else
{
PADDLE_THROW
(
"Unsupported Variable Type of Parameter"
);
PADDLE_ENFORCE_EQ
(
false
,
true
,
platform
::
errors
::
PermissionDenied
(
"Unsupported Variable Type of Parameter in SgdOp. Excepted "
"LodTensor or SelectedRows, But received [%s]"
,
paddle
::
framework
::
ToTypeName
(
param_var
->
Type
())));
}
}
}
}
};
};
...
...
paddle/fluid/platform/init_test.cc
浏览文件 @
b9a1c7e2
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/xpu_info.h"
TEST
(
InitDevices
,
CPU
)
{
TEST
(
InitDevices
,
CPU
)
{
using
paddle
::
framework
::
InitDevices
;
using
paddle
::
framework
::
InitDevices
;
...
...
paddle/fluid/platform/xpu_header.h
浏览文件 @
b9a1c7e2
...
@@ -15,9 +15,36 @@
...
@@ -15,9 +15,36 @@
#pragma once
#pragma once
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
#include <string>
#include <unordered_map>
#include "paddle/fluid/platform/errors.h"
#include "xpu/api.h"
#include "xpu/api.h"
#include "xpu/runtime.h"
#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#include "xpu/runtime_ex.h"
namespace
xpu
=
baidu
::
xpu
::
api
;
namespace
xpu
=
baidu
::
xpu
::
api
;
class
XPUActHelper
{
public:
// Convert string to activation type in xpu
static
xpu
::
Activation_t
ConvertToXpuActType
(
const
std
::
string
&
act_type_str
)
{
static
std
::
unordered_map
<
std
::
string
,
xpu
::
Activation_t
>
str2act
=
{
{
"linear"
,
xpu
::
Activation_t
::
LINEAR
},
{
"relu"
,
xpu
::
Activation_t
::
RELU
},
{
"sigmoid"
,
xpu
::
Activation_t
::
SIGMOID
},
{
"tanh"
,
xpu
::
Activation_t
::
TANH
},
{
"gelu"
,
xpu
::
Activation_t
::
GELU
},
{
"leaky_relu"
,
xpu
::
Activation_t
::
LEAKY_RELU
},
{
"sqrt"
,
xpu
::
Activation_t
::
SQRT
},
{
"square"
,
xpu
::
Activation_t
::
SQUARE
}};
auto
res
=
str2act
.
find
(
act_type_str
);
PADDLE_ENFORCE_NE
(
res
,
str2act
.
end
(),
paddle
::
platform
::
errors
::
InvalidArgument
(
"Invalid activation type(%s) in XPU"
,
act_type_str
));
return
res
->
second
;
}
};
#endif
#endif
python/paddle/__init__.py
浏览文件 @
b9a1c7e2
...
@@ -230,7 +230,6 @@ from .framework import CPUPlace #DEFINE_ALIAS
...
@@ -230,7 +230,6 @@ from .framework import CPUPlace #DEFINE_ALIAS
from
.framework
import
CUDAPlace
#DEFINE_ALIAS
from
.framework
import
CUDAPlace
#DEFINE_ALIAS
from
.framework
import
CUDAPinnedPlace
#DEFINE_ALIAS
from
.framework
import
CUDAPinnedPlace
#DEFINE_ALIAS
from
.framework
import
to_variable
#DEFINE_ALIAS
from
.framework
import
grad
#DEFINE_ALIAS
from
.framework
import
grad
#DEFINE_ALIAS
from
.framework
import
no_grad
#DEFINE_ALIAS
from
.framework
import
no_grad
#DEFINE_ALIAS
from
.framework
import
save
#DEFINE_ALIAS
from
.framework
import
save
#DEFINE_ALIAS
...
@@ -258,6 +257,8 @@ from .tensor.stat import numel #DEFINE_ALIAS
...
@@ -258,6 +257,8 @@ from .tensor.stat import numel #DEFINE_ALIAS
from
.device
import
get_cudnn_version
from
.device
import
get_cudnn_version
from
.device
import
set_device
from
.device
import
set_device
from
.device
import
get_device
from
.device
import
get_device
from
.device
import
is_compiled_with_xpu
from
.device
import
XPUPlace
# from .tensor.tensor import Tensor #DEFINE_ALIAS
# from .tensor.tensor import Tensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS
# from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS
...
...
python/paddle/device.py
浏览文件 @
b9a1c7e2
...
@@ -22,7 +22,9 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
...
@@ -22,7 +22,9 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
__all__
=
[
__all__
=
[
'get_cudnn_version'
,
'get_cudnn_version'
,
'set_device'
,
'set_device'
,
'get_device'
'get_device'
,
'XPUPlace'
,
'is_compiled_with_xpu'
# 'cpu_places',
# 'cpu_places',
# 'CPUPlace',
# 'CPUPlace',
# 'cuda_pinned_places',
# 'cuda_pinned_places',
...
@@ -35,6 +37,37 @@ __all__ = [
...
@@ -35,6 +37,37 @@ __all__ = [
_cudnn_version
=
None
_cudnn_version
=
None
def
is_compiled_with_xpu
():
"""
Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun
Returns (bool): whether paddle was built with WITH_XPU=ON
Examples:
.. code-block:: python
import paddle
support_xpu = paddle.device.is_compiled_with_xpu()
"""
return
core
.
is_compiled_with_xpu
()
def
XPUPlace
(
dev_id
):
"""
Return a Baidu Kunlun Place
Parameters:
dev_id(int): Baidu Kunlun device id
Examples:
.. code-block:: python
import paddle
place = paddle.device.XPUPlace(0)
"""
return
core
.
XPUPlace
(
dev_id
)
def
get_cudnn_version
():
def
get_cudnn_version
():
"""
"""
This funciton return the version of cudnn. the retuen value is int which represents the
This funciton return the version of cudnn. the retuen value is int which represents the
...
...
python/paddle/fluid/dygraph/nn.py
浏览文件 @
b9a1c7e2
...
@@ -3230,14 +3230,11 @@ class Flatten(layers.Layer):
...
@@ -3230,14 +3230,11 @@ class Flatten(layers.Layer):
.. code-block:: python
.. code-block:: python
import paddle
import paddle
from paddle import to_variable
import numpy as np
import numpy as np
paddle.disable_static()
inp_np = np.ones([5, 2, 3, 4]).astype('float32')
inp_np = np.ones([5, 2, 3, 4]).astype('float32')
inp_np = paddle.to_tensor(inp_np)
paddle.disable_static()
inp_np = to_variable(inp_np)
flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2)
flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2)
flatten_res = flatten(inp_np)
flatten_res = flatten(inp_np)
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
b9a1c7e2
...
@@ -217,6 +217,9 @@ class OpTest(unittest.TestCase):
...
@@ -217,6 +217,9 @@ class OpTest(unittest.TestCase):
return
False
return
False
return
True
return
True
def
is_xpu_op_test
():
return
hasattr
(
cls
,
"use_xpu"
)
and
cls
.
use_xpu
==
True
def
is_mkldnn_op_test
():
def
is_mkldnn_op_test
():
return
hasattr
(
cls
,
"use_mkldnn"
)
and
cls
.
use_mkldnn
==
True
return
hasattr
(
cls
,
"use_mkldnn"
)
and
cls
.
use_mkldnn
==
True
...
@@ -239,6 +242,7 @@ class OpTest(unittest.TestCase):
...
@@ -239,6 +242,7 @@ class OpTest(unittest.TestCase):
if
cls
.
dtype
in
[
np
.
float32
,
np
.
float64
]
\
if
cls
.
dtype
in
[
np
.
float32
,
np
.
float64
]
\
and
cls
.
op_type
not
in
op_accuracy_white_list
.
NO_FP64_CHECK_GRAD_OP_LIST
\
and
cls
.
op_type
not
in
op_accuracy_white_list
.
NO_FP64_CHECK_GRAD_OP_LIST
\
and
not
hasattr
(
cls
,
'exist_fp64_check_grad'
)
\
and
not
hasattr
(
cls
,
'exist_fp64_check_grad'
)
\
and
not
is_xpu_op_test
()
\
and
not
is_mkldnn_op_test
():
and
not
is_mkldnn_op_test
():
raise
AssertionError
(
raise
AssertionError
(
"This test of %s op needs check_grad with fp64 precision."
%
"This test of %s op needs check_grad with fp64 precision."
%
...
@@ -336,6 +340,11 @@ class OpTest(unittest.TestCase):
...
@@ -336,6 +340,11 @@ class OpTest(unittest.TestCase):
self
.
attrs
[
"use_mkldnn"
]
==
True
):
self
.
attrs
[
"use_mkldnn"
]
==
True
):
self
.
__class__
.
use_mkldnn
=
True
self
.
__class__
.
use_mkldnn
=
True
if
(
hasattr
(
self
,
"use_xpu"
)
and
self
.
use_xpu
==
True
)
or
\
(
hasattr
(
self
,
"attrs"
)
and
"use_xpu"
in
self
.
attrs
and
\
self
.
attrs
[
"use_xpu"
]
==
True
):
self
.
__class__
.
use_xpu
=
True
op_proto
=
OpProtoHolder
.
instance
().
get_op_proto
(
self
.
op_type
)
op_proto
=
OpProtoHolder
.
instance
().
get_op_proto
(
self
.
op_type
)
"infer datatype from inputs and outputs for this test case"
"infer datatype from inputs and outputs for this test case"
self
.
infer_dtype_from_inputs_outputs
(
self
.
inputs
,
self
.
outputs
)
self
.
infer_dtype_from_inputs_outputs
(
self
.
inputs
,
self
.
outputs
)
...
@@ -932,6 +941,8 @@ class OpTest(unittest.TestCase):
...
@@ -932,6 +941,8 @@ class OpTest(unittest.TestCase):
need_run_ops
=
self
.
_get_need_run_ops
(
op_desc
)
need_run_ops
=
self
.
_get_need_run_ops
(
op_desc
)
res
=
{}
res
=
{}
if
hasattr
(
self
,
'attrs'
)
and
bool
(
self
.
attrs
.
get
(
'use_xpu'
,
False
)):
return
for
op_desc
,
father_op_desc
in
reversed
(
need_run_ops
):
for
op_desc
,
father_op_desc
in
reversed
(
need_run_ops
):
# The first one is the forward op
# The first one is the forward op
has_infer_inplace
=
fluid
.
core
.
has_infer_inplace
(
op_desc
.
type
())
has_infer_inplace
=
fluid
.
core
.
has_infer_inplace
(
op_desc
.
type
())
...
@@ -1203,6 +1214,11 @@ class OpTest(unittest.TestCase):
...
@@ -1203,6 +1214,11 @@ class OpTest(unittest.TestCase):
self
.
attrs
[
"use_mkldnn"
]
==
True
):
self
.
attrs
[
"use_mkldnn"
]
==
True
):
self
.
__class__
.
use_mkldnn
=
True
self
.
__class__
.
use_mkldnn
=
True
if
(
hasattr
(
self
,
"use_xpu"
)
and
self
.
use_xpu
==
True
)
or
\
(
hasattr
(
self
,
"attrs"
)
and
"use_xpu"
in
self
.
attrs
and
\
self
.
attrs
[
"use_xpu"
]
==
True
):
self
.
__class__
.
use_xpu
=
True
places
=
self
.
_get_places
()
places
=
self
.
_get_places
()
for
place
in
places
:
for
place
in
places
:
res
=
self
.
check_output_with_place
(
place
,
atol
,
no_check_set
,
res
=
self
.
check_output_with_place
(
place
,
atol
,
no_check_set
,
...
...
python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
浏览文件 @
b9a1c7e2
...
@@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase):
...
@@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase):
self
.
func
(
p
)
self
.
func
(
p
)
class
TestLogDoubleGradCheck
(
unittest
.
TestCase
):
@
prog_scope
()
def
func
(
self
,
place
):
shape
=
[
2
,
3
,
7
,
9
]
eps
=
1e-6
dtype
=
np
.
float64
x
=
layers
.
data
(
'x'
,
shape
,
False
,
dtype
)
x
.
persistable
=
True
y
=
layers
.
log
(
x
)
x_arr
=
np
.
random
.
uniform
(
0.1
,
1
,
shape
).
astype
(
dtype
)
gradient_checker
.
double_grad_check
(
[
x
],
y
,
x_init
=
x_arr
,
place
=
place
,
eps
=
eps
)
def
test_grad
(
self
):
places
=
[
fluid
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
fluid
.
CUDAPlace
(
0
))
for
p
in
places
:
self
.
func
(
p
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_activation_op.py
浏览文件 @
b9a1c7e2
...
@@ -228,7 +228,7 @@ class TestTanhAPI(unittest.TestCase):
...
@@ -228,7 +228,7 @@ class TestTanhAPI(unittest.TestCase):
def
test_dygraph_api
(
self
):
def
test_dygraph_api
(
self
):
paddle
.
disable_static
(
self
.
place
)
paddle
.
disable_static
(
self
.
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out1
=
F
.
tanh
(
x
)
out1
=
F
.
tanh
(
x
)
out2
=
paddle
.
tanh
(
x
)
out2
=
paddle
.
tanh
(
x
)
th
=
paddle
.
nn
.
Tanh
()
th
=
paddle
.
nn
.
Tanh
()
...
@@ -573,7 +573,7 @@ class TestHardShrinkAPI(unittest.TestCase):
...
@@ -573,7 +573,7 @@ class TestHardShrinkAPI(unittest.TestCase):
def
test_dygraph_api
(
self
):
def
test_dygraph_api
(
self
):
paddle
.
disable_static
(
self
.
place
)
paddle
.
disable_static
(
self
.
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out1
=
F
.
hardshrink
(
x
)
out1
=
F
.
hardshrink
(
x
)
hd
=
paddle
.
nn
.
Hardshrink
()
hd
=
paddle
.
nn
.
Hardshrink
()
out2
=
hd
(
x
)
out2
=
hd
(
x
)
...
@@ -639,7 +639,7 @@ class TestHardtanhAPI(unittest.TestCase):
...
@@ -639,7 +639,7 @@ class TestHardtanhAPI(unittest.TestCase):
def
test_dygraph_api
(
self
):
def
test_dygraph_api
(
self
):
paddle
.
disable_static
(
self
.
place
)
paddle
.
disable_static
(
self
.
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out1
=
F
.
hardtanh
(
x
)
out1
=
F
.
hardtanh
(
x
)
m
=
paddle
.
nn
.
Hardtanh
()
m
=
paddle
.
nn
.
Hardtanh
()
out2
=
m
(
x
)
out2
=
m
(
x
)
...
@@ -1063,7 +1063,7 @@ class TestLeakyReluAPI(unittest.TestCase):
...
@@ -1063,7 +1063,7 @@ class TestLeakyReluAPI(unittest.TestCase):
def
test_dygraph_api
(
self
):
def
test_dygraph_api
(
self
):
paddle
.
disable_static
(
self
.
place
)
paddle
.
disable_static
(
self
.
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out1
=
F
.
leaky_relu
(
x
)
out1
=
F
.
leaky_relu
(
x
)
m
=
paddle
.
nn
.
LeakyReLU
()
m
=
paddle
.
nn
.
LeakyReLU
()
out2
=
m
(
x
)
out2
=
m
(
x
)
...
...
python/paddle/fluid/tests/unittests/test_adamax_api.py
浏览文件 @
b9a1c7e2
...
@@ -25,7 +25,7 @@ class TestAdamaxAPI(unittest.TestCase):
...
@@ -25,7 +25,7 @@ class TestAdamaxAPI(unittest.TestCase):
def
test_adamax_api_dygraph
(
self
):
def
test_adamax_api_dygraph
(
self
):
paddle
.
disable_static
()
paddle
.
disable_static
()
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
a
=
paddle
.
to_
variable
(
value
)
a
=
paddle
.
to_
tensor
(
value
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
adam
=
paddle
.
optimizer
.
Adamax
(
adam
=
paddle
.
optimizer
.
Adamax
(
learning_rate
=
0.01
,
learning_rate
=
0.01
,
...
...
python/paddle/fluid/tests/unittests/test_adamw_op.py
浏览文件 @
b9a1c7e2
...
@@ -22,7 +22,7 @@ class TestAdamWOp(unittest.TestCase):
...
@@ -22,7 +22,7 @@ class TestAdamWOp(unittest.TestCase):
def
test_adamw_op_dygraph
(
self
):
def
test_adamw_op_dygraph
(
self
):
paddle
.
disable_static
()
paddle
.
disable_static
()
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
a
=
paddle
.
to_
variable
(
value
)
a
=
paddle
.
to_
tensor
(
value
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
adam
=
paddle
.
optimizer
.
AdamW
(
adam
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
0.01
,
learning_rate
=
0.01
,
...
@@ -37,7 +37,7 @@ class TestAdamWOp(unittest.TestCase):
...
@@ -37,7 +37,7 @@ class TestAdamWOp(unittest.TestCase):
def
test_adamw_op_coverage
(
self
):
def
test_adamw_op_coverage
(
self
):
paddle
.
disable_static
()
paddle
.
disable_static
()
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
value
=
np
.
arange
(
26
).
reshape
(
2
,
13
).
astype
(
"float32"
)
a
=
paddle
.
to_
variable
(
value
)
a
=
paddle
.
to_
tensor
(
value
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
linear
=
paddle
.
nn
.
Linear
(
13
,
5
)
adam
=
paddle
.
optimizer
.
AdamW
(
adam
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
0.0
,
learning_rate
=
0.0
,
...
...
python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py
浏览文件 @
b9a1c7e2
...
@@ -147,7 +147,7 @@ class TestAdaptiveAvgPool2dAPI(unittest.TestCase):
...
@@ -147,7 +147,7 @@ class TestAdaptiveAvgPool2dAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out_1
=
paddle
.
nn
.
functional
.
adaptive_avg_pool2d
(
out_1
=
paddle
.
nn
.
functional
.
adaptive_avg_pool2d
(
x
=
x
,
output_size
=
[
3
,
3
])
x
=
x
,
output_size
=
[
3
,
3
])
...
@@ -245,7 +245,7 @@ class TestAdaptiveAvgPool2dClassAPI(unittest.TestCase):
...
@@ -245,7 +245,7 @@ class TestAdaptiveAvgPool2dClassAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
adaptive_avg_pool
=
paddle
.
nn
.
AdaptiveAvgPool2d
(
output_size
=
[
3
,
3
])
adaptive_avg_pool
=
paddle
.
nn
.
AdaptiveAvgPool2d
(
output_size
=
[
3
,
3
])
out_1
=
adaptive_avg_pool
(
x
=
x
)
out_1
=
adaptive_avg_pool
(
x
=
x
)
...
...
python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py
浏览文件 @
b9a1c7e2
...
@@ -162,7 +162,7 @@ class TestAdaptiveAvgPool3dAPI(unittest.TestCase):
...
@@ -162,7 +162,7 @@ class TestAdaptiveAvgPool3dAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out_1
=
paddle
.
nn
.
functional
.
adaptive_avg_pool3d
(
out_1
=
paddle
.
nn
.
functional
.
adaptive_avg_pool3d
(
x
=
x
,
output_size
=
[
3
,
3
,
3
])
x
=
x
,
output_size
=
[
3
,
3
,
3
])
...
@@ -262,7 +262,7 @@ class TestAdaptiveAvgPool3dClassAPI(unittest.TestCase):
...
@@ -262,7 +262,7 @@ class TestAdaptiveAvgPool3dClassAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
adaptive_avg_pool
=
paddle
.
nn
.
AdaptiveAvgPool3d
(
adaptive_avg_pool
=
paddle
.
nn
.
AdaptiveAvgPool3d
(
output_size
=
[
3
,
3
,
3
])
output_size
=
[
3
,
3
,
3
])
...
...
python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py
浏览文件 @
b9a1c7e2
...
@@ -147,7 +147,7 @@ class TestAdaptiveMaxPool2dAPI(unittest.TestCase):
...
@@ -147,7 +147,7 @@ class TestAdaptiveMaxPool2dAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out_1
=
paddle
.
nn
.
functional
.
adaptive_max_pool2d
(
out_1
=
paddle
.
nn
.
functional
.
adaptive_max_pool2d
(
x
=
x
,
return_indices
=
False
,
output_size
=
[
3
,
3
])
x
=
x
,
return_indices
=
False
,
output_size
=
[
3
,
3
])
...
@@ -240,7 +240,7 @@ class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase):
...
@@ -240,7 +240,7 @@ class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
adaptive_max_pool
=
paddle
.
nn
.
AdaptiveMaxPool2d
(
output_size
=
[
3
,
3
])
adaptive_max_pool
=
paddle
.
nn
.
AdaptiveMaxPool2d
(
output_size
=
[
3
,
3
])
out_1
=
adaptive_max_pool
(
x
=
x
)
out_1
=
adaptive_max_pool
(
x
=
x
)
...
...
python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py
浏览文件 @
b9a1c7e2
...
@@ -162,7 +162,7 @@ class TestAdaptiveMaxPool3dAPI(unittest.TestCase):
...
@@ -162,7 +162,7 @@ class TestAdaptiveMaxPool3dAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
out_1
=
paddle
.
nn
.
functional
.
adaptive_max_pool3d
(
out_1
=
paddle
.
nn
.
functional
.
adaptive_max_pool3d
(
x
=
x
,
output_size
=
[
3
,
3
,
3
])
x
=
x
,
output_size
=
[
3
,
3
,
3
])
...
@@ -257,7 +257,7 @@ class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase):
...
@@ -257,7 +257,7 @@ class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
if
core
.
is_compiled_with_cuda
()
else
[
False
]):
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
place
=
paddle
.
CUDAPlace
(
0
)
if
use_cuda
else
paddle
.
CPUPlace
()
paddle
.
disable_static
(
place
=
place
)
paddle
.
disable_static
(
place
=
place
)
x
=
paddle
.
to_
variable
(
self
.
x_np
)
x
=
paddle
.
to_
tensor
(
self
.
x_np
)
adaptive_max_pool
=
paddle
.
nn
.
AdaptiveMaxPool3d
(
adaptive_max_pool
=
paddle
.
nn
.
AdaptiveMaxPool3d
(
output_size
=
[
3
,
3
,
3
])
output_size
=
[
3
,
3
,
3
])
...
...
python/paddle/fluid/tests/unittests/test_addmm_op.py
浏览文件 @
b9a1c7e2
...
@@ -244,9 +244,9 @@ class TestAddMMAPI(unittest.TestCase):
...
@@ -244,9 +244,9 @@ class TestAddMMAPI(unittest.TestCase):
def test_error1():
def test_error1():
data_x_wrong = np.ones((2, 3)).astype(np.float32)
data_x_wrong = np.ones((2, 3)).astype(np.float32)
x = paddle.to_
variable
(data_x_wrong)
x = paddle.to_
tensor
(data_x_wrong)
y = paddle.to_
variable
(data_y)
y = paddle.to_
tensor
(data_y)
input = paddle.to_
variable
(data_input)
input = paddle.to_
tensor
(data_input)
out = paddle.tensor.addmm( input=input, x=x, y=y, beta=0.5, alpha=5.0 )
out = paddle.tensor.addmm( input=input, x=x, y=y, beta=0.5, alpha=5.0 )
self.assertRaises(ValueError, test_error1)
self.assertRaises(ValueError, test_error1)
'''
'''
...
...
python/paddle/fluid/tests/unittests/test_arange.py
浏览文件 @
b9a1c7e2
...
@@ -98,9 +98,9 @@ class TestArangeImperative(unittest.TestCase):
...
@@ -98,9 +98,9 @@ class TestArangeImperative(unittest.TestCase):
x2
=
paddle
.
tensor
.
arange
(
5
)
x2
=
paddle
.
tensor
.
arange
(
5
)
x3
=
paddle
.
tensor
.
creation
.
arange
(
5
)
x3
=
paddle
.
tensor
.
creation
.
arange
(
5
)
start
=
paddle
.
to_
variable
(
np
.
array
([
0
],
'float32'
))
start
=
paddle
.
to_
tensor
(
np
.
array
([
0
],
'float32'
))
end
=
paddle
.
to_
variable
(
np
.
array
([
5
],
'float32'
))
end
=
paddle
.
to_
tensor
(
np
.
array
([
5
],
'float32'
))
step
=
paddle
.
to_
variable
(
np
.
array
([
1
],
'float32'
))
step
=
paddle
.
to_
tensor
(
np
.
array
([
1
],
'float32'
))
x4
=
paddle
.
arange
(
start
,
end
,
step
,
'int64'
)
x4
=
paddle
.
arange
(
start
,
end
,
step
,
'int64'
)
paddle
.
enable_static
()
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_cholesky_op.py
浏览文件 @
b9a1c7e2
...
@@ -96,7 +96,7 @@ class TestDygraph(unittest.TestCase):
...
@@ -96,7 +96,7 @@ class TestDygraph(unittest.TestCase):
a
=
np
.
random
.
rand
(
3
,
3
)
a
=
np
.
random
.
rand
(
3
,
3
)
a_t
=
np
.
transpose
(
a
,
[
1
,
0
])
a_t
=
np
.
transpose
(
a
,
[
1
,
0
])
x_data
=
np
.
matmul
(
a
,
a_t
)
+
1e-03
x_data
=
np
.
matmul
(
a
,
a_t
)
+
1e-03
x
=
paddle
.
to_
variable
(
x_data
)
x
=
paddle
.
to_
tensor
(
x_data
)
out
=
paddle
.
cholesky
(
x
,
upper
=
False
)
out
=
paddle
.
cholesky
(
x
,
upper
=
False
)
...
...
python/paddle/fluid/tests/unittests/test_clip_op.py
浏览文件 @
b9a1c7e2
...
@@ -168,9 +168,9 @@ class TestClipAPI(unittest.TestCase):
...
@@ -168,9 +168,9 @@ class TestClipAPI(unittest.TestCase):
paddle
.
disable_static
(
place
)
paddle
.
disable_static
(
place
)
data_shape
=
[
1
,
9
,
9
,
4
]
data_shape
=
[
1
,
9
,
9
,
4
]
data
=
np
.
random
.
random
(
data_shape
).
astype
(
'float32'
)
data
=
np
.
random
.
random
(
data_shape
).
astype
(
'float32'
)
images
=
paddle
.
to_
variable
(
data
,
dtype
=
'float32'
)
images
=
paddle
.
to_
tensor
(
data
,
dtype
=
'float32'
)
v_min
=
paddle
.
to_
variable
(
np
.
array
([
0.2
],
dtype
=
np
.
float32
))
v_min
=
paddle
.
to_
tensor
(
np
.
array
([
0.2
],
dtype
=
np
.
float32
))
v_max
=
paddle
.
to_
variable
(
np
.
array
([
0.8
],
dtype
=
np
.
float32
))
v_max
=
paddle
.
to_
tensor
(
np
.
array
([
0.8
],
dtype
=
np
.
float32
))
out_1
=
paddle
.
clip
(
images
,
min
=
0.2
,
max
=
0.8
)
out_1
=
paddle
.
clip
(
images
,
min
=
0.2
,
max
=
0.8
)
out_2
=
paddle
.
clip
(
images
,
min
=
0.2
,
max
=
0.9
)
out_2
=
paddle
.
clip
(
images
,
min
=
0.2
,
max
=
0.9
)
...
...
python/paddle/fluid/tests/unittests/test_concat_op.py
浏览文件 @
b9a1c7e2
...
@@ -285,9 +285,9 @@ class TestConcatAPI(unittest.TestCase):
...
@@ -285,9 +285,9 @@ class TestConcatAPI(unittest.TestCase):
in2
=
np
.
array
([[
11
,
12
,
13
],
[
14
,
15
,
16
]])
in2
=
np
.
array
([[
11
,
12
,
13
],
[
14
,
15
,
16
]])
in3
=
np
.
array
([[
21
,
22
],
[
23
,
24
]])
in3
=
np
.
array
([[
21
,
22
],
[
23
,
24
]])
paddle
.
disable_static
()
paddle
.
disable_static
()
x1
=
paddle
.
to_
variable
(
in1
)
x1
=
paddle
.
to_
tensor
(
in1
)
x2
=
paddle
.
to_
variable
(
in2
)
x2
=
paddle
.
to_
tensor
(
in2
)
x3
=
paddle
.
to_
variable
(
in3
)
x3
=
paddle
.
to_
tensor
(
in3
)
out1
=
fluid
.
layers
.
concat
(
input
=
[
x1
,
x2
,
x3
],
axis
=-
1
)
out1
=
fluid
.
layers
.
concat
(
input
=
[
x1
,
x2
,
x3
],
axis
=-
1
)
out2
=
paddle
.
concat
(
x
=
[
x1
,
x2
],
axis
=
0
)
out2
=
paddle
.
concat
(
x
=
[
x1
,
x2
],
axis
=
0
)
np_out1
=
np
.
concatenate
([
in1
,
in2
,
in3
],
axis
=-
1
)
np_out1
=
np
.
concatenate
([
in1
,
in2
,
in3
],
axis
=-
1
)
...
...
python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py
浏览文件 @
b9a1c7e2
...
@@ -75,8 +75,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
...
@@ -75,8 +75,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_x2
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
np_x2
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
tesnor_x1
=
paddle
.
to_
variable
(
np_x1
)
tesnor_x1
=
paddle
.
to_
tensor
(
np_x1
)
tesnor_x2
=
paddle
.
to_
variable
(
np_x2
)
tesnor_x2
=
paddle
.
to_
tensor
(
np_x2
)
y
=
F
.
cosine_similarity
(
tesnor_x1
,
tesnor_x2
,
axis
=
axis
,
eps
=
eps
)
y
=
F
.
cosine_similarity
(
tesnor_x1
,
tesnor_x2
,
axis
=
axis
,
eps
=
eps
)
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
...
@@ -92,8 +92,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
...
@@ -92,8 +92,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_x2
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
np_x2
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
tesnor_x1
=
paddle
.
to_
variable
(
np_x1
)
tesnor_x1
=
paddle
.
to_
tensor
(
np_x1
)
tesnor_x2
=
paddle
.
to_
variable
(
np_x2
)
tesnor_x2
=
paddle
.
to_
tensor
(
np_x2
)
y
=
F
.
cosine_similarity
(
tesnor_x1
,
tesnor_x2
,
axis
=
axis
,
eps
=
eps
)
y
=
F
.
cosine_similarity
(
tesnor_x1
,
tesnor_x2
,
axis
=
axis
,
eps
=
eps
)
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
...
@@ -110,8 +110,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
...
@@ -110,8 +110,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_x2
=
np
.
random
.
rand
(
*
shape2
).
astype
(
np
.
float32
)
np_x2
=
np
.
random
.
rand
(
*
shape2
).
astype
(
np
.
float32
)
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
tesnor_x1
=
paddle
.
to_
variable
(
np_x1
)
tesnor_x1
=
paddle
.
to_
tensor
(
np_x1
)
tesnor_x2
=
paddle
.
to_
variable
(
np_x2
)
tesnor_x2
=
paddle
.
to_
tensor
(
np_x2
)
y
=
F
.
cosine_similarity
(
tesnor_x1
,
tesnor_x2
,
axis
=
axis
,
eps
=
eps
)
y
=
F
.
cosine_similarity
(
tesnor_x1
,
tesnor_x2
,
axis
=
axis
,
eps
=
eps
)
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
...
@@ -129,8 +129,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
...
@@ -129,8 +129,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
np_out
=
self
.
_get_numpy_out
(
np_x1
,
np_x2
,
axis
=
axis
,
eps
=
eps
)
cos_sim_func
=
nn
.
CosineSimilarity
(
axis
=
axis
,
eps
=
eps
)
cos_sim_func
=
nn
.
CosineSimilarity
(
axis
=
axis
,
eps
=
eps
)
tesnor_x1
=
paddle
.
to_
variable
(
np_x1
)
tesnor_x1
=
paddle
.
to_
tensor
(
np_x1
)
tesnor_x2
=
paddle
.
to_
variable
(
np_x2
)
tesnor_x2
=
paddle
.
to_
tensor
(
np_x2
)
y
=
cos_sim_func
(
tesnor_x1
,
tesnor_x2
)
y
=
cos_sim_func
(
tesnor_x1
,
tesnor_x2
)
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
np_out
))
...
...
python/paddle/fluid/tests/unittests/test_cumsum_op.py
浏览文件 @
b9a1c7e2
...
@@ -21,13 +21,12 @@ import paddle
...
@@ -21,13 +21,12 @@ import paddle
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid
import
compiler
,
Program
,
program_guard
from
paddle.fluid
import
compiler
,
Program
,
program_guard
from
paddle
import
to_variable
class
TestCumsumOp
(
unittest
.
TestCase
):
class
TestCumsumOp
(
unittest
.
TestCase
):
def
run_cases
(
self
):
def
run_cases
(
self
):
data_np
=
np
.
arange
(
12
).
reshape
(
3
,
4
)
data_np
=
np
.
arange
(
12
).
reshape
(
3
,
4
)
data
=
to_variable
(
data_np
)
data
=
paddle
.
to_tensor
(
data_np
)
y
=
paddle
.
cumsum
(
data
)
y
=
paddle
.
cumsum
(
data
)
z
=
np
.
cumsum
(
data_np
)
z
=
np
.
cumsum
(
data_np
)
...
...
python/paddle/fluid/tests/unittests/test_default_dtype.py
浏览文件 @
b9a1c7e2
...
@@ -20,7 +20,6 @@ import paddle
...
@@ -20,7 +20,6 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.dygraph
import
Linear
from
paddle.fluid.dygraph
import
Linear
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle
import
to_variable
class
TestDefaultType
(
unittest
.
TestCase
):
class
TestDefaultType
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_directory_migration.py
浏览文件 @
b9a1c7e2
...
@@ -36,7 +36,7 @@ class TestDirectory(unittest.TestCase):
...
@@ -36,7 +36,7 @@ class TestDirectory(unittest.TestCase):
def
test_new_directory
(
self
):
def
test_new_directory
(
self
):
new_directory
=
[
new_directory
=
[
'paddle.enable_static'
,
'paddle.disable_static'
,
'paddle.enable_static'
,
'paddle.disable_static'
,
'paddle.in_dynamic_mode'
,
'paddle.to_
variable
'
,
'paddle.grad'
,
'paddle.in_dynamic_mode'
,
'paddle.to_
tensor
'
,
'paddle.grad'
,
'paddle.no_grad'
,
'paddle.save'
,
'paddle.load'
,
'paddle.no_grad'
,
'paddle.save'
,
'paddle.load'
,
'paddle.static.save'
,
'paddle.static.load'
,
'paddle.static.save'
,
'paddle.static.load'
,
'paddle.distributed.ParallelEnv'
,
'paddle.distributed.ParallelEnv'
,
...
...
python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py
浏览文件 @
b9a1c7e2
...
@@ -195,7 +195,7 @@ class TestFlattenPython(unittest.TestCase):
...
@@ -195,7 +195,7 @@ class TestFlattenPython(unittest.TestCase):
def
test_Negative
():
def
test_Negative
():
paddle
.
disable_static
()
paddle
.
disable_static
()
img
=
paddle
.
to_
variable
(
x
)
img
=
paddle
.
to_
tensor
(
x
)
out
=
paddle
.
flatten
(
img
,
start_axis
=-
2
,
stop_axis
=-
1
)
out
=
paddle
.
flatten
(
img
,
start_axis
=-
2
,
stop_axis
=-
1
)
return
out
.
numpy
().
shape
return
out
.
numpy
().
shape
...
...
python/paddle/fluid/tests/unittests/test_imperative_basic.py
浏览文件 @
b9a1c7e2
...
@@ -211,7 +211,7 @@ class TestImperative(unittest.TestCase):
...
@@ -211,7 +211,7 @@ class TestImperative(unittest.TestCase):
paddle
.
disable_static
()
paddle
.
disable_static
()
self
.
assertTrue
(
paddle
.
in_dynamic_mode
())
self
.
assertTrue
(
paddle
.
in_dynamic_mode
())
np_inp
=
np
.
array
([[
1.0
,
2.0
],
[
3.0
,
4.0
]],
dtype
=
np
.
float32
)
np_inp
=
np
.
array
([[
1.0
,
2.0
],
[
3.0
,
4.0
]],
dtype
=
np
.
float32
)
var_inp
=
paddle
.
to_
variable
(
np_inp
)
var_inp
=
paddle
.
to_
tensor
(
np_inp
)
mlp
=
MLP
(
input_size
=
2
)
mlp
=
MLP
(
input_size
=
2
)
out
=
mlp
(
var_inp
)
out
=
mlp
(
var_inp
)
dy_out1
=
out
.
numpy
()
dy_out1
=
out
.
numpy
()
...
@@ -221,7 +221,7 @@ class TestImperative(unittest.TestCase):
...
@@ -221,7 +221,7 @@ class TestImperative(unittest.TestCase):
self
.
assertFalse
(
paddle
.
in_dynamic_mode
())
self
.
assertFalse
(
paddle
.
in_dynamic_mode
())
paddle
.
disable_static
()
paddle
.
disable_static
()
self
.
assertTrue
(
paddle
.
in_dynamic_mode
())
self
.
assertTrue
(
paddle
.
in_dynamic_mode
())
var_inp
=
paddle
.
to_
variable
(
np_inp
)
var_inp
=
paddle
.
to_
tensor
(
np_inp
)
mlp
=
MLP
(
input_size
=
2
)
mlp
=
MLP
(
input_size
=
2
)
out
=
mlp
(
var_inp
)
out
=
mlp
(
var_inp
)
dy_out2
=
out
.
numpy
()
dy_out2
=
out
.
numpy
()
...
...
python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
浏览文件 @
b9a1c7e2
...
@@ -54,7 +54,7 @@ class TestSimpleNet(unittest.TestCase):
...
@@ -54,7 +54,7 @@ class TestSimpleNet(unittest.TestCase):
# grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0)
# grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input_word
=
np
.
array
([[
1
,
2
],
[
2
,
1
]]).
astype
(
'int64'
)
input
=
paddle
.
to_
variable
(
input_word
)
input
=
paddle
.
to_
tensor
(
input_word
)
simplenet
=
SimpleNet
(
20
,
32
,
dtype
)
simplenet
=
SimpleNet
(
20
,
32
,
dtype
)
adam
=
SGDOptimizer
(
adam
=
SGDOptimizer
(
...
...
python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py
浏览文件 @
b9a1c7e2
...
@@ -41,7 +41,7 @@ def run_dygraph(x_np, op_str, use_gpu=True):
...
@@ -41,7 +41,7 @@ def run_dygraph(x_np, op_str, use_gpu=True):
if
use_gpu
and
fluid
.
core
.
is_compiled_with_cuda
():
if
use_gpu
and
fluid
.
core
.
is_compiled_with_cuda
():
place
=
paddle
.
CUDAPlace
(
0
)
place
=
paddle
.
CUDAPlace
(
0
)
paddle
.
disable_static
(
place
)
paddle
.
disable_static
(
place
)
x
=
paddle
.
to_
variable
(
x_np
)
x
=
paddle
.
to_
tensor
(
x_np
)
dygraph_result
=
getattr
(
paddle
.
tensor
,
op_str
)(
x
)
dygraph_result
=
getattr
(
paddle
.
tensor
,
op_str
)(
x
)
return
dygraph_result
return
dygraph_result
...
...
python/paddle/fluid/tests/unittests/test_jit_save_load.py
浏览文件 @
b9a1c7e2
...
@@ -543,9 +543,9 @@ class TestJitSaveMultiCases(unittest.TestCase):
...
@@ -543,9 +543,9 @@ class TestJitSaveMultiCases(unittest.TestCase):
loaded_layer
=
paddle
.
jit
.
load
(
model_path
)
loaded_layer
=
paddle
.
jit
.
load
(
model_path
)
loaded_layer
.
eval
()
loaded_layer
.
eval
()
# inference & compare
# inference & compare
x
=
paddle
.
to_
variable
(
np
.
random
.
random
((
1
,
784
)).
astype
(
'float32'
))
x
=
paddle
.
to_
tensor
(
np
.
random
.
random
((
1
,
784
)).
astype
(
'float32'
))
if
with_label
:
if
with_label
:
y
=
paddle
.
to_
variable
(
np
.
random
.
random
((
1
,
1
)).
astype
(
'int64'
))
y
=
paddle
.
to_
tensor
(
np
.
random
.
random
((
1
,
1
)).
astype
(
'int64'
))
pred
,
_
=
layer
(
x
,
y
)
pred
,
_
=
layer
(
x
,
y
)
pred
=
pred
.
numpy
()
pred
=
pred
.
numpy
()
else
:
else
:
...
@@ -677,7 +677,7 @@ class TestJitSaveMultiCases(unittest.TestCase):
...
@@ -677,7 +677,7 @@ class TestJitSaveMultiCases(unittest.TestCase):
model_path
=
"test_not_prune_output_spec_name_warning"
model_path
=
"test_not_prune_output_spec_name_warning"
configs
=
paddle
.
SaveLoadConfig
()
configs
=
paddle
.
SaveLoadConfig
()
out
=
paddle
.
to_
variable
(
np
.
random
.
random
((
1
,
1
)).
astype
(
'float'
))
out
=
paddle
.
to_
tensor
(
np
.
random
.
random
((
1
,
1
)).
astype
(
'float'
))
configs
.
output_spec
=
[
out
]
configs
.
output_spec
=
[
out
]
paddle
.
jit
.
save
(
layer
,
model_path
,
configs
=
configs
)
paddle
.
jit
.
save
(
layer
,
model_path
,
configs
=
configs
)
...
@@ -709,7 +709,7 @@ class TestJitSaveMultiCases(unittest.TestCase):
...
@@ -709,7 +709,7 @@ class TestJitSaveMultiCases(unittest.TestCase):
model_path
=
"test_prune_to_static_after_train"
model_path
=
"test_prune_to_static_after_train"
configs
=
paddle
.
SaveLoadConfig
()
configs
=
paddle
.
SaveLoadConfig
()
out
=
paddle
.
to_
variable
(
np
.
random
.
random
((
1
,
1
)).
astype
(
'float'
))
out
=
paddle
.
to_
tensor
(
np
.
random
.
random
((
1
,
1
)).
astype
(
'float'
))
configs
.
output_spec
=
[
out
]
configs
.
output_spec
=
[
out
]
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
paddle
.
jit
.
save
(
paddle
.
jit
.
save
(
...
@@ -730,7 +730,7 @@ class TestJitSaveLoadEmptyLayer(unittest.TestCase):
...
@@ -730,7 +730,7 @@ class TestJitSaveLoadEmptyLayer(unittest.TestCase):
def
test_save_load_empty_layer
(
self
):
def
test_save_load_empty_layer
(
self
):
layer
=
EmptyLayer
()
layer
=
EmptyLayer
()
x
=
paddle
.
to_
variable
(
np
.
random
.
random
((
10
)).
astype
(
'float32'
))
x
=
paddle
.
to_
tensor
(
np
.
random
.
random
((
10
)).
astype
(
'float32'
))
out
=
layer
(
x
)
out
=
layer
(
x
)
paddle
.
jit
.
save
(
layer
,
self
.
model_path
)
paddle
.
jit
.
save
(
layer
,
self
.
model_path
)
load_layer
=
paddle
.
jit
.
load
(
self
.
model_path
)
load_layer
=
paddle
.
jit
.
load
(
self
.
model_path
)
...
@@ -746,8 +746,8 @@ class TestJitSaveLoadNoParamLayer(unittest.TestCase):
...
@@ -746,8 +746,8 @@ class TestJitSaveLoadNoParamLayer(unittest.TestCase):
def
test_save_load_no_param_layer
(
self
):
def
test_save_load_no_param_layer
(
self
):
layer
=
NoParamLayer
()
layer
=
NoParamLayer
()
x
=
paddle
.
to_
variable
(
np
.
random
.
random
((
5
)).
astype
(
'float32'
))
x
=
paddle
.
to_
tensor
(
np
.
random
.
random
((
5
)).
astype
(
'float32'
))
y
=
paddle
.
to_
variable
(
np
.
random
.
random
((
5
)).
astype
(
'float32'
))
y
=
paddle
.
to_
tensor
(
np
.
random
.
random
((
5
)).
astype
(
'float32'
))
out
=
layer
(
x
,
y
)
out
=
layer
(
x
,
y
)
paddle
.
jit
.
save
(
layer
,
self
.
model_path
)
paddle
.
jit
.
save
(
layer
,
self
.
model_path
)
load_layer
=
paddle
.
jit
.
load
(
self
.
model_path
)
load_layer
=
paddle
.
jit
.
load
(
self
.
model_path
)
...
...
python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py
浏览文件 @
b9a1c7e2
...
@@ -90,7 +90,7 @@ class TestKLDivLossDygraph(unittest.TestCase):
...
@@ -90,7 +90,7 @@ class TestKLDivLossDygraph(unittest.TestCase):
with
paddle
.
fluid
.
dygraph
.
guard
():
with
paddle
.
fluid
.
dygraph
.
guard
():
kldiv_criterion
=
paddle
.
nn
.
KLDivLoss
(
reduction
)
kldiv_criterion
=
paddle
.
nn
.
KLDivLoss
(
reduction
)
pred_loss
=
kldiv_criterion
(
pred_loss
=
kldiv_criterion
(
paddle
.
to_
variable
(
x
),
paddle
.
to_variable
(
target
))
paddle
.
to_
tensor
(
x
),
paddle
.
to_tensor
(
target
))
self
.
assertTrue
(
np
.
allclose
(
pred_loss
.
numpy
(),
gt_loss
))
self
.
assertTrue
(
np
.
allclose
(
pred_loss
.
numpy
(),
gt_loss
))
def
test_kl_loss_batchmean
(
self
):
def
test_kl_loss_batchmean
(
self
):
...
...
python/paddle/fluid/tests/unittests/test_l1_loss.py
浏览文件 @
b9a1c7e2
...
@@ -26,8 +26,8 @@ class TestFunctionalL1Loss(unittest.TestCase):
...
@@ -26,8 +26,8 @@ class TestFunctionalL1Loss(unittest.TestCase):
self
.
label_np
=
np
.
random
.
random
(
size
=
(
10
,
10
,
5
)).
astype
(
np
.
float32
)
self
.
label_np
=
np
.
random
.
random
(
size
=
(
10
,
10
,
5
)).
astype
(
np
.
float32
)
def
run_imperative
(
self
):
def
run_imperative
(
self
):
input
=
paddle
.
to_
variable
(
self
.
input_np
)
input
=
paddle
.
to_
tensor
(
self
.
input_np
)
label
=
paddle
.
to_
variable
(
self
.
label_np
)
label
=
paddle
.
to_
tensor
(
self
.
label_np
)
dy_result
=
paddle
.
nn
.
functional
.
l1_loss
(
input
,
label
)
dy_result
=
paddle
.
nn
.
functional
.
l1_loss
(
input
,
label
)
expected
=
np
.
mean
(
np
.
abs
(
self
.
input_np
-
self
.
label_np
))
expected
=
np
.
mean
(
np
.
abs
(
self
.
input_np
-
self
.
label_np
))
self
.
assertTrue
(
np
.
allclose
(
dy_result
.
numpy
(),
expected
))
self
.
assertTrue
(
np
.
allclose
(
dy_result
.
numpy
(),
expected
))
...
@@ -106,8 +106,8 @@ class TestClassL1Loss(unittest.TestCase):
...
@@ -106,8 +106,8 @@ class TestClassL1Loss(unittest.TestCase):
self
.
label_np
=
np
.
random
.
random
(
size
=
(
10
,
10
,
5
)).
astype
(
np
.
float32
)
self
.
label_np
=
np
.
random
.
random
(
size
=
(
10
,
10
,
5
)).
astype
(
np
.
float32
)
def
run_imperative
(
self
):
def
run_imperative
(
self
):
input
=
paddle
.
to_
variable
(
self
.
input_np
)
input
=
paddle
.
to_
tensor
(
self
.
input_np
)
label
=
paddle
.
to_
variable
(
self
.
label_np
)
label
=
paddle
.
to_
tensor
(
self
.
label_np
)
l1_loss
=
paddle
.
nn
.
loss
.
L1Loss
()
l1_loss
=
paddle
.
nn
.
loss
.
L1Loss
()
dy_result
=
l1_loss
(
input
,
label
)
dy_result
=
l1_loss
(
input
,
label
)
expected
=
np
.
mean
(
np
.
abs
(
self
.
input_np
-
self
.
label_np
))
expected
=
np
.
mean
(
np
.
abs
(
self
.
input_np
-
self
.
label_np
))
...
...
python/paddle/fluid/tests/unittests/test_log_softmax.py
浏览文件 @
b9a1c7e2
...
@@ -96,7 +96,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase):
...
@@ -96,7 +96,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase):
# test dygrapg api
# test dygrapg api
paddle
.
disable_static
()
paddle
.
disable_static
()
x
=
paddle
.
to_
variable
(
self
.
x
)
x
=
paddle
.
to_
tensor
(
self
.
x
)
y
=
logsoftmax
(
x
)
y
=
logsoftmax
(
x
)
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
ref_out
))
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
ref_out
))
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -127,7 +127,7 @@ class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase):
...
@@ -127,7 +127,7 @@ class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
out
[
0
],
ref_out
))
self
.
assertTrue
(
np
.
allclose
(
out
[
0
],
ref_out
))
paddle
.
disable_static
()
paddle
.
disable_static
()
x
=
paddle
.
to_
variable
(
self
.
x
)
x
=
paddle
.
to_
tensor
(
self
.
x
)
y
=
F
.
log_softmax
(
x
,
axis
,
dtype
)
y
=
F
.
log_softmax
(
x
,
axis
,
dtype
)
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
ref_out
),
True
)
self
.
assertTrue
(
np
.
allclose
(
y
.
numpy
(),
ref_out
),
True
)
paddle
.
enable_static
()
paddle
.
enable_static
()
...
...
python/paddle/fluid/tests/unittests/test_logsumexp.py
浏览文件 @
b9a1c7e2
...
@@ -111,7 +111,7 @@ class TestLogsumexpAPI(unittest.TestCase):
...
@@ -111,7 +111,7 @@ class TestLogsumexpAPI(unittest.TestCase):
self
.
assertTrue
(
np
.
allclose
(
res
[
0
],
out_ref
))
self
.
assertTrue
(
np
.
allclose
(
res
[
0
],
out_ref
))
paddle
.
disable_static
(
self
.
place
)
paddle
.
disable_static
(
self
.
place
)
x
=
paddle
.
to_
variable
(
self
.
x
)
x
=
paddle
.
to_
tensor
(
self
.
x
)
out
=
paddle
.
logsumexp
(
x
,
axis
,
keepdim
)
out
=
paddle
.
logsumexp
(
x
,
axis
,
keepdim
)
self
.
assertTrue
(
np
.
allclose
(
out
.
numpy
(),
out_ref
))
self
.
assertTrue
(
np
.
allclose
(
out
.
numpy
(),
out_ref
))
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -126,7 +126,7 @@ class TestLogsumexpAPI(unittest.TestCase):
...
@@ -126,7 +126,7 @@ class TestLogsumexpAPI(unittest.TestCase):
def
test_alias
(
self
):
def
test_alias
(
self
):
paddle
.
disable_static
(
self
.
place
)
paddle
.
disable_static
(
self
.
place
)
x
=
paddle
.
to_
variable
(
self
.
x
)
x
=
paddle
.
to_
tensor
(
self
.
x
)
out1
=
paddle
.
logsumexp
(
x
)
out1
=
paddle
.
logsumexp
(
x
)
out2
=
paddle
.
tensor
.
logsumexp
(
x
)
out2
=
paddle
.
tensor
.
logsumexp
(
x
)
out3
=
paddle
.
tensor
.
math
.
logsumexp
(
x
)
out3
=
paddle
.
tensor
.
math
.
logsumexp
(
x
)
...
...
python/paddle/fluid/tests/unittests/test_matmul_op.py
浏览文件 @
b9a1c7e2
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
paddle.fluid.core
as
core
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
from
op_test
import
OpTest
from
op_test
import
OpTest
...
...
python/paddle/fluid/tests/unittests/test_max_op.py
浏览文件 @
b9a1c7e2
...
@@ -80,7 +80,7 @@ class ApiMaxTest(unittest.TestCase):
...
@@ -80,7 +80,7 @@ class ApiMaxTest(unittest.TestCase):
def
test_imperative_api
(
self
):
def
test_imperative_api
(
self
):
paddle
.
disable_static
()
paddle
.
disable_static
()
np_x
=
np
.
array
([
10
,
10
]).
astype
(
'float64'
)
np_x
=
np
.
array
([
10
,
10
]).
astype
(
'float64'
)
x
=
paddle
.
to_
variable
(
np_x
)
x
=
paddle
.
to_
tensor
(
np_x
)
z
=
paddle
.
max
(
x
,
axis
=
0
)
z
=
paddle
.
max
(
x
,
axis
=
0
)
np_z
=
z
.
numpy
()
np_z
=
z
.
numpy
()
z_expected
=
np
.
array
(
np
.
max
(
np_x
,
axis
=
0
))
z_expected
=
np
.
array
(
np
.
max
(
np_x
,
axis
=
0
))
...
...
python/paddle/fluid/tests/unittests/test_maximum_op.py
浏览文件 @
b9a1c7e2
...
@@ -61,8 +61,8 @@ class ApiMaximumTest(unittest.TestCase):
...
@@ -61,8 +61,8 @@ class ApiMaximumTest(unittest.TestCase):
def
test_dynamic_api
(
self
):
def
test_dynamic_api
(
self
):
paddle
.
disable_static
()
paddle
.
disable_static
()
np_x
=
np
.
array
([
10
,
10
]).
astype
(
'float64'
)
np_x
=
np
.
array
([
10
,
10
]).
astype
(
'float64'
)
x
=
paddle
.
to_
variable
(
self
.
input_x
)
x
=
paddle
.
to_
tensor
(
self
.
input_x
)
y
=
paddle
.
to_
variable
(
self
.
input_y
)
y
=
paddle
.
to_
tensor
(
self
.
input_y
)
z
=
paddle
.
maximum
(
x
,
y
)
z
=
paddle
.
maximum
(
x
,
y
)
np_z
=
z
.
numpy
()
np_z
=
z
.
numpy
()
z_expected
=
np
.
array
(
np
.
maximum
(
self
.
input_x
,
self
.
input_y
))
z_expected
=
np
.
array
(
np
.
maximum
(
self
.
input_x
,
self
.
input_y
))
...
@@ -73,8 +73,8 @@ class ApiMaximumTest(unittest.TestCase):
...
@@ -73,8 +73,8 @@ class ApiMaximumTest(unittest.TestCase):
np_x
=
np
.
random
.
rand
(
5
,
4
,
3
,
2
).
astype
(
"float64"
)
np_x
=
np
.
random
.
rand
(
5
,
4
,
3
,
2
).
astype
(
"float64"
)
np_y
=
np
.
random
.
rand
(
4
,
3
).
astype
(
"float64"
)
np_y
=
np
.
random
.
rand
(
4
,
3
).
astype
(
"float64"
)
x
=
paddle
.
to_
variable
(
self
.
input_x
)
x
=
paddle
.
to_
tensor
(
self
.
input_x
)
y
=
paddle
.
to_
variable
(
self
.
input_y
)
y
=
paddle
.
to_
tensor
(
self
.
input_y
)
result_1
=
paddle
.
maximum
(
x
,
y
,
axis
=
1
)
result_1
=
paddle
.
maximum
(
x
,
y
,
axis
=
1
)
result_2
=
paddle
.
maximum
(
x
,
y
,
axis
=-
2
)
result_2
=
paddle
.
maximum
(
x
,
y
,
axis
=-
2
)
self
.
assertEqual
((
result_1
.
numpy
()
==
result_2
.
numpy
()).
all
(),
True
)
self
.
assertEqual
((
result_1
.
numpy
()
==
result_2
.
numpy
()).
all
(),
True
)
python/paddle/fluid/tests/unittests/test_mean_op.py
浏览文件 @
b9a1c7e2
...
@@ -204,7 +204,7 @@ class TestMeanAPI(unittest.TestCase):
...
@@ -204,7 +204,7 @@ class TestMeanAPI(unittest.TestCase):
paddle
.
disable_static
(
self
.
place
)
paddle
.
disable_static
(
self
.
place
)
def
test_case
(
x
,
axis
=
None
,
keepdim
=
False
):
def
test_case
(
x
,
axis
=
None
,
keepdim
=
False
):
x_tensor
=
paddle
.
to_
variable
(
x
)
x_tensor
=
paddle
.
to_
tensor
(
x
)
out
=
paddle
.
mean
(
x_tensor
,
axis
,
keepdim
)
out
=
paddle
.
mean
(
x_tensor
,
axis
,
keepdim
)
if
isinstance
(
axis
,
list
):
if
isinstance
(
axis
,
list
):
axis
=
tuple
(
axis
)
axis
=
tuple
(
axis
)
...
...
python/paddle/fluid/tests/unittests/test_min_op.py
浏览文件 @
b9a1c7e2
...
@@ -80,7 +80,7 @@ class ApiMinTest(unittest.TestCase):
...
@@ -80,7 +80,7 @@ class ApiMinTest(unittest.TestCase):
def
test_imperative_api
(
self
):
def
test_imperative_api
(
self
):
paddle
.
disable_static
()
paddle
.
disable_static
()
np_x
=
np
.
array
([
10
,
10
]).
astype
(
'float64'
)
np_x
=
np
.
array
([
10
,
10
]).
astype
(
'float64'
)
x
=
paddle
.
to_
variable
(
np_x
)
x
=
paddle
.
to_
tensor
(
np_x
)
z
=
paddle
.
min
(
x
,
axis
=
0
)
z
=
paddle
.
min
(
x
,
axis
=
0
)
np_z
=
z
.
numpy
()
np_z
=
z
.
numpy
()
z_expected
=
np
.
array
(
np
.
min
(
np_x
,
axis
=
0
))
z_expected
=
np
.
array
(
np
.
min
(
np_x
,
axis
=
0
))
...
...
python/paddle/fluid/tests/unittests/test_mul_op.py
浏览文件 @
b9a1c7e2
...
@@ -18,6 +18,8 @@ import unittest
...
@@ -18,6 +18,8 @@ import unittest
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
sys
sys
.
path
.
append
(
".."
)
from
op_test
import
OpTest
from
op_test
import
OpTest
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid
import
Program
,
program_guard
from
paddle.fluid
import
Program
,
program_guard
...
@@ -175,57 +177,5 @@ class TestFP16MulOp2(TestMulOp2):
...
@@ -175,57 +177,5 @@ class TestFP16MulOp2(TestMulOp2):
no_grad_set
=
set
(
'Y'
))
no_grad_set
=
set
(
'Y'
))
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUMulOp1
(
TestMulOp
):
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-1
)
def
test_check_grad_normal
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
0.5
)
def
test_check_grad_ingore_x
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUMulOp2
(
TestMulOp2
):
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
2e-1
)
def
test_check_grad_normal
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
0.9
)
def
test_check_grad_ingore_x
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
place
=
core
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.9
,
no_grad_set
=
set
(
'Y'
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_randn_op.py
浏览文件 @
b9a1c7e2
...
@@ -63,7 +63,7 @@ class TestRandnOpForDygraph(unittest.TestCase):
...
@@ -63,7 +63,7 @@ class TestRandnOpForDygraph(unittest.TestCase):
dim_2
=
paddle
.
fill_constant
([
1
],
"int32"
,
50
)
dim_2
=
paddle
.
fill_constant
([
1
],
"int32"
,
50
)
x3
=
paddle
.
randn
(
shape
=
[
dim_1
,
dim_2
,
784
])
x3
=
paddle
.
randn
(
shape
=
[
dim_1
,
dim_2
,
784
])
var_shape
=
paddle
.
to_
variable
(
np
.
array
(
shape
))
var_shape
=
paddle
.
to_
tensor
(
np
.
array
(
shape
))
x4
=
paddle
.
randn
(
var_shape
)
x4
=
paddle
.
randn
(
var_shape
)
for
out
in
[
x1
,
x2
,
x3
,
x4
]:
for
out
in
[
x1
,
x2
,
x3
,
x4
]:
...
...
python/paddle/fluid/tests/unittests/test_retain_graph.py
浏览文件 @
b9a1c7e2
...
@@ -105,8 +105,8 @@ class TestRetainGraph(unittest.TestCase):
...
@@ -105,8 +105,8 @@ class TestRetainGraph(unittest.TestCase):
A
=
np
.
random
.
rand
(
2
,
3
,
32
,
32
).
astype
(
'float32'
)
A
=
np
.
random
.
rand
(
2
,
3
,
32
,
32
).
astype
(
'float32'
)
B
=
np
.
random
.
rand
(
2
,
3
,
32
,
32
).
astype
(
'float32'
)
B
=
np
.
random
.
rand
(
2
,
3
,
32
,
32
).
astype
(
'float32'
)
realA
=
paddle
.
to_
variable
(
A
)
realA
=
paddle
.
to_
tensor
(
A
)
realB
=
paddle
.
to_
variable
(
B
)
realB
=
paddle
.
to_
tensor
(
B
)
fakeB
=
g
(
realA
)
fakeB
=
g
(
realA
)
optim_d
.
clear_gradients
()
optim_d
.
clear_gradients
()
...
...
python/paddle/fluid/tests/unittests/test_transformer_api.py
浏览文件 @
b9a1c7e2
...
@@ -487,24 +487,24 @@ class TestTransformer(unittest.TestCase):
...
@@ -487,24 +487,24 @@ class TestTransformer(unittest.TestCase):
dropout
=
dropout
,
dropout
=
dropout
,
weight_attr
=
[
None
],
weight_attr
=
[
None
],
bias_attr
=
[
False
])
bias_attr
=
[
False
])
src
=
paddle
.
to_
variable
(
src
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
tgt
=
paddle
.
to_
variable
(
tgt
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
=
paddle
.
to_
variable
(
src_mask
)
src_mask
=
paddle
.
to_
tensor
(
src_mask
)
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
target_length
)).
astype
(
"float32"
)
target_length
)).
astype
(
"float32"
)
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
,
memory_mask
=
paddle
.
to_
variable
(
tgt_mask
,
memory_mask
=
paddle
.
to_
tensor
(
tgt_mask
),
paddle
.
to_
variable
(
memory_mask
)
tgt_mask
),
paddle
.
to_
tensor
(
memory_mask
)
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
memory_mask
)
memory_mask
)
...
@@ -521,24 +521,24 @@ class TestTransformer(unittest.TestCase):
...
@@ -521,24 +521,24 @@ class TestTransformer(unittest.TestCase):
dropout
=
dropout
,
dropout
=
dropout
,
weight_attr
=
[
None
,
None
],
weight_attr
=
[
None
,
None
],
bias_attr
=
[
False
,
False
])
bias_attr
=
[
False
,
False
])
src
=
paddle
.
to_
variable
(
src
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
tgt
=
paddle
.
to_
variable
(
tgt
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
=
paddle
.
to_
variable
(
src_mask
)
src_mask
=
paddle
.
to_
tensor
(
src_mask
)
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
target_length
)).
astype
(
"float32"
)
target_length
)).
astype
(
"float32"
)
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
,
memory_mask
=
paddle
.
to_
variable
(
tgt_mask
,
memory_mask
=
paddle
.
to_
tensor
(
tgt_mask
),
paddle
.
to_
variable
(
memory_mask
)
tgt_mask
),
paddle
.
to_
tensor
(
memory_mask
)
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
memory_mask
)
memory_mask
)
...
@@ -555,24 +555,24 @@ class TestTransformer(unittest.TestCase):
...
@@ -555,24 +555,24 @@ class TestTransformer(unittest.TestCase):
dropout
=
dropout
,
dropout
=
dropout
,
weight_attr
=
[
None
,
None
,
None
],
weight_attr
=
[
None
,
None
,
None
],
bias_attr
=
[
False
,
False
,
True
])
bias_attr
=
[
False
,
False
,
True
])
src
=
paddle
.
to_
variable
(
src
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
tgt
=
paddle
.
to_
variable
(
tgt
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
=
paddle
.
to_
variable
(
src_mask
)
src_mask
=
paddle
.
to_
tensor
(
src_mask
)
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
target_length
)).
astype
(
"float32"
)
target_length
)).
astype
(
"float32"
)
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
,
memory_mask
=
paddle
.
to_
variable
(
tgt_mask
,
memory_mask
=
paddle
.
to_
tensor
(
tgt_mask
),
paddle
.
to_
variable
(
memory_mask
)
tgt_mask
),
paddle
.
to_
tensor
(
memory_mask
)
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
memory_mask
)
memory_mask
)
...
@@ -588,24 +588,24 @@ class TestTransformer(unittest.TestCase):
...
@@ -588,24 +588,24 @@ class TestTransformer(unittest.TestCase):
dim_feedforward
=
dim_feedforward
,
dim_feedforward
=
dim_feedforward
,
dropout
=
dropout
,
dropout
=
dropout
,
bias_attr
=
False
)
bias_attr
=
False
)
src
=
paddle
.
to_
variable
(
src
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
source_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
tgt
=
paddle
.
to_
variable
(
tgt
=
paddle
.
to_
tensor
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
np
.
random
.
rand
(
batch_size
,
target_length
,
d_model
).
astype
(
"float32"
))
"float32"
))
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
src_mask
=
np
.
zeros
((
batch_size
,
n_head
,
source_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
[
0
][
0
][
0
][
0
]
=
-
np
.
inf
src_mask
=
paddle
.
to_
variable
(
src_mask
)
src_mask
=
paddle
.
to_
tensor
(
src_mask
)
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
tgt_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
target_length
)).
astype
(
"float32"
)
target_length
)).
astype
(
"float32"
)
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
memory_mask
=
np
.
zeros
((
batch_size
,
n_head
,
target_length
,
source_length
)).
astype
(
"float32"
)
source_length
)).
astype
(
"float32"
)
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
memory_mask
[
0
][
0
][
0
][
0
]
=
-
1e9
tgt_mask
,
memory_mask
=
paddle
.
to_
variable
(
tgt_mask
,
memory_mask
=
paddle
.
to_
tensor
(
tgt_mask
),
paddle
.
to_
variable
(
memory_mask
)
tgt_mask
),
paddle
.
to_
tensor
(
memory_mask
)
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
trans_output
=
transformer
(
src
,
tgt
,
src_mask
,
tgt_mask
,
memory_mask
)
memory_mask
)
...
...
python/paddle/fluid/tests/unittests/test_zeros_like_op.py
浏览文件 @
b9a1c7e2
...
@@ -63,7 +63,7 @@ class TestZerosLikeImpeartive(unittest.TestCase):
...
@@ -63,7 +63,7 @@ class TestZerosLikeImpeartive(unittest.TestCase):
place
=
fluid
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
(
place
=
fluid
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
(
)
else
fluid
.
CPUPlace
()
)
else
fluid
.
CPUPlace
()
paddle
.
disable_static
(
place
)
paddle
.
disable_static
(
place
)
x
=
paddle
.
to_
variable
(
np
.
ones
(
shape
))
x
=
paddle
.
to_
tensor
(
np
.
ones
(
shape
))
for
dtype
in
[
np
.
bool
,
np
.
float32
,
np
.
float64
,
np
.
int32
,
np
.
int64
]:
for
dtype
in
[
np
.
bool
,
np
.
float32
,
np
.
float64
,
np
.
int32
,
np
.
int64
]:
out
=
zeros_like
(
x
,
dtype
)
out
=
zeros_like
(
x
,
dtype
)
self
.
assertEqual
((
out
.
numpy
()
==
np
.
zeros
(
shape
,
dtype
)).
all
(),
self
.
assertEqual
((
out
.
numpy
()
==
np
.
zeros
(
shape
,
dtype
)).
all
(),
...
...
python/paddle/fluid/tests/unittests/xpu/test_activation_op.py
0 → 100755
浏览文件 @
b9a1c7e2
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
sys
sys
.
path
.
append
(
".."
)
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
scipy.special
import
expit
,
erf
import
paddle
import
paddle.fluid
as
fluid
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.fluid
import
compiler
,
Program
,
program_guard
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUActivation
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"exp"
self
.
init_dtype
()
self
.
init_kernel_type
()
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
np
.
exp
(
x
)
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
if
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-3
)
def
init_kernel_type
(
self
):
pass
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUSigmoid
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"sigmoid"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
1
/
(
1
+
np
.
exp
(
-
x
))
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_grad
(
self
):
if
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.01
)
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUTanh
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"tanh"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
np
.
tanh
(
x
)
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUSqrt
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"sqrt"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
np
.
sqrt
(
x
)
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUAbs
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"abs"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
25
]).
astype
(
self
.
dtype
)
# Because we set delta = 0.005 in calculating numeric gradient,
# if x is too small, such as 0.002, x_neg will be -0.003
# x_pos will be 0.007, so the numeric gradient is inaccurate.
# we should avoid this
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
abs
(
x
)
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPURelu
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"relu"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
maximum
(
x
,
0
)
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
out
}
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUGelu
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"gelu"
self
.
init_dtype
()
approximate
=
False
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
gelu
(
x
,
approximate
)
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
out
}
self
.
attrs
=
{
"approximate"
:
approximate
,
'use_xpu'
:
True
}
def
gelu
(
x
,
approximate
):
if
approximate
:
y_ref
=
0.5
*
x
*
(
1.0
+
np
.
tanh
(
np
.
sqrt
(
2
/
np
.
pi
)
*
(
x
+
0.044715
*
np
.
power
(
x
,
3
))))
else
:
y_ref
=
0.5
*
x
*
(
1
+
erf
(
x
/
np
.
sqrt
(
2
)))
return
y_ref
.
astype
(
x
.
dtype
)
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPULog
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"log"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
np
.
log
(
x
)
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUSquare
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"square"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
np
.
square
(
x
)
self
.
attrs
=
{
'use_xpu'
:
True
}
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUPow
(
TestXPUActivation
):
def
setUp
(
self
):
self
.
op_type
=
"pow"
self
.
init_dtype
()
x
=
np
.
random
.
uniform
(
1
,
2
,
[
11
,
17
]).
astype
(
self
.
dtype
)
out
=
np
.
power
(
x
,
3
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
attrs
=
{
'factor'
:
3.0
,
'use_xpu'
:
True
}
self
.
outputs
=
{
'Out'
:
out
}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op.py
0 → 100644
浏览文件 @
b9a1c7e2
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
sys
sys
.
path
.
append
(
".."
)
import
unittest
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
,
skip_check_grad_ci
import
paddle.fluid
as
fluid
from
paddle.fluid
import
compiler
,
Program
,
program_guard
class
TestElementwiseAddOp
(
OpTest
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
False
def
setUp
(
self
):
self
.
op_type
=
"elementwise_add"
self
.
init_dtype
()
self
.
init_input_output
()
self
.
init_kernel_type
()
self
.
init_axis
()
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
self
.
x
),
'Y'
:
OpTest
.
np_dtype_to_fluid_dtype
(
self
.
y
)
}
self
.
attrs
=
{
'axis'
:
self
.
axis
,
'use_mkldnn'
:
self
.
use_mkldnn
}
self
.
outputs
=
{
'Out'
:
self
.
out
}
def
test_check_output
(
self
):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self
.
check_output
(
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
def
test_check_grad_normal
(
self
):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad
(
[
'X'
,
'Y'
],
'Out'
,
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
def
test_check_grad_ingore_x
(
self
):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad
(
[
'Y'
],
'Out'
,
no_grad_set
=
set
(
"X"
),
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
def
test_check_grad_ingore_y
(
self
):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if
self
.
dtype
==
np
.
float16
:
return
self
.
check_grad
(
[
'X'
],
'Out'
,
no_grad_set
=
set
(
'Y'
),
check_dygraph
=
(
self
.
use_mkldnn
==
False
))
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
self
.
out
=
np
.
add
(
self
.
x
,
self
.
y
)
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float64
def
init_axis
(
self
):
self
.
axis
=
-
1
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUElementwiseAddOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"elementwise_add"
self
.
init_dtype
()
self
.
init_input_output
()
self
.
init_axis
()
self
.
inputs
=
{
'X'
:
self
.
x
,
'Y'
:
self
.
y
}
self
.
attrs
=
{
'axis'
:
self
.
axis
,
'use_mkldnn'
:
False
,
'use_xpu'
:
True
}
self
.
outputs
=
{
'Out'
:
self
.
out
}
def
test_check_output
(
self
):
if
self
.
dtype
==
np
.
float32
and
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
)
def
test_check_grad_normal
(
self
):
if
self
.
dtype
==
np
.
float32
and
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
)
def
test_check_grad_ingore_x
(
self
):
if
self
.
dtype
==
np
.
float32
and
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'Y'
],
'Out'
)
def
test_check_grad_ingore_y
(
self
):
if
self
.
dtype
==
np
.
float32
and
paddle
.
is_compiled_with_xpu
():
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
],
'Out'
)
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
uniform
(
0.1
,
1
,
[
13
,
17
]).
astype
(
self
.
dtype
)
self
.
out
=
np
.
add
(
self
.
x
,
self
.
y
)
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float32
def
init_axis
(
self
):
self
.
axis
=
-
1
@
skip_check_grad_ci
(
reason
=
"[skip shape check] Use y_shape(1) to test broadcast."
)
class
TestElementwiseAddOp_scalar
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
3
,
4
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
@
skip_check_grad_ci
(
reason
=
"[skip shape check] Use y_shape(1,1) to test broadcast."
)
class
TestElementwiseAddOp_scalar2
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
3
,
4
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
,
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
class
TestElementwiseAddOp_Vector
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
random
((
100
,
)).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
random
((
100
,
)).
astype
(
self
.
dtype
)
self
.
out
=
np
.
add
(
self
.
x
,
self
.
y
)
class
TestElementwiseAddOp_broadcast_0
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
100
,
2
,
3
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
100
,
1
,
1
)
def
init_axis
(
self
):
self
.
axis
=
0
class
TestElementwiseAddOp_broadcast_1
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
100
,
3
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
100
,
1
)
def
init_axis
(
self
):
self
.
axis
=
1
class
TestElementwiseAddOp_broadcast_2
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
3
,
100
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
1
,
100
)
class
TestElementwiseAddOp_broadcast_3
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
10
,
12
,
3
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
10
,
12
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
10
,
12
,
1
)
def
init_axis
(
self
):
self
.
axis
=
1
class
TestElementwiseAddOp_broadcast_4
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
100
,
2
,
3
,
4
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
,
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
100
,
1
,
1
,
1
)
def
init_axis
(
self
):
self
.
axis
=
0
class
TestElementwiseAddOp_broadcast_5
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
10
,
3
,
12
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
10
,
1
,
12
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
class
TestElementwiseAddOp_broadcast_6
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
12
,
3
,
5
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
2
,
12
,
1
,
5
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
class
TestElementwiseAddOp_broadcast_7
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
1
,
1
,
20
,
5
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
20
,
5
,
1
,
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
class
TestElementwiseAddOp_rowwise_add_0
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
10
,
12
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
10
,
12
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
10
,
12
)
def
init_axis
(
self
):
self
.
axis
=
1
@
skip_check_grad_ci
(
reason
=
"[skip shape check] Use y_shape(1) to test broadcast."
)
class
TestElementwiseAddOp_rowwise_add_1
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
100
,
1
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
.
reshape
(
1
,
1
)
def
init_axis
(
self
):
self
.
axis
=
1
class
TestElementwiseAddOp_channelwise_add
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
100
,
2
,
3
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
100
,
1
,
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
def
init_axis
(
self
):
self
.
axis
=
-
1
class
TestElementwiseAddOp_commonuse_add1
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
2
,
3
,
100
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
1
,
1
,
100
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
def
init_axis
(
self
):
self
.
axis
=
-
1
class
TestElementwiseAddOp_commonuse_add2
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
10
,
3
,
1
,
4
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
10
,
1
,
12
,
1
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
def
init_axis
(
self
):
self
.
axis
=
-
1
class
TestElementwiseAddOp_xsize_lessthan_ysize_add
(
TestElementwiseAddOp
):
def
init_input_output
(
self
):
self
.
x
=
np
.
random
.
rand
(
10
,
12
).
astype
(
self
.
dtype
)
self
.
y
=
np
.
random
.
rand
(
2
,
3
,
10
,
12
).
astype
(
self
.
dtype
)
self
.
out
=
self
.
x
+
self
.
y
def
init_axis
(
self
):
self
.
axis
=
2
class
TestElementwiseAddOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
# the input of elementwise_add must be Variable.
x1
=
fluid
.
create_lod_tensor
(
np
.
array
([
-
1
,
3
,
5
,
5
]),
[[
1
,
1
,
1
,
1
]],
fluid
.
CPUPlace
())
y1
=
fluid
.
create_lod_tensor
(
np
.
array
([
-
1
,
3
,
5
,
5
]),
[[
1
,
1
,
1
,
1
]],
fluid
.
CPUPlace
())
self
.
assertRaises
(
TypeError
,
fluid
.
layers
.
elementwise_add
,
x1
,
y1
)
# the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64
# float16 only can be set on GPU place
x2
=
fluid
.
layers
.
data
(
name
=
'x2'
,
shape
=
[
3
,
4
,
5
,
6
],
dtype
=
"uint8"
)
y2
=
fluid
.
layers
.
data
(
name
=
'y2'
,
shape
=
[
3
,
4
,
5
,
6
],
dtype
=
"uint8"
)
self
.
assertRaises
(
TypeError
,
fluid
.
layers
.
elementwise_add
,
x2
,
y2
)
class
TestAddOp
(
unittest
.
TestCase
):
def
test_name
(
self
):
with
fluid
.
program_guard
(
fluid
.
Program
()):
x
=
fluid
.
data
(
name
=
"x"
,
shape
=
[
2
,
3
],
dtype
=
"float32"
)
y
=
fluid
.
data
(
name
=
'y'
,
shape
=
[
2
,
3
],
dtype
=
'float32'
)
y_1
=
paddle
.
add
(
x
,
y
,
name
=
'add_res'
)
self
.
assertEqual
((
'add_res'
in
y_1
.
name
),
True
)
def
test_declarative
(
self
):
with
fluid
.
program_guard
(
fluid
.
Program
()):
def
gen_data
():
return
{
"x"
:
np
.
array
([
2
,
3
,
4
]).
astype
(
'float32'
),
"y"
:
np
.
array
([
1
,
5
,
2
]).
astype
(
'float32'
)
}
x
=
fluid
.
data
(
name
=
"x"
,
shape
=
[
3
],
dtype
=
'float32'
)
y
=
fluid
.
data
(
name
=
"y"
,
shape
=
[
3
],
dtype
=
'float32'
)
z
=
paddle
.
add
(
x
,
y
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
z_value
=
exe
.
run
(
feed
=
gen_data
(),
fetch_list
=
[
z
.
name
])
z_expected
=
np
.
array
([
3.
,
8.
,
6.
])
self
.
assertEqual
((
z_value
==
z_expected
).
all
(),
True
)
def
test_dygraph
(
self
):
with
fluid
.
dygraph
.
guard
():
np_x
=
np
.
array
([
2
,
3
,
4
]).
astype
(
'float64'
)
np_y
=
np
.
array
([
1
,
5
,
2
]).
astype
(
'float64'
)
x
=
fluid
.
dygraph
.
to_variable
(
np_x
)
y
=
fluid
.
dygraph
.
to_variable
(
np_y
)
z
=
paddle
.
add
(
x
,
y
)
np_z
=
z
.
numpy
()
z_expected
=
np
.
array
([
3.
,
8.
,
6.
])
self
.
assertEqual
((
np_z
==
z_expected
).
all
(),
True
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/xpu/test_matmul_op.py
0 → 100644
浏览文件 @
b9a1c7e2
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
sys
sys
.
path
.
append
(
".."
)
import
paddle.fluid.core
as
core
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
Program
,
program_guard
def
generate_compatible_shapes
(
dim_X
,
dim_Y
,
transpose_X
,
transpose_Y
):
BATCH_SIZE
=
2
M
=
3
N
=
4
K
=
5
if
(
dim_X
==
1
and
transpose_X
)
or
(
dim_Y
==
1
and
transpose_Y
):
K
=
1
if
dim_X
==
1
:
if
transpose_X
:
shape_X
=
[
M
]
else
:
shape_X
=
[
K
]
if
dim_Y
==
1
:
if
transpose_Y
:
shape_Y
=
[
N
]
else
:
shape_Y
=
[
K
]
if
dim_X
>=
2
:
if
transpose_X
:
shape_X
=
[
K
,
M
]
else
:
shape_X
=
[
M
,
K
]
if
dim_X
==
3
:
shape_X
=
[
BATCH_SIZE
]
+
shape_X
if
dim_Y
>=
2
:
if
transpose_Y
:
shape_Y
=
[
N
,
K
]
else
:
shape_Y
=
[
K
,
N
]
if
dim_Y
==
3
:
shape_Y
=
[
BATCH_SIZE
]
+
shape_Y
return
shape_X
,
shape_Y
def
reference_matmul
(
X
,
Y
,
transpose_X
=
False
,
transpose_Y
=
False
):
"""Reference forward implementation using np.matmul."""
# np.matmul does not support the transpose flags, so we manually
# transpose X and Y appropriately.
if
transpose_X
:
if
X
.
ndim
==
1
:
X
=
X
.
reshape
((
X
.
size
,
1
))
elif
X
.
ndim
==
2
:
X
=
X
.
T
else
:
dim
=
[
i
for
i
in
range
(
len
(
X
.
shape
))]
dim
[
-
1
],
dim
[
len
(
X
.
shape
)
-
2
]
=
dim
[
len
(
X
.
shape
)
-
2
],
dim
[
-
1
]
X
=
np
.
transpose
(
X
,
tuple
(
dim
))
if
transpose_Y
:
if
Y
.
ndim
==
1
:
Y
=
Y
.
reshape
((
1
,
Y
.
size
))
else
:
dim
=
[
i
for
i
in
range
(
len
(
Y
.
shape
))]
dim
[
-
1
],
dim
[
len
(
Y
.
shape
)
-
2
]
=
dim
[
len
(
Y
.
shape
)
-
2
],
dim
[
-
1
]
Y
=
np
.
transpose
(
Y
,
tuple
(
dim
))
Out
=
np
.
matmul
(
X
,
Y
)
if
not
Out
.
shape
:
# We do not support 0-dimensional Tensors (scalars). So where
# np.matmul outputs a scalar, we must convert to a Tensor of
# shape (1, ) instead.
# Everywhere else, we are compatible with np.matmul.
Out
=
np
.
array
([
Out
],
dtype
=
"float32"
)
return
Out
class
Generator
(
object
):
def
setUp
(
self
):
self
.
op_type
=
"matmul"
X
=
np
.
random
.
random
(
self
.
shape_X
).
astype
(
"float32"
)
Y
=
np
.
random
.
random
(
self
.
shape_Y
).
astype
(
"float32"
)
Out
=
reference_matmul
(
X
,
Y
,
self
.
transpose_X
,
self
.
transpose_Y
)
self
.
inputs
=
{
'X'
:
X
,
'Y'
:
Y
}
self
.
attrs
=
{
'transpose_X'
:
self
.
transpose_X
,
'transpose_Y'
:
self
.
transpose_Y
}
self
.
outputs
=
{
'Out'
:
Out
}
def
test_check_output
(
self
):
self
.
check_output
()
if
paddle
.
is_compiled_with_xpu
()
and
len
(
self
.
inputs
[
'X'
].
shape
)
==
len
(
self
.
inputs
[
'Y'
].
shape
)
and
self
.
inputs
[
'X'
].
shape
[
0
]
==
self
.
inputs
[
'Y'
].
shape
[
0
]:
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-3
)
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
1e-3
)
if
paddle
.
is_compiled_with_xpu
()
and
len
(
self
.
inputs
[
'X'
].
shape
)
==
len
(
self
.
inputs
[
'Y'
].
shape
)
and
self
.
inputs
[
'X'
].
shape
[
0
]
==
self
.
inputs
[
'Y'
].
shape
[
0
]:
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
5e-2
)
def
test_check_grad_ignore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
1e-3
,
no_grad_set
=
set
(
"X"
))
if
paddle
.
is_compiled_with_xpu
()
and
len
(
self
.
inputs
[
'X'
].
shape
)
==
len
(
self
.
inputs
[
'Y'
].
shape
)
and
self
.
inputs
[
'X'
].
shape
[
0
]
==
self
.
inputs
[
'Y'
].
shape
[
0
]:
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'Y'
],
'Out'
,
max_relative_error
=
5e-2
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ignore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
1e-3
,
no_grad_set
=
set
(
'Y'
))
if
paddle
.
is_compiled_with_xpu
()
and
len
(
self
.
inputs
[
'X'
].
shape
)
==
len
(
self
.
inputs
[
'Y'
].
shape
)
and
self
.
inputs
[
'X'
].
shape
[
0
]
==
self
.
inputs
[
'Y'
].
shape
[
0
]:
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
],
'Out'
,
max_relative_error
=
5e-2
,
no_grad_set
=
set
(
'Y'
))
class
TestMatmulOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
# The inputs type of matmul_op must be Variable.
input1
=
12
self
.
assertRaises
(
TypeError
,
fluid
.
layers
.
matmul
,
input1
,
input1
)
# The inputs dtype of matmul_op must be float32, float64.
input2
=
fluid
.
layers
.
data
(
name
=
'input2'
,
shape
=
[
10
,
10
],
dtype
=
"int32"
)
self
.
assertRaises
(
TypeError
,
fluid
.
layers
.
matmul
,
input2
,
input2
)
input3
=
fluid
.
layers
.
data
(
name
=
'input3'
,
shape
=
[
2
,
2
],
dtype
=
"float16"
)
fluid
.
layers
.
matmul
(
input3
,
input3
)
# Negative dimension generation
def
generate_negative_dims
(
in_shape
):
from
itertools
import
combinations
size
=
len
(
in_shape
)
indexs
=
list
()
shapes
=
list
()
for
i
in
range
(
size
):
indexs
.
extend
(
list
(
combinations
([
j
for
j
in
range
(
size
)],
i
+
1
)))
for
idx
in
indexs
:
shapes
.
append
(
[
in_shape
[
i
]
if
i
not
in
idx
else
-
1
for
i
in
range
(
size
)])
return
shapes
# Build program with inputs sizes that contain negative numbers
def
test_negative_dims_program
(
obj
):
for
shape_x
in
generate_negative_dims
(
obj
.
shape_X
):
for
shape_y
in
generate_negative_dims
(
obj
.
shape_Y
):
X
=
np
.
random
.
random
(
obj
.
shape_X
).
astype
(
"float32"
)
Y
=
np
.
random
.
random
(
obj
.
shape_Y
).
astype
(
"float32"
)
Ref
=
reference_matmul
(
X
,
Y
,
obj
.
transpose_X
,
obj
.
transpose_Y
)
with
program_guard
(
Program
(),
Program
()):
x
=
fluid
.
data
(
name
=
'x'
,
shape
=
shape_x
,
dtype
=
'float32'
)
y
=
fluid
.
data
(
name
=
'y'
,
shape
=
shape_y
,
dtype
=
'float32'
)
output
=
fluid
.
layers
.
matmul
(
x
,
y
,
obj
.
transpose_X
,
obj
.
transpose_Y
)
obj
.
assertEqual
(
len
(
Ref
.
shape
),
len
(
output
.
shape
))
for
idx
in
range
(
len
(
Ref
.
shape
)):
if
output
.
shape
[
idx
]
!=
-
1
:
obj
.
assertEqual
(
Ref
.
shape
[
idx
],
output
.
shape
[
idx
])
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
res
,
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'x'
:
X
,
'y'
:
Y
},
fetch_list
=
[
output
])
np
.
allclose
(
res
,
Ref
,
atol
=
1e-5
)
# Generate program api cases for all negative possibilities
def
api_test
(
dim_x
,
dim_y
,
trans_x
,
trans_y
):
test_name
=
(
'TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'
.
format
(
dim_x
,
dim_y
,
trans_x
,
trans_y
))
shape_x
,
shape_y
=
generate_compatible_shapes
(
dim_x
,
dim_y
,
trans_x
,
trans_y
)
globals
()[
test_name
]
=
type
(
test_name
,
(
unittest
.
TestCase
,
),
{
'shape_X'
:
shape_x
,
'shape_Y'
:
shape_y
,
'transpose_X'
:
trans_x
,
'transpose_Y'
:
trans_y
,
'test_propram'
:
test_negative_dims_program
,
})
# Generate operators cases for all possibilities
def
inject_test
(
dim_x
,
dim_y
,
trans_x
,
trans_y
):
test_name
=
(
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'
.
format
(
dim_x
,
dim_y
,
trans_x
,
trans_y
))
shape_x
,
shape_y
=
generate_compatible_shapes
(
dim_x
,
dim_y
,
trans_x
,
trans_y
)
globals
()[
test_name
]
=
type
(
test_name
,
(
Generator
,
OpTest
),
{
'shape_X'
:
shape_x
,
'shape_Y'
:
shape_y
,
'transpose_X'
:
trans_x
,
'transpose_Y'
:
trans_y
,
})
for
dim_X
in
(
1
,
2
,
3
):
for
dim_Y
in
(
1
,
2
,
3
):
for
transose_x
in
(
False
,
True
):
for
transose_y
in
(
False
,
True
):
inject_test
(
dim_X
,
dim_Y
,
transose_x
,
transose_y
)
api_test
(
dim_X
,
dim_Y
,
transose_x
,
transose_y
)
# Test case n-dim
def
generate_compatible_shapes
(
dim
,
transpose_X
,
transpose_Y
):
M
=
2
N
=
4
K
=
3
shape_X
=
[
2
for
_
in
range
(
dim
-
2
)]
shape_Y
=
[
2
for
_
in
range
(
dim
-
2
)]
if
transpose_X
:
shape_X
+=
[
K
,
M
]
else
:
shape_X
+=
[
M
,
K
]
if
transpose_Y
:
shape_Y
+=
[
N
,
K
]
else
:
shape_Y
+=
[
K
,
N
]
return
shape_X
,
shape_Y
# # Test case n-dim
for
dim
in
[
4
]:
for
transpose_X
in
[
False
,
True
]:
for
transpose_Y
in
[
False
,
True
]:
test_name
=
(
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'
.
format
(
dim
,
dim
,
transpose_X
,
transpose_Y
))
shape_X
,
shape_Y
=
generate_compatible_shapes
(
dim
,
transpose_X
,
transpose_Y
)
globals
()[
test_name
]
=
type
(
test_name
,
(
Generator
,
OpTest
),
{
'shape_X'
:
shape_X
,
'shape_Y'
:
shape_Y
,
'transpose_X'
:
transpose_X
,
'transpose_Y'
:
transpose_Y
,
})
class
API_TestMm
(
unittest
.
TestCase
):
def
test_out
(
self
):
with
fluid
.
program_guard
(
fluid
.
Program
()):
x
=
fluid
.
data
(
name
=
"x"
,
shape
=
[
2
],
dtype
=
"float64"
)
y
=
fluid
.
data
(
name
=
'y'
,
shape
=
[
2
],
dtype
=
'float64'
)
res
=
fluid
.
data
(
name
=
"output"
,
shape
=
[
1
],
dtype
=
"float64"
)
result
=
paddle
.
mm
(
x
,
y
)
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
data1
=
np
.
random
.
rand
(
2
)
data2
=
np
.
random
.
rand
(
2
)
np_res
=
exe
.
run
(
feed
=
{
'x'
:
data1
,
'y'
:
data2
},
fetch_list
=
[
result
])
expected_result
=
np
.
matmul
(
data1
.
reshape
(
1
,
2
),
data2
.
reshape
(
2
,
1
))
self
.
assertTrue
(
np
.
allclose
(
np_res
,
expected_result
,
atol
=
1e-5
),
"two value is
\
{}
\n
{}, check diff!"
.
format
(
np_res
,
expected_result
))
def
test_dygraph_without_out
(
self
):
device
=
fluid
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
device
):
input_array1
=
np
.
random
.
rand
(
3
,
4
).
astype
(
"float64"
)
input_array2
=
np
.
random
.
rand
(
4
,
3
).
astype
(
"float64"
)
data1
=
fluid
.
dygraph
.
to_variable
(
input_array1
)
data2
=
fluid
.
dygraph
.
to_variable
(
input_array2
)
out
=
paddle
.
mm
(
data1
,
data2
)
expected_result
=
np
.
matmul
(
input_array1
,
input_array2
)
self
.
assertTrue
(
np
.
allclose
(
expected_result
,
out
.
numpy
()))
class
Test_API_Matmul
(
unittest
.
TestCase
):
def
test_dygraph_without_out
(
self
):
device
=
fluid
.
CPUPlace
()
with
fluid
.
dygraph
.
guard
(
device
):
input_array1
=
np
.
random
.
rand
(
3
,
4
).
astype
(
"float64"
)
input_array2
=
np
.
random
.
rand
(
4
,
3
).
astype
(
"float64"
)
data1
=
fluid
.
dygraph
.
to_variable
(
input_array1
)
data2
=
fluid
.
dygraph
.
to_variable
(
input_array2
)
out
=
paddle
.
matmul
(
data1
,
data2
)
expected_result
=
np
.
matmul
(
input_array1
,
input_array2
)
self
.
assertTrue
(
np
.
allclose
(
expected_result
,
out
.
numpy
()))
class
API_TestMmError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
def
test_error1
():
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
data1
=
fluid
.
data
(
name
=
"data1"
,
shape
=
[
10
,
2
],
dtype
=
"float32"
)
data2
=
fluid
.
data
(
name
=
"data2"
,
shape
=
[
3
,
10
],
dtype
=
"float32"
)
paddle
.
mm
(
data1
,
data2
)
self
.
assertRaises
(
ValueError
,
test_error1
)
def
test_error2
():
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
data1
=
fluid
.
data
(
name
=
"data1"
,
shape
=
[
-
1
,
10
,
2
],
dtype
=
"float32"
)
data2
=
fluid
.
data
(
name
=
"data2"
,
shape
=
[
-
1
,
2
,
10
],
dtype
=
"float32"
)
paddle
.
mm
(
data1
,
data2
)
test_error2
()
def
test_error3
():
with
fluid
.
program_guard
(
fluid
.
Program
(),
fluid
.
Program
()):
data1
=
fluid
.
data
(
name
=
"data1"
,
shape
=
[
10
,
10
,
2
],
dtype
=
"float32"
)
data2
=
fluid
.
data
(
name
=
"data2"
,
shape
=
[
3
,
2
,
10
],
dtype
=
"float32"
)
paddle
.
mm
(
data1
,
data2
)
self
.
assertRaises
(
ValueError
,
test_error3
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/xpu/test_mul_op.py
0 → 100644
浏览文件 @
b9a1c7e2
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
import
sys
sys
.
path
.
append
(
".."
)
from
op_test
import
OpTest
import
paddle.fluid
as
fluid
from
paddle.fluid
import
Program
,
program_guard
class
TestMulOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
self
.
dtype
=
np
.
float64
self
.
init_dtype_type
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
20
,
5
)).
astype
(
self
.
dtype
),
'Y'
:
np
.
random
.
random
((
5
,
21
)).
astype
(
self
.
dtype
)
}
self
.
outputs
=
{
'Out'
:
np
.
dot
(
self
.
inputs
[
'X'
],
self
.
inputs
[
'Y'
])}
def
init_dtype_type
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
class
TestMulOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
with
program_guard
(
Program
(),
Program
()):
# The input type of mul_op must be Variable.
x1
=
fluid
.
create_lod_tensor
(
np
.
array
([[
-
1
]]),
[[
1
]],
fluid
.
CPUPlace
())
x2
=
fluid
.
create_lod_tensor
(
np
.
array
([[
-
1
]]),
[[
1
]],
fluid
.
CPUPlace
())
self
.
assertRaises
(
TypeError
,
fluid
.
layers
.
mul
,
x1
,
x2
)
# The input dtype of mul_op must be float32 or float64.
x3
=
fluid
.
layers
.
data
(
name
=
'x3'
,
shape
=
[
4
],
dtype
=
"int32"
)
x4
=
fluid
.
layers
.
data
(
name
=
'x4'
,
shape
=
[
4
],
dtype
=
"int32"
)
self
.
assertRaises
(
TypeError
,
fluid
.
layers
.
mul
,
x3
,
x4
)
class
TestMulOp2
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"mul"
self
.
dtype
=
np
.
float64
self
.
init_dtype_type
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
3
,
4
,
2
,
9
)).
astype
(
self
.
dtype
),
'Y'
:
np
.
random
.
random
((
3
,
6
,
1
,
2
,
3
)).
astype
(
self
.
dtype
)
}
self
.
attrs
=
{
'x_num_col_dims'
:
2
,
'y_num_col_dims'
:
2
,
}
result
=
np
.
dot
(
self
.
inputs
[
'X'
].
reshape
(
3
*
4
,
2
*
9
),
self
.
inputs
[
'Y'
].
reshape
(
3
*
6
,
1
*
2
*
3
))
result
=
result
.
reshape
(
3
,
4
,
1
,
2
,
3
)
self
.
outputs
=
{
'Out'
:
result
}
def
init_dtype_type
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad_normal
(
self
):
self
.
check_grad
([
'X'
,
'Y'
],
'Out'
)
def
test_check_grad_ingore_x
(
self
):
self
.
check_grad
(
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'X'
))
def
test_check_grad_ignore_y
(
self
):
self
.
check_grad
(
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUMulOp1
(
TestMulOp
):
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
1e-1
)
def
test_check_grad_normal
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
0.5
)
def
test_check_grad_ingore_x
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
'Y'
))
@
unittest
.
skipIf
(
not
paddle
.
is_compiled_with_xpu
(),
"core is not compiled with XPU"
)
class
TestXPUMulOp2
(
TestMulOp2
):
def
init_dtype_type
(
self
):
self
.
dtype
=
np
.
float32
def
test_check_output
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_output_with_place
(
place
,
atol
=
2e-1
)
def
test_check_grad_normal
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Y'
],
'Out'
,
max_relative_error
=
0.9
)
def
test_check_grad_ingore_x
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'Y'
],
'Out'
,
max_relative_error
=
0.5
,
no_grad_set
=
set
(
"X"
))
def
test_check_grad_ingore_y
(
self
):
place
=
paddle
.
XPUPlace
(
0
)
self
.
check_grad_with_place
(
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.9
,
no_grad_set
=
set
(
'Y'
))
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/tensor/linalg.py
浏览文件 @
b9a1c7e2
...
@@ -707,20 +707,14 @@ def cross(x, y, axis=None, name=None):
...
@@ -707,20 +707,14 @@ def cross(x, y, axis=None, name=None):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
import paddle
import paddle
from paddle import to_variable
import numpy as np
paddle.disable_static()
paddle.disable_static()
data_x = np.array([[1.0, 1.0, 1.0],
x = paddle.to_tensor([[1.0, 1.0, 1.0],
[2.0, 2.0, 2.0],
[2.0, 2.0, 2.0],
[3.0, 3.0, 3.0]])
[3.0, 3.0, 3.0]])
data_y = np.array([[1.0, 1.0, 1.0],
y = paddle.to_tensor([[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0]])
[1.0, 1.0, 1.0]])
x = to_variable(data_x)
y = to_variable(data_y)
z1 = paddle.cross(x, y)
z1 = paddle.cross(x, y)
print(z1.numpy())
print(z1.numpy())
# [[-1. -1. -1.]
# [[-1. -1. -1.]
...
...
python/paddle/tensor/math.py
浏览文件 @
b9a1c7e2
...
@@ -1650,12 +1650,11 @@ def cumsum(x, axis=None, dtype=None, name=None):
...
@@ -1650,12 +1650,11 @@ def cumsum(x, axis=None, dtype=None, name=None):
.. code-block:: python
.. code-block:: python
import paddle
import paddle
from paddle import to_variable
import numpy as np
import numpy as np
paddle.disable_static()
paddle.disable_static()
data_np = np.arange(12).reshape(3, 4)
data_np = np.arange(12).reshape(3, 4)
data =
to_variable
(data_np)
data =
paddle.to_tensor
(data_np)
y = paddle.cumsum(data)
y = paddle.cumsum(data)
print(y.numpy())
print(y.numpy())
...
...
tools/wlist.json
浏览文件 @
b9a1c7e2
...
@@ -251,9 +251,10 @@
...
@@ -251,9 +251,10 @@
"BilinearTensorProduct"
,
"BilinearTensorProduct"
,
"GroupNorm"
,
"GroupNorm"
,
"SpectralNorm"
,
"SpectralNorm"
,
"TreeConv"
,
"TreeConv"
],
"wlist_temp"
:[
"prroi_pool"
,
"prroi_pool"
,
"to_tensor"
,
"ChunkEvaluator"
,
"ChunkEvaluator"
,
"EditDistance"
,
"EditDistance"
,
"ErrorClipByValue"
,
"ErrorClipByValue"
,
...
@@ -406,7 +407,9 @@
...
@@ -406,7 +407,9 @@
"TransformerDecoder.prepare_incremental_cache"
,
"TransformerDecoder.prepare_incremental_cache"
,
"LinearChainCRF.forward"
,
"LinearChainCRF.forward"
,
"CRFDecoding.forward"
,
"CRFDecoding.forward"
,
"SequenceTagging.forward"
"SequenceTagging.forward"
,
"XPUPlace"
,
"is_compiled_with_xpu"
],
],
"gpu_not_white"
:[
"gpu_not_white"
:[
"deformable_conv"
,
"deformable_conv"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录