提交 b9a1c7e2 编写于 作者: X xiemoyuan

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into op_error_message

...@@ -4,7 +4,7 @@ endif() ...@@ -4,7 +4,7 @@ endif()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(XPU_PROJECT "extern_xpu") SET(XPU_PROJECT "extern_xpu")
SET(XPU_URL "https://kunlun1.su.bcebos.com/xpu.tar.gz" CACHE STRING "" FORCE) SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu.tar.gz" CACHE STRING "" FORCE)
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
......
...@@ -62,9 +62,9 @@ function(op_library TARGET) ...@@ -62,9 +62,9 @@ function(op_library TARGET)
endif() endif()
endif() endif()
if(WITH_XPU) if(WITH_XPU)
string(REPLACE "_op" "_xpu_op" XPU_FILE "${TARGET}") string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${XPU_FILE}.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc)
list(APPEND xpu_cc_srcs xpu/${XPU_FILE}.cc) list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
endif() endif()
endif() endif()
else() else()
...@@ -83,7 +83,7 @@ function(op_library TARGET) ...@@ -83,7 +83,7 @@ function(op_library TARGET)
list(APPEND mkldnn_cc_srcs ${src}) list(APPEND mkldnn_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cu.cc$") elseif(${src} MATCHES ".*\\.cu.cc$")
list(APPEND cu_cc_srcs ${src}) list(APPEND cu_cc_srcs ${src})
elseif(WITH_XPU AND ${src} MATCHES ".*_xpu_op.cc$") elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
list(APPEND xpu_cc_srcs ${src}) list(APPEND xpu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$") elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src}) list(APPEND cc_srcs ${src})
......
...@@ -76,7 +76,7 @@ void AllReduceOpHandle::AllReduceImpl( ...@@ -76,7 +76,7 @@ void AllReduceOpHandle::AllReduceImpl(
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The NoDummyInputSize should be equal " "The NoDummyInputSize should be equal "
"to the number of places, but got NoDummyInputSize is " "to the number of places, but got NoDummyInputSize is "
"%d and the number of place is %d.", "%d and the number of places is %d.",
in_var_handles.size(), num_places)); in_var_handles.size(), num_places));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_var_handles.size(), out_var_handles.size(), in_var_handles.size(), out_var_handles.size(),
...@@ -89,7 +89,7 @@ void AllReduceOpHandle::AllReduceImpl( ...@@ -89,7 +89,7 @@ void AllReduceOpHandle::AllReduceImpl(
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The number of local scopes should be equal " "The number of local scopes should be equal "
"to the number of places, but got the number of local scopes is " "to the number of places, but got the number of local scopes is "
"%d and the number of place is %d.", "%d and the number of places is %d.",
in_var_handles.size(), num_places)); in_var_handles.size(), num_places));
std::vector<const void *> lod_tensor_data; std::vector<const void *> lod_tensor_data;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/broadcast_op_handle.h" #include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -31,10 +32,15 @@ void BroadcastOpHandle::RunImpl() { ...@@ -31,10 +32,15 @@ void BroadcastOpHandle::RunImpl() {
auto out_var_handles = DynamicCast<VarHandle>(outputs_); auto out_var_handles = DynamicCast<VarHandle>(outputs_);
PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL, PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL,
"The number of input should be one."); platform::errors::PreconditionNotMet(
PADDLE_ENFORCE_EQ( "The number of inputs should be 1, but got %d.",
out_var_handles.size(), places_.size(), in_var_handles.size()));
"The number of output should equal to the number of places."); PADDLE_ENFORCE_EQ(out_var_handles.size(), places_.size(),
platform::errors::PreconditionNotMet(
"The number of outputs and the number of places should "
"be equal, but got the number of outputs is %d and the "
"number of places is %d.",
out_var_handles.size(), places_.size()));
VarHandle *in_var_handle = in_var_handles[0]; VarHandle *in_var_handle = in_var_handles[0];
...@@ -47,7 +53,9 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -47,7 +53,9 @@ void BroadcastOpHandle::BroadcastOneVar(
const std::vector<Scope *> &var_scopes) { const std::vector<Scope *> &var_scopes) {
auto *in_var = auto *in_var =
var_scopes.at(in_var_handle.scope_idx())->FindVar(in_var_handle.name()); var_scopes.at(in_var_handle.scope_idx())->FindVar(in_var_handle.name());
PADDLE_ENFORCE_NOT_NULL(in_var); PADDLE_ENFORCE_NOT_NULL(
in_var, platform::errors::NotFound("Variable %s is not found in scopes.",
in_var_handle.name()));
Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
if (UNLIKELY(!in_tensor.IsInitialized())) { if (UNLIKELY(!in_tensor.IsInitialized())) {
VLOG(3) << "in var " << in_var_handle.name() << "not inited, return!"; VLOG(3) << "in var " << in_var_handle.name() << "not inited, return!";
...@@ -103,7 +111,7 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -103,7 +111,7 @@ void BroadcastOpHandle::BroadcastOneVar(
broadcast_calls.emplace_back( broadcast_calls.emplace_back(
[send_recv_buffer, numel, type, root_id, &nccl_ctx] { [send_recv_buffer, numel, type, root_id, &nccl_ctx] {
PADDLE_ENFORCE(platform::dynload::ncclBcast( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast(
send_recv_buffer, numel, static_cast<ncclDataType_t>(type), send_recv_buffer, numel, static_cast<ncclDataType_t>(type),
root_id, nccl_ctx.comm_, nccl_ctx.stream())); root_id, nccl_ctx.comm_, nccl_ctx.stream()));
}); });
...@@ -131,7 +139,8 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -131,7 +139,8 @@ void BroadcastOpHandle::BroadcastOneVar(
nccl_ctxs_->DevCtx(p)->Wait(); nccl_ctxs_->DevCtx(p)->Wait();
} }
#else #else
PADDLE_THROW("CUDA is not enabled."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with NCLL."));
#endif #endif
} }
} }
...@@ -154,10 +163,13 @@ void BroadcastOpHandle::InitOutputValue( ...@@ -154,10 +163,13 @@ void BroadcastOpHandle::InitOutputValue(
auto t_out_p = out_var_handle->place(); auto t_out_p = out_var_handle->place();
auto *out_var = var_scopes.at(out_var_handle->scope_idx()) auto *out_var = var_scopes.at(out_var_handle->scope_idx())
->FindVar(out_var_handle->name()); ->FindVar(out_var_handle->name());
PADDLE_ENFORCE_NOT_NULL(out_var); PADDLE_ENFORCE_NOT_NULL(out_var, platform::errors::NotFound(
"Variable %s is not found in scopes.",
out_var_handle->name()));
if (is_gpu_place(in_tensor.place())) { if (is_gpu_place(in_tensor.place())) {
PADDLE_ENFORCE(platform::is_gpu_place(t_out_p), PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true,
"Places of input and output must be all on GPU."); platform::errors::PreconditionNotMet(
"Places of input and output must be all on GPU."));
} else { } else {
t_out_p = platform::CPUPlace(); t_out_p = platform::CPUPlace();
} }
......
...@@ -79,7 +79,8 @@ struct TestBroadcastOpHandle { ...@@ -79,7 +79,8 @@ struct TestBroadcastOpHandle {
} }
nccl_ctxs_.reset(new platform::NCCLContextMap(place_list_)); nccl_ctxs_.reset(new platform::NCCLContextMap(place_list_));
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with NCLL."));
#endif #endif
} else { } else {
int count = 8; int count = 8;
...@@ -113,7 +114,8 @@ struct TestBroadcastOpHandle { ...@@ -113,7 +114,8 @@ struct TestBroadcastOpHandle {
op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_, op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
place_list_, nccl_ctxs_.get()); place_list_, nccl_ctxs_.get());
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with NCLL."));
#endif #endif
} else { } else {
#if defined(PADDLE_WITH_NCCL) #if defined(PADDLE_WITH_NCCL)
...@@ -171,7 +173,9 @@ struct TestBroadcastOpHandle { ...@@ -171,7 +173,9 @@ struct TestBroadcastOpHandle {
float val_scalar = 0.0) { float val_scalar = 0.0) {
auto var = param_scopes_[input_scope_idx]->FindVar(varname); auto var = param_scopes_[input_scope_idx]->FindVar(varname);
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("Variable %s is not found in scope.",
varname));
auto lod_tensor = var->GetMutable<f::LoDTensor>(); auto lod_tensor = var->GetMutable<f::LoDTensor>();
std::vector<float> send_vector(static_cast<size_t>(f::product(kDims))); std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
for (size_t k = 0; k < send_vector.size(); ++k) { for (size_t k = 0; k < send_vector.size(); ++k) {
...@@ -194,7 +198,9 @@ struct TestBroadcastOpHandle { ...@@ -194,7 +198,9 @@ struct TestBroadcastOpHandle {
} }
auto var = param_scopes_[input_scope_idx]->FindVar(varname); auto var = param_scopes_[input_scope_idx]->FindVar(varname);
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("Variable %s is not found in scope.",
varname));
auto selected_rows = var->GetMutable<f::SelectedRows>(); auto selected_rows = var->GetMutable<f::SelectedRows>();
auto value = selected_rows->mutable_value(); auto value = selected_rows->mutable_value();
value->mutable_data<float>(kDims, place_list_[input_scope_idx]); value->mutable_data<float>(kDims, place_list_[input_scope_idx]);
...@@ -211,13 +217,24 @@ struct TestBroadcastOpHandle { ...@@ -211,13 +217,24 @@ struct TestBroadcastOpHandle {
const std::vector<float>& send_vector, const std::vector<float>& send_vector,
const std::vector<int64_t>& rows, int height) { const std::vector<int64_t>& rows, int height) {
auto var = param_scopes_[input_scope_idx]->FindVar(varname); auto var = param_scopes_[input_scope_idx]->FindVar(varname);
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("Variable %s is not found in scope.",
varname));
auto& selected_rows = var->Get<f::SelectedRows>(); auto& selected_rows = var->Get<f::SelectedRows>();
auto rt = selected_rows.value(); auto rt = selected_rows.value();
PADDLE_ENFORCE_EQ(selected_rows.height(), height, "height is not equal."); PADDLE_ENFORCE_EQ(selected_rows.height(), height,
platform::errors::InvalidArgument(
"The height of SelectedRows is not equal to "
"the expected, expect %d, but got %ld.",
height, selected_rows.height()));
for (size_t k = 0; k < selected_rows.rows().size(); ++k) { for (size_t k = 0; k < selected_rows.rows().size(); ++k) {
PADDLE_ENFORCE_EQ(selected_rows.rows()[k], rows[k]); PADDLE_ENFORCE_EQ(
selected_rows.rows()[k], rows[k],
platform::errors::InvalidArgument(
"The item at position %zu of rows of SelectedRows "
"is not equal to the expected, expect %ld, but got %ld.",
k, rows[k], selected_rows.rows()[k]));
} }
p::CPUPlace cpu_place; p::CPUPlace cpu_place;
...@@ -235,9 +252,15 @@ struct TestBroadcastOpHandle { ...@@ -235,9 +252,15 @@ struct TestBroadcastOpHandle {
framework::Scope* scope) { framework::Scope* scope) {
p::CPUPlace cpu_place; p::CPUPlace cpu_place;
auto var = scope->FindVar(varname); auto var = scope->FindVar(varname);
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("Variable %s is not found in scope.",
varname));
auto tensor = var->Get<f::LoDTensor>(); auto tensor = var->Get<f::LoDTensor>();
PADDLE_ENFORCE_EQ(tensor.lod(), lod, "lod is not equal."); PADDLE_ENFORCE_EQ(tensor.lod(), lod,
platform::errors::InvalidArgument(
"The LoD of tensor is not equal to "
"the expected, expect %s, but got %s.",
lod, tensor.lod()));
f::Tensor result_tensor; f::Tensor result_tensor;
f::TensorCopySync(tensor, cpu_place, &result_tensor); f::TensorCopySync(tensor, cpu_place, &result_tensor);
float* ct = result_tensor.mutable_data<float>(cpu_place); float* ct = result_tensor.mutable_data<float>(cpu_place);
......
...@@ -235,7 +235,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { ...@@ -235,7 +235,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass("reduce_mode_multi_devices_pass").get(); AppendPass("reduce_mode_multi_devices_pass").get();
break; break;
default: default:
PADDLE_THROW("Unknown reduce strategy."); PADDLE_THROW(
platform::errors::Unimplemented("Unknown reduce strategy."));
} }
} }
multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy", multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy",
......
...@@ -12,11 +12,12 @@ ...@@ -12,11 +12,12 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include <memory> #include <memory>
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
...@@ -47,15 +48,19 @@ EagerDeletionOpHandle::EagerDeletionOpHandle( ...@@ -47,15 +48,19 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
if (dynamic_cast<StreamGarbageCollector *>(gc_)) { if (dynamic_cast<StreamGarbageCollector *>(gc_)) {
platform::CUDADeviceGuard guard( platform::CUDADeviceGuard guard(
BOOST_GET_CONST(platform::CUDAPlace, place).device); BOOST_GET_CONST(platform::CUDAPlace, place).device);
PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); PADDLE_ENFORCE_CUDA_SUCCESS(
PADDLE_ENFORCE_NOT_NULL(event_); cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
PADDLE_ENFORCE_NOT_NULL(event_, platform::errors::InvalidArgument(
"The cuda envet created is NULL."));
} }
} }
#endif #endif
PADDLE_ENFORCE_NE(vars.empty(), true, platform::errors::InvalidArgument( PADDLE_ENFORCE_NE(vars.empty(), true,
"Variable names are empty.")); platform::errors::InvalidArgument(
"The variables to be deleted are empty."));
for (auto *var : var_infos_) { for (auto *var : var_infos_) {
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(var, platform::errors::InvalidArgument(
"The memory optimization info is NULL."));
} }
} }
...@@ -64,7 +69,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() { ...@@ -64,7 +69,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() {
if (event_) { if (event_) {
auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx_->GetPlace()); auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx_->GetPlace());
platform::CUDADeviceGuard guard(gpu_place.device); platform::CUDADeviceGuard guard(gpu_place.device);
PADDLE_ENFORCE(cudaEventDestroy(event_)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(event_));
} }
#endif #endif
} }
...@@ -78,12 +83,17 @@ void EagerDeletionOpHandle::InitCUDA() { ...@@ -78,12 +83,17 @@ void EagerDeletionOpHandle::InitCUDA() {
} }
void EagerDeletionOpHandle::CallOnce() { void EagerDeletionOpHandle::CallOnce() {
PADDLE_ENFORCE(vars_.empty(), "vars_ must be initialized here"); PADDLE_ENFORCE_EQ(
vars_.empty(), true,
platform::errors::InvalidArgument(
"The variables to be deleted should be initialized here."));
Scope *exec_scope = local_exec_scopes_[0]; Scope *exec_scope = local_exec_scopes_[0];
for (auto *var_info : var_infos_) { for (auto *var_info : var_infos_) {
auto *var = exec_scope->FindVar(var_info->Name()); auto *var = exec_scope->FindVar(var_info->Name());
PADDLE_ENFORCE_NOT_NULL(var, "Variable %s should not be nullptr", PADDLE_ENFORCE_NOT_NULL(
var_info->Name()); var, platform::errors::NotFound(
"The variable(%s) to be inplaced is not found in scope.",
var_info->Name()));
vars_.emplace_back(var); vars_.emplace_back(var);
} }
} }
...@@ -119,8 +129,9 @@ void EagerDeletionOpHandle::RunImpl() { ...@@ -119,8 +129,9 @@ void EagerDeletionOpHandle::RunImpl() {
garbages.emplace_back(t.MoveMemoryHolder()); garbages.emplace_back(t.MoveMemoryHolder());
} }
} else { } else {
PADDLE_THROW("Type %s of %s is not supported eager deletion", PADDLE_THROW(platform::errors::Unimplemented(
framework::ToTypeName(var->Type()), var_info->Name()); "The variable(%s) of type %s is not supported in eager deletion.",
framework::ToTypeName(var->Type()), var_info->Name()));
} }
} }
...@@ -137,8 +148,9 @@ void EagerDeletionOpHandle::ClearGarbages( ...@@ -137,8 +148,9 @@ void EagerDeletionOpHandle::ClearGarbages(
auto callback_stream = auto callback_stream =
reinterpret_cast<StreamGarbageCollector *>(gc_)->stream(); reinterpret_cast<StreamGarbageCollector *>(gc_)->stream();
auto callback_func = [=]() { auto callback_func = [=]() {
PADDLE_ENFORCE(cudaEventRecord(event_, compute_stream)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventRecord(event_, compute_stream));
PADDLE_ENFORCE(cudaStreamWaitEvent(callback_stream, event_, 0)); PADDLE_ENFORCE_CUDA_SUCCESS(
cudaStreamWaitEvent(callback_stream, event_, 0));
}; };
gc_->Add(std::move(*garbages), callback_func); gc_->Add(std::move(*garbages), callback_func);
} else { } else {
......
...@@ -12,8 +12,10 @@ ...@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
...@@ -56,10 +58,20 @@ void FusedAllReduceOpHandle::RunImpl() { ...@@ -56,10 +58,20 @@ void FusedAllReduceOpHandle::RunImpl() {
size_t place_num = places_.size(); size_t place_num = places_.size();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_var_handles.size(), place_num * num_of_all_reduce_, in_var_handles.size(), place_num * num_of_all_reduce_,
"The NoDummyInputSize should be equal to the number of places."); platform::errors::PreconditionNotMet(
"The number of input variable handles should be equal to the number "
"of places plus the number of all reduce handles, "
"but got the number of input variable handles is %d, the "
"number of places is %d, and the number of all reduce handles "
"is %d.",
in_var_handles.size(), place_num, num_of_all_reduce_));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_var_handles.size(), out_var_handles.size(), in_var_handles.size(), out_var_handles.size(),
"The NoDummyInputSize and NoDummyOutputSize should be equal."); platform::errors::PreconditionNotMet(
"The number of input variable handles should be equal to the number "
"of output variable handles, but got the number of input variable "
"handles is %d, and the number of output variable handles is %d.",
in_var_handles.size(), out_var_handles.size()));
// Note: some gradient op doesn't have CUDAKernel, so the gradients of // Note: some gradient op doesn't have CUDAKernel, so the gradients of
// those op are in CPUPlace, in this case, the all reduce should not be fused. // those op are in CPUPlace, in this case, the all reduce should not be fused.
...@@ -106,7 +118,13 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc( ...@@ -106,7 +118,13 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
dtype = ele_dtype; dtype = ele_dtype;
} }
PADDLE_ENFORCE_EQ(ele_dtype, dtype); PADDLE_ENFORCE_EQ(
ele_dtype, dtype,
platform::errors::InvalidArgument(
"The DataType of grad tensors of fused_all_reduce_op_handle "
"must be consistent. The current dtype is %s, but the "
"previous dtype is %s.",
DataTypeToString(ele_dtype), DataTypeToString(dtype)));
// Check whether the address space is contiguous. // Check whether the address space is contiguous.
std::sort( std::sort(
...@@ -130,16 +148,29 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc( ...@@ -130,16 +148,29 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc(
"input[%d] address: 0X%02x. The offset: %d", "input[%d] address: 0X%02x. The offset: %d",
k - 1, g_tensor.at(k - 1).first, cur_address, g_tensor.at(k).first, k, k - 1, g_tensor.at(k - 1).first, cur_address, g_tensor.at(k).first, k,
next_address, k, infer_next_address, offset); next_address, k, infer_next_address, offset);
PADDLE_ENFORCE_EQ(infer_next_address, next_address, PADDLE_ENFORCE_EQ(
"The address is not consistent."); infer_next_address, next_address,
platform::errors::InvalidArgument(
"The infered address of the next tensor should be equal to the "
"real address of the next tensor. But got infered address is %p "
"and real address is %p.",
infer_next_address, next_address));
} }
} }
if (!FLAGS_skip_fused_all_reduce_check) { if (!FLAGS_skip_fused_all_reduce_check) {
for (size_t scope_idx = 0; scope_idx < place_num; ++scope_idx) { for (size_t scope_idx = 0; scope_idx < place_num; ++scope_idx) {
for (size_t j = 1; j < num_of_all_reduce_; ++j) { for (size_t j = 1; j < num_of_all_reduce_; ++j) {
PADDLE_ENFORCE_EQ(grads_tensor.at(0).at(j).first, PADDLE_ENFORCE_EQ(
grads_tensor.at(scope_idx).at(j).first); grads_tensor.at(0).at(j).first,
grads_tensor.at(scope_idx).at(j).first,
platform::errors::InvalidArgument(
"The variable name of grad tensors of "
"fused_all_reduce_op_handle "
"must be consistent. The current name is %s, but the "
"previous name is %s.",
grads_tensor.at(0).at(j).first,
grads_tensor.at(scope_idx).at(j).first));
} }
} }
} }
...@@ -167,7 +198,9 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace( ...@@ -167,7 +198,9 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace(
for (size_t j = 0; j < in_var_handles.size(); j += place_num) { for (size_t j = 0; j < in_var_handles.size(); j += place_num) {
auto var_name = in_var_handles[j]->name(); auto var_name = in_var_handles[j]->name();
auto var = local_scope->FindVar(var_name); auto var = local_scope->FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound(
"The variable '%s' is not found in local scope.", var_name));
auto &lod_tensor = var->Get<LoDTensor>(); auto &lod_tensor = var->Get<LoDTensor>();
if (!is_same_place(lod_tensor.place(), places_.at(scope_idx))) { if (!is_same_place(lod_tensor.place(), places_.at(scope_idx))) {
return true; return true;
...@@ -185,14 +218,24 @@ void FusedAllReduceOpHandle::GetGradLoDTensor( ...@@ -185,14 +218,24 @@ void FusedAllReduceOpHandle::GetGradLoDTensor(
size_t place_num = places_.size(); size_t place_num = places_.size();
for (size_t j = 0; j < in_var_handles.size(); j += place_num) { for (size_t j = 0; j < in_var_handles.size(); j += place_num) {
auto var_name = in_var_handles[j]->name(); auto var_name = in_var_handles[j]->name();
PADDLE_ENFORCE_EQ(var_name, out_var_handles[j]->name()); PADDLE_ENFORCE_EQ(
var_name, out_var_handles[j]->name(),
platform::errors::InvalidArgument(
"The name of input variable should be equal "
"to the name of output variable. But got the name of input "
"variable is %s and the name of output variable is %s.",
var_name, out_var_handles[j]->name()));
auto var = local_scope->FindVar(var_name); auto var = local_scope->FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound(
"The variable '%s' is not found in local scope.", var_name));
auto &lod_tensor = var->Get<LoDTensor>(); auto &lod_tensor = var->Get<LoDTensor>();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
platform::is_same_place(lod_tensor.place(), places_.at(scope_idx)), platform::is_same_place(lod_tensor.place(), places_.at(scope_idx)),
true, "%s(%d) is not in the right place.", var_name, scope_idx); true, platform::errors::InvalidArgument(
"The variable '%s' at scope %d is not in the right place.",
var_name, scope_idx));
grad_tensor->emplace_back(std::make_pair(var_name, &lod_tensor)); grad_tensor->emplace_back(std::make_pair(var_name, &lod_tensor));
} }
} }
...@@ -204,16 +247,26 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel( ...@@ -204,16 +247,26 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
size_t size_of_dtype = 0; size_t size_of_dtype = 0;
for (size_t i = 0; i < grad_tensor.size(); ++i) { for (size_t i = 0; i < grad_tensor.size(); ++i) {
// Get dtype // Get dtype
auto ele_type = grad_tensor.at(i).second->type(); auto ele_dtype = grad_tensor.at(i).second->type();
if (i == 0) { if (i == 0) {
*dtype = ele_type; *dtype = ele_dtype;
size_of_dtype = framework::SizeOfType(ele_type); size_of_dtype = framework::SizeOfType(ele_dtype);
} }
PADDLE_ENFORCE_EQ(ele_type, *dtype); PADDLE_ENFORCE_EQ(
ele_dtype, *dtype,
platform::errors::InvalidArgument(
"The DataType of grad tensors of fused_all_reduce_op_handle "
"must be consistent. The current dtype is %s, but the "
"previous dtype is %s.",
DataTypeToString(ele_dtype), DataTypeToString(*dtype)));
// Get element number // Get element number
int64_t len = grad_tensor.at(i).second->numel(); int64_t len = grad_tensor.at(i).second->numel();
PADDLE_ENFORCE_GT(len, 0); PADDLE_ENFORCE_GT(
len, 0, platform::errors::InvalidArgument(
"The size of grad tensors of fused_all_reduce_op_handle "
"must be > 0, but got %d.",
len));
*numel += *numel +=
platform::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype; platform::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype;
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h" #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -32,7 +33,15 @@ void FusedBroadcastOpHandle::RunImpl() { ...@@ -32,7 +33,15 @@ void FusedBroadcastOpHandle::RunImpl() {
WaitInputVarGenerated(); WaitInputVarGenerated();
size_t place_num = places_.size(); size_t place_num = places_.size();
PADDLE_ENFORCE_EQ(in_var_handles.size() * place_num, out_var_handles.size()); PADDLE_ENFORCE_EQ(
in_var_handles.size() * place_num, out_var_handles.size(),
platform::errors::PreconditionNotMet(
"The number of input variable handles plus the number "
"of places should be equal to the number of output variable handles, "
"but got the number of input variable handles is %d, the "
"number of places is %d, and the number of output variable handles "
"is %d.",
in_var_handles.size(), place_num, out_var_handles.size()));
for (size_t i = 0; i < in_var_handles.size(); ++i) { for (size_t i = 0; i < in_var_handles.size(); ++i) {
BroadcastOneVar( BroadcastOneVar(
......
...@@ -13,8 +13,10 @@ ...@@ -13,8 +13,10 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h" #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include <memory> #include <memory>
#include <unordered_map> #include <unordered_map>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/framework/details/broadcast_op_handle_test.h" #include "paddle/fluid/framework/details/broadcast_op_handle_test.h"
#include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/op_handle_base.h"
...@@ -58,7 +60,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { ...@@ -58,7 +60,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
op_handle_ = new FusedBroadcastOpHandle( op_handle_ = new FusedBroadcastOpHandle(
nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
#else #else
PADDLE_THROW("CUDA is not supported."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with CUDA."));
#endif #endif
} else { } else {
#if defined(PADDLE_WITH_NCCL) #if defined(PADDLE_WITH_NCCL)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/gather_op_handle.h" #include "paddle/fluid/framework/details/gather_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
...@@ -32,13 +33,20 @@ void GatherOpHandle::RunImpl() { ...@@ -32,13 +33,20 @@ void GatherOpHandle::RunImpl() {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_var_handles.size(), places_.size(), in_var_handles.size(), places_.size(),
"The number of output should equal to the number of places."); platform::errors::InvalidArgument(
"The number of input variables should be equal "
"to the number of places, but got the number of input variables is "
"%d and the number of places is %d.",
in_var_handles.size(), places_.size()));
VarHandle *out_var_handle; VarHandle *out_var_handle;
{ {
auto out_var_handles = DynamicCast<VarHandle>(this->Outputs()); auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
PADDLE_ENFORCE_EQ(out_var_handles.size(), 1, PADDLE_ENFORCE_EQ(
"The number of output should be one."); out_var_handles.size(), 1,
platform::errors::InvalidArgument(
"The number of output variables should be 1, but got %d.",
out_var_handles.size()));
out_var_handle = out_var_handles.front(); out_var_handle = out_var_handles.front();
} }
...@@ -47,10 +55,14 @@ void GatherOpHandle::RunImpl() { ...@@ -47,10 +55,14 @@ void GatherOpHandle::RunImpl() {
auto in_0_handle = in_var_handles[0]; auto in_0_handle = in_var_handles[0];
auto pre_in_var = auto pre_in_var =
var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name()); var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name());
PADDLE_ENFORCE_NOT_NULL(pre_in_var); PADDLE_ENFORCE_NOT_NULL(
pre_in_var,
platform::errors::NotFound("The variable '%s' is not found in the scope.",
in_0_handle->name()));
PADDLE_ENFORCE(pre_in_var->IsType<framework::SelectedRows>(), PADDLE_ENFORCE_EQ(pre_in_var->IsType<framework::SelectedRows>(), true,
"Currently, gather_op only can gather SelectedRows."); platform::errors::Unimplemented(
"Currently, gather_op only supports SelectedRows."));
// Wait input done, this Wait is asynchronous operation // Wait input done, this Wait is asynchronous operation
WaitInputVarGenerated(); WaitInputVarGenerated();
...@@ -63,7 +75,10 @@ void GatherOpHandle::RunImpl() { ...@@ -63,7 +75,10 @@ void GatherOpHandle::RunImpl() {
for (auto *in_handle : in_var_handles) { for (auto *in_handle : in_var_handles) {
auto *in_var = auto *in_var =
var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name()); var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name());
PADDLE_ENFORCE_NOT_NULL(in_var); PADDLE_ENFORCE_NOT_NULL(
in_var,
platform::errors::NotFound(
"The variable '%s' is not found in the scope.", in_handle->name()));
VariableVisitor::EnforceShapeAndDTypeEQ(*in_var, *pre_in_var); VariableVisitor::EnforceShapeAndDTypeEQ(*in_var, *pre_in_var);
auto &in_sr_value = in_var->Get<framework::SelectedRows>(); auto &in_sr_value = in_var->Get<framework::SelectedRows>();
...@@ -76,15 +91,19 @@ void GatherOpHandle::RunImpl() { ...@@ -76,15 +91,19 @@ void GatherOpHandle::RunImpl() {
// NOTE: The Places of all input tensor must be all on CPU or all on GPU. // NOTE: The Places of all input tensor must be all on CPU or all on GPU.
platform::Place t_out_p = out_var_handle->place(); platform::Place t_out_p = out_var_handle->place();
if (platform::is_gpu_place(pre_in_value.place())) { if (platform::is_gpu_place(pre_in_value.place())) {
PADDLE_ENFORCE(platform::is_gpu_place(t_out_p), PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true,
"Places of input and output must be all on GPU."); platform::errors::PreconditionNotMet(
"Places of input and output must be all on GPU."));
} else { } else {
t_out_p = platform::CPUPlace(); t_out_p = platform::CPUPlace();
} }
auto out_var = var_scopes.at(out_var_handle->scope_idx()) auto out_var = var_scopes.at(out_var_handle->scope_idx())
->FindVar(out_var_handle->name()); ->FindVar(out_var_handle->name());
PADDLE_ENFORCE_NOT_NULL(out_var); PADDLE_ENFORCE_NOT_NULL(
out_var,
platform::errors::NotFound("The variable '%s' is not found in the scope.",
out_var_handle->name()));
auto out_value = out_var->GetMutable<framework::SelectedRows>(); auto out_value = out_var->GetMutable<framework::SelectedRows>();
out_value->set_height(pre_in_value.height()); out_value->set_height(pre_in_value.height());
out_value->set_rows(out_rows); out_value->set_rows(out_rows);
......
...@@ -13,8 +13,10 @@ ...@@ -13,8 +13,10 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/gather_op_handle.h" #include "paddle/fluid/framework/details/gather_op_handle.h"
#include <memory> #include <memory>
#include <unordered_map> #include <unordered_map>
#include "gtest/gtest.h" #include "gtest/gtest.h"
namespace paddle { namespace paddle {
...@@ -60,7 +62,8 @@ struct TestGatherOpHandle { ...@@ -60,7 +62,8 @@ struct TestGatherOpHandle {
ctxs_.emplace_back(new p::CUDADeviceContext(p)); ctxs_.emplace_back(new p::CUDADeviceContext(p));
} }
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with CUDA."));
#endif #endif
} else { } else {
int count = 8; int count = 8;
...@@ -141,7 +144,9 @@ struct TestGatherOpHandle { ...@@ -141,7 +144,9 @@ struct TestGatherOpHandle {
for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size();
++input_scope_idx) { ++input_scope_idx) {
auto in_var = param_scopes_.at(input_scope_idx)->FindVar("input"); auto in_var = param_scopes_.at(input_scope_idx)->FindVar("input");
PADDLE_ENFORCE_NOT_NULL(in_var); PADDLE_ENFORCE_NOT_NULL(
in_var, platform::errors::NotFound(
"The variable '%s' is not found in the scope.", "input"));
auto in_selected_rows = in_var->GetMutable<f::SelectedRows>(); auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
auto value = in_selected_rows->mutable_value(); auto value = in_selected_rows->mutable_value();
value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]); value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
...@@ -155,7 +160,9 @@ struct TestGatherOpHandle { ...@@ -155,7 +160,9 @@ struct TestGatherOpHandle {
} }
auto out_var = param_scopes_.at(output_scope_idx)->FindVar("out"); auto out_var = param_scopes_.at(output_scope_idx)->FindVar("out");
PADDLE_ENFORCE_NOT_NULL(out_var); PADDLE_ENFORCE_NOT_NULL(
out_var, platform::errors::NotFound(
"The variable '%s' is not found in the scope.", "out"));
auto out_selected_rows = out_var->GetMutable<f::SelectedRows>(); auto out_selected_rows = out_var->GetMutable<f::SelectedRows>();
auto in_var = param_scopes_.at(output_scope_idx)->FindVar("input"); auto in_var = param_scopes_.at(output_scope_idx)->FindVar("input");
...@@ -173,9 +180,19 @@ struct TestGatherOpHandle { ...@@ -173,9 +180,19 @@ struct TestGatherOpHandle {
auto& out_select_rows = out_var->Get<f::SelectedRows>(); auto& out_select_rows = out_var->Get<f::SelectedRows>();
auto rt = out_select_rows.value(); auto rt = out_select_rows.value();
PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal."); PADDLE_ENFORCE_EQ(out_select_rows.height(), height,
platform::errors::InvalidArgument(
"The height of SelectedRows is not equal to "
"the expected, expect %d, but got %d.",
height, out_select_rows.height()));
for (size_t k = 0; k < out_select_rows.rows().size(); ++k) { for (size_t k = 0; k < out_select_rows.rows().size(); ++k) {
PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]); PADDLE_ENFORCE_EQ(
out_select_rows.rows()[k], rows[k % rows.size()],
platform::errors::InvalidArgument(
"The item at position %d of rows of SelectedRows is not equal to "
"the expected, expect %d, but got %d.",
k, rows[k % rows.size()], out_select_rows.rows()[k]));
} }
f::Tensor result_tensor; f::Tensor result_tensor;
...@@ -207,6 +224,7 @@ TEST(GatherTester, TestGPUGatherTestSelectedRows) { ...@@ -207,6 +224,7 @@ TEST(GatherTester, TestGPUGatherTestSelectedRows) {
test_op.TestGatherSelectedRows(input_scope_idx); test_op.TestGatherSelectedRows(input_scope_idx);
} }
#endif #endif
} // namespace details } // namespace details
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -46,14 +46,17 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -46,14 +46,17 @@ class NCCLOpHandleBase : public OpHandleBase {
} }
virtual ~NCCLOpHandleBase() { virtual ~NCCLOpHandleBase() {
for (auto& ev : inter_events_) { for (auto& ev : inter_events_) {
PADDLE_ENFORCE(cudaEventDestroy(ev.second)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second));
} }
for (auto& ev : exter_events_) { for (auto& ev : exter_events_) {
PADDLE_ENFORCE(cudaEventDestroy(ev.second)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second));
} }
} }
void SetRunEnv(int run_order, bool use_hierarchical_allreduce) { void SetRunEnv(int run_order, bool use_hierarchical_allreduce) {
PADDLE_ENFORCE(run_order >= 0, "run_order must >= 0"); PADDLE_ENFORCE_GE(
run_order, 0,
platform::errors::InvalidArgument(
"The argument run_order must be >= 0, but got %d.", run_order));
run_order_ = run_order; run_order_ = run_order;
use_hierarchical_allreduce_ = use_hierarchical_allreduce; use_hierarchical_allreduce_ = use_hierarchical_allreduce;
...@@ -74,8 +77,11 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -74,8 +77,11 @@ class NCCLOpHandleBase : public OpHandleBase {
return; return;
} }
PADDLE_ENFORCE(places_.size() == 1, PADDLE_ENFORCE_EQ(places_.size(), 1,
"HierarchicalAllReduce run one proc with one card mode."); platform::errors::InvalidArgument(
"HierarchicalAllReduce can only run "
"one proccess with one card mode, but got %d cards.",
places_.size()));
for (auto& p : places_) { for (auto& p : places_) {
auto ctxs = nccl_ctxs_->GetHierarchicalInterCtx(run_order); auto ctxs = nccl_ctxs_->GetHierarchicalInterCtx(run_order);
...@@ -88,11 +94,11 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -88,11 +94,11 @@ class NCCLOpHandleBase : public OpHandleBase {
continue; continue;
} }
PADDLE_ENFORCE(cudaSetDevice(dev_id)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id));
PADDLE_ENFORCE(cudaEventCreateWithFlags(&inter_events_[dev_id], PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags(
cudaEventDisableTiming)); &inter_events_[dev_id], cudaEventDisableTiming));
PADDLE_ENFORCE(cudaEventCreateWithFlags(&exter_events_[dev_id], PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags(
cudaEventDisableTiming)); &exter_events_[dev_id], cudaEventDisableTiming));
VLOG(10) << "Create events on dev_id:" << dev_id VLOG(10) << "Create events on dev_id:" << dev_id
<< ", inter_event:" << &inter_events_[dev_id] << ", inter_event:" << &inter_events_[dev_id]
<< ", exter_event:" << &exter_events_[dev_id]; << ", exter_event:" << &exter_events_[dev_id];
...@@ -102,7 +108,10 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -102,7 +108,10 @@ class NCCLOpHandleBase : public OpHandleBase {
void FlatNCCLAllReduce(platform::Place place, const void* sendbuff, void FlatNCCLAllReduce(platform::Place place, const void* sendbuff,
void* recvbuff, size_t count, ncclDataType_t datatype, void* recvbuff, size_t count, ncclDataType_t datatype,
ncclRedOp_t op) { ncclRedOp_t op) {
PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); PADDLE_ENFORCE_GE(
run_order_, 0,
platform::errors::InvalidArgument(
"The argument run_order_ must be >= 0, but got %d.", run_order_));
auto flat_nccl_ctxs = nccl_ctxs_->GetFlatCtx(run_order_); auto flat_nccl_ctxs = nccl_ctxs_->GetFlatCtx(run_order_);
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
auto& nccl_ctx = flat_nccl_ctxs->at(dev_id); auto& nccl_ctx = flat_nccl_ctxs->at(dev_id);
...@@ -113,14 +122,17 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -113,14 +122,17 @@ class NCCLOpHandleBase : public OpHandleBase {
<< ", dev_id:" << dev_id << ", dtype:" << datatype << ", dev_id:" << dev_id << ", dtype:" << datatype
<< ", place:" << place; << ", place:" << place;
PADDLE_ENFORCE(platform::dynload::ncclAllReduce( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce(
sendbuff, recvbuff, count, datatype, op, comm, stream)); sendbuff, recvbuff, count, datatype, op, comm, stream));
} }
void NCCLAllReduce(platform::Place place, const void* sendbuff, void NCCLAllReduce(platform::Place place, const void* sendbuff,
void* recvbuff, size_t count, ncclDataType_t datatype, void* recvbuff, size_t count, ncclDataType_t datatype,
ncclRedOp_t op) { ncclRedOp_t op) {
PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); PADDLE_ENFORCE_GE(
run_order_, 0,
platform::errors::InvalidArgument(
"The argument run_order_ must be >= 0, but got %d.", run_order_));
if (!use_hierarchical_allreduce_) { if (!use_hierarchical_allreduce_) {
FlatNCCLAllReduce(place, sendbuff, recvbuff, count, datatype, op); FlatNCCLAllReduce(place, sendbuff, recvbuff, count, datatype, op);
return; return;
...@@ -132,7 +144,10 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -132,7 +144,10 @@ class NCCLOpHandleBase : public OpHandleBase {
void HierarchicalAllReduce(platform::Place place, const void* sendbuff, void HierarchicalAllReduce(platform::Place place, const void* sendbuff,
void* recvbuff, size_t count, void* recvbuff, size_t count,
ncclDataType_t datatype, ncclRedOp_t op) { ncclDataType_t datatype, ncclRedOp_t op) {
PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); PADDLE_ENFORCE_GE(
run_order_, 0,
platform::errors::InvalidArgument(
"The argument run_order_ must be >= 0, but got %d.", run_order_));
InterReduce(place, sendbuff, recvbuff, count, datatype, op); InterReduce(place, sendbuff, recvbuff, count, datatype, op);
// When a trainer is not in exter allreduce ring // When a trainer is not in exter allreduce ring
// they need not to call this. // they need not to call this.
...@@ -157,14 +172,13 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -157,14 +172,13 @@ class NCCLOpHandleBase : public OpHandleBase {
<< ", dtype:" << datatype << ", place:" << place << ", dtype:" << datatype << ", place:" << place
<< ", stream:" << stream; << ", stream:" << stream;
PADDLE_ENFORCE(platform::dynload::ncclReduce( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce(
sendbuff, recvbuff, count, datatype, ncclSum, 0, comm, stream)); sendbuff, recvbuff, count, datatype, ncclSum, 0, comm, stream));
cudaEventRecord(inter_events_.at(dev_id), stream); cudaEventRecord(inter_events_.at(dev_id), stream);
if (FLAGS_sync_nccl_allreduce) { if (FLAGS_sync_nccl_allreduce) {
PADDLE_ENFORCE(cudaStreamSynchronize(stream), PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
"sync HierarchicalAllReduce inter stream error");
} }
} }
...@@ -172,7 +186,9 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -172,7 +186,9 @@ class NCCLOpHandleBase : public OpHandleBase {
void* recvbuff, size_t count, ncclDataType_t datatype, void* recvbuff, size_t count, ncclDataType_t datatype,
ncclRedOp_t op) { ncclRedOp_t op) {
auto nccl_ctxs = nccl_ctxs_->GetHierarchicalExterCtx(run_order_); auto nccl_ctxs = nccl_ctxs_->GetHierarchicalExterCtx(run_order_);
PADDLE_ENFORCE(nccl_ctxs_, "can't get exter %d nccl_ctxs", run_order_); PADDLE_ENFORCE_NOT_NULL(
nccl_ctxs_, platform::errors::NotFound(
"Can't get exter %d nccl contexts.", run_order_));
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
auto& nccl_ctx = nccl_ctxs->at(dev_id); auto& nccl_ctx = nccl_ctxs->at(dev_id);
auto stream = nccl_ctx.stream(); auto stream = nccl_ctx.stream();
...@@ -185,14 +201,13 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -185,14 +201,13 @@ class NCCLOpHandleBase : public OpHandleBase {
cudaStreamWaitEvent(stream, inter_events_.at(dev_id), 0); cudaStreamWaitEvent(stream, inter_events_.at(dev_id), 0);
PADDLE_ENFORCE(platform::dynload::ncclAllReduce( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce(
sendbuff, recvbuff, count, datatype, op, comm, stream)); sendbuff, recvbuff, count, datatype, op, comm, stream));
cudaEventRecord(exter_events_.at(dev_id), stream); cudaEventRecord(exter_events_.at(dev_id), stream);
if (FLAGS_sync_nccl_allreduce) { if (FLAGS_sync_nccl_allreduce) {
PADDLE_ENFORCE(cudaStreamSynchronize(stream), PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream));
"sync HierarchicalAllReduce exter stream error");
} }
} }
...@@ -210,8 +225,8 @@ class NCCLOpHandleBase : public OpHandleBase { ...@@ -210,8 +225,8 @@ class NCCLOpHandleBase : public OpHandleBase {
<< ", stream:" << stream; << ", stream:" << stream;
cudaStreamWaitEvent(stream, exter_events_.at(dev_id), 0); cudaStreamWaitEvent(stream, exter_events_.at(dev_id), 0);
PADDLE_ENFORCE(platform::dynload::ncclBcast(sendbuff, count, datatype, 0, PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast(
comm, stream)); sendbuff, count, datatype, 0, comm, stream));
} }
protected: protected:
......
...@@ -47,8 +47,8 @@ void OpHandleBase::InitCUDA() { ...@@ -47,8 +47,8 @@ void OpHandleBase::InitCUDA() {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
for (auto &p : dev_ctxes_) { for (auto &p : dev_ctxes_) {
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device;
PADDLE_ENFORCE(cudaSetDevice(dev_id)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id));
PADDLE_ENFORCE( PADDLE_ENFORCE_CUDA_SUCCESS(
cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming)); cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming));
} }
if (IsMultiDeviceTransfer() && dev_ctxes_.size() > 0) { if (IsMultiDeviceTransfer() && dev_ctxes_.size() > 0) {
...@@ -62,17 +62,22 @@ void OpHandleBase::InitCUDA() { ...@@ -62,17 +62,22 @@ void OpHandleBase::InitCUDA() {
} }
} }
} else { } else {
PADDLE_ENFORCE_EQ(dev_ctxes_.size(), 1UL, PADDLE_ENFORCE_EQ(
"%s should have only one dev_ctx.", Name()); dev_ctxes_.size(), 1UL,
platform::errors::InvalidArgument(
"Operator %s should have only one dev_ctx, but got %d.", Name(),
dev_ctxes_.size()));
auto &place = dev_ctxes_.begin()->first; auto &place = dev_ctxes_.begin()->first;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
for (auto &out_var : outputs_) { for (auto &out_var : outputs_) {
auto *out_var_handle = dynamic_cast<VarHandle *>(out_var); auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
if (out_var_handle) { if (out_var_handle) {
PADDLE_ENFORCE(platform::is_same_place(place, out_var_handle->place()), PADDLE_ENFORCE_EQ(
"The place of output(%s) is not consistent with the " platform::is_same_place(place, out_var_handle->place()), true,
"place of current op(%s).", platform::errors::InvalidArgument(
out_var_handle->Name(), Name()); "The place of output(%s) is not consistent with the "
"place of current op(%s).",
out_var_handle->Name(), Name()));
out_var_handle->SetGenerateEvent(events_.at(dev_id)); out_var_handle->SetGenerateEvent(events_.at(dev_id));
} }
} }
...@@ -86,7 +91,10 @@ void OpHandleBase::Run(bool use_cuda) { ...@@ -86,7 +91,10 @@ void OpHandleBase::Run(bool use_cuda) {
InitCUDA(); InitCUDA();
} }
#else #else
PADDLE_ENFORCE(!use_cuda); PADDLE_ENFORCE_EQ(use_cuda, false,
platform::errors::InvalidArgument(
"Argument use_cuda should be false when Paddle is not "
"compiled with CUDA."));
#endif #endif
// skip running current op, used with inplace_addto_op_pass // skip running current op, used with inplace_addto_op_pass
...@@ -100,17 +108,20 @@ void OpHandleBase::Run(bool use_cuda) { ...@@ -100,17 +108,20 @@ void OpHandleBase::Run(bool use_cuda) {
void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) { void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
PADDLE_ENFORCE_NOT_NULL(waited_ctx); PADDLE_ENFORCE_NOT_NULL(waited_ctx, platform::errors::InvalidArgument(
"Argument waited_ctx is NULL."));
if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) { if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) {
for (auto &dev_ctx : dev_ctxes_) { for (auto &dev_ctx : dev_ctxes_) {
PADDLE_ENFORCE_NOT_NULL(dev_ctx.second); PADDLE_ENFORCE_NOT_NULL(
dev_ctx.second,
platform::errors::InvalidArgument("The device context is NULL."));
dev_ctx.second->Wait(); dev_ctx.second->Wait();
} }
} else { } else {
auto stream = auto stream =
static_cast<platform::CUDADeviceContext *>(waited_ctx)->stream(); static_cast<platform::CUDADeviceContext *>(waited_ctx)->stream();
for (auto &ev : events_) { for (auto &ev : events_) {
PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamWaitEvent(stream, ev.second, 0));
} }
} }
#else #else
...@@ -145,10 +156,11 @@ void OpHandleBase::WaitInputVarGenerated() { ...@@ -145,10 +156,11 @@ void OpHandleBase::WaitInputVarGenerated() {
auto stream = auto stream =
static_cast<platform::CUDADeviceContext *>(dev_ctxes_.at(place)) static_cast<platform::CUDADeviceContext *>(dev_ctxes_.at(place))
->stream(); ->stream();
PADDLE_ENFORCE( PADDLE_ENFORCE_CUDA_SUCCESS(
cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0)); cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0));
#else #else
PADDLE_THROW("Doesn't compile the GPU."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with CUDA."));
#endif #endif
} }
// There are nothing to do when the place is CPUPlace. // There are nothing to do when the place is CPUPlace.
...@@ -169,10 +181,11 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) { ...@@ -169,10 +181,11 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) {
auto stream = static_cast<platform::CUDADeviceContext *>( auto stream = static_cast<platform::CUDADeviceContext *>(
dev_ctxes_.at(in_var_handle->place())) dev_ctxes_.at(in_var_handle->place()))
->stream(); ->stream();
PADDLE_ENFORCE( PADDLE_ENFORCE_CUDA_SUCCESS(
cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0)); cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0));
#else #else
PADDLE_THROW("Doesn't compile the GPU."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with CUDA."));
#endif #endif
} }
// There are nothing to do when the place is CPUPlace. // There are nothing to do when the place is CPUPlace.
...@@ -242,7 +255,9 @@ void OpHandleBase::SetLocalExecScopes( ...@@ -242,7 +255,9 @@ void OpHandleBase::SetLocalExecScopes(
auto scopes = GetLocalScopes(); auto scopes = GetLocalScopes();
for (auto *scope : scopes) { for (auto *scope : scopes) {
auto iter = scope_map.find(scope); auto iter = scope_map.find(scope);
PADDLE_ENFORCE(iter != scope_map.end(), "Local scope not found"); PADDLE_ENFORCE_NE(
iter, scope_map.end(),
platform::errors::NotFound("Local scope not found in scope map."));
local_exec_scopes_.emplace_back(iter->second); local_exec_scopes_.emplace_back(iter->second);
} }
} }
......
...@@ -21,6 +21,7 @@ limitations under the License. */ ...@@ -21,6 +21,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/grad_op_desc_maker.h"
#include "paddle/fluid/framework/inplace_op_inference.h" #include "paddle/fluid/framework/inplace_op_inference.h"
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
...@@ -186,19 +187,20 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> { ...@@ -186,19 +187,20 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
PADDLE_ENFORCE_EQ(info->proto_, nullptr, PADDLE_ENFORCE_EQ(info->proto_, nullptr,
platform::errors::AlreadyExists( platform::errors::AlreadyExists(
"OpProto of %s has been registered", op_type)); "OpProto of %s has been registered.", op_type));
PADDLE_ENFORCE_EQ(info->checker_, nullptr, PADDLE_ENFORCE_EQ(info->checker_, nullptr,
platform::errors::AlreadyExists( platform::errors::AlreadyExists(
"OpAttrChecker of %s has been registered", op_type)); "OpAttrChecker of %s has been registered.", op_type));
info->proto_ = new proto::OpProto; info->proto_ = new proto::OpProto;
info->checker_ = new OpAttrChecker(); info->checker_ = new OpAttrChecker();
T maker; T maker;
maker(info->proto_, info->checker_); maker(info->proto_, info->checker_);
info->proto_->set_type(op_type); info->proto_->set_type(op_type);
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
info->proto_->IsInitialized(), info->proto_->IsInitialized(), true,
"Fail to initialize %s's OpProto, because %s is not initialized", platform::errors::PreconditionNotMet(
op_type, info->proto_->InitializationErrorString()); "Fail to initialize %s's OpProto, because %s is not initialized.",
op_type, info->proto_->InitializationErrorString()));
} }
}; };
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
...@@ -32,9 +33,13 @@ struct ReduceLoDTensor { ...@@ -32,9 +33,13 @@ struct ReduceLoDTensor {
template <typename T> template <typename T>
void apply() const { void apply() const {
PADDLE_ENFORCE(!src_tensors_.empty()); PADDLE_ENFORCE_NE(src_tensors_.empty(), true,
platform::errors::InvalidArgument(
"The number of tensors to be reduced is 0."));
auto &t0 = *src_tensors_[0]; auto &t0 = *src_tensors_[0];
PADDLE_ENFORCE_NE(t0.numel(), 0); PADDLE_ENFORCE_NE(t0.numel(), 0,
platform::errors::InvalidArgument(
"The size of first tensor to be reduced is 0."));
dst_tensor_.Resize(t0.dims()); dst_tensor_.Resize(t0.dims());
T *dst = dst_tensor_.mutable_data<T>(platform::CPUPlace()); T *dst = dst_tensor_.mutable_data<T>(platform::CPUPlace());
...@@ -45,8 +50,19 @@ struct ReduceLoDTensor { ...@@ -45,8 +50,19 @@ struct ReduceLoDTensor {
continue; continue;
} }
PADDLE_ENFORCE_EQ(t.dims(), t0.dims()); PADDLE_ENFORCE_EQ(t.dims(), t0.dims(),
PADDLE_ENFORCE_EQ(t.type(), t0.type()); platform::errors::InvalidArgument(
"The shape of tensors to be reduced must be "
"consistent. The shape of current tensor is %s, "
"but the shape of the first tensor is %s.",
t.dims(), t0.dims()));
PADDLE_ENFORCE_EQ(t.type(), t0.type(),
platform::errors::InvalidArgument(
"The type of tensors to be reduced must be "
"consistent. The type of current tensor is %s, "
"but the type of the first tensor is %s.",
t.type(), t0.type()));
std::transform(t.data<T>(), t.data<T>() + t.numel(), dst, dst, std::transform(t.data<T>(), t.data<T>() + t.numel(), dst, dst,
[](T a, T b) -> T { return a + b; }); [](T a, T b) -> T { return a + b; });
} }
...@@ -88,7 +104,9 @@ struct GatherLocalSelectedRowsFunctor { ...@@ -88,7 +104,9 @@ struct GatherLocalSelectedRowsFunctor {
in_places_(in_places), in_places_(in_places),
out_place_(out_place), out_place_(out_place),
dst_selected_rows_(dst_selected_rows) { dst_selected_rows_(dst_selected_rows) {
PADDLE_ENFORCE_EQ(src_selected_rows.empty(), false); PADDLE_ENFORCE_NE(src_selected_rows.empty(), true,
platform::errors::InvalidArgument(
"The number of selected_rows to be gathered is 0."));
std::vector<int64_t> out_rows; std::vector<int64_t> out_rows;
......
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/reduce_op_handle.h"
#include <memory> #include <memory>
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/details/variable_visitor.h"
...@@ -116,8 +118,15 @@ void ReduceOpHandle::GatherSelectedRows( ...@@ -116,8 +118,15 @@ void ReduceOpHandle::GatherSelectedRows(
merged_dev_ctx->Wait(); merged_dev_ctx->Wait();
scope->EraseVars(std::vector<std::string>{gathered_var_name}); scope->EraseVars(std::vector<std::string>{gathered_var_name});
PADDLE_ENFORCE(client->Gather(vars, &remote, *merged_dev_ctx, scope)); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(remote.size() == vars.size()); client->Gather(vars, &remote, *merged_dev_ctx, scope), true,
platform::errors::PreconditionNotMet("Gather SelectedRows failed."));
PADDLE_ENFORCE_EQ(remote.size(), vars.size(),
platform::errors::PreconditionNotMet(
"The number of remotes should be equal to the number "
"of variables to be gathered, but got the number of "
"remotes is %d and the number of variables is %d.",
remote.size(), vars.size()));
// 4. merged local selected rows. // 4. merged local selected rows.
std::vector<const SelectedRows *> all; std::vector<const SelectedRows *> all;
...@@ -151,14 +160,19 @@ void ReduceOpHandle::RunImpl() { ...@@ -151,14 +160,19 @@ void ReduceOpHandle::RunImpl() {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_var_handles.size(), places_.size(), in_var_handles.size(), places_.size(),
"The number of output should equal to the number of places."); platform::errors::InvalidArgument(
"The number of inputs should equal to the number of places, but got "
"the number of inputs is %d and the number of places is %d.",
in_var_handles.size(), places_.size()));
VarHandle *out_var_handle; VarHandle *out_var_handle;
{ {
auto out_var_handles = DynamicCast<VarHandle>(outputs_); auto out_var_handles = DynamicCast<VarHandle>(outputs_);
PADDLE_ENFORCE_EQ(out_var_handles.size(), 1UL, PADDLE_ENFORCE_EQ(out_var_handles.size(), 1UL,
"The number of output should be one."); platform::errors::InvalidArgument(
"The number of output should be one, but got %d.",
out_var_handles.size()));
out_var_handle = out_var_handles.front(); out_var_handle = out_var_handles.front();
} }
...@@ -168,7 +182,10 @@ void ReduceOpHandle::RunImpl() { ...@@ -168,7 +182,10 @@ void ReduceOpHandle::RunImpl() {
auto pre_in_var = auto pre_in_var =
var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name()); var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name());
PADDLE_ENFORCE_NOT_NULL(pre_in_var);
PADDLE_ENFORCE_NOT_NULL(pre_in_var, platform::errors::NotFound(
"Variable %s is not found in scope.",
in_0_handle->name()));
// NOTE: The Places of all input tensor must be all on CPU or all on GPU. // NOTE: The Places of all input tensor must be all on CPU or all on GPU.
std::vector<platform::Place> in_places; // used to get dev_ctx std::vector<platform::Place> in_places; // used to get dev_ctx
...@@ -176,21 +193,29 @@ void ReduceOpHandle::RunImpl() { ...@@ -176,21 +193,29 @@ void ReduceOpHandle::RunImpl() {
in_places.emplace_back(in_handle->place()); in_places.emplace_back(in_handle->place());
auto in_var = auto in_var =
var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name()); var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name());
PADDLE_ENFORCE_NOT_NULL(in_var);
PADDLE_ENFORCE_NOT_NULL(
in_var, platform::errors::NotFound("Variable %s is not found in scope.",
in_handle->name()));
VariableVisitor::EnforceShapeAndDTypeEQ(*pre_in_var, *in_var); VariableVisitor::EnforceShapeAndDTypeEQ(*pre_in_var, *in_var);
} }
auto out_var = var_scopes.at(out_var_handle->scope_idx()) auto out_var = var_scopes.at(out_var_handle->scope_idx())
->FindVar(out_var_handle->name()); ->FindVar(out_var_handle->name());
PADDLE_ENFORCE_NOT_NULL(out_var);
PADDLE_ENFORCE_NOT_NULL(
out_var, platform::errors::NotFound("Variable %s is not found in scope.",
out_var_handle->name()));
// NOTE: The tensors' Place of input and output must be all on GPU or all on // NOTE: The tensors' Place of input and output must be all on GPU or all on
// CPU. // CPU.
auto in_p = VariableVisitor::GetMutableTensor(pre_in_var).place(); auto in_p = VariableVisitor::GetMutableTensor(pre_in_var).place();
platform::Place t_out_p; platform::Place t_out_p;
if (platform::is_gpu_place(in_p)) { if (platform::is_gpu_place(in_p)) {
PADDLE_ENFORCE(platform::is_gpu_place(out_var_handle->place()), PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_var_handle->place()), true,
"Places of input and output must be all on GPU."); platform::errors::PreconditionNotMet(
"Places of input and output must be all on GPU."));
t_out_p = out_var_handle->place(); t_out_p = out_var_handle->place();
} else { } else {
t_out_p = platform::CPUPlace(); t_out_p = platform::CPUPlace();
...@@ -229,7 +254,10 @@ void ReduceOpHandle::RunImpl() { ...@@ -229,7 +254,10 @@ void ReduceOpHandle::RunImpl() {
in_selected_rows, in_places, dev_ctxes_, out_var_handle, t_out_p, in_selected_rows, in_places, dev_ctxes_, out_var_handle, t_out_p,
out_var->GetMutable<framework::SelectedRows>()); out_var->GetMutable<framework::SelectedRows>());
} else { } else {
PADDLE_THROW("only support double or float when gather SelectedRows"); PADDLE_THROW(platform::errors::Unimplemented(
"Only support double or float when gather SelectedRows, but got "
"%s.",
framework::DataTypeToString(in_selected_rows[0]->value().type())));
} }
#endif #endif
}); });
...@@ -292,7 +320,7 @@ void ReduceOpHandle::RunImpl() { ...@@ -292,7 +320,7 @@ void ReduceOpHandle::RunImpl() {
size_t numel = static_cast<size_t>(lod_tensor.numel()); size_t numel = static_cast<size_t>(lod_tensor.numel());
all_reduce_calls.emplace_back( all_reduce_calls.emplace_back(
[buffer, recvbuffer, type, numel, root_id, &nccl_ctx] { [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] {
PADDLE_ENFORCE(platform::dynload::ncclReduce( PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce(
buffer, recvbuffer, numel, static_cast<ncclDataType_t>(type), buffer, recvbuffer, numel, static_cast<ncclDataType_t>(type),
ncclSum, root_id, nccl_ctx.comm_, nccl_ctx.stream())); ncclSum, root_id, nccl_ctx.comm_, nccl_ctx.stream()));
}); });
...@@ -306,10 +334,13 @@ void ReduceOpHandle::RunImpl() { ...@@ -306,10 +334,13 @@ void ReduceOpHandle::RunImpl() {
} }
}); });
#else #else
PADDLE_THROW("CUDA is not enabled."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with CUDA."));
#endif #endif
} else { } else {
PADDLE_THROW("Place should be CPUPlace or CUDAPlace."); PADDLE_THROW(platform::errors::InvalidArgument(
"The place of tensor should be CPUPlace or CUDAPlace, but got %s.",
lod_tensors[0]->place()));
} }
} }
} }
......
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/reduce_op_handle.h"
#include <unordered_map> #include <unordered_map>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
...@@ -69,7 +71,8 @@ struct TestReduceOpHandle { ...@@ -69,7 +71,8 @@ struct TestReduceOpHandle {
} }
nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_)); nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_));
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with NCLL."));
#endif #endif
} else { } else {
int count = 8; int count = 8;
...@@ -103,7 +106,8 @@ struct TestReduceOpHandle { ...@@ -103,7 +106,8 @@ struct TestReduceOpHandle {
op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_, op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
gpu_list_, nccl_ctxs_.get())); gpu_list_, nccl_ctxs_.get()));
#else #else
PADDLE_THROW("CUDA is not support."); PADDLE_THROW(
platform::errors::PreconditionNotMet("Not compiled with NCLL."));
#endif #endif
} else { } else {
#if defined(PADDLE_WITH_NCCL) #if defined(PADDLE_WITH_NCCL)
...@@ -164,7 +168,10 @@ struct TestReduceOpHandle { ...@@ -164,7 +168,10 @@ struct TestReduceOpHandle {
for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size();
++input_scope_idx) { ++input_scope_idx) {
auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
PADDLE_ENFORCE_NOT_NULL(in_var);
PADDLE_ENFORCE_NOT_NULL(
in_var, platform::errors::NotFound(
"Variable %s is not found in scope.", "input"));
auto in_selected_rows = in_var->GetMutable<f::SelectedRows>(); auto in_selected_rows = in_var->GetMutable<f::SelectedRows>();
auto value = in_selected_rows->mutable_value(); auto value = in_selected_rows->mutable_value();
value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]); value->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
...@@ -178,7 +185,9 @@ struct TestReduceOpHandle { ...@@ -178,7 +185,9 @@ struct TestReduceOpHandle {
} }
auto out_var = param_scopes_[output_scope_idx]->FindVar("out"); auto out_var = param_scopes_[output_scope_idx]->FindVar("out");
PADDLE_ENFORCE_NOT_NULL(out_var); PADDLE_ENFORCE_NOT_NULL(out_var,
platform::errors::NotFound(
"Variable %s is not found in scope.", "out"));
auto out_selected_rows = out_var->GetMutable<f::SelectedRows>(); auto out_selected_rows = out_var->GetMutable<f::SelectedRows>();
auto in_var = param_scopes_[output_scope_idx]->FindVar("input"); auto in_var = param_scopes_[output_scope_idx]->FindVar("input");
...@@ -196,9 +205,18 @@ struct TestReduceOpHandle { ...@@ -196,9 +205,18 @@ struct TestReduceOpHandle {
auto &out_select_rows = out_var->Get<f::SelectedRows>(); auto &out_select_rows = out_var->Get<f::SelectedRows>();
auto rt = out_select_rows.value(); auto rt = out_select_rows.value();
PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal."); PADDLE_ENFORCE_EQ(out_select_rows.height(), height,
platform::errors::InvalidArgument(
"The height of SelectedRows is not equal to "
"the expected, expect %d, but got %d.",
height, out_select_rows.height()));
for (size_t k = 0; k < out_select_rows.rows().size(); ++k) { for (size_t k = 0; k < out_select_rows.rows().size(); ++k) {
PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]); PADDLE_ENFORCE_EQ(
out_select_rows.rows()[k], rows[k % rows.size()],
platform::errors::InvalidArgument(
"The item at position %d of rows of SelectedRows is not equal to "
"the expected, expect %d, but got %d.",
k, rows[k % rows.size()], out_select_rows.rows()[k]));
} }
f::Tensor result_tensor; f::Tensor result_tensor;
...@@ -208,7 +226,7 @@ struct TestReduceOpHandle { ...@@ -208,7 +226,7 @@ struct TestReduceOpHandle {
for (int64_t j = 0; j < f::product(result_tensor.dims()); ++j) { for (int64_t j = 0; j < f::product(result_tensor.dims()); ++j) {
ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5); ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5);
} }
} } // namespace details
void TestReduceLodTensors(size_t output_scope_idx) { void TestReduceLodTensors(size_t output_scope_idx) {
std::vector<float> send_vector(static_cast<size_t>(f::product(kDims))); std::vector<float> send_vector(static_cast<size_t>(f::product(kDims)));
...@@ -220,7 +238,9 @@ struct TestReduceOpHandle { ...@@ -220,7 +238,9 @@ struct TestReduceOpHandle {
for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size();
++input_scope_idx) { ++input_scope_idx) {
auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); auto in_var = param_scopes_[input_scope_idx]->FindVar("input");
PADDLE_ENFORCE_NOT_NULL(in_var); PADDLE_ENFORCE_NOT_NULL(
in_var, platform::errors::NotFound(
"Variable %s is not found in scope.", "input"));
auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>(); auto in_lod_tensor = in_var->GetMutable<f::LoDTensor>();
in_lod_tensor->mutable_data<float>(kDims, gpu_list_[input_scope_idx]); in_lod_tensor->mutable_data<float>(kDims, gpu_list_[input_scope_idx]);
in_lod_tensor->set_lod(lod); in_lod_tensor->set_lod(lod);
...@@ -230,7 +250,9 @@ struct TestReduceOpHandle { ...@@ -230,7 +250,9 @@ struct TestReduceOpHandle {
} }
auto out_var = param_scopes_[output_scope_idx]->FindVar("out"); auto out_var = param_scopes_[output_scope_idx]->FindVar("out");
PADDLE_ENFORCE_NOT_NULL(out_var); PADDLE_ENFORCE_NOT_NULL(out_var,
platform::errors::NotFound(
"Variable %s is not found in scope.", "out"));
auto out_lodtensor = out_var->GetMutable<f::LoDTensor>(); auto out_lodtensor = out_var->GetMutable<f::LoDTensor>();
auto in_var = param_scopes_[output_scope_idx]->FindVar("input"); auto in_var = param_scopes_[output_scope_idx]->FindVar("input");
...@@ -254,7 +276,7 @@ struct TestReduceOpHandle { ...@@ -254,7 +276,7 @@ struct TestReduceOpHandle {
ASSERT_NEAR(ct[j], send_vector[j] * gpu_list_.size(), 1e-5); ASSERT_NEAR(ct[j], send_vector[j] * gpu_list_.size(), 1e-5);
} }
} }
}; }; // namespace details
TEST(ReduceTester, TestCPUReduceTestSelectedRows) { TEST(ReduceTester, TestCPUReduceTestSelectedRows) {
TestReduceOpHandle test_op; TestReduceOpHandle test_op;
......
...@@ -111,13 +111,12 @@ void ShareTensorBufferFunctor::CallOnce() { ...@@ -111,13 +111,12 @@ void ShareTensorBufferFunctor::CallOnce() {
auto *out_var = exec_scope_->FindVar(out_var_names_[i]); auto *out_var = exec_scope_->FindVar(out_var_names_[i]);
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
in_var, platform::errors::NotFound( in_var, platform::errors::NotFound(
"The input variable(%s)to be inplaced should not be NULL.", "The variable(%s) to be inplaced is not found in scope.",
in_var_infos_[i]->Name())); in_var_infos_[i]->Name()));
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
out_var, out_var, platform::errors::NotFound(
platform::errors::NotFound( "The variable(%s) to be inplaced is not found in scope.",
"The output variable(%s) to be inplaced should not be NULL.", out_var_names_[i]));
out_var_names_[i]));
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NE(
in_var, out_var, in_var, out_var,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
......
...@@ -12,8 +12,10 @@ ...@@ -12,8 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h"
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include "dgc/dgc.h" #include "dgc/dgc.h"
#include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/reduce_and_gather.h"
...@@ -38,18 +40,23 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle( ...@@ -38,18 +40,23 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle(
is_encoded_(is_encoded), is_encoded_(is_encoded),
nranks_(nranks) { nranks_(nranks) {
// TODO(gongwb) :polish them! // TODO(gongwb) :polish them!
PADDLE_ENFORCE_EQ(is_encoded, true); PADDLE_ENFORCE_EQ(is_encoded, true, platform::errors::InvalidArgument(
"The argument is_encoded is false."));
VLOG(1) << "Use dgc allreduce mode" VLOG(1) << "Use dgc allreduce mode"
<< ", nranks:" << nranks_; << ", nranks:" << nranks_;
PADDLE_ENFORCE_GT(local_scopes_.size(), 0); PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
platform::errors::PreconditionNotMet(
"The number of local scope should be > 0, but got %zu.",
local_scopes_.size()));
auto nranks_name = g_dgc_nranks; auto nranks_name = g_dgc_nranks;
for (size_t i = 0; i < local_scopes_.size(); ++i) { for (size_t i = 0; i < local_scopes_.size(); ++i) {
auto *local_scope = local_scopes_[i]; auto *local_scope = local_scopes_[i];
auto nranks_var = local_scope->FindVar(nranks_name); auto nranks_var = local_scope->FindVar(nranks_name);
if (nranks_var == nullptr) {
PADDLE_THROW("not find nranks_var:%s", nranks_name); PADDLE_ENFORCE_NOT_NULL(
} nranks_var, platform::errors::NotFound(
"Variable %s is not found in scope.", nranks_name));
float *dgc_nranks = nranks_var->GetMutable<LoDTensor>()->data<float>(); float *dgc_nranks = nranks_var->GetMutable<LoDTensor>()->data<float>();
*dgc_nranks = nranks; *dgc_nranks = nranks;
...@@ -64,10 +71,18 @@ void SparseAllReduceOpHandle::RunImplEncoded() { ...@@ -64,10 +71,18 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
auto out_var_handles = DynamicCast<VarHandle>(this->Outputs()); auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_var_handles.size(), places_.size(), in_var_handles.size(), places_.size(),
"The NoDummyInputSize should be equal to the number of places."); platform::errors::PreconditionNotMet(
"The number of input variables should be equal to the number of "
"places, but got the number of input variables is %zu and the the "
"number of places is %zu.",
in_var_handles.size(), places_.size()));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_var_handles.size(), out_var_handles.size(), in_var_handles.size(), out_var_handles.size(),
"The NoDummyInputSize and NoDummyOutputSize should be equal."); platform::errors::PreconditionNotMet(
"The number of input variables should be equal to the number of "
"output variables, but got the number of input variables is %zu and "
"the the number of output variables is %zu.",
in_var_handles.size(), out_var_handles.size()));
std::vector<const LoDTensor *> ins; std::vector<const LoDTensor *> ins;
std::vector<LoDTensor *> gathers; std::vector<LoDTensor *> gathers;
...@@ -80,14 +95,17 @@ void SparseAllReduceOpHandle::RunImplEncoded() { ...@@ -80,14 +95,17 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
auto encode_var_name = original_name + g_dgc_encoded; auto encode_var_name = original_name + g_dgc_encoded;
auto *in_var = local_scope->FindVar(encode_var_name); auto *in_var = local_scope->FindVar(encode_var_name);
PADDLE_ENFORCE_NOT_NULL(in_var, "%s should not be null", encode_var_name); PADDLE_ENFORCE_NOT_NULL(
in_var, platform::errors::NotFound("Variable %s is not found in scope.",
encode_var_name));
auto &in = in_var->Get<LoDTensor>(); auto &in = in_var->Get<LoDTensor>();
ins.emplace_back(&in); ins.emplace_back(&in);
auto gather_var_name = original_name + g_dgc_gather; auto gather_var_name = original_name + g_dgc_gather;
auto *gather_var = local_scope->FindVar(gather_var_name); auto *gather_var = local_scope->FindVar(gather_var_name);
PADDLE_ENFORCE_NOT_NULL(gather_var, "%s should not be null", PADDLE_ENFORCE_NOT_NULL(
gather_var_name); gather_var, platform::errors::NotFound(
"Variable %s is not found in scope.", gather_var));
auto *gather = gather_var->GetMutable<LoDTensor>(); auto *gather = gather_var->GetMutable<LoDTensor>();
gathers.emplace_back(gather); gathers.emplace_back(gather);
...@@ -100,14 +118,26 @@ void SparseAllReduceOpHandle::RunImplEncoded() { ...@@ -100,14 +118,26 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
} }
} }
PADDLE_ENFORCE(platform::is_gpu_place(ins[0]->place())); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(platform::is_gpu_place(outs[0]->place())); platform::is_gpu_place(ins[0]->place()), true,
PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr."); platform::errors::InvalidArgument(
"The place of input variable should be CUDAPlace, but got %s.",
ins[0]->place()));
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(outs[0]->place()), true,
platform::errors::InvalidArgument(
"The place of input variable should be CUDAPlace, but got %s.",
outs[0]->place()));
PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, platform::errors::PreconditionNotMet(
"The nccl contexts are NULL."));
int dtype = -1; int dtype = -1;
size_t in_numel = 0; size_t in_numel = 0;
size_t out_numel = 0; size_t out_numel = 0;
PADDLE_ENFORCE(nranks_ > 1); PADDLE_ENFORCE_GT(
nranks_, 1,
platform::errors::PreconditionNotMet(
"The number of ranks should be > 1, but got %d.", nranks_));
std::vector<std::function<void()>> all_gather_calls; std::vector<std::function<void()>> all_gather_calls;
std::vector<std::function<void()>> sparse_reduce_calls; std::vector<std::function<void()>> sparse_reduce_calls;
...@@ -123,8 +153,16 @@ void SparseAllReduceOpHandle::RunImplEncoded() { ...@@ -123,8 +153,16 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype; dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype;
in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel; in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel;
PADDLE_ENFORCE(in_numel % 2 == 0); PADDLE_ENFORCE_EQ(in_numel % 2, 0,
PADDLE_ENFORCE(in_numel / 2 == static_cast<size_t>(k)); platform::errors::InvalidArgument(
"The number of elements of input variable should be "
"even, but got %zu.",
in_numel));
PADDLE_ENFORCE_EQ(in_numel / 2, static_cast<size_t>(k),
platform::errors::InvalidArgument(
"The number of elements of input variable should be "
"even, but got %zu.",
in_numel));
out_numel = (out_numel == 0) ? static_cast<size_t>(out.numel()) : out_numel; out_numel = (out_numel == 0) ? static_cast<size_t>(out.numel()) : out_numel;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
...@@ -154,7 +192,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() { ...@@ -154,7 +192,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
PADDLE_ENFORCE_EQ(paddle::communication::dgc::sparseReduce( PADDLE_ENFORCE_EQ(paddle::communication::dgc::sparseReduce(
gather_buff, k, out_tensor_buf, gather_buff, k, out_tensor_buf,
static_cast<int>(out_numel), nranks_, stream), static_cast<int>(out_numel), nranks_, stream),
true); true, platform::errors::Unavailable(
"Calling sparseReduce() failed."));
}); });
} }
...@@ -187,11 +226,16 @@ void SparseAllReduceOpHandle::SparseAllReduceFunc( ...@@ -187,11 +226,16 @@ void SparseAllReduceOpHandle::SparseAllReduceFunc(
int SparseAllReduceOpHandle::GetKValue(const std::string &grad_name) { int SparseAllReduceOpHandle::GetKValue(const std::string &grad_name) {
auto original_name = paddle::framework::GradOriginalVarName(grad_name); auto original_name = paddle::framework::GradOriginalVarName(grad_name);
auto var_name = original_name + g_dgc_k; auto var_name = original_name + g_dgc_k;
PADDLE_ENFORCE(local_scopes_.size() > 0); PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
platform::errors::PreconditionNotMet(
"The number of local scope should be > 0, but got %zu.",
local_scopes_.size()));
auto *scope = local_exec_scopes_[0]; auto *scope = local_exec_scopes_[0];
auto var = scope->FindVar(var_name); auto var = scope->FindVar(var_name);
PADDLE_ENFORCE_NOT_NULL(var); PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("Variable %s is not found in scope.",
var_name));
auto tensor = var->Get<LoDTensor>().data<float>(); auto tensor = var->Get<LoDTensor>().data<float>();
return *tensor; return *tensor;
} }
...@@ -202,15 +246,22 @@ bool SparseAllReduceOpHandle::IsEncoded() { ...@@ -202,15 +246,22 @@ bool SparseAllReduceOpHandle::IsEncoded() {
} }
auto counter_name = g_dgc_counter_name; auto counter_name = g_dgc_counter_name;
auto step_name = g_dgc_rampup_begin_step; auto step_name = g_dgc_rampup_begin_step;
PADDLE_ENFORCE(local_scopes_.size() > 0);
PADDLE_ENFORCE_GT(local_scopes_.size(), 0,
platform::errors::PreconditionNotMet(
"The number of local scope should be > 0, but got %zu.",
local_scopes_.size()));
auto *local_scope = local_exec_scopes_[0]; auto *local_scope = local_exec_scopes_[0];
auto count_var = local_scope->FindVar(counter_name); auto count_var = local_scope->FindVar(counter_name);
auto step_var = local_scope->FindVar(step_name); auto step_var = local_scope->FindVar(step_name);
if (count_var == nullptr || step_var == nullptr) {
PADDLE_THROW("not find count_var:%s or step_var:%s", counter_name, PADDLE_ENFORCE_NOT_NULL(
step_var); count_var, platform::errors::NotFound(
} "Variable %s is not found in scope.", counter_name));
PADDLE_ENFORCE_NOT_NULL(
step_var, platform::errors::NotFound("Variable %s is not found in scope.",
step_var));
float count = *count_var->Get<LoDTensor>().data<float>(); float count = *count_var->Get<LoDTensor>().data<float>();
float step = *step_var->Get<LoDTensor>().data<float>(); float step = *step_var->Get<LoDTensor>().data<float>();
......
...@@ -127,11 +127,10 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) { ...@@ -127,11 +127,10 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
"Baidu Kunlun Card is properly installed.", "Baidu Kunlun Card is properly installed.",
ret)); ret));
ret = xpu_malloc(reinterpret_cast<void **>(&p), size); ret = xpu_malloc(reinterpret_cast<void **>(&p), size);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, PADDLE_ENFORCE_EQ(
platform::errors::External( ret, XPU_SUCCESS,
"XPU API return wrong value[%d], please check whether " platform::errors::External(
"Baidu Kunlun Card is properly installed.", "XPU API return wrong value[%d], no enough memory", ret));
ret));
if (FLAGS_init_allocated_mem) { if (FLAGS_init_allocated_mem) {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"xpu memory FLAGS_init_allocated_mem is not implemented.")); "xpu memory FLAGS_init_allocated_mem is not implemented."));
......
...@@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> { ...@@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
} }
}; };
// log Grad: dx = dout / x
// log Grad Grad: ddout = ddx / x; dx = -(dout / x) * (ddx / x)
template <typename T>
class LogDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("log_grad_grad");
op->SetInput("X", this->Input("X"));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
op->SetAttrMap(this->Attrs());
// X@GRAD: dx
op->SetOutput("DX", this->InputGrad("X"));
// Out@GRAD@GRAD: ddy
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer, DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer,
{framework::GradVarName("Out"), {framework::GradVarName("Out"),
framework::GradVarName("X")}); framework::GradVarName("X")});
...@@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL( ...@@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL(
ops::AbsGradGradFunctor<int64_t>>); ops::AbsGradGradFunctor<int64_t>>);
/* ========================================================================== */ /* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_OPERATOR(
log, ops::ActivationOp, ops::LogOpMaker, ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::LogGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::LogGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(log_grad, ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::LogDoubleGradMaker<paddle::framework::OpDesc>,
ops::LogDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
log_grad_grad,
ops::ActivationOpDoubleGrad<ops::LogGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
REGISTER_ACTIVATION_CPU_KERNEL(log, Log, LogFunctor, LogGradFunctor);
REGISTER_OP_CPU_KERNEL(
log_grad_grad, ops::LogDoubleGradKernel<plat::CPUDeviceContext,
ops::LogGradGradFunctor<float>>,
ops::LogDoubleGradKernel<plat::CPUDeviceContext,
ops::LogGradGradFunctor<double>>,
ops::LogDoubleGradKernel<plat::CPUDeviceContext,
ops::LogGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/ /* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu) REGISTER_OP_VERSION(leaky_relu)
.AddCheckpoint( .AddCheckpoint(
......
...@@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL(
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext, ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<int64_t>>); ops::AbsGradGradFunctor<int64_t>>);
/* ========================================================================== */ /* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, LogFunctor, LogGradFunctor);
REGISTER_OP_CUDA_KERNEL(
log_grad_grad, ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<float>>,
ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<double>>,
ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<plat::float16>>);
/* ========================================================================== */
...@@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel ...@@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel
} }
}; };
template <typename DeviceContext, typename Functor>
class LogDoubleGradKernel
: public SquareDoubleGradKernel<DeviceContext, Functor> {};
template <typename DeviceContext, typename Functor> template <typename DeviceContext, typename Functor>
class ELUDoubleGradKernel class ELUDoubleGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> { : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
...@@ -1852,6 +1856,37 @@ class PowGradKernel ...@@ -1852,6 +1856,37 @@ class PowGradKernel
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} }
}; };
template <typename T>
struct LogGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* X,
const framework::Tensor* ddX, framework::Tensor* ddOut,
const framework::Tensor* dOut, framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "LogGradGrad"));
auto x = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "LogGradGrad"));
// ddout = ddx / x; dx = -(dout / x) * (ddx / x)
// calculate dx first, so ddout can inplace ddx
if (dX) {
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Output", "DOut", "LogGradGrad"));
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "DX", "LogGradGrad"));
dx.device(*d) = dout * static_cast<T>(-1) * ddx / (x * x);
}
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "LogGradGrad"));
ddout.device(*d) = ddx * static_cast<T>(1) / x;
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -1872,7 +1907,6 @@ class PowGradKernel ...@@ -1872,7 +1907,6 @@ class PowGradKernel
__macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \ __macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \ __macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log, Log, LogFunctor, LogGradFunctor); \
__macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \ __macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \
__macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \ __macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/activation_op.h"
#include <string>
#include "paddle/fluid/platform/xpu_header.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
template <typename Functor>
class XPUActivationKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &context) const override {
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
functor(context);
}
};
template <typename Functor>
class XPUActivationGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
void Compute(const framework::ExecutionContext &context) const override {
Functor functor;
auto attrs = functor.GetAttrs();
for (auto &attr : attrs) {
*attr.second = context.Attr<float>(attr.first);
}
functor(context);
}
};
template <typename DeviceContext, typename T>
void xpu_activation_forward(const framework::ExecutionContext &ctx,
xpu::Activation_t type) {
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");
const T *x_data = x->data<T>();
T *y_data = y->mutable_data<T>(ctx.GetPlace());
int r = 0;
if (xpu::Activation_t::ACT_POW == type.type) {
type.pow_factor = ctx.Attr<float>("factor");
}
auto xpu_context = ctx.device_context<DeviceContext>().x_context();
r = xpu::activation_forward(xpu_context, type, x->numel(),
reinterpret_cast<const float *>(x_data),
reinterpret_cast<float *>(y_data));
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
template <typename DeviceContext, typename T>
void xpu_activation_backward(const framework::ExecutionContext &ctx,
xpu::Activation_t type) {
/* TODO: relu tanh sigmoid are inplace */
const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Input<Tensor>("Out");
auto *dOut = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dX = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const T *x_data = nullptr;
const T *y_data = nullptr;
const T *y_grad = nullptr;
if (x != nullptr) x_data = x->data<T>();
if (y != nullptr) y_data = y->data<T>();
if (dOut != nullptr) y_grad = dOut->data<T>();
T *x_grad = dX->mutable_data<T>(ctx.GetPlace());
auto xpu_context = ctx.device_context<DeviceContext>().x_context();
int r = xpu::activation_backward(xpu_context, type, dX->numel(),
reinterpret_cast<const float *>(x_data),
reinterpret_cast<const float *>(y_data),
reinterpret_cast<const float *>(y_grad),
reinterpret_cast<float *>(x_grad));
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
template <typename T, xpu::Activation_t::act_enum algorithm>
struct XPUActivationFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_forward<paddle::platform::XPUDeviceContext, T>(ctx,
algorithm);
}
};
template <typename T, xpu::Activation_t::act_enum algorithm>
struct XPUActivationGradFunc : public BaseActivationFunctor<T> {
void operator()(const framework::ExecutionContext &ctx) const {
xpu_activation_backward<paddle::platform::XPUDeviceContext, T>(ctx,
algorithm);
}
};
template <typename T>
using XPUReluFunctor = XPUActivationFunc<T, xpu::Activation_t::RELU>;
template <typename T>
using XPUSigmoidFunctor = XPUActivationFunc<T, xpu::Activation_t::SIGMOID>;
template <typename T>
using XPUTanhFunctor = XPUActivationFunc<T, xpu::Activation_t::TANH>;
template <typename T>
using XPUGeluFunctor = XPUActivationFunc<T, xpu::Activation_t::GELU>;
template <typename T>
using XPULogFunctor = XPUActivationFunc<T, xpu::Activation_t::LOG>;
template <typename T>
using XPUSquareFunctor = XPUActivationFunc<T, xpu::Activation_t::SQUARE>;
template <typename T>
using XPUSuareGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQUARE>;
template <typename T>
using XPUReluGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::RELU>;
template <typename T>
using XPUSigmoidGradFunctor =
XPUActivationGradFunc<T, xpu::Activation_t::SIGMOID>;
template <typename T>
using XPUTanhGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::TANH>;
template <typename T>
using XPUGeluGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::GELU>;
template <typename T>
using XPUSqrtFunctor = XPUActivationFunc<T, xpu::Activation_t::SQRT>;
template <typename T>
using XPUSqrtGradFunctor = XPUActivationGradFunc<T, xpu::Activation_t::SQRT>;
template <typename T>
using XPUACTPowFunctor = XPUActivationFunc<T, xpu::Activation_t::ACT_POW>;
template <typename T>
using XPUABSFunctor = XPUActivationFunc<T, xpu::Activation_t::ABS>;
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_XPU_KERNEL(act_type, \
ops::XPUActivationKernel<ops::functor<float>>); \
REGISTER_OP_XPU_KERNEL( \
act_type##_grad, \
ops::XPUActivationGradKernel<ops::grad_functor<float>>);
REGISTER_ACTIVATION_XPU_KERNEL(relu, XPUReluFunctor, XPUReluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(tanh, XPUTanhFunctor, XPUTanhGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor,
XPUSigmoidGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor)
REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSuareGradFunctor)
REGISTER_OP_XPU_KERNEL(log,
ops::XPUActivationKernel<ops::XPULogFunctor<float>>);
REGISTER_OP_XPU_KERNEL(pow,
ops::XPUActivationKernel<ops::XPUACTPowFunctor<float>>);
REGISTER_OP_XPU_KERNEL(abs,
ops::XPUActivationKernel<ops::XPUABSFunctor<float>>);
#endif // PADDLE_WITH_XPU
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_xpu.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ElementwiseAddXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
XPUElementwise<T, XPUAddFunctor<T>>(ctx);
}
};
template <typename DeviceContext, typename T>
class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
ElemwiseGradKernel<T>::Compute(ctx);
using Tensor = framework::Tensor;
auto *dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
auto dx_dims = dout->dims();
auto dy_dims_untrimed = dout->dims();
T *dx_data = NULL;
T *dy_data = NULL;
int axis = ctx.Attr<int>("axis");
PADDLE_ENFORCE_GE(dx_dims.size(), dy_dims_untrimed.size(),
"Rank of first input must >= rank of second input.");
if (dx != nullptr) {
dx->mutable_data<T>(ctx.GetPlace());
dx_dims = dx->dims();
dx_data = dx->data<T>();
}
if (dy != nullptr) {
dy->mutable_data<T>(ctx.GetPlace());
dy_dims_untrimed = dy->dims();
dy_data = dy->data<T>();
}
int pre, n, post, is_common_broadcast;
if (dx_dims == dy_dims_untrimed) {
pre = post = 1;
n = dout->numel();
} else {
axis = (axis == -1 ? dx_dims.size() - dy_dims_untrimed.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < dx_dims.size(),
"Axis should be in range [0, dx_dims)");
auto dy_dims = trim_trailing_singular_dims(dy_dims_untrimed);
axis = (dy_dims.size() == 0) ? dx_dims.size() : axis;
get_mid_dims(dx_dims, dy_dims, axis, &pre, &n, &post,
&is_common_broadcast);
}
int len = pre * n * post;
auto &dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
if (post == 1) {
int r = xpu::matrix_vector_add_grad(
dev_ctx.x_context(), dout->data<T>(), dout->data<T>(),
dout->data<T>(), dout->data<T>(), dx_data, dy_data, pre, n);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
return;
}
if (dx == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dx_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
}
if (dy == nullptr) {
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void **>(&dy_data), len * sizeof(float)),
XPU_SUCCESS, platform::errors::External("XPU has no enough memory"));
} else {
if (len != n) {
PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast<void **>(&dy_data),
len * sizeof(float)),
XPU_SUCCESS, platform::errors::External(
"XPU has no enough memory"));
}
}
int r = xpu::elementwise_add_grad(
dev_ctx.x_context(), dout->data<T>() /*x*/, dout->data<T>() /*y*/,
dout->data<T>() /*out*/, dout->data<T>(), dx_data, dy_data, len);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
if ((dy != nullptr) && (len != n)) {
r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data<T>(), pre, n,
post, xpu::ElementwiseOp::ASSIGN);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
dev_ctx.Wait();
xpu_free(dy_data);
}
if ((dx == nullptr || dy == nullptr) && !(dy != nullptr && len != n)) {
dev_ctx.Wait();
}
if (dx == nullptr) {
xpu_free(dx_data);
}
if (dy == nullptr) {
xpu_free(dy_data);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
elementwise_add,
ops::ElementwiseAddXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(elementwise_add_grad,
ops::ElementwiseAddGradXPUKernel<
paddle::platform::XPUDeviceContext, float>);
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
namespace operators {
template <typename T>
struct XPUAddFunctor {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) {
return xpu::elementwise_add(ctx, x, y, z, len);
}
};
template <typename T>
struct XPUMulFunctor {
int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) {
return xpu::elementwise_mul(ctx, x, y, z, len);
}
};
template <typename T, typename Functor>
void XPUElementwise(const framework::ExecutionContext& ctx) {
PADDLE_ENFORCE(platform::is_xpu_place(ctx.GetPlace()),
"This kernel only runs on XPU device.");
auto x_var = ctx.InputVar("X");
PADDLE_ENFORCE_NE(x_var, nullptr,
platform::errors::Fatal("Cannot get input Variable X"));
PADDLE_ENFORCE(x_var->IsType<framework::LoDTensor>(),
"XPU only support LoDTensor");
auto x = x_var->Get<framework::LoDTensor>();
auto* y = ctx.Input<framework::LoDTensor>("Y");
auto* z = ctx.Output<framework::LoDTensor>("Out");
z->mutable_data<T>(ctx.GetPlace());
int axis = ctx.Attr<int>("axis");
auto x_dims = x.dims();
auto y_dims_untrimed = y->dims();
PADDLE_ENFORCE_GE(x_dims.size(), y_dims_untrimed.size(),
"Rank of first input must >= rank of second input.");
axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis);
PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(),
"Axis should be in range [0, x_dims)");
auto y_dims = trim_trailing_singular_dims(y_dims_untrimed);
axis = (y_dims.size() == 0) ? x_dims.size() : axis;
int pre, n, post, is_common_broadcast;
get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post, &is_common_broadcast);
int len = pre * n * post;
const T* x_data = x.data<T>();
const T* y_data = y->data<T>();
T* z_data = z->data<T>();
T* y_broadcast = nullptr;
auto& dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
if (post == 1) {
if (std::is_same<Functor, XPUAddFunctor<T>>::value) {
int res = xpu::matrix_vector_add(dev_ctx.x_context(), x_data, y_data,
z_data, pre, n);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
return;
}
if (std::is_same<Functor, XPUMulFunctor<T>>::value) {
int res = xpu::matrix_vector_mul(dev_ctx.x_context(), x_data, y_data,
z_data, pre, n);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
return;
}
}
if (pre != 1 || post != 1) {
PADDLE_ENFORCE(xpu_malloc(reinterpret_cast<void**>(&y_broadcast),
len * sizeof(T)) == XPU_SUCCESS);
int res = xpu::broadcast_ew(dev_ctx.x_context(), y_data, y_broadcast, pre,
n, post, xpu::ElementwiseOp::ASSIGN);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
y_data = y_broadcast;
}
Functor functor;
int res = functor(dev_ctx.x_context(), x_data, y_data, z_data, len);
PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d",
res);
if (pre != 1 || post != 1) {
dev_ctx.Wait();
xpu_free(y_broadcast);
}
}
} // namespace operators
} // namespace paddle
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <algorithm>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/blas.h"
namespace paddle {
namespace operators {
static framework::DDim RowMatrixFromVector(const framework::DDim &x_dim) {
if (x_dim.size() > 1) {
return x_dim;
}
return framework::make_ddim({1, x_dim[0]});
}
static framework::Tensor FoldInitDims(const framework::Tensor &input) {
auto output = input;
auto in_dims = input.dims();
if (in_dims.size() == 3) {
output.Resize({in_dims[0] * in_dims[1], in_dims[2]});
}
return output;
}
/**
* Get column matrix shape from a vector shape. If the ran of y_dim > 1, the
* original y_dim is returned.
*/
static framework::DDim ColumnMatrixFromVector(const framework::DDim &y_dim) {
if (y_dim.size() > 1) {
return y_dim;
}
return framework::make_ddim({y_dim[0], 1});
}
static void ReshapeTensorIntoMatrixSequence(
framework::Tensor *x, const math::MatDescriptor &descriptor) {
int64_t h, w;
h = descriptor.height_;
w = descriptor.width_;
if (descriptor.trans_) {
std::swap(w, h);
}
if (descriptor.batch_size_) {
x->Resize({descriptor.batch_size_, h, w});
} else {
x->Resize({h, w});
}
}
/**
* Reshape the x,y,out tensor to 3-D or 2-D tensor by matrix descriptor
* Out = matmul(x, y)
*
* This method will first calculate X,Y matrix sequence, and then calculate
* the out shape.
*
* Assume X = [BatchSize, H1, W1], Y = [BatchSize, H2, W2]
* The out = [BatchSize, H1, W2]
*
* If there is no batch size in `X` and `Y`, the out will be [H1, W2]
* If any of `X` and `Y` has batch size BatchSize, the out will have the
* BatchSize.
*/
static void ReshapeXYOutIntoMatrixSequence(framework::Tensor *x,
framework::Tensor *y,
framework::Tensor *out, bool trans_x,
bool trans_y) {
auto x_dim = RowMatrixFromVector(x->dims());
auto y_dim = ColumnMatrixFromVector(y->dims());
auto mat_dim_x = math::CreateMatrixDescriptor(x_dim, 0, trans_x);
auto mat_dim_y = math::CreateMatrixDescriptor(y_dim, 0, trans_y);
if (mat_dim_x.batch_size_ == 0 && mat_dim_y.batch_size_ == 0) {
out->Resize({mat_dim_x.height_, mat_dim_y.width_});
} else {
out->Resize({std::max(mat_dim_x.batch_size_, mat_dim_y.batch_size_),
mat_dim_x.height_, mat_dim_y.width_});
}
ReshapeTensorIntoMatrixSequence(x, mat_dim_x);
ReshapeTensorIntoMatrixSequence(y, mat_dim_y);
}
template <typename DeviceContext, typename T>
class MatMulXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto *x = context.Input<framework::Tensor>("X");
auto *y = context.Input<framework::Tensor>("Y");
auto *out = context.Output<framework::Tensor>("Out");
out->mutable_data<T>(context.GetPlace());
auto mat_dim_a = math::CreateMatrixDescriptor(
RowMatrixFromVector(x->dims()), 0, context.Attr<bool>("transpose_X"));
auto mat_dim_b =
math::CreateMatrixDescriptor(ColumnMatrixFromVector(y->dims()), 0,
context.Attr<bool>("transpose_Y"));
PADDLE_ENFORCE_EQ(
mat_dim_a.width_, mat_dim_b.height_,
platform::errors::InvalidArgument("Shape mistake in matmul_op"));
PADDLE_ENFORCE_EQ(
mat_dim_a.batch_size_, mat_dim_b.batch_size_,
platform::errors::InvalidArgument("Shape mistake in matmul_op"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto &dev_ctx = context.template device_context<DeviceContext>();
float *data_c = out->data<T>();
if (mat_dim_a.batch_size_ == 0 || mat_dim_a.batch_size_ == 1) {
int r =
xpu::fc_int16(dev_ctx.x_context(), mat_dim_a.trans_, mat_dim_b.trans_,
mat_dim_a.height_, mat_dim_b.width_, mat_dim_a.width_,
alpha, x->data<T>(), y->data<T>(), 0.0f, data_c);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} else {
// batch matmul
int r = xpu::batched_gemm_int16(dev_ctx.x_context(), mat_dim_a.trans_,
mat_dim_b.trans_, mat_dim_a.batch_size_,
mat_dim_a.height_, mat_dim_b.width_,
mat_dim_a.width_, alpha, x->data<T>(),
y->data<T>(), data_c, nullptr, nullptr);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
}
};
// Reshape a rank-3 tensor from P x M x N to M x (P * N).
// (Warning: This requires transposing data and writes into new memory.)
// Identity op if the tensor is not of rank 3.
template <typename DeviceContext, typename T>
static framework::Tensor XPUFoldHeadAndLastDims(
const DeviceContext &context, const framework::Tensor &input) {
auto in_dims = input.dims();
if (in_dims.size() != 3) {
return input;
}
framework::Tensor output;
output.Resize({in_dims[1], in_dims[0], in_dims[2]});
output.mutable_data<T>(context.GetPlace());
std::vector<int> in_shape_host = {static_cast<int>(in_dims[0]),
static_cast<int>(in_dims[1]),
static_cast<int>(in_dims[2])};
std::vector<int> axis_host = {1, 0, 2};
int r = xpu::transpose(context.x_context(), input.data<T>(), output.data<T>(),
in_shape_host.data(), axis_host.data(), /*ndims=*/3);
PADDLE_ENFORCE_EQ(r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
output.Resize({in_dims[1], in_dims[0] * in_dims[2]});
return output;
}
// Using dimensional constraints on matrix multiplication, it is
// straight-forward to check the following table for when X and Y
// are both matrices.
//
// transpose_X | False | True | False | True
// transpose_Y | False | False | True | True
// -----------+----------+----------+----------+-----------
// dX = | dOut Y^T | Y dOut^T | dOut Y | Y^T dOut^T
// dY = | X^T dOut | X dOut | dOut^T X | dOut^T X^T
//
// When X is a vector of size K, we treat it instead as a matrix of shape
// (1, K). Similarly, when Y is a vector of size K, we treat it instead as
// a matrix of shape (K, 1).
//
// When X and Y are both 3-dimensional tensors, then the first dimension
// the batch dimension can be ignored and the exact same formulas apply
// as for two matrices.
//
// Finally, when, e.g., X is a 3-dimensional tensor but Y is a matrix, we end
// up with formulas like
//
// dY_{ij} = \sum_{p, m} X_{pmi} dOut_{pmj}
//
// To handle this sort of scenario, we reshape X : P x M x K, dOut: P x M x N
// to X: (P * M) x K, dOut: (P * M) x N.
template <typename DeviceContext, typename T>
class MatMulGradXPUKernel : public framework::OpKernel<T> {
public:
void MatMul(const framework::ExecutionContext &context,
const framework::Tensor &a, bool trans_a,
const framework::Tensor &b, bool trans_b,
framework::Tensor *out) const {
out->mutable_data<T>(context.GetPlace());
auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b);
PADDLE_ENFORCE_EQ(
mat_dim_a.width_, mat_dim_b.height_,
platform::errors::InvalidArgument("Shape mistake in matmul_grad_op"));
PADDLE_ENFORCE_EQ(
mat_dim_a.batch_size_, mat_dim_b.batch_size_,
platform::errors::InvalidArgument("Shape mistake in matmul_grad_op"));
T alpha = static_cast<T>(context.Attr<float>("alpha"));
auto &dev_ctx = context.template device_context<DeviceContext>();
float *data_c = out->data<T>();
if (mat_dim_a.batch_size_ == 0 || mat_dim_a.batch_size_ == 1) {
int r =
xpu::fc_int16(dev_ctx.x_context(), mat_dim_a.trans_, mat_dim_b.trans_,
mat_dim_a.height_, mat_dim_b.width_, mat_dim_a.width_,
alpha, a.data<T>(), b.data<T>(), 0.0f, data_c);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
} else {
// batch matmul
int r = xpu::batched_gemm_int16(dev_ctx.x_context(), mat_dim_a.trans_,
mat_dim_b.trans_, mat_dim_a.batch_size_,
mat_dim_a.height_, mat_dim_b.width_,
mat_dim_a.width_, alpha, a.data<T>(),
b.data<T>(), data_c, nullptr, nullptr);
PADDLE_ENFORCE_EQ(
r, XPU_SUCCESS,
platform::errors::External(
"XPU API return wrong value[%d], please check whether "
"Baidu Kunlun Card is properly installed.",
r));
}
}
void CalcInputGrad(const framework::ExecutionContext &context,
const framework::Tensor &a, bool trans_a,
bool is_fold_init_dims_a, const framework::Tensor &b,
bool trans_b, bool is_fold_init_dims_b,
framework::Tensor *out) const {
if (out == nullptr) return;
bool need_combine = (a.dims().size() == 3 || b.dims().size() == 3) &&
out->dims().size() == 2;
if (!need_combine) {
MatMul(context, a, trans_a, b, trans_b, out);
} else {
auto &dev_ctx = context.template device_context<DeviceContext>();
MatMul(
context, is_fold_init_dims_a
? FoldInitDims(a)
: XPUFoldHeadAndLastDims<DeviceContext, T>(dev_ctx, a),
trans_a, is_fold_init_dims_b
? FoldInitDims(b)
: XPUFoldHeadAndLastDims<DeviceContext, T>(dev_ctx, b),
trans_b, out);
}
}
void Compute(const framework::ExecutionContext &context) const override {
auto x = *context.Input<framework::Tensor>("X");
auto y = *context.Input<framework::Tensor>("Y");
auto dout =
*context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto *dx = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto *dy = context.Output<framework::Tensor>(framework::GradVarName("Y"));
bool transpose_x = context.Attr<bool>("transpose_X");
bool transpose_y = context.Attr<bool>("transpose_Y");
ReshapeXYOutIntoMatrixSequence(&x, &y, &dout, transpose_x, transpose_y);
framework::DDim dx_dims;
if (dx) {
dx_dims = dx->dims();
if (dx_dims != x.dims()) {
dx->Resize(x.dims());
}
}
framework::DDim dy_dims;
if (dy) {
dy_dims = dy->dims();
if (dy_dims != y.dims()) {
dy->Resize(y.dims());
}
}
if (transpose_x && transpose_y) {
CalcInputGrad(context, y, true, true, dout, true, false, dx);
CalcInputGrad(context, dout, true, true, x, true, false, dy);
} else if (transpose_x) {
CalcInputGrad(context, y, false, false, dout, true, false, dx);
CalcInputGrad(context, x, false, false, dout, false, true, dy);
} else if (transpose_y) {
CalcInputGrad(context, dout, false, false, y, false, true, dx);
CalcInputGrad(context, dout, true, true, x, false, true, dy);
} else {
CalcInputGrad(context, dout, false, false, y, true, false, dx);
CalcInputGrad(context, x, true, true, dout, false, true, dy);
}
if (dx) {
if (dx_dims != x.dims()) {
dx->Resize(dx_dims);
}
}
if (dy) {
if (dy_dims != y.dims()) {
dy->Resize(dy_dims);
}
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
matmul, ops::MatMulXPUKernel<paddle::platform::XPUDeviceContext, float>);
REGISTER_OP_XPU_KERNEL(
matmul_grad,
ops::MatMulGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
...@@ -14,11 +14,11 @@ limitations under the License. */ ...@@ -14,11 +14,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/mul_op.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/operators/mul_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -24,32 +24,45 @@ class DpsgdOp : public framework::OperatorWithKernel { ...@@ -24,32 +24,45 @@ class DpsgdOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true, PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
"Input(Param) of DpsgdOp should not be null."); platform::errors::NotFound(
"Input(Param) of DpsgdOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true, PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true,
"Input(Grad) of DpsgdOp should not be null."); platform::errors::NotFound(
PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true, "Input(Grad) of DpsgdOp should not be null."));
"Input(LearningRate) of DpsgdOp should not be null."); PADDLE_ENFORCE_EQ(
ctx->HasInput("LearningRate"), true,
platform::errors::NotFound(
"Input(LearningRate) of DpsgdOp should not be null."));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ctx->GetInputsVarType("Param").front(), ctx->GetInputsVarType("Param").front(),
framework::proto::VarType::LOD_TENSOR, framework::proto::VarType::LOD_TENSOR,
"The input var's type should be LoDTensor, but the received is %s", platform::errors::InvalidArgument(
ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); "The input var's type should be LoDTensor, but the received is %s",
ctx->GetInputsVarType("Param").front()));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ctx->GetInputsVarType("Grad").front(), ctx->GetInputsVarType("Grad").front(),
framework::proto::VarType::LOD_TENSOR, framework::proto::VarType::LOD_TENSOR,
"The input var's type should be LoDTensor, but the received is %s", platform::errors::InvalidArgument(
ctx->Inputs("Grad").front(), ctx->GetInputsVarType("Grad").front()); "The input var's type should be LoDTensor, but the received is %s",
ctx->GetInputsVarType("Grad").front()));
PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true, PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
"Output(ParamOut) of DpsgdOp should not be null."); platform::errors::NotFound(
"Output(ParamOut) of DpsgdOp should not be null."));
auto lr_dims = ctx->GetInputDim("LearningRate"); auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 dimension"); platform::errors::InvalidArgument(
"Learning rate should have 1 dimension. But Received "
"LearningRate's dims [%s].",
framework::product(lr_dims)));
auto param_dims = ctx->GetInputDim("Param"); auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"), param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of DpsgdOp should have same dimension"); platform::errors::InvalidArgument(
"Param and Grad input of DpsgdOp should have same dimension. But "
"received Para's dim [%s] and Grad's dim [%s].",
param_dims, ctx->GetInputDim("Grad")));
ctx->SetOutputDim("ParamOut", param_dims); ctx->SetOutputDim("ParamOut", param_dims);
} }
......
...@@ -28,17 +28,19 @@ class DpsgdOpKernel : public framework::OpKernel<T> { ...@@ -28,17 +28,19 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
const auto *param_var = ctx.InputVar("Param"); const auto *param_var = ctx.InputVar("Param");
PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true, PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true,
"The Var(%s)'s type should be LoDTensor, " platform::errors::InvalidArgument(
"but the received is %s", "The Var(%s)'s type should be LoDTensor, "
ctx.InputNames("Param").front(), "but the received is %s",
framework::ToTypeName(param_var->Type())); ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type())));
const auto *grad_var = ctx.InputVar("Grad"); const auto *grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE_EQ(grad_var->IsType<framework::LoDTensor>(), true, PADDLE_ENFORCE_EQ(grad_var->IsType<framework::LoDTensor>(), true,
"The Var(%s)'s type should be LoDTensor, " platform::errors::InvalidArgument(
"but the received is %s", "The Var(%s)'s type should be LoDTensor, "
ctx.InputNames("Grad").front(), "but the received is %s",
framework::ToTypeName(grad_var->Type())); ctx.InputNames("Grad").front(),
framework::ToTypeName(grad_var->Type())));
const auto *learning_rate = ctx.Input<framework::Tensor>("LearningRate"); const auto *learning_rate = ctx.Input<framework::Tensor>("LearningRate");
......
...@@ -40,43 +40,62 @@ class MomentumOp : public framework::OperatorWithKernel { ...@@ -40,43 +40,62 @@ class MomentumOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"), PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
"Input(param) of Momentum should not be null."); platform::errors::NotFound(
PADDLE_ENFORCE(ctx->HasInput("Grad"), "Input(param) of Momentum should not be null."));
"Input(grad) of Momentum should not be null."); PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true,
PADDLE_ENFORCE(ctx->HasInput("Velocity"), platform::errors::NotFound(
"Input(velocity) of Momentum should not be null."); "Input(grad) of Momentum should not be null."));
PADDLE_ENFORCE(ctx->HasInput("LearningRate"), PADDLE_ENFORCE_EQ(ctx->HasInput("Velocity"), true,
"Input(LearningRate) of Momentum should not be null."); platform::errors::NotFound(
PADDLE_ENFORCE( "Input(velocity) of Momentum should not be null."));
ctx->GetInputsVarType("Param").front() == PADDLE_ENFORCE_EQ(
framework::proto::VarType::LOD_TENSOR, ctx->HasInput("LearningRate"), true,
"The input var's type should be LoDTensor, but the received is %s", platform::errors::NotFound(
ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); "Input(LearningRate) of Momentum should not be null."));
PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), ctx->GetInputsVarType("Param").front(),
"Output(ParamOut) of Momentum should not be null."); framework::proto::VarType::LOD_TENSOR,
PADDLE_ENFORCE(ctx->HasOutput("VelocityOut"), platform::errors::InvalidArgument(
"Output(VelocityOut) of Momentum should not be null."); "The input var's type should be LoDTensor, but the received is %s",
ctx->GetInputsVarType("Param").front()));
PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
platform::errors::NotFound(
"Output(ParamOut) of Momentum should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("VelocityOut"), true,
platform::errors::NotFound(
"Output(VelocityOut) of Momentum should not be null."));
auto lr_dims = ctx->GetInputDim("LearningRate"); auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, PADDLE_ENFORCE_NE(framework::product(lr_dims), 0,
"Maybe the Input variable LearningRate has not " platform::errors::InvalidArgument(
"been initialized. You may need to confirm " "Maybe the Input variable LearningRate has not "
"if you put exe.run(startup_program) " "been initialized. You may need to confirm "
"after optimizer.minimize function."); "if you put exe.run(startup_program) "
"after optimizer.minimize function."));
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning_rate should be a scalar"); platform::errors::InvalidArgument(
"Learning_rate should be a scalar. But Received "
"LearningRate's dim [%s]",
framework::product(lr_dims)));
auto param_dim = ctx->GetInputDim("Param"); auto param_dim = ctx->GetInputDim("Param");
if (ctx->GetInputsVarType("Grad")[0] == if (ctx->GetInputsVarType("Grad")[0] ==
framework::proto::VarType::LOD_TENSOR) { framework::proto::VarType::LOD_TENSOR) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Grad"), param_dim, ctx->GetInputDim("Grad"),
"Param and Grad input of MomentumOp should have the same dimension."); platform::errors::InvalidArgument(
"Param and Grad input of MomentumOp should have the same "
"dimension. But received Param's dim [%s] and Grad's dim [%s].",
param_dim, ctx->GetInputDim("Grad")));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Velocity"), param_dim, ctx->GetInputDim("Velocity"),
"Param and Velocity of MomentumOp should have the same dimension."); platform::errors::InvalidArgument(
"Param and Velocity of MomentumOp should have the same "
"dimension. But received Param's dim [%s] and Velocity [%s].",
param_dim, ctx->GetInputDim("Velocity")));
} }
ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("ParamOut", param_dim);
...@@ -398,10 +417,12 @@ class MomentumOpKernel : public framework::OpKernel<T> { ...@@ -398,10 +417,12 @@ class MomentumOpKernel : public framework::OpKernel<T> {
for_range(functor); for_range(functor);
} }
} else { } else {
PADDLE_THROW( PADDLE_ENFORCE_EQ(false, true,
string::Sprintf("MomentumOp only supports LoDTensor or SelectedRows " platform::errors::PermissionDenied(
"gradient, but the received Variable Type is %s", "Unsupported Variable Type of Grad "
framework::ToTypeName(grad_var->Type()))); "in MomentumOp. Excepted LodTensor "
"or SelectedRows, But received [%s]",
paddle::framework::ToTypeName(grad_var->Type())));
} }
} }
}; };
......
...@@ -22,47 +22,75 @@ class RmspropOp : public framework::OperatorWithKernel { ...@@ -22,47 +22,75 @@ class RmspropOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"), PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
"Input(Param) of RmspropOp should not be null."); platform::errors::NotFound(
PADDLE_ENFORCE(ctx->HasInput("MeanSquare"), "Input(Param) of RmspropOp should not be null."));
"Input(MeanSquare) of RmspropOp should not be null."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(ctx->HasInput("LearningRate"), ctx->HasInput("MeanSquare"), true,
"Input(LearningRate) of RmspropOp should not be null."); platform::errors::NotFound(
PADDLE_ENFORCE(ctx->HasInput("Grad"), "Input(MeanSquare) of RmspropOp should not be null."));
"Input(Grad) of RmspropOp should not be null."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(ctx->HasInput("Moment"), ctx->HasInput("LearningRate"), true,
"Input(Moment) of RmspropOp should not be null."); platform::errors::NotFound(
PADDLE_ENFORCE( "Input(LearningRate) of RmspropOp should not be null."));
ctx->GetInputsVarType("Param").front() == PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true,
framework::proto::VarType::LOD_TENSOR, platform::errors::NotFound(
"The input var's type should be LoDTensor, but the received is %s", "Input(Grad) of RmspropOp should not be null."));
ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); PADDLE_ENFORCE_EQ(ctx->HasInput("Moment"), true,
platform::errors::NotFound(
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), "Input(Moment) of RmspropOp should not be null."));
"Output(param_out) of RmspropOp should not be null."); PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), framework::proto::VarType::LOD_TENSOR,
"Output(MomentOut) of RmspropOp should not be null."); platform::errors::InvalidArgument(
PADDLE_ENFORCE(ctx->HasOutput("MeanSquareOut"), "The input var's type in RmspropOp should be "
"Output(MeanSquareOut) of RmspropOp should not be null."); "LoDTensor, but the received is %s",
ctx->GetInputsVarType("Param").front()));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("ParamOut"), true,
platform::errors::NotFound(
"Output(param_out) of RmspropOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("MomentOut"), true,
platform::errors::NotFound(
"Output(MomentOut) of RmspropOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("MeanSquareOut"), true,
platform::errors::NotFound(
"Output(MeanSquareOut) of RmspropOp should not be null."));
if (ctx->Attrs().Get<bool>("centered")) { if (ctx->Attrs().Get<bool>("centered")) {
PADDLE_ENFORCE(ctx->HasOutput("MeanGradOut"), PADDLE_ENFORCE_EQ(
"Output(MeanGradOut) of RmspropOp should not be null."); ctx->HasOutput("MeanGradOut"), true,
platform::errors::NotFound(
"Output(MeanGradOut) of RmspropOp should not be null."));
} }
auto param_dim = ctx->GetInputDim("Param"); auto param_dim = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Grad"), param_dim, ctx->GetInputDim("Grad"),
"Param and grad input of RmspropOp should have the same dimension."); platform::errors::InvalidArgument(
"Param and grad input of RmspropOp should have the same dimension. "
"But received Param's dim [%s] and Grad's dim [%s].",
param_dim, ctx->GetInputDim("Grad")));
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Moment"), PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Moment"),
"Param and Momentum input of RmspropOp " platform::errors::InvalidArgument(
"should have the same dimension."); "Param and Momentum input of RmspropOp "
"should have the same dimension. But received "
"Param's dim [%s] and Moment [%s]",
param_dim, ctx->GetInputDim("Moment")));
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("MeanSquare"), PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("MeanSquare"),
"Param and Momentum input of RmspropOp " platform::errors::InvalidArgument(
"should have the same dimension."); "Param and Momentum input of RmspropOp "
"should have the same dimension. But received "
"Param's dim [%s] and MeanSquare [%s]",
param_dim, ctx->GetInputDim("MeanSquare")));
auto lr_dim = ctx->GetInputDim("LearningRate"); auto lr_dim = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dim), 1, PADDLE_ENFORCE_EQ(framework::product(lr_dim), 1,
"Learning Rate should be a scalar."); platform::errors::InvalidArgument(
"Learning Rate of RmspropOp should be a scalar. But "
"received LearningRate's dim [%s]",
framework::product(lr_dim)));
ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("ParamOut", param_dim);
ctx->SetOutputDim("MomentOut", param_dim); ctx->SetOutputDim("MomentOut", param_dim);
......
...@@ -148,11 +148,15 @@ class RmspropOpKernel : public framework::OpKernel<T> { ...@@ -148,11 +148,15 @@ class RmspropOpKernel : public framework::OpKernel<T> {
auto &mom_tensor = *ctx.Input<LoDTensor>("Moment"); auto &mom_tensor = *ctx.Input<LoDTensor>("Moment");
PADDLE_ENFORCE_EQ(&p_tensor, param_out, PADDLE_ENFORCE_EQ(&p_tensor, param_out,
"Param and ParamOut must be the same Tensor"); platform::errors::InvalidArgument(
"Param and ParamOut must be the same Tensor"));
PADDLE_ENFORCE_EQ(&mom_tensor, moment_out, PADDLE_ENFORCE_EQ(&mom_tensor, moment_out,
"Moment and MomentOut must be the same Tensor"); platform::errors::InvalidArgument(
PADDLE_ENFORCE_EQ(&ms_tensor, mean_square_out, "Moment and MomentOut must be the same Tensor"));
"MeanSquare and MeanSquareOut must be the same Tensor"); PADDLE_ENFORCE_EQ(
&ms_tensor, mean_square_out,
platform::errors::InvalidArgument(
"MeanSquare and MeanSquareOut must be the same Tensor"));
auto &dev_ctx = ctx.template device_context<DeviceContext>(); auto &dev_ctx = ctx.template device_context<DeviceContext>();
size_t limit = static_cast<size_t>(ms_tensor.numel()); size_t limit = static_cast<size_t>(ms_tensor.numel());
...@@ -179,8 +183,10 @@ class RmspropOpKernel : public framework::OpKernel<T> { ...@@ -179,8 +183,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad"); auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad");
auto mg = EigenVector<T>::Flatten(mg_tensor); auto mg = EigenVector<T>::Flatten(mg_tensor);
auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut"); auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut");
PADDLE_ENFORCE_EQ(&mg_tensor, mean_grad_out, PADDLE_ENFORCE_EQ(
"MeanGrad and MeanGradOut must be the same Tensor"); &mg_tensor, mean_grad_out,
platform::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
auto mg_out = EigenVector<T>::Flatten(*mean_grad_out); auto mg_out = EigenVector<T>::Flatten(*mean_grad_out);
mg_out.device(place) = rho * mg + (1 - rho) * g; mg_out.device(place) = rho * mg + (1 - rho) * g;
...@@ -198,8 +204,10 @@ class RmspropOpKernel : public framework::OpKernel<T> { ...@@ -198,8 +204,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
if (centered) { if (centered) {
auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad"); auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad");
auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut"); auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut");
PADDLE_ENFORCE_EQ(&mg_tensor, mean_grad_out, PADDLE_ENFORCE_EQ(
"MeanGrad and MeanGradOut must be the same Tensor"); &mg_tensor, mean_grad_out,
platform::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
for_range(CenteredRmspropFunctor<T, DenseRmspropGradFunctor<T>>( for_range(CenteredRmspropFunctor<T, DenseRmspropGradFunctor<T>>(
param_out->mutable_data<T>(ctx.GetPlace()), param_out->mutable_data<T>(ctx.GetPlace()),
mean_square_out->mutable_data<T>(ctx.GetPlace()), mean_square_out->mutable_data<T>(ctx.GetPlace()),
...@@ -233,8 +241,10 @@ class RmspropOpKernel : public framework::OpKernel<T> { ...@@ -233,8 +241,10 @@ class RmspropOpKernel : public framework::OpKernel<T> {
if (centered) { if (centered) {
auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad"); auto &mg_tensor = *ctx.Input<LoDTensor>("MeanGrad");
auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut"); auto *mean_grad_out = ctx.Output<LoDTensor>("MeanGradOut");
PADDLE_ENFORCE_EQ(&mg_tensor, mean_grad_out, PADDLE_ENFORCE_EQ(
"MeanGrad and MeanGradOut must be the same Tensor"); &mg_tensor, mean_grad_out,
platform::errors::InvalidArgument(
"MeanGrad and MeanGradOut must be the same Tensor"));
for_range(CenteredRmspropFunctor<T, SparseRmspropGradFunctor<T>>( for_range(CenteredRmspropFunctor<T, SparseRmspropGradFunctor<T>>(
param_out->mutable_data<T>(ctx.GetPlace()), param_out->mutable_data<T>(ctx.GetPlace()),
mean_square_out->mutable_data<T>(ctx.GetPlace()), mean_square_out->mutable_data<T>(ctx.GetPlace()),
...@@ -249,7 +259,12 @@ class RmspropOpKernel : public framework::OpKernel<T> { ...@@ -249,7 +259,12 @@ class RmspropOpKernel : public framework::OpKernel<T> {
rho, epsilon, momentum, grad_func)); rho, epsilon, momentum, grad_func));
} }
} else { } else {
PADDLE_THROW("RMSProp only supports LoDTensor or SelectedRows gradient"); PADDLE_ENFORCE_EQ(false, true,
platform::errors::PermissionDenied(
"Unsupported Variable Type of Grad "
"in RmspropOp. Excepted LodTensor "
"or SelectedRows, But received [%s]",
paddle::framework::ToTypeName(grad_var->Type())));
} }
} }
}; };
......
...@@ -22,23 +22,31 @@ class SGDOp : public framework::OperatorWithKernel { ...@@ -22,23 +22,31 @@ class SGDOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"), PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
"Input(Param) of SGDOp should not be null."); platform::errors::NotFound(
PADDLE_ENFORCE(ctx->HasInput("Grad"), "Input(Param) of SGDOp should not be null."));
"Input(Grad) of SGDOp should not be null."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE(ctx->HasInput("LearningRate"), ctx->HasInput("Grad"), true,
"Input(LearningRate) of SGDOp should not be null."); platform::errors::NotFound("Input(Grad) of SGDOp should not be null."));
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true,
"Output(ParamOut) of SGDOp should not be null."); platform::errors::NotFound(
"Input(LearningRate) of SGDOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
platform::errors::NotFound(
"Output(ParamOut) of SGDOp should not be null."));
auto lr_dims = ctx->GetInputDim("LearningRate"); auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, PADDLE_ENFORCE_NE(framework::product(lr_dims), 0,
"Maybe the Input variable LearningRate has not " platform::errors::NotFound(
"been initialized. You may need to confirm " "Maybe the Input variable LearningRate has not "
"if you put exe.run(startup_program) " "been initialized. You may need to confirm "
"after optimizer.minimize function."); "if you put exe.run(startup_program) "
"after optimizer.minimize function."));
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 element"); platform::errors::InvalidArgument(
"Learning rate should have 1 element. But received "
"LearningRate dims [%s]",
framework::product(lr_dims)));
auto param_dim = ctx->GetInputDim("Param"); auto param_dim = ctx->GetInputDim("Param");
if (ctx->GetInputsVarType("Grad")[0] == if (ctx->GetInputsVarType("Grad")[0] ==
framework::proto::VarType::LOD_TENSOR) { framework::proto::VarType::LOD_TENSOR) {
......
...@@ -57,11 +57,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T> ...@@ -57,11 +57,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
const auto* param_var = ctx.InputVar("Param"); const auto* param_var = ctx.InputVar("Param");
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(), PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true,
"The Var(%s)'s type should be LoDTensor, " platform::errors::InvalidArgument(
"but the received is %s", "The Var(%s)'s type should be LoDTensor, "
ctx.InputNames("Param").front(), "but the received is %s",
framework::ToTypeName(param_var->Type())); ctx.InputNames("Param").front(),
paddle::framework::ToTypeName(param_var->Type())));
auto* param = ctx.Input<framework::Tensor>("Param"); auto* param = ctx.Input<framework::Tensor>("Param");
auto* param_out = ctx.Output<framework::Tensor>("ParamOut"); auto* param_out = ctx.Output<framework::Tensor>("ParamOut");
...@@ -91,18 +92,30 @@ class SGDOpKernel<platform::CUDADeviceContext, T> ...@@ -91,18 +92,30 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
// TODO(qijun): In Sparse SGD operator, in-place update is enforced. // TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency. // This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution. // It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ(param, param_out); PADDLE_ENFORCE_EQ(
param, param_out,
platform::errors::InvalidArgument(
"The input tensor Param of SgdOp should be equal with ParamOut "
"if variable's type is SelectedRows."));
auto* grad = ctx.Input<framework::SelectedRows>("Grad"); auto* grad = ctx.Input<framework::SelectedRows>("Grad");
auto in_height = grad->height(); auto in_height = grad->height();
auto out_dims = param_out->dims(); auto out_dims = param_out->dims();
PADDLE_ENFORCE_EQ(in_height, out_dims[0]); PADDLE_ENFORCE_EQ(in_height, out_dims[0],
platform::errors::InvalidArgument(
"The input tensor Grad's height of SgdOp should be "
"equal with ParamOut's dims. But received Grad's "
"height [%s] and ParamOut's dims [%s]",
in_height, out_dims[0]));
auto& in_value = grad->value(); auto& in_value = grad->value();
auto& in_rows = grad->rows(); auto& in_rows = grad->rows();
int64_t in_row_numel = in_value.numel() / in_rows.size(); int64_t in_row_numel = in_value.numel() / in_rows.size();
PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height); PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height,
platform::errors::InvalidArgument(
"The in_row_numel of SgdOp should be equal with "
"param_out's numel / in_height."));
auto* in_data = in_value.data<T>(); auto* in_data = in_value.data<T>();
auto* out_data = param_out->data<T>(); auto* out_data = param_out->data<T>();
...@@ -118,7 +131,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T> ...@@ -118,7 +131,12 @@ class SGDOpKernel<platform::CUDADeviceContext, T>
out_data, in_row_numel, in_rows.size()); out_data, in_row_numel, in_rows.size());
} else { } else {
PADDLE_THROW("Unsupported Variable Type of Grad"); PADDLE_ENFORCE_EQ(false, true,
platform::errors::PermissionDenied(
"Unsupported Variable Type of Grad "
"in SgdOp. Excepted LodTensor or "
"SelectedRows, But received [%s]",
paddle::framework::ToTypeName(grad_var->Type())));
} }
} }
}; };
......
...@@ -44,8 +44,20 @@ class SGDOpKernel<platform::CPUDeviceContext, T> ...@@ -44,8 +44,20 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
if (grad_var->IsType<framework::LoDTensor>()) { if (grad_var->IsType<framework::LoDTensor>()) {
const auto *grad = ctx.Input<framework::Tensor>("Grad"); const auto *grad = ctx.Input<framework::Tensor>("Grad");
auto sz = param_out->numel(); auto sz = param_out->numel();
PADDLE_ENFORCE_EQ(param->numel(), sz); PADDLE_ENFORCE_EQ(param->numel(), sz,
PADDLE_ENFORCE_EQ(grad->numel(), sz); platform::errors::InvalidArgument(
"The input tensor Param's numel of SgdOp "
"should be equal with ParamOut's numel. "
"But received Param's "
"numel = [%s], ParamOut's numel = [%s]",
param->numel(), sz));
PADDLE_ENFORCE_EQ(grad->numel(), sz,
platform::errors::InvalidArgument(
"The input tensor Grad's numel of SgdOp "
"should be equal with ParamOut's numel. "
"But received Grad's "
"numel = [%s], ParamOut's numel = [%s]",
grad->numel(), sz));
jit::sgd_attr_t attr(1, sz, 1, sz, 1); jit::sgd_attr_t attr(1, sz, 1, sz, 1);
const T *lr = learning_rate->data<T>(); const T *lr = learning_rate->data<T>();
...@@ -62,7 +74,11 @@ class SGDOpKernel<platform::CPUDeviceContext, T> ...@@ -62,7 +74,11 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
// TODO(qijun): In Sparse SGD operator, in-place update is enforced. // TODO(qijun): In Sparse SGD operator, in-place update is enforced.
// This manual optimization brings difficulty to track data dependency. // This manual optimization brings difficulty to track data dependency.
// It's better to find a more elegant solution. // It's better to find a more elegant solution.
PADDLE_ENFORCE_EQ(param, param_out); PADDLE_ENFORCE_EQ(param, param_out,
platform::errors::InvalidArgument(
"The input tensor Param of SgdOp "
"should be equal with ParamOut if variable's "
"type is SelectedRows. "));
const auto *grad = ctx.Input<framework::SelectedRows>("Grad"); const auto *grad = ctx.Input<framework::SelectedRows>("Grad");
auto &grad_rows = grad->rows(); auto &grad_rows = grad->rows();
...@@ -73,7 +89,13 @@ class SGDOpKernel<platform::CPUDeviceContext, T> ...@@ -73,7 +89,13 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
} }
auto out_dims = param_out->dims(); auto out_dims = param_out->dims();
PADDLE_ENFORCE_EQ(grad->height(), out_dims[0]); PADDLE_ENFORCE_EQ(
grad->height(), out_dims[0],
platform::errors::InvalidArgument(
"The input tensor Grad's height of SgdOp "
"should be equal with ParamOut's dims. But received Grad's "
"height [%s] and ParamOut's dims [%s]",
grad->height(), out_dims[0]));
auto &grad_value = grad->value(); auto &grad_value = grad->value();
const T *param_data = param->data<T>(); const T *param_data = param->data<T>();
const T *grad_data = grad_value.data<T>(); const T *grad_data = grad_value.data<T>();
...@@ -87,19 +109,31 @@ class SGDOpKernel<platform::CPUDeviceContext, T> ...@@ -87,19 +109,31 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
attr.grad_height = grad_rows.size(); // note: it is not grad->height() attr.grad_height = grad_rows.size(); // note: it is not grad->height()
attr.grad_width = grad_value.numel() / attr.grad_height; attr.grad_width = grad_value.numel() / attr.grad_height;
attr.selected_rows_size = grad_rows.size(); attr.selected_rows_size = grad_rows.size();
PADDLE_ENFORCE_EQ(attr.grad_width, attr.param_width); PADDLE_ENFORCE_EQ(
attr.grad_width, attr.param_width,
platform::errors::InvalidArgument(
"The grad_value's numel of SgdOp "
"should be equal with param_out's numel. But received "
"grad_value's numel [%s] and param_out's numel [%s]",
attr.grad_width, attr.param_width));
auto sgd = auto sgd =
jit::KernelFuncs<jit::SgdTuple<T>, platform::CPUPlace>::Cache().At( jit::KernelFuncs<jit::SgdTuple<T>, platform::CPUPlace>::Cache().At(
attr); attr);
sgd(lr, param_data, grad_data, rows_data, out_data, &attr); sgd(lr, param_data, grad_data, rows_data, out_data, &attr);
} else { } else {
PADDLE_THROW("Unsupported Variable Type of Grad"); PADDLE_ENFORCE_EQ(
false, true,
platform::errors::PermissionDenied(
"Unsupported Variable Type of Grad in SgdOp. Excepted "
"LodTensor or SelectedRows, But received [%s]",
paddle::framework::ToTypeName(grad_var->Type())));
} }
} else if (param_var->IsType<framework::SelectedRows>()) { } else if (param_var->IsType<framework::SelectedRows>()) {
PADDLE_ENFORCE(grad_var->IsType<framework::SelectedRows>(), PADDLE_ENFORCE_EQ(grad_var->IsType<framework::SelectedRows>(), true,
"when param " platform::errors::InvalidArgument(
"is SelectedRows, gradient should also be SelectedRows"); "when param is SelectedRows, "
"gradient should also be SelectedRows"));
const auto &param = param_var->Get<framework::SelectedRows>(); const auto &param = param_var->Get<framework::SelectedRows>();
auto *param_out = ctx.Output<framework::SelectedRows>("ParamOut"); auto *param_out = ctx.Output<framework::SelectedRows>("ParamOut");
const auto &grad = grad_var->Get<framework::SelectedRows>(); const auto &grad = grad_var->Get<framework::SelectedRows>();
...@@ -112,27 +146,36 @@ class SGDOpKernel<platform::CPUDeviceContext, T> ...@@ -112,27 +146,36 @@ class SGDOpKernel<platform::CPUDeviceContext, T>
auto param_row_width = param.value().dims()[1]; auto param_row_width = param.value().dims()[1];
auto grad_row_width = grad.value().dims()[1]; auto grad_row_width = grad.value().dims()[1];
VLOG(4) << " param rows: " << param.rows().size() PADDLE_ENFORCE_EQ(
<< " param memory rows: " << param.value().dims()[0] param_row_width, grad_row_width,
<< " grad rows: " << grad.rows().size() platform::errors::InvalidArgument(
<< " grad memory rows: " << grad.value().dims()[0]; "The param_row in SgdOP should have the same size with grad_row. "
PADDLE_ENFORCE_EQ(param_row_width, grad_row_width, "But received param_row's width is [%s], and grad_row's width is "
"param_row should have the same size with grad_row"); "[%s]",
param_row_width, grad_row_width));
const auto *lr = learning_rate->data<T>(); const auto *lr = learning_rate->data<T>();
const auto *grad_data = grad.value().data<T>(); const auto *grad_data = grad.value().data<T>();
auto *out_data = param_out->mutable_value()->data<T>(); auto *out_data = param_out->mutable_value()->data<T>();
for (size_t i = 0; i < grad.rows().size(); i++) { for (size_t i = 0; i < grad.rows().size(); i++) {
int64_t id_index = param_out->AutoGrownIndex(grad.rows()[i], false); int64_t id_index = param_out->AutoGrownIndex(grad.rows()[i], false);
PADDLE_ENFORCE_GE(id_index, static_cast<int64_t>(0), PADDLE_ENFORCE_GE(
"id should be in the table"); id_index, static_cast<int64_t>(0),
platform::errors::InvalidArgument(
"The id in SgdOp should be >= 0. But recevied id_index is [%s]",
id_index));
for (int64_t j = 0; j < grad_row_width; j++) { for (int64_t j = 0; j < grad_row_width; j++) {
out_data[id_index * grad_row_width + j] -= out_data[id_index * grad_row_width + j] -=
lr[0] * grad_data[i * grad_row_width + j]; lr[0] * grad_data[i * grad_row_width + j];
} }
} }
} else { } else {
PADDLE_THROW("Unsupported Variable Type of Parameter"); PADDLE_ENFORCE_EQ(
false, true,
platform::errors::PermissionDenied(
"Unsupported Variable Type of Parameter in SgdOp. Excepted "
"LodTensor or SelectedRows, But received [%s]",
paddle::framework::ToTypeName(param_var->Type())));
} }
} }
}; };
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/xpu_info.h"
TEST(InitDevices, CPU) { TEST(InitDevices, CPU) {
using paddle::framework::InitDevices; using paddle::framework::InitDevices;
......
...@@ -15,9 +15,36 @@ ...@@ -15,9 +15,36 @@
#pragma once #pragma once
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include <string>
#include <unordered_map>
#include "paddle/fluid/platform/errors.h"
#include "xpu/api.h" #include "xpu/api.h"
#include "xpu/runtime.h" #include "xpu/runtime.h"
#include "xpu/runtime_ex.h" #include "xpu/runtime_ex.h"
namespace xpu = baidu::xpu::api; namespace xpu = baidu::xpu::api;
class XPUActHelper {
public:
// Convert string to activation type in xpu
static xpu::Activation_t ConvertToXpuActType(
const std::string& act_type_str) {
static std::unordered_map<std::string, xpu::Activation_t> str2act = {
{"linear", xpu::Activation_t::LINEAR},
{"relu", xpu::Activation_t::RELU},
{"sigmoid", xpu::Activation_t::SIGMOID},
{"tanh", xpu::Activation_t::TANH},
{"gelu", xpu::Activation_t::GELU},
{"leaky_relu", xpu::Activation_t::LEAKY_RELU},
{"sqrt", xpu::Activation_t::SQRT},
{"square", xpu::Activation_t::SQUARE}};
auto res = str2act.find(act_type_str);
PADDLE_ENFORCE_NE(res, str2act.end(),
paddle::platform::errors::InvalidArgument(
"Invalid activation type(%s) in XPU", act_type_str));
return res->second;
}
};
#endif #endif
...@@ -230,7 +230,6 @@ from .framework import CPUPlace #DEFINE_ALIAS ...@@ -230,7 +230,6 @@ from .framework import CPUPlace #DEFINE_ALIAS
from .framework import CUDAPlace #DEFINE_ALIAS from .framework import CUDAPlace #DEFINE_ALIAS
from .framework import CUDAPinnedPlace #DEFINE_ALIAS from .framework import CUDAPinnedPlace #DEFINE_ALIAS
from .framework import to_variable #DEFINE_ALIAS
from .framework import grad #DEFINE_ALIAS from .framework import grad #DEFINE_ALIAS
from .framework import no_grad #DEFINE_ALIAS from .framework import no_grad #DEFINE_ALIAS
from .framework import save #DEFINE_ALIAS from .framework import save #DEFINE_ALIAS
...@@ -258,6 +257,8 @@ from .tensor.stat import numel #DEFINE_ALIAS ...@@ -258,6 +257,8 @@ from .tensor.stat import numel #DEFINE_ALIAS
from .device import get_cudnn_version from .device import get_cudnn_version
from .device import set_device from .device import set_device
from .device import get_device from .device import get_device
from .device import is_compiled_with_xpu
from .device import XPUPlace
# from .tensor.tensor import Tensor #DEFINE_ALIAS # from .tensor.tensor import Tensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS # from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS
......
...@@ -22,7 +22,9 @@ from paddle.fluid.dygraph.parallel import ParallelEnv ...@@ -22,7 +22,9 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
__all__ = [ __all__ = [
'get_cudnn_version', 'get_cudnn_version',
'set_device', 'set_device',
'get_device' 'get_device',
'XPUPlace',
'is_compiled_with_xpu'
# 'cpu_places', # 'cpu_places',
# 'CPUPlace', # 'CPUPlace',
# 'cuda_pinned_places', # 'cuda_pinned_places',
...@@ -35,6 +37,37 @@ __all__ = [ ...@@ -35,6 +37,37 @@ __all__ = [
_cudnn_version = None _cudnn_version = None
def is_compiled_with_xpu():
"""
Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun
Returns (bool): whether paddle was built with WITH_XPU=ON
Examples:
.. code-block:: python
import paddle
support_xpu = paddle.device.is_compiled_with_xpu()
"""
return core.is_compiled_with_xpu()
def XPUPlace(dev_id):
"""
Return a Baidu Kunlun Place
Parameters:
dev_id(int): Baidu Kunlun device id
Examples:
.. code-block:: python
import paddle
place = paddle.device.XPUPlace(0)
"""
return core.XPUPlace(dev_id)
def get_cudnn_version(): def get_cudnn_version():
""" """
This funciton return the version of cudnn. the retuen value is int which represents the This funciton return the version of cudnn. the retuen value is int which represents the
......
...@@ -3230,14 +3230,11 @@ class Flatten(layers.Layer): ...@@ -3230,14 +3230,11 @@ class Flatten(layers.Layer):
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle import to_variable
import numpy as np import numpy as np
paddle.disable_static()
inp_np = np.ones([5, 2, 3, 4]).astype('float32') inp_np = np.ones([5, 2, 3, 4]).astype('float32')
inp_np = paddle.to_tensor(inp_np)
paddle.disable_static()
inp_np = to_variable(inp_np)
flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2)
flatten_res = flatten(inp_np) flatten_res = flatten(inp_np)
......
...@@ -217,6 +217,9 @@ class OpTest(unittest.TestCase): ...@@ -217,6 +217,9 @@ class OpTest(unittest.TestCase):
return False return False
return True return True
def is_xpu_op_test():
return hasattr(cls, "use_xpu") and cls.use_xpu == True
def is_mkldnn_op_test(): def is_mkldnn_op_test():
return hasattr(cls, "use_mkldnn") and cls.use_mkldnn == True return hasattr(cls, "use_mkldnn") and cls.use_mkldnn == True
...@@ -239,6 +242,7 @@ class OpTest(unittest.TestCase): ...@@ -239,6 +242,7 @@ class OpTest(unittest.TestCase):
if cls.dtype in [np.float32, np.float64] \ if cls.dtype in [np.float32, np.float64] \
and cls.op_type not in op_accuracy_white_list.NO_FP64_CHECK_GRAD_OP_LIST \ and cls.op_type not in op_accuracy_white_list.NO_FP64_CHECK_GRAD_OP_LIST \
and not hasattr(cls, 'exist_fp64_check_grad') \ and not hasattr(cls, 'exist_fp64_check_grad') \
and not is_xpu_op_test() \
and not is_mkldnn_op_test(): and not is_mkldnn_op_test():
raise AssertionError( raise AssertionError(
"This test of %s op needs check_grad with fp64 precision." % "This test of %s op needs check_grad with fp64 precision." %
...@@ -336,6 +340,11 @@ class OpTest(unittest.TestCase): ...@@ -336,6 +340,11 @@ class OpTest(unittest.TestCase):
self.attrs["use_mkldnn"] == True): self.attrs["use_mkldnn"] == True):
self.__class__.use_mkldnn = True self.__class__.use_mkldnn = True
if (hasattr(self, "use_xpu") and self.use_xpu == True) or \
(hasattr(self, "attrs") and "use_xpu" in self.attrs and \
self.attrs["use_xpu"] == True):
self.__class__.use_xpu = True
op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) op_proto = OpProtoHolder.instance().get_op_proto(self.op_type)
"infer datatype from inputs and outputs for this test case" "infer datatype from inputs and outputs for this test case"
self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs) self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
...@@ -932,6 +941,8 @@ class OpTest(unittest.TestCase): ...@@ -932,6 +941,8 @@ class OpTest(unittest.TestCase):
need_run_ops = self._get_need_run_ops(op_desc) need_run_ops = self._get_need_run_ops(op_desc)
res = {} res = {}
if hasattr(self, 'attrs') and bool(self.attrs.get('use_xpu', False)):
return
for op_desc, father_op_desc in reversed(need_run_ops): for op_desc, father_op_desc in reversed(need_run_ops):
# The first one is the forward op # The first one is the forward op
has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type()) has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type())
...@@ -1203,6 +1214,11 @@ class OpTest(unittest.TestCase): ...@@ -1203,6 +1214,11 @@ class OpTest(unittest.TestCase):
self.attrs["use_mkldnn"] == True): self.attrs["use_mkldnn"] == True):
self.__class__.use_mkldnn = True self.__class__.use_mkldnn = True
if (hasattr(self, "use_xpu") and self.use_xpu == True) or \
(hasattr(self, "attrs") and "use_xpu" in self.attrs and \
self.attrs["use_xpu"] == True):
self.__class__.use_xpu = True
places = self._get_places() places = self._get_places()
for place in places: for place in places:
res = self.check_output_with_place(place, atol, no_check_set, res = self.check_output_with_place(place, atol, no_check_set,
......
...@@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase): ...@@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase):
self.func(p) self.func(p)
class TestLogDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
eps = 1e-6
dtype = np.float64
x = layers.data('x', shape, False, dtype)
x.persistable = True
y = layers.log(x)
x_arr = np.random.uniform(0.1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -228,7 +228,7 @@ class TestTanhAPI(unittest.TestCase): ...@@ -228,7 +228,7 @@ class TestTanhAPI(unittest.TestCase):
def test_dygraph_api(self): def test_dygraph_api(self):
paddle.disable_static(self.place) paddle.disable_static(self.place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out1 = F.tanh(x) out1 = F.tanh(x)
out2 = paddle.tanh(x) out2 = paddle.tanh(x)
th = paddle.nn.Tanh() th = paddle.nn.Tanh()
...@@ -573,7 +573,7 @@ class TestHardShrinkAPI(unittest.TestCase): ...@@ -573,7 +573,7 @@ class TestHardShrinkAPI(unittest.TestCase):
def test_dygraph_api(self): def test_dygraph_api(self):
paddle.disable_static(self.place) paddle.disable_static(self.place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out1 = F.hardshrink(x) out1 = F.hardshrink(x)
hd = paddle.nn.Hardshrink() hd = paddle.nn.Hardshrink()
out2 = hd(x) out2 = hd(x)
...@@ -639,7 +639,7 @@ class TestHardtanhAPI(unittest.TestCase): ...@@ -639,7 +639,7 @@ class TestHardtanhAPI(unittest.TestCase):
def test_dygraph_api(self): def test_dygraph_api(self):
paddle.disable_static(self.place) paddle.disable_static(self.place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out1 = F.hardtanh(x) out1 = F.hardtanh(x)
m = paddle.nn.Hardtanh() m = paddle.nn.Hardtanh()
out2 = m(x) out2 = m(x)
...@@ -1063,7 +1063,7 @@ class TestLeakyReluAPI(unittest.TestCase): ...@@ -1063,7 +1063,7 @@ class TestLeakyReluAPI(unittest.TestCase):
def test_dygraph_api(self): def test_dygraph_api(self):
paddle.disable_static(self.place) paddle.disable_static(self.place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out1 = F.leaky_relu(x) out1 = F.leaky_relu(x)
m = paddle.nn.LeakyReLU() m = paddle.nn.LeakyReLU()
out2 = m(x) out2 = m(x)
......
...@@ -25,7 +25,7 @@ class TestAdamaxAPI(unittest.TestCase): ...@@ -25,7 +25,7 @@ class TestAdamaxAPI(unittest.TestCase):
def test_adamax_api_dygraph(self): def test_adamax_api_dygraph(self):
paddle.disable_static() paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32") value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_variable(value) a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5) linear = paddle.nn.Linear(13, 5)
adam = paddle.optimizer.Adamax( adam = paddle.optimizer.Adamax(
learning_rate=0.01, learning_rate=0.01,
......
...@@ -22,7 +22,7 @@ class TestAdamWOp(unittest.TestCase): ...@@ -22,7 +22,7 @@ class TestAdamWOp(unittest.TestCase):
def test_adamw_op_dygraph(self): def test_adamw_op_dygraph(self):
paddle.disable_static() paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32") value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_variable(value) a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5) linear = paddle.nn.Linear(13, 5)
adam = paddle.optimizer.AdamW( adam = paddle.optimizer.AdamW(
learning_rate=0.01, learning_rate=0.01,
...@@ -37,7 +37,7 @@ class TestAdamWOp(unittest.TestCase): ...@@ -37,7 +37,7 @@ class TestAdamWOp(unittest.TestCase):
def test_adamw_op_coverage(self): def test_adamw_op_coverage(self):
paddle.disable_static() paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32") value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_variable(value) a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5) linear = paddle.nn.Linear(13, 5)
adam = paddle.optimizer.AdamW( adam = paddle.optimizer.AdamW(
learning_rate=0.0, learning_rate=0.0,
......
...@@ -147,7 +147,7 @@ class TestAdaptiveAvgPool2dAPI(unittest.TestCase): ...@@ -147,7 +147,7 @@ class TestAdaptiveAvgPool2dAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out_1 = paddle.nn.functional.adaptive_avg_pool2d( out_1 = paddle.nn.functional.adaptive_avg_pool2d(
x=x, output_size=[3, 3]) x=x, output_size=[3, 3])
...@@ -245,7 +245,7 @@ class TestAdaptiveAvgPool2dClassAPI(unittest.TestCase): ...@@ -245,7 +245,7 @@ class TestAdaptiveAvgPool2dClassAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[3, 3]) adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[3, 3])
out_1 = adaptive_avg_pool(x=x) out_1 = adaptive_avg_pool(x=x)
......
...@@ -162,7 +162,7 @@ class TestAdaptiveAvgPool3dAPI(unittest.TestCase): ...@@ -162,7 +162,7 @@ class TestAdaptiveAvgPool3dAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out_1 = paddle.nn.functional.adaptive_avg_pool3d( out_1 = paddle.nn.functional.adaptive_avg_pool3d(
x=x, output_size=[3, 3, 3]) x=x, output_size=[3, 3, 3])
...@@ -262,7 +262,7 @@ class TestAdaptiveAvgPool3dClassAPI(unittest.TestCase): ...@@ -262,7 +262,7 @@ class TestAdaptiveAvgPool3dClassAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(
output_size=[3, 3, 3]) output_size=[3, 3, 3])
......
...@@ -147,7 +147,7 @@ class TestAdaptiveMaxPool2dAPI(unittest.TestCase): ...@@ -147,7 +147,7 @@ class TestAdaptiveMaxPool2dAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out_1 = paddle.nn.functional.adaptive_max_pool2d( out_1 = paddle.nn.functional.adaptive_max_pool2d(
x=x, return_indices=False, output_size=[3, 3]) x=x, return_indices=False, output_size=[3, 3])
...@@ -240,7 +240,7 @@ class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase): ...@@ -240,7 +240,7 @@ class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3]) adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3])
out_1 = adaptive_max_pool(x=x) out_1 = adaptive_max_pool(x=x)
......
...@@ -162,7 +162,7 @@ class TestAdaptiveMaxPool3dAPI(unittest.TestCase): ...@@ -162,7 +162,7 @@ class TestAdaptiveMaxPool3dAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
out_1 = paddle.nn.functional.adaptive_max_pool3d( out_1 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[3, 3, 3]) x=x, output_size=[3, 3, 3])
...@@ -257,7 +257,7 @@ class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase): ...@@ -257,7 +257,7 @@ class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]): if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place) paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np) x = paddle.to_tensor(self.x_np)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[3, 3, 3]) output_size=[3, 3, 3])
......
...@@ -244,9 +244,9 @@ class TestAddMMAPI(unittest.TestCase): ...@@ -244,9 +244,9 @@ class TestAddMMAPI(unittest.TestCase):
def test_error1(): def test_error1():
data_x_wrong = np.ones((2, 3)).astype(np.float32) data_x_wrong = np.ones((2, 3)).astype(np.float32)
x = paddle.to_variable(data_x_wrong) x = paddle.to_tensor(data_x_wrong)
y = paddle.to_variable(data_y) y = paddle.to_tensor(data_y)
input = paddle.to_variable(data_input) input = paddle.to_tensor(data_input)
out = paddle.tensor.addmm( input=input, x=x, y=y, beta=0.5, alpha=5.0 ) out = paddle.tensor.addmm( input=input, x=x, y=y, beta=0.5, alpha=5.0 )
self.assertRaises(ValueError, test_error1) self.assertRaises(ValueError, test_error1)
''' '''
......
...@@ -98,9 +98,9 @@ class TestArangeImperative(unittest.TestCase): ...@@ -98,9 +98,9 @@ class TestArangeImperative(unittest.TestCase):
x2 = paddle.tensor.arange(5) x2 = paddle.tensor.arange(5)
x3 = paddle.tensor.creation.arange(5) x3 = paddle.tensor.creation.arange(5)
start = paddle.to_variable(np.array([0], 'float32')) start = paddle.to_tensor(np.array([0], 'float32'))
end = paddle.to_variable(np.array([5], 'float32')) end = paddle.to_tensor(np.array([5], 'float32'))
step = paddle.to_variable(np.array([1], 'float32')) step = paddle.to_tensor(np.array([1], 'float32'))
x4 = paddle.arange(start, end, step, 'int64') x4 = paddle.arange(start, end, step, 'int64')
paddle.enable_static() paddle.enable_static()
......
...@@ -96,7 +96,7 @@ class TestDygraph(unittest.TestCase): ...@@ -96,7 +96,7 @@ class TestDygraph(unittest.TestCase):
a = np.random.rand(3, 3) a = np.random.rand(3, 3)
a_t = np.transpose(a, [1, 0]) a_t = np.transpose(a, [1, 0])
x_data = np.matmul(a, a_t) + 1e-03 x_data = np.matmul(a, a_t) + 1e-03
x = paddle.to_variable(x_data) x = paddle.to_tensor(x_data)
out = paddle.cholesky(x, upper=False) out = paddle.cholesky(x, upper=False)
......
...@@ -168,9 +168,9 @@ class TestClipAPI(unittest.TestCase): ...@@ -168,9 +168,9 @@ class TestClipAPI(unittest.TestCase):
paddle.disable_static(place) paddle.disable_static(place)
data_shape = [1, 9, 9, 4] data_shape = [1, 9, 9, 4]
data = np.random.random(data_shape).astype('float32') data = np.random.random(data_shape).astype('float32')
images = paddle.to_variable(data, dtype='float32') images = paddle.to_tensor(data, dtype='float32')
v_min = paddle.to_variable(np.array([0.2], dtype=np.float32)) v_min = paddle.to_tensor(np.array([0.2], dtype=np.float32))
v_max = paddle.to_variable(np.array([0.8], dtype=np.float32)) v_max = paddle.to_tensor(np.array([0.8], dtype=np.float32))
out_1 = paddle.clip(images, min=0.2, max=0.8) out_1 = paddle.clip(images, min=0.2, max=0.8)
out_2 = paddle.clip(images, min=0.2, max=0.9) out_2 = paddle.clip(images, min=0.2, max=0.9)
......
...@@ -285,9 +285,9 @@ class TestConcatAPI(unittest.TestCase): ...@@ -285,9 +285,9 @@ class TestConcatAPI(unittest.TestCase):
in2 = np.array([[11, 12, 13], [14, 15, 16]]) in2 = np.array([[11, 12, 13], [14, 15, 16]])
in3 = np.array([[21, 22], [23, 24]]) in3 = np.array([[21, 22], [23, 24]])
paddle.disable_static() paddle.disable_static()
x1 = paddle.to_variable(in1) x1 = paddle.to_tensor(in1)
x2 = paddle.to_variable(in2) x2 = paddle.to_tensor(in2)
x3 = paddle.to_variable(in3) x3 = paddle.to_tensor(in3)
out1 = fluid.layers.concat(input=[x1, x2, x3], axis=-1) out1 = fluid.layers.concat(input=[x1, x2, x3], axis=-1)
out2 = paddle.concat(x=[x1, x2], axis=0) out2 = paddle.concat(x=[x1, x2], axis=0)
np_out1 = np.concatenate([in1, in2, in3], axis=-1) np_out1 = np.concatenate([in1, in2, in3], axis=-1)
......
...@@ -75,8 +75,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): ...@@ -75,8 +75,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_x2 = np.random.rand(*shape).astype(np.float32) np_x2 = np.random.rand(*shape).astype(np.float32)
np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps)
tesnor_x1 = paddle.to_variable(np_x1) tesnor_x1 = paddle.to_tensor(np_x1)
tesnor_x2 = paddle.to_variable(np_x2) tesnor_x2 = paddle.to_tensor(np_x2)
y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps)
self.assertTrue(np.allclose(y.numpy(), np_out)) self.assertTrue(np.allclose(y.numpy(), np_out))
...@@ -92,8 +92,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): ...@@ -92,8 +92,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_x2 = np.random.rand(*shape).astype(np.float32) np_x2 = np.random.rand(*shape).astype(np.float32)
np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps)
tesnor_x1 = paddle.to_variable(np_x1) tesnor_x1 = paddle.to_tensor(np_x1)
tesnor_x2 = paddle.to_variable(np_x2) tesnor_x2 = paddle.to_tensor(np_x2)
y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps)
self.assertTrue(np.allclose(y.numpy(), np_out)) self.assertTrue(np.allclose(y.numpy(), np_out))
...@@ -110,8 +110,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): ...@@ -110,8 +110,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_x2 = np.random.rand(*shape2).astype(np.float32) np_x2 = np.random.rand(*shape2).astype(np.float32)
np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps)
tesnor_x1 = paddle.to_variable(np_x1) tesnor_x1 = paddle.to_tensor(np_x1)
tesnor_x2 = paddle.to_variable(np_x2) tesnor_x2 = paddle.to_tensor(np_x2)
y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps)
self.assertTrue(np.allclose(y.numpy(), np_out)) self.assertTrue(np.allclose(y.numpy(), np_out))
...@@ -129,8 +129,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): ...@@ -129,8 +129,8 @@ class TestCosineSimilarityAPI(unittest.TestCase):
np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps)
cos_sim_func = nn.CosineSimilarity(axis=axis, eps=eps) cos_sim_func = nn.CosineSimilarity(axis=axis, eps=eps)
tesnor_x1 = paddle.to_variable(np_x1) tesnor_x1 = paddle.to_tensor(np_x1)
tesnor_x2 = paddle.to_variable(np_x2) tesnor_x2 = paddle.to_tensor(np_x2)
y = cos_sim_func(tesnor_x1, tesnor_x2) y = cos_sim_func(tesnor_x1, tesnor_x2)
self.assertTrue(np.allclose(y.numpy(), np_out)) self.assertTrue(np.allclose(y.numpy(), np_out))
......
...@@ -21,13 +21,12 @@ import paddle ...@@ -21,13 +21,12 @@ import paddle
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard from paddle.fluid import compiler, Program, program_guard
from paddle import to_variable
class TestCumsumOp(unittest.TestCase): class TestCumsumOp(unittest.TestCase):
def run_cases(self): def run_cases(self):
data_np = np.arange(12).reshape(3, 4) data_np = np.arange(12).reshape(3, 4)
data = to_variable(data_np) data = paddle.to_tensor(data_np)
y = paddle.cumsum(data) y = paddle.cumsum(data)
z = np.cumsum(data_np) z = np.cumsum(data_np)
......
...@@ -20,7 +20,6 @@ import paddle ...@@ -20,7 +20,6 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear from paddle.fluid.dygraph import Linear
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle import to_variable
class TestDefaultType(unittest.TestCase): class TestDefaultType(unittest.TestCase):
......
...@@ -36,7 +36,7 @@ class TestDirectory(unittest.TestCase): ...@@ -36,7 +36,7 @@ class TestDirectory(unittest.TestCase):
def test_new_directory(self): def test_new_directory(self):
new_directory = [ new_directory = [
'paddle.enable_static', 'paddle.disable_static', 'paddle.enable_static', 'paddle.disable_static',
'paddle.in_dynamic_mode', 'paddle.to_variable', 'paddle.grad', 'paddle.in_dynamic_mode', 'paddle.to_tensor', 'paddle.grad',
'paddle.no_grad', 'paddle.save', 'paddle.load', 'paddle.no_grad', 'paddle.save', 'paddle.load',
'paddle.static.save', 'paddle.static.load', 'paddle.static.save', 'paddle.static.load',
'paddle.distributed.ParallelEnv', 'paddle.distributed.ParallelEnv',
......
...@@ -195,7 +195,7 @@ class TestFlattenPython(unittest.TestCase): ...@@ -195,7 +195,7 @@ class TestFlattenPython(unittest.TestCase):
def test_Negative(): def test_Negative():
paddle.disable_static() paddle.disable_static()
img = paddle.to_variable(x) img = paddle.to_tensor(x)
out = paddle.flatten(img, start_axis=-2, stop_axis=-1) out = paddle.flatten(img, start_axis=-2, stop_axis=-1)
return out.numpy().shape return out.numpy().shape
......
...@@ -211,7 +211,7 @@ class TestImperative(unittest.TestCase): ...@@ -211,7 +211,7 @@ class TestImperative(unittest.TestCase):
paddle.disable_static() paddle.disable_static()
self.assertTrue(paddle.in_dynamic_mode()) self.assertTrue(paddle.in_dynamic_mode())
np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
var_inp = paddle.to_variable(np_inp) var_inp = paddle.to_tensor(np_inp)
mlp = MLP(input_size=2) mlp = MLP(input_size=2)
out = mlp(var_inp) out = mlp(var_inp)
dy_out1 = out.numpy() dy_out1 = out.numpy()
...@@ -221,7 +221,7 @@ class TestImperative(unittest.TestCase): ...@@ -221,7 +221,7 @@ class TestImperative(unittest.TestCase):
self.assertFalse(paddle.in_dynamic_mode()) self.assertFalse(paddle.in_dynamic_mode())
paddle.disable_static() paddle.disable_static()
self.assertTrue(paddle.in_dynamic_mode()) self.assertTrue(paddle.in_dynamic_mode())
var_inp = paddle.to_variable(np_inp) var_inp = paddle.to_tensor(np_inp)
mlp = MLP(input_size=2) mlp = MLP(input_size=2)
out = mlp(var_inp) out = mlp(var_inp)
dy_out2 = out.numpy() dy_out2 = out.numpy()
......
...@@ -54,7 +54,7 @@ class TestSimpleNet(unittest.TestCase): ...@@ -54,7 +54,7 @@ class TestSimpleNet(unittest.TestCase):
# grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0) # grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0)
input_word = np.array([[1, 2], [2, 1]]).astype('int64') input_word = np.array([[1, 2], [2, 1]]).astype('int64')
input = paddle.to_variable(input_word) input = paddle.to_tensor(input_word)
simplenet = SimpleNet(20, 32, dtype) simplenet = SimpleNet(20, 32, dtype)
adam = SGDOptimizer( adam = SGDOptimizer(
......
...@@ -41,7 +41,7 @@ def run_dygraph(x_np, op_str, use_gpu=True): ...@@ -41,7 +41,7 @@ def run_dygraph(x_np, op_str, use_gpu=True):
if use_gpu and fluid.core.is_compiled_with_cuda(): if use_gpu and fluid.core.is_compiled_with_cuda():
place = paddle.CUDAPlace(0) place = paddle.CUDAPlace(0)
paddle.disable_static(place) paddle.disable_static(place)
x = paddle.to_variable(x_np) x = paddle.to_tensor(x_np)
dygraph_result = getattr(paddle.tensor, op_str)(x) dygraph_result = getattr(paddle.tensor, op_str)(x)
return dygraph_result return dygraph_result
......
...@@ -543,9 +543,9 @@ class TestJitSaveMultiCases(unittest.TestCase): ...@@ -543,9 +543,9 @@ class TestJitSaveMultiCases(unittest.TestCase):
loaded_layer = paddle.jit.load(model_path) loaded_layer = paddle.jit.load(model_path)
loaded_layer.eval() loaded_layer.eval()
# inference & compare # inference & compare
x = paddle.to_variable(np.random.random((1, 784)).astype('float32')) x = paddle.to_tensor(np.random.random((1, 784)).astype('float32'))
if with_label: if with_label:
y = paddle.to_variable(np.random.random((1, 1)).astype('int64')) y = paddle.to_tensor(np.random.random((1, 1)).astype('int64'))
pred, _ = layer(x, y) pred, _ = layer(x, y)
pred = pred.numpy() pred = pred.numpy()
else: else:
...@@ -677,7 +677,7 @@ class TestJitSaveMultiCases(unittest.TestCase): ...@@ -677,7 +677,7 @@ class TestJitSaveMultiCases(unittest.TestCase):
model_path = "test_not_prune_output_spec_name_warning" model_path = "test_not_prune_output_spec_name_warning"
configs = paddle.SaveLoadConfig() configs = paddle.SaveLoadConfig()
out = paddle.to_variable(np.random.random((1, 1)).astype('float')) out = paddle.to_tensor(np.random.random((1, 1)).astype('float'))
configs.output_spec = [out] configs.output_spec = [out]
paddle.jit.save(layer, model_path, configs=configs) paddle.jit.save(layer, model_path, configs=configs)
...@@ -709,7 +709,7 @@ class TestJitSaveMultiCases(unittest.TestCase): ...@@ -709,7 +709,7 @@ class TestJitSaveMultiCases(unittest.TestCase):
model_path = "test_prune_to_static_after_train" model_path = "test_prune_to_static_after_train"
configs = paddle.SaveLoadConfig() configs = paddle.SaveLoadConfig()
out = paddle.to_variable(np.random.random((1, 1)).astype('float')) out = paddle.to_tensor(np.random.random((1, 1)).astype('float'))
configs.output_spec = [out] configs.output_spec = [out]
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
paddle.jit.save( paddle.jit.save(
...@@ -730,7 +730,7 @@ class TestJitSaveLoadEmptyLayer(unittest.TestCase): ...@@ -730,7 +730,7 @@ class TestJitSaveLoadEmptyLayer(unittest.TestCase):
def test_save_load_empty_layer(self): def test_save_load_empty_layer(self):
layer = EmptyLayer() layer = EmptyLayer()
x = paddle.to_variable(np.random.random((10)).astype('float32')) x = paddle.to_tensor(np.random.random((10)).astype('float32'))
out = layer(x) out = layer(x)
paddle.jit.save(layer, self.model_path) paddle.jit.save(layer, self.model_path)
load_layer = paddle.jit.load(self.model_path) load_layer = paddle.jit.load(self.model_path)
...@@ -746,8 +746,8 @@ class TestJitSaveLoadNoParamLayer(unittest.TestCase): ...@@ -746,8 +746,8 @@ class TestJitSaveLoadNoParamLayer(unittest.TestCase):
def test_save_load_no_param_layer(self): def test_save_load_no_param_layer(self):
layer = NoParamLayer() layer = NoParamLayer()
x = paddle.to_variable(np.random.random((5)).astype('float32')) x = paddle.to_tensor(np.random.random((5)).astype('float32'))
y = paddle.to_variable(np.random.random((5)).astype('float32')) y = paddle.to_tensor(np.random.random((5)).astype('float32'))
out = layer(x, y) out = layer(x, y)
paddle.jit.save(layer, self.model_path) paddle.jit.save(layer, self.model_path)
load_layer = paddle.jit.load(self.model_path) load_layer = paddle.jit.load(self.model_path)
......
...@@ -90,7 +90,7 @@ class TestKLDivLossDygraph(unittest.TestCase): ...@@ -90,7 +90,7 @@ class TestKLDivLossDygraph(unittest.TestCase):
with paddle.fluid.dygraph.guard(): with paddle.fluid.dygraph.guard():
kldiv_criterion = paddle.nn.KLDivLoss(reduction) kldiv_criterion = paddle.nn.KLDivLoss(reduction)
pred_loss = kldiv_criterion( pred_loss = kldiv_criterion(
paddle.to_variable(x), paddle.to_variable(target)) paddle.to_tensor(x), paddle.to_tensor(target))
self.assertTrue(np.allclose(pred_loss.numpy(), gt_loss)) self.assertTrue(np.allclose(pred_loss.numpy(), gt_loss))
def test_kl_loss_batchmean(self): def test_kl_loss_batchmean(self):
......
...@@ -26,8 +26,8 @@ class TestFunctionalL1Loss(unittest.TestCase): ...@@ -26,8 +26,8 @@ class TestFunctionalL1Loss(unittest.TestCase):
self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32) self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32)
def run_imperative(self): def run_imperative(self):
input = paddle.to_variable(self.input_np) input = paddle.to_tensor(self.input_np)
label = paddle.to_variable(self.label_np) label = paddle.to_tensor(self.label_np)
dy_result = paddle.nn.functional.l1_loss(input, label) dy_result = paddle.nn.functional.l1_loss(input, label)
expected = np.mean(np.abs(self.input_np - self.label_np)) expected = np.mean(np.abs(self.input_np - self.label_np))
self.assertTrue(np.allclose(dy_result.numpy(), expected)) self.assertTrue(np.allclose(dy_result.numpy(), expected))
...@@ -106,8 +106,8 @@ class TestClassL1Loss(unittest.TestCase): ...@@ -106,8 +106,8 @@ class TestClassL1Loss(unittest.TestCase):
self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32) self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32)
def run_imperative(self): def run_imperative(self):
input = paddle.to_variable(self.input_np) input = paddle.to_tensor(self.input_np)
label = paddle.to_variable(self.label_np) label = paddle.to_tensor(self.label_np)
l1_loss = paddle.nn.loss.L1Loss() l1_loss = paddle.nn.loss.L1Loss()
dy_result = l1_loss(input, label) dy_result = l1_loss(input, label)
expected = np.mean(np.abs(self.input_np - self.label_np)) expected = np.mean(np.abs(self.input_np - self.label_np))
......
...@@ -96,7 +96,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase): ...@@ -96,7 +96,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase):
# test dygrapg api # test dygrapg api
paddle.disable_static() paddle.disable_static()
x = paddle.to_variable(self.x) x = paddle.to_tensor(self.x)
y = logsoftmax(x) y = logsoftmax(x)
self.assertTrue(np.allclose(y.numpy(), ref_out)) self.assertTrue(np.allclose(y.numpy(), ref_out))
paddle.enable_static() paddle.enable_static()
...@@ -127,7 +127,7 @@ class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase): ...@@ -127,7 +127,7 @@ class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase):
self.assertTrue(np.allclose(out[0], ref_out)) self.assertTrue(np.allclose(out[0], ref_out))
paddle.disable_static() paddle.disable_static()
x = paddle.to_variable(self.x) x = paddle.to_tensor(self.x)
y = F.log_softmax(x, axis, dtype) y = F.log_softmax(x, axis, dtype)
self.assertTrue(np.allclose(y.numpy(), ref_out), True) self.assertTrue(np.allclose(y.numpy(), ref_out), True)
paddle.enable_static() paddle.enable_static()
......
...@@ -111,7 +111,7 @@ class TestLogsumexpAPI(unittest.TestCase): ...@@ -111,7 +111,7 @@ class TestLogsumexpAPI(unittest.TestCase):
self.assertTrue(np.allclose(res[0], out_ref)) self.assertTrue(np.allclose(res[0], out_ref))
paddle.disable_static(self.place) paddle.disable_static(self.place)
x = paddle.to_variable(self.x) x = paddle.to_tensor(self.x)
out = paddle.logsumexp(x, axis, keepdim) out = paddle.logsumexp(x, axis, keepdim)
self.assertTrue(np.allclose(out.numpy(), out_ref)) self.assertTrue(np.allclose(out.numpy(), out_ref))
paddle.enable_static() paddle.enable_static()
...@@ -126,7 +126,7 @@ class TestLogsumexpAPI(unittest.TestCase): ...@@ -126,7 +126,7 @@ class TestLogsumexpAPI(unittest.TestCase):
def test_alias(self): def test_alias(self):
paddle.disable_static(self.place) paddle.disable_static(self.place)
x = paddle.to_variable(self.x) x = paddle.to_tensor(self.x)
out1 = paddle.logsumexp(x) out1 = paddle.logsumexp(x)
out2 = paddle.tensor.logsumexp(x) out2 = paddle.tensor.logsumexp(x)
out3 = paddle.tensor.math.logsumexp(x) out3 = paddle.tensor.math.logsumexp(x)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import paddle.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
......
...@@ -80,7 +80,7 @@ class ApiMaxTest(unittest.TestCase): ...@@ -80,7 +80,7 @@ class ApiMaxTest(unittest.TestCase):
def test_imperative_api(self): def test_imperative_api(self):
paddle.disable_static() paddle.disable_static()
np_x = np.array([10, 10]).astype('float64') np_x = np.array([10, 10]).astype('float64')
x = paddle.to_variable(np_x) x = paddle.to_tensor(np_x)
z = paddle.max(x, axis=0) z = paddle.max(x, axis=0)
np_z = z.numpy() np_z = z.numpy()
z_expected = np.array(np.max(np_x, axis=0)) z_expected = np.array(np.max(np_x, axis=0))
......
...@@ -61,8 +61,8 @@ class ApiMaximumTest(unittest.TestCase): ...@@ -61,8 +61,8 @@ class ApiMaximumTest(unittest.TestCase):
def test_dynamic_api(self): def test_dynamic_api(self):
paddle.disable_static() paddle.disable_static()
np_x = np.array([10, 10]).astype('float64') np_x = np.array([10, 10]).astype('float64')
x = paddle.to_variable(self.input_x) x = paddle.to_tensor(self.input_x)
y = paddle.to_variable(self.input_y) y = paddle.to_tensor(self.input_y)
z = paddle.maximum(x, y) z = paddle.maximum(x, y)
np_z = z.numpy() np_z = z.numpy()
z_expected = np.array(np.maximum(self.input_x, self.input_y)) z_expected = np.array(np.maximum(self.input_x, self.input_y))
...@@ -73,8 +73,8 @@ class ApiMaximumTest(unittest.TestCase): ...@@ -73,8 +73,8 @@ class ApiMaximumTest(unittest.TestCase):
np_x = np.random.rand(5, 4, 3, 2).astype("float64") np_x = np.random.rand(5, 4, 3, 2).astype("float64")
np_y = np.random.rand(4, 3).astype("float64") np_y = np.random.rand(4, 3).astype("float64")
x = paddle.to_variable(self.input_x) x = paddle.to_tensor(self.input_x)
y = paddle.to_variable(self.input_y) y = paddle.to_tensor(self.input_y)
result_1 = paddle.maximum(x, y, axis=1) result_1 = paddle.maximum(x, y, axis=1)
result_2 = paddle.maximum(x, y, axis=-2) result_2 = paddle.maximum(x, y, axis=-2)
self.assertEqual((result_1.numpy() == result_2.numpy()).all(), True) self.assertEqual((result_1.numpy() == result_2.numpy()).all(), True)
...@@ -204,7 +204,7 @@ class TestMeanAPI(unittest.TestCase): ...@@ -204,7 +204,7 @@ class TestMeanAPI(unittest.TestCase):
paddle.disable_static(self.place) paddle.disable_static(self.place)
def test_case(x, axis=None, keepdim=False): def test_case(x, axis=None, keepdim=False):
x_tensor = paddle.to_variable(x) x_tensor = paddle.to_tensor(x)
out = paddle.mean(x_tensor, axis, keepdim) out = paddle.mean(x_tensor, axis, keepdim)
if isinstance(axis, list): if isinstance(axis, list):
axis = tuple(axis) axis = tuple(axis)
......
...@@ -80,7 +80,7 @@ class ApiMinTest(unittest.TestCase): ...@@ -80,7 +80,7 @@ class ApiMinTest(unittest.TestCase):
def test_imperative_api(self): def test_imperative_api(self):
paddle.disable_static() paddle.disable_static()
np_x = np.array([10, 10]).astype('float64') np_x = np.array([10, 10]).astype('float64')
x = paddle.to_variable(np_x) x = paddle.to_tensor(np_x)
z = paddle.min(x, axis=0) z = paddle.min(x, axis=0)
np_z = z.numpy() np_z = z.numpy()
z_expected = np.array(np.min(np_x, axis=0)) z_expected = np.array(np.min(np_x, axis=0))
......
...@@ -18,6 +18,8 @@ import unittest ...@@ -18,6 +18,8 @@ import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid.core as core import paddle.fluid.core as core
import sys
sys.path.append("..")
from op_test import OpTest from op_test import OpTest
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
...@@ -175,57 +177,5 @@ class TestFP16MulOp2(TestMulOp2): ...@@ -175,57 +177,5 @@ class TestFP16MulOp2(TestMulOp2):
no_grad_set=set('Y')) no_grad_set=set('Y'))
@unittest.skipIf(not core.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp1(TestMulOp):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = core.XPUPlace(0)
self.check_output_with_place(place, atol=1e-1)
def test_check_grad_normal(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.5)
def test_check_grad_ingore_x(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
@unittest.skipIf(not core.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp2(TestMulOp2):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = core.XPUPlace(0)
self.check_output_with_place(place, atol=2e-1)
def test_check_grad_normal(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.9)
def test_check_grad_ingore_x(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = core.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.9, no_grad_set=set('Y'))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -63,7 +63,7 @@ class TestRandnOpForDygraph(unittest.TestCase): ...@@ -63,7 +63,7 @@ class TestRandnOpForDygraph(unittest.TestCase):
dim_2 = paddle.fill_constant([1], "int32", 50) dim_2 = paddle.fill_constant([1], "int32", 50)
x3 = paddle.randn(shape=[dim_1, dim_2, 784]) x3 = paddle.randn(shape=[dim_1, dim_2, 784])
var_shape = paddle.to_variable(np.array(shape)) var_shape = paddle.to_tensor(np.array(shape))
x4 = paddle.randn(var_shape) x4 = paddle.randn(var_shape)
for out in [x1, x2, x3, x4]: for out in [x1, x2, x3, x4]:
......
...@@ -105,8 +105,8 @@ class TestRetainGraph(unittest.TestCase): ...@@ -105,8 +105,8 @@ class TestRetainGraph(unittest.TestCase):
A = np.random.rand(2, 3, 32, 32).astype('float32') A = np.random.rand(2, 3, 32, 32).astype('float32')
B = np.random.rand(2, 3, 32, 32).astype('float32') B = np.random.rand(2, 3, 32, 32).astype('float32')
realA = paddle.to_variable(A) realA = paddle.to_tensor(A)
realB = paddle.to_variable(B) realB = paddle.to_tensor(B)
fakeB = g(realA) fakeB = g(realA)
optim_d.clear_gradients() optim_d.clear_gradients()
......
...@@ -487,24 +487,24 @@ class TestTransformer(unittest.TestCase): ...@@ -487,24 +487,24 @@ class TestTransformer(unittest.TestCase):
dropout=dropout, dropout=dropout,
weight_attr=[None], weight_attr=[None],
bias_attr=[False]) bias_attr=[False])
src = paddle.to_variable( src = paddle.to_tensor(
np.random.rand(batch_size, source_length, d_model).astype( np.random.rand(batch_size, source_length, d_model).astype(
"float32")) "float32"))
tgt = paddle.to_variable( tgt = paddle.to_tensor(
np.random.rand(batch_size, target_length, d_model).astype( np.random.rand(batch_size, target_length, d_model).astype(
"float32")) "float32"))
src_mask = np.zeros((batch_size, n_head, source_length, src_mask = np.zeros((batch_size, n_head, source_length,
source_length)).astype("float32") source_length)).astype("float32")
src_mask[0][0][0][0] = -np.inf src_mask[0][0][0][0] = -np.inf
src_mask = paddle.to_variable(src_mask) src_mask = paddle.to_tensor(src_mask)
tgt_mask = np.zeros((batch_size, n_head, target_length, tgt_mask = np.zeros((batch_size, n_head, target_length,
target_length)).astype("float32") target_length)).astype("float32")
tgt_mask[0][0][0][0] = -1e9 tgt_mask[0][0][0][0] = -1e9
memory_mask = np.zeros((batch_size, n_head, target_length, memory_mask = np.zeros((batch_size, n_head, target_length,
source_length)).astype("float32") source_length)).astype("float32")
memory_mask[0][0][0][0] = -1e9 memory_mask[0][0][0][0] = -1e9
tgt_mask, memory_mask = paddle.to_variable( tgt_mask, memory_mask = paddle.to_tensor(
tgt_mask), paddle.to_variable(memory_mask) tgt_mask), paddle.to_tensor(memory_mask)
trans_output = transformer(src, tgt, src_mask, tgt_mask, trans_output = transformer(src, tgt, src_mask, tgt_mask,
memory_mask) memory_mask)
...@@ -521,24 +521,24 @@ class TestTransformer(unittest.TestCase): ...@@ -521,24 +521,24 @@ class TestTransformer(unittest.TestCase):
dropout=dropout, dropout=dropout,
weight_attr=[None, None], weight_attr=[None, None],
bias_attr=[False, False]) bias_attr=[False, False])
src = paddle.to_variable( src = paddle.to_tensor(
np.random.rand(batch_size, source_length, d_model).astype( np.random.rand(batch_size, source_length, d_model).astype(
"float32")) "float32"))
tgt = paddle.to_variable( tgt = paddle.to_tensor(
np.random.rand(batch_size, target_length, d_model).astype( np.random.rand(batch_size, target_length, d_model).astype(
"float32")) "float32"))
src_mask = np.zeros((batch_size, n_head, source_length, src_mask = np.zeros((batch_size, n_head, source_length,
source_length)).astype("float32") source_length)).astype("float32")
src_mask[0][0][0][0] = -np.inf src_mask[0][0][0][0] = -np.inf
src_mask = paddle.to_variable(src_mask) src_mask = paddle.to_tensor(src_mask)
tgt_mask = np.zeros((batch_size, n_head, target_length, tgt_mask = np.zeros((batch_size, n_head, target_length,
target_length)).astype("float32") target_length)).astype("float32")
tgt_mask[0][0][0][0] = -1e9 tgt_mask[0][0][0][0] = -1e9
memory_mask = np.zeros((batch_size, n_head, target_length, memory_mask = np.zeros((batch_size, n_head, target_length,
source_length)).astype("float32") source_length)).astype("float32")
memory_mask[0][0][0][0] = -1e9 memory_mask[0][0][0][0] = -1e9
tgt_mask, memory_mask = paddle.to_variable( tgt_mask, memory_mask = paddle.to_tensor(
tgt_mask), paddle.to_variable(memory_mask) tgt_mask), paddle.to_tensor(memory_mask)
trans_output = transformer(src, tgt, src_mask, tgt_mask, trans_output = transformer(src, tgt, src_mask, tgt_mask,
memory_mask) memory_mask)
...@@ -555,24 +555,24 @@ class TestTransformer(unittest.TestCase): ...@@ -555,24 +555,24 @@ class TestTransformer(unittest.TestCase):
dropout=dropout, dropout=dropout,
weight_attr=[None, None, None], weight_attr=[None, None, None],
bias_attr=[False, False, True]) bias_attr=[False, False, True])
src = paddle.to_variable( src = paddle.to_tensor(
np.random.rand(batch_size, source_length, d_model).astype( np.random.rand(batch_size, source_length, d_model).astype(
"float32")) "float32"))
tgt = paddle.to_variable( tgt = paddle.to_tensor(
np.random.rand(batch_size, target_length, d_model).astype( np.random.rand(batch_size, target_length, d_model).astype(
"float32")) "float32"))
src_mask = np.zeros((batch_size, n_head, source_length, src_mask = np.zeros((batch_size, n_head, source_length,
source_length)).astype("float32") source_length)).astype("float32")
src_mask[0][0][0][0] = -np.inf src_mask[0][0][0][0] = -np.inf
src_mask = paddle.to_variable(src_mask) src_mask = paddle.to_tensor(src_mask)
tgt_mask = np.zeros((batch_size, n_head, target_length, tgt_mask = np.zeros((batch_size, n_head, target_length,
target_length)).astype("float32") target_length)).astype("float32")
tgt_mask[0][0][0][0] = -1e9 tgt_mask[0][0][0][0] = -1e9
memory_mask = np.zeros((batch_size, n_head, target_length, memory_mask = np.zeros((batch_size, n_head, target_length,
source_length)).astype("float32") source_length)).astype("float32")
memory_mask[0][0][0][0] = -1e9 memory_mask[0][0][0][0] = -1e9
tgt_mask, memory_mask = paddle.to_variable( tgt_mask, memory_mask = paddle.to_tensor(
tgt_mask), paddle.to_variable(memory_mask) tgt_mask), paddle.to_tensor(memory_mask)
trans_output = transformer(src, tgt, src_mask, tgt_mask, trans_output = transformer(src, tgt, src_mask, tgt_mask,
memory_mask) memory_mask)
...@@ -588,24 +588,24 @@ class TestTransformer(unittest.TestCase): ...@@ -588,24 +588,24 @@ class TestTransformer(unittest.TestCase):
dim_feedforward=dim_feedforward, dim_feedforward=dim_feedforward,
dropout=dropout, dropout=dropout,
bias_attr=False) bias_attr=False)
src = paddle.to_variable( src = paddle.to_tensor(
np.random.rand(batch_size, source_length, d_model).astype( np.random.rand(batch_size, source_length, d_model).astype(
"float32")) "float32"))
tgt = paddle.to_variable( tgt = paddle.to_tensor(
np.random.rand(batch_size, target_length, d_model).astype( np.random.rand(batch_size, target_length, d_model).astype(
"float32")) "float32"))
src_mask = np.zeros((batch_size, n_head, source_length, src_mask = np.zeros((batch_size, n_head, source_length,
source_length)).astype("float32") source_length)).astype("float32")
src_mask[0][0][0][0] = -np.inf src_mask[0][0][0][0] = -np.inf
src_mask = paddle.to_variable(src_mask) src_mask = paddle.to_tensor(src_mask)
tgt_mask = np.zeros((batch_size, n_head, target_length, tgt_mask = np.zeros((batch_size, n_head, target_length,
target_length)).astype("float32") target_length)).astype("float32")
tgt_mask[0][0][0][0] = -1e9 tgt_mask[0][0][0][0] = -1e9
memory_mask = np.zeros((batch_size, n_head, target_length, memory_mask = np.zeros((batch_size, n_head, target_length,
source_length)).astype("float32") source_length)).astype("float32")
memory_mask[0][0][0][0] = -1e9 memory_mask[0][0][0][0] = -1e9
tgt_mask, memory_mask = paddle.to_variable( tgt_mask, memory_mask = paddle.to_tensor(
tgt_mask), paddle.to_variable(memory_mask) tgt_mask), paddle.to_tensor(memory_mask)
trans_output = transformer(src, tgt, src_mask, tgt_mask, trans_output = transformer(src, tgt, src_mask, tgt_mask,
memory_mask) memory_mask)
......
...@@ -63,7 +63,7 @@ class TestZerosLikeImpeartive(unittest.TestCase): ...@@ -63,7 +63,7 @@ class TestZerosLikeImpeartive(unittest.TestCase):
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace() ) else fluid.CPUPlace()
paddle.disable_static(place) paddle.disable_static(place)
x = paddle.to_variable(np.ones(shape)) x = paddle.to_tensor(np.ones(shape))
for dtype in [np.bool, np.float32, np.float64, np.int32, np.int64]: for dtype in [np.bool, np.float32, np.float64, np.int32, np.int64]:
out = zeros_like(x, dtype) out = zeros_like(x, dtype)
self.assertEqual((out.numpy() == np.zeros(shape, dtype)).all(), self.assertEqual((out.numpy() == np.zeros(shape, dtype)).all(),
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
sys.path.append("..")
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
from scipy.special import expit, erf
import paddle
import paddle.fluid as fluid
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.fluid import compiler, Program, program_guard
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUActivation(OpTest):
def setUp(self):
self.op_type = "exp"
self.init_dtype()
self.init_kernel_type()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.exp(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=1e-3)
def init_kernel_type(self):
pass
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSigmoid(TestXPUActivation):
def setUp(self):
self.op_type = "sigmoid"
self.init_dtype()
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
out = 1 / (1 + np.exp(-x))
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.01)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUTanh(TestXPUActivation):
def setUp(self):
self.op_type = "tanh"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.tanh(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSqrt(TestXPUActivation):
def setUp(self):
self.op_type = "sqrt"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.sqrt(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUAbs(TestXPUActivation):
def setUp(self):
self.op_type = "abs"
self.init_dtype()
x = np.random.uniform(-1, 1, [4, 25]).astype(self.dtype)
# Because we set delta = 0.005 in calculating numeric gradient,
# if x is too small, such as 0.002, x_neg will be -0.003
# x_pos will be 0.007, so the numeric gradient is inaccurate.
# we should avoid this
x[np.abs(x) < 0.005] = 0.02
out = np.abs(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPURelu(TestXPUActivation):
def setUp(self):
self.op_type = "relu"
self.init_dtype()
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
# The same reason with TestAbs
x[np.abs(x) < 0.005] = 0.02
out = np.maximum(x, 0)
self.attrs = {'use_xpu': True}
self.inputs = {'X': x}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUGelu(TestXPUActivation):
def setUp(self):
self.op_type = "gelu"
self.init_dtype()
approximate = False
x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
out = gelu(x, approximate)
self.inputs = {'X': x}
self.outputs = {'Out': out}
self.attrs = {"approximate": approximate, 'use_xpu': True}
def gelu(x, approximate):
if approximate:
y_ref = 0.5 * x * (1.0 + np.tanh(
np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3))))
else:
y_ref = 0.5 * x * (1 + erf(x / np.sqrt(2)))
return y_ref.astype(x.dtype)
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPULog(TestXPUActivation):
def setUp(self):
self.op_type = "log"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.log(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSquare(TestXPUActivation):
def setUp(self):
self.op_type = "square"
self.init_dtype()
x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype)
out = np.square(x)
self.attrs = {'use_xpu': True}
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUPow(TestXPUActivation):
def setUp(self):
self.op_type = "pow"
self.init_dtype()
x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype)
out = np.power(x, 3)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.attrs = {'factor': 3.0, 'use_xpu': True}
self.outputs = {'Out': out}
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
sys.path.append("..")
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
from op_test import OpTest, skip_check_grad_ci
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
class TestElementwiseAddOp(OpTest):
def init_kernel_type(self):
self.use_mkldnn = False
def setUp(self):
self.op_type = "elementwise_add"
self.init_dtype()
self.init_input_output()
self.init_kernel_type()
self.init_axis()
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
}
self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
self.outputs = {'Out': self.out}
def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_output(check_dygraph=(self.use_mkldnn == False))
def test_check_grad_normal(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_x(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['Y'],
'Out',
no_grad_set=set("X"),
check_dygraph=(self.use_mkldnn == False))
def test_check_grad_ingore_y(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
if self.dtype == np.float16:
return
self.check_grad(
['X'],
'Out',
no_grad_set=set('Y'),
check_dygraph=(self.use_mkldnn == False))
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_dtype(self):
self.dtype = np.float64
def init_axis(self):
self.axis = -1
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUElementwiseAddOp(OpTest):
def setUp(self):
self.op_type = "elementwise_add"
self.init_dtype()
self.init_input_output()
self.init_axis()
self.inputs = {'X': self.x, 'Y': self.y}
self.attrs = {'axis': self.axis, 'use_mkldnn': False, 'use_xpu': True}
self.outputs = {'Out': self.out}
def test_check_output(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad_normal(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['Y'], 'Out')
def test_check_grad_ingore_y(self):
if self.dtype == np.float32 and paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_dtype(self):
self.dtype = np.float32
def init_axis(self):
self.axis = -1
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseAddOp_scalar(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1,1) to test broadcast.")
class TestElementwiseAddOp_scalar2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 4).astype(self.dtype)
self.y = np.random.rand(1, 1).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_Vector(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.random((100, )).astype(self.dtype)
self.y = np.random.random((100, )).astype(self.dtype)
self.out = np.add(self.x, self.y)
class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(100, 1, 1)
def init_axis(self):
self.axis = 0
class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 100, 3).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 100, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 100).astype(self.dtype)
self.y = np.random.rand(100).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1, 100)
class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype)
self.y = np.random.rand(10, 12).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 10, 12, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype)
self.y = np.random.rand(100, 1).astype(self.dtype)
self.out = self.x + self.y.reshape(100, 1, 1, 1)
def init_axis(self):
self.axis = 0
class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(10, 3, 12).astype(self.dtype)
self.y = np.random.rand(10, 1, 12).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype)
self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype)
self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype)
self.out = self.x + self.y
class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 10, 12).astype(self.dtype)
self.y = np.random.rand(10, 12).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 10, 12)
def init_axis(self):
self.axis = 1
@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast.")
class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 1).astype(self.dtype)
self.y = np.random.rand(1).astype(self.dtype)
self.out = self.x + self.y.reshape(1, 1)
def init_axis(self):
self.axis = 1
class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(100, 2, 3).astype(self.dtype)
self.y = np.random.rand(100, 1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(2, 3, 100).astype(self.dtype)
self.y = np.random.rand(1, 1, 100).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype)
self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(10, 12).astype(self.dtype)
self.y = np.random.rand(2, 3, 10, 12).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = 2
class TestElementwiseAddOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# the input of elementwise_add must be Variable.
x1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace())
y1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace())
self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1)
# the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64
# float16 only can be set on GPU place
x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="uint8")
y2 = fluid.layers.data(name='y2', shape=[3, 4, 5, 6], dtype="uint8")
self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2)
class TestAddOp(unittest.TestCase):
def test_name(self):
with fluid.program_guard(fluid.Program()):
x = fluid.data(name="x", shape=[2, 3], dtype="float32")
y = fluid.data(name='y', shape=[2, 3], dtype='float32')
y_1 = paddle.add(x, y, name='add_res')
self.assertEqual(('add_res' in y_1.name), True)
def test_declarative(self):
with fluid.program_guard(fluid.Program()):
def gen_data():
return {
"x": np.array([2, 3, 4]).astype('float32'),
"y": np.array([1, 5, 2]).astype('float32')
}
x = fluid.data(name="x", shape=[3], dtype='float32')
y = fluid.data(name="y", shape=[3], dtype='float32')
z = paddle.add(x, y)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
z_value = exe.run(feed=gen_data(), fetch_list=[z.name])
z_expected = np.array([3., 8., 6.])
self.assertEqual((z_value == z_expected).all(), True)
def test_dygraph(self):
with fluid.dygraph.guard():
np_x = np.array([2, 3, 4]).astype('float64')
np_y = np.array([1, 5, 2]).astype('float64')
x = fluid.dygraph.to_variable(np_x)
y = fluid.dygraph.to_variable(np_y)
z = paddle.add(x, y)
np_z = z.numpy()
z_expected = np.array([3., 8., 6.])
self.assertEqual((np_z == z_expected).all(), True)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
sys.path.append("..")
import paddle.fluid.core as core
import unittest
import numpy as np
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
def generate_compatible_shapes(dim_X, dim_Y, transpose_X, transpose_Y):
BATCH_SIZE = 2
M = 3
N = 4
K = 5
if (dim_X == 1 and transpose_X) or (dim_Y == 1 and transpose_Y):
K = 1
if dim_X == 1:
if transpose_X:
shape_X = [M]
else:
shape_X = [K]
if dim_Y == 1:
if transpose_Y:
shape_Y = [N]
else:
shape_Y = [K]
if dim_X >= 2:
if transpose_X:
shape_X = [K, M]
else:
shape_X = [M, K]
if dim_X == 3:
shape_X = [BATCH_SIZE] + shape_X
if dim_Y >= 2:
if transpose_Y:
shape_Y = [N, K]
else:
shape_Y = [K, N]
if dim_Y == 3:
shape_Y = [BATCH_SIZE] + shape_Y
return shape_X, shape_Y
def reference_matmul(X, Y, transpose_X=False, transpose_Y=False):
"""Reference forward implementation using np.matmul."""
# np.matmul does not support the transpose flags, so we manually
# transpose X and Y appropriately.
if transpose_X:
if X.ndim == 1:
X = X.reshape((X.size, 1))
elif X.ndim == 2:
X = X.T
else:
dim = [i for i in range(len(X.shape))]
dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1]
X = np.transpose(X, tuple(dim))
if transpose_Y:
if Y.ndim == 1:
Y = Y.reshape((1, Y.size))
else:
dim = [i for i in range(len(Y.shape))]
dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1]
Y = np.transpose(Y, tuple(dim))
Out = np.matmul(X, Y)
if not Out.shape:
# We do not support 0-dimensional Tensors (scalars). So where
# np.matmul outputs a scalar, we must convert to a Tensor of
# shape (1, ) instead.
# Everywhere else, we are compatible with np.matmul.
Out = np.array([Out], dtype="float32")
return Out
class Generator(object):
def setUp(self):
self.op_type = "matmul"
X = np.random.random(self.shape_X).astype("float32")
Y = np.random.random(self.shape_Y).astype("float32")
Out = reference_matmul(X, Y, self.transpose_X, self.transpose_Y)
self.inputs = {'X': X, 'Y': Y}
self.attrs = {
'transpose_X': self.transpose_X,
'transpose_Y': self.transpose_Y
}
self.outputs = {'Out': Out}
def test_check_output(self):
self.check_output()
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=1e-3)
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3)
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=5e-2)
def test_check_grad_ignore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X"))
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'],
'Out',
max_relative_error=5e-2,
no_grad_set=set("X"))
def test_check_grad_ignore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y'))
if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len(
self.inputs['Y'].shape) and self.inputs['X'].shape[
0] == self.inputs['Y'].shape[0]:
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'],
'Out',
max_relative_error=5e-2,
no_grad_set=set('Y'))
class TestMatmulOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# The inputs type of matmul_op must be Variable.
input1 = 12
self.assertRaises(TypeError, fluid.layers.matmul, input1, input1)
# The inputs dtype of matmul_op must be float32, float64.
input2 = fluid.layers.data(
name='input2', shape=[10, 10], dtype="int32")
self.assertRaises(TypeError, fluid.layers.matmul, input2, input2)
input3 = fluid.layers.data(
name='input3', shape=[2, 2], dtype="float16")
fluid.layers.matmul(input3, input3)
# Negative dimension generation
def generate_negative_dims(in_shape):
from itertools import combinations
size = len(in_shape)
indexs = list()
shapes = list()
for i in range(size):
indexs.extend(list(combinations([j for j in range(size)], i + 1)))
for idx in indexs:
shapes.append(
[in_shape[i] if i not in idx else -1 for i in range(size)])
return shapes
# Build program with inputs sizes that contain negative numbers
def test_negative_dims_program(obj):
for shape_x in generate_negative_dims(obj.shape_X):
for shape_y in generate_negative_dims(obj.shape_Y):
X = np.random.random(obj.shape_X).astype("float32")
Y = np.random.random(obj.shape_Y).astype("float32")
Ref = reference_matmul(X, Y, obj.transpose_X, obj.transpose_Y)
with program_guard(Program(), Program()):
x = fluid.data(name='x', shape=shape_x, dtype='float32')
y = fluid.data(name='y', shape=shape_y, dtype='float32')
output = fluid.layers.matmul(x, y, obj.transpose_X,
obj.transpose_Y)
obj.assertEqual(len(Ref.shape), len(output.shape))
for idx in range(len(Ref.shape)):
if output.shape[idx] != -1:
obj.assertEqual(Ref.shape[idx], output.shape[idx])
exe = fluid.Executor(fluid.CPUPlace())
res, = exe.run(fluid.default_main_program(),
feed={'x': X,
'y': Y},
fetch_list=[output])
np.allclose(res, Ref, atol=1e-5)
# Generate program api cases for all negative possibilities
def api_test(dim_x, dim_y, trans_x, trans_y):
test_name = ('TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim_x, dim_y, trans_x, trans_y))
shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x,
trans_y)
globals()[test_name] = type(test_name, (unittest.TestCase, ), {
'shape_X': shape_x,
'shape_Y': shape_y,
'transpose_X': trans_x,
'transpose_Y': trans_y,
'test_propram': test_negative_dims_program,
})
# Generate operators cases for all possibilities
def inject_test(dim_x, dim_y, trans_x, trans_y):
test_name = ('TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim_x, dim_y, trans_x, trans_y))
shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x,
trans_y)
globals()[test_name] = type(test_name, (Generator, OpTest), {
'shape_X': shape_x,
'shape_Y': shape_y,
'transpose_X': trans_x,
'transpose_Y': trans_y,
})
for dim_X in (1, 2, 3):
for dim_Y in (1, 2, 3):
for transose_x in (False, True):
for transose_y in (False, True):
inject_test(dim_X, dim_Y, transose_x, transose_y)
api_test(dim_X, dim_Y, transose_x, transose_y)
# Test case n-dim
def generate_compatible_shapes(dim, transpose_X, transpose_Y):
M = 2
N = 4
K = 3
shape_X = [2 for _ in range(dim - 2)]
shape_Y = [2 for _ in range(dim - 2)]
if transpose_X:
shape_X += [K, M]
else:
shape_X += [M, K]
if transpose_Y:
shape_Y += [N, K]
else:
shape_Y += [K, N]
return shape_X, shape_Y
# # Test case n-dim
for dim in [4]:
for transpose_X in [False, True]:
for transpose_Y in [False, True]:
test_name = (
'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format(
dim, dim, transpose_X, transpose_Y))
shape_X, shape_Y = generate_compatible_shapes(dim, transpose_X,
transpose_Y)
globals()[test_name] = type(test_name, (Generator, OpTest), {
'shape_X': shape_X,
'shape_Y': shape_Y,
'transpose_X': transpose_X,
'transpose_Y': transpose_Y,
})
class API_TestMm(unittest.TestCase):
def test_out(self):
with fluid.program_guard(fluid.Program()):
x = fluid.data(name="x", shape=[2], dtype="float64")
y = fluid.data(name='y', shape=[2], dtype='float64')
res = fluid.data(name="output", shape=[1], dtype="float64")
result = paddle.mm(x, y)
exe = fluid.Executor(fluid.CPUPlace())
data1 = np.random.rand(2)
data2 = np.random.rand(2)
np_res = exe.run(feed={'x': data1, 'y': data2}, fetch_list=[result])
expected_result = np.matmul(
data1.reshape(1, 2), data2.reshape(2, 1))
self.assertTrue(
np.allclose(
np_res, expected_result, atol=1e-5),
"two value is\
{}\n{}, check diff!".format(np_res, expected_result))
def test_dygraph_without_out(self):
device = fluid.CPUPlace()
with fluid.dygraph.guard(device):
input_array1 = np.random.rand(3, 4).astype("float64")
input_array2 = np.random.rand(4, 3).astype("float64")
data1 = fluid.dygraph.to_variable(input_array1)
data2 = fluid.dygraph.to_variable(input_array2)
out = paddle.mm(data1, data2)
expected_result = np.matmul(input_array1, input_array2)
self.assertTrue(np.allclose(expected_result, out.numpy()))
class Test_API_Matmul(unittest.TestCase):
def test_dygraph_without_out(self):
device = fluid.CPUPlace()
with fluid.dygraph.guard(device):
input_array1 = np.random.rand(3, 4).astype("float64")
input_array2 = np.random.rand(4, 3).astype("float64")
data1 = fluid.dygraph.to_variable(input_array1)
data2 = fluid.dygraph.to_variable(input_array2)
out = paddle.matmul(data1, data2)
expected_result = np.matmul(input_array1, input_array2)
self.assertTrue(np.allclose(expected_result, out.numpy()))
class API_TestMmError(unittest.TestCase):
def test_errors(self):
def test_error1():
with fluid.program_guard(fluid.Program(), fluid.Program()):
data1 = fluid.data(name="data1", shape=[10, 2], dtype="float32")
data2 = fluid.data(name="data2", shape=[3, 10], dtype="float32")
paddle.mm(data1, data2)
self.assertRaises(ValueError, test_error1)
def test_error2():
with fluid.program_guard(fluid.Program(), fluid.Program()):
data1 = fluid.data(
name="data1", shape=[-1, 10, 2], dtype="float32")
data2 = fluid.data(
name="data2", shape=[-1, 2, 10], dtype="float32")
paddle.mm(data1, data2)
test_error2()
def test_error3():
with fluid.program_guard(fluid.Program(), fluid.Program()):
data1 = fluid.data(
name="data1", shape=[10, 10, 2], dtype="float32")
data2 = fluid.data(
name="data2", shape=[3, 2, 10], dtype="float32")
paddle.mm(data1, data2)
self.assertRaises(ValueError, test_error3)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
import sys
sys.path.append("..")
from op_test import OpTest
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
class TestMulOp(OpTest):
def setUp(self):
self.op_type = "mul"
self.dtype = np.float64
self.init_dtype_type()
self.inputs = {
'X': np.random.random((20, 5)).astype(self.dtype),
'Y': np.random.random((5, 21)).astype(self.dtype)
}
self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}
def init_dtype_type(self):
pass
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
class TestMulOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
# The input type of mul_op must be Variable.
x1 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], fluid.CPUPlace())
x2 = fluid.create_lod_tensor(
np.array([[-1]]), [[1]], fluid.CPUPlace())
self.assertRaises(TypeError, fluid.layers.mul, x1, x2)
# The input dtype of mul_op must be float32 or float64.
x3 = fluid.layers.data(name='x3', shape=[4], dtype="int32")
x4 = fluid.layers.data(name='x4', shape=[4], dtype="int32")
self.assertRaises(TypeError, fluid.layers.mul, x3, x4)
class TestMulOp2(OpTest):
def setUp(self):
self.op_type = "mul"
self.dtype = np.float64
self.init_dtype_type()
self.inputs = {
'X': np.random.random((3, 4, 2, 9)).astype(self.dtype),
'Y': np.random.random((3, 6, 1, 2, 3)).astype(self.dtype)
}
self.attrs = {
'x_num_col_dims': 2,
'y_num_col_dims': 2,
}
result = np.dot(self.inputs['X'].reshape(3 * 4, 2 * 9),
self.inputs['Y'].reshape(3 * 6, 1 * 2 * 3))
result = result.reshape(3, 4, 1, 2, 3)
self.outputs = {'Out': result}
def init_dtype_type(self):
pass
def test_check_output(self):
self.check_output()
def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self):
self.check_grad(
['Y'], 'Out', max_relative_error=0.5, no_grad_set=set('X'))
def test_check_grad_ignore_y(self):
self.check_grad(
['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp1(TestMulOp):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=1e-1)
def test_check_grad_normal(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.5)
def test_check_grad_ingore_x(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUMulOp2(TestMulOp2):
def init_dtype_type(self):
self.dtype = np.float32
def test_check_output(self):
place = paddle.XPUPlace(0)
self.check_output_with_place(place, atol=2e-1)
def test_check_grad_normal(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X', 'Y'], 'Out', max_relative_error=0.9)
def test_check_grad_ingore_x(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
def test_check_grad_ingore_y(self):
place = paddle.XPUPlace(0)
self.check_grad_with_place(
place, ['X'], 'Out', max_relative_error=0.9, no_grad_set=set('Y'))
if __name__ == "__main__":
unittest.main()
...@@ -707,20 +707,14 @@ def cross(x, y, axis=None, name=None): ...@@ -707,20 +707,14 @@ def cross(x, y, axis=None, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle import to_variable
import numpy as np
paddle.disable_static() paddle.disable_static()
data_x = np.array([[1.0, 1.0, 1.0], x = paddle.to_tensor([[1.0, 1.0, 1.0],
[2.0, 2.0, 2.0], [2.0, 2.0, 2.0],
[3.0, 3.0, 3.0]]) [3.0, 3.0, 3.0]])
data_y = np.array([[1.0, 1.0, 1.0], y = paddle.to_tensor([[1.0, 1.0, 1.0],
[1.0, 1.0, 1.0], [1.0, 1.0, 1.0],
[1.0, 1.0, 1.0]]) [1.0, 1.0, 1.0]])
x = to_variable(data_x)
y = to_variable(data_y)
z1 = paddle.cross(x, y) z1 = paddle.cross(x, y)
print(z1.numpy()) print(z1.numpy())
# [[-1. -1. -1.] # [[-1. -1. -1.]
......
...@@ -1650,12 +1650,11 @@ def cumsum(x, axis=None, dtype=None, name=None): ...@@ -1650,12 +1650,11 @@ def cumsum(x, axis=None, dtype=None, name=None):
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle import to_variable
import numpy as np import numpy as np
paddle.disable_static() paddle.disable_static()
data_np = np.arange(12).reshape(3, 4) data_np = np.arange(12).reshape(3, 4)
data = to_variable(data_np) data = paddle.to_tensor(data_np)
y = paddle.cumsum(data) y = paddle.cumsum(data)
print(y.numpy()) print(y.numpy())
......
...@@ -251,9 +251,10 @@ ...@@ -251,9 +251,10 @@
"BilinearTensorProduct", "BilinearTensorProduct",
"GroupNorm", "GroupNorm",
"SpectralNorm", "SpectralNorm",
"TreeConv", "TreeConv"
],
"wlist_temp":[
"prroi_pool", "prroi_pool",
"to_tensor",
"ChunkEvaluator", "ChunkEvaluator",
"EditDistance", "EditDistance",
"ErrorClipByValue", "ErrorClipByValue",
...@@ -406,7 +407,9 @@ ...@@ -406,7 +407,9 @@
"TransformerDecoder.prepare_incremental_cache", "TransformerDecoder.prepare_incremental_cache",
"LinearChainCRF.forward", "LinearChainCRF.forward",
"CRFDecoding.forward", "CRFDecoding.forward",
"SequenceTagging.forward" "SequenceTagging.forward",
"XPUPlace",
"is_compiled_with_xpu"
], ],
"gpu_not_white":[ "gpu_not_white":[
"deformable_conv", "deformable_conv",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册