diff --git a/cmake/phi.cmake b/cmake/phi.cmake index ebb686d8ad0f31917e64161d6f7d2ecd4644fadd..1c4dd723b9b71ffaab33599eb13ee8235393a097 100644 --- a/cmake/phi.cmake +++ b/cmake/phi.cmake @@ -118,7 +118,7 @@ function(kernel_library TARGET) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) list(APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc AND NOT WITH_XPU_KP) list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) endif() if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc) @@ -151,6 +151,9 @@ function(kernel_library TARGET) file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc ) + list(APPEND kps_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) + endif() endif() else() # TODO(chenweihang): impl compile by source later diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc index 49e517dc9b3f3271ef26dfbece46f799ef805c57..3c237b76e64b0d15ba86a20a2308e968557d6800 100644 --- a/paddle/fluid/eager/tests/task_tests/generated_test.cc +++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc @@ -35,6 +35,8 @@ PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sigmoid, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sigmoid_grad, CPU, ALL_LAYOUT); namespace egr { diff --git a/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc index b86865e2d126fbfc0b00495a6e3208932ac6de39..8524be7800bfdda4f588007f8f21548e2995b0c3 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc @@ -32,6 +32,8 @@ PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sigmoid, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sigmoid_grad, CPU, ALL_LAYOUT); namespace egr { diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc index 17663ecf6baa35f698aca35e451de34c647d2214..4236dc55d518686b97ea47aa12e756eb39304f35 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc @@ -25,6 +25,12 @@ #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/place.h" +#include "paddle/phi/core/kernel_registry.h" + +PD_DECLARE_KERNEL(conv2d_transpose, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(batch_norm, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(gelu, CPU, ALL_LAYOUT); USE_OP_ITSELF(batch_norm); USE_OP_DEVICE_KERNEL(batch_norm, MKLDNN); diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index 46f340d681a225c03dbf4ad5cfd8f24ffd42ec0d..c2f32f5fe22311aa3df8051817d0e363373879b7 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -23,7 +23,7 @@ cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore) # cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler) # skip win32 since wget is not installed by default on windows machine. # skip COVERAGE_CI since the test runs slowly because of instrumentation. -if (WITH_CUDA AND WITH_TESTING AND NOT WIN32 AND NOT WITH_COVERAGE AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") +if (WITH_GPU AND WITH_TESTING AND NOT WIN32 AND NOT WITH_COVERAGE AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") add_custom_target( download_program COMMAND wget -nc https://paddle-ci.gz.bcebos.com/new_exec/lm_main_program diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index 878b845211ca1ae9e92f43fcc6ac82da366264d4..62e801b76955d74f15bfd81f8da641671de7307b 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -41,6 +41,7 @@ namespace paddle { namespace framework { // NOTE(Aurelius84): Need a better strategy to determine it. static constexpr size_t kHostNumThreads = 4; +static constexpr size_t kDeviceNumThreads = 1; bool IsInterpretercoreFastGCEnabled() { return FLAGS_fast_eager_deletion_mode && FLAGS_use_stream_safe_cuda_allocator; @@ -54,8 +55,8 @@ InterpreterCore::InterpreterCore(const platform::Place& place, global_scope_(global_scope), stream_analyzer_(place) { is_build_ = false; - async_work_queue_.reset( - new interpreter::AsyncWorkQueue(kHostNumThreads, &main_thread_blocker_)); + async_work_queue_.reset(new interpreter::AsyncWorkQueue( + kHostNumThreads, kDeviceNumThreads, &main_thread_blocker_)); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (IsInterpretercoreFastGCEnabled()) { @@ -271,6 +272,10 @@ void InterpreterCore::Convert( if (FLAGS_new_executor_use_inplace) { BuildInplace(); } + + // prepare for the first time. + async_work_queue_->PrepareAtomicDeps(dependecy_count_); + async_work_queue_->PrepareAtomicVarRef(vec_meta_info); } bool InterpreterCore::BuildInplaceCheckVarIsOnlyInput(size_t var_index) { @@ -388,18 +393,18 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { : global_scope_->GetMutableScope(); auto op_with_kernel = dynamic_cast(op); { - platform::RecordEvent infershape_event( - "infer_shape", platform::TracerEventType::OperatorInner, 1, - platform::EventRole::kInnerOp); - // If it is OperatorBase, InferShape do nothing. - if (op_with_kernel != nullptr) + if (op_with_kernel != nullptr) { + platform::RecordEvent infershape_event( + "infer_shape", platform::TracerEventType::OperatorInner, 1, + platform::EventRole::kInnerOp); + // If it is OperatorBase, InferShape do nothing. op_with_kernel->Info().infer_shape_( instr_node.InnerInferShapeContext().get()); + } } - if (op_with_kernel != nullptr && - FLAGS_new_executor_use_inplace) { // TODO(xiongkun03) Does operator - // base support inplace ? + if (op_with_kernel != nullptr && FLAGS_new_executor_use_inplace) { + // TODO(xiongkun03) Does operator base support inplace ? for (auto& pair : instr_node.InplaceInfo()) { const auto& in = paddle::framework::details::GetTensorFromVar(pair.first); auto* out = @@ -409,6 +414,7 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { } } } + { platform::RecordEvent compute_event( "compute", platform::TracerEventType::OperatorInner, 1, @@ -458,16 +464,24 @@ void InterpreterCore::RunInstruction(const Instruction& instr_node) { void InterpreterCore::ExecuteInstructionList( const std::vector& vec_instr) { + // NOTE(zhiqiu): get the prepared deps from std::future, and async prepare + // those for the next step + auto atomic_deps = async_work_queue_->AtomicDeps(); + auto atomic_var_ref = async_work_queue_->AtomicVarRef(); + async_work_queue_->PrepareAtomicDeps(dependecy_count_); async_work_queue_->PrepareAtomicVarRef(global_scope_->VecMetaInfo()); + unfinished_op_numer_ = vec_instr.size(); exception_holder_.Clear(); for (size_t i = 0; i < dependecy_count_.size(); ++i) { if (dependecy_count_[i] == 0) { - async_work_queue_->AddTask(vec_instr.at(i).KernelType(), - [&, i] { RunInstructionAsync(i); }); + async_work_queue_->AddTask(vec_instr.at(i).KernelType(), [ + this, i, atomic_deps = atomic_deps.get(), + atomic_var_ref = atomic_var_ref.get() + ] { RunInstructionAsync(i, atomic_deps, atomic_var_ref); }); } } @@ -490,11 +504,16 @@ void InterpreterCore::ExecuteInstructionList( } void InterpreterCore::RunNextInstructions( - const Instruction& instr, std::queue* reserved_next_ops) { + const Instruction& instr, std::queue* reserved_next_ops, + std::vector>* atomic_deps, + std::vector>* atomic_var_ref) { + VLOG(4) << "atomic 1:" << atomic_deps; auto& next_instr = instr.NextInstructions(); - auto& atomic_deps = async_work_queue_->AtomicDeps(); - auto IsReady = [&](size_t next_id) { - return atomic_deps[next_id]->fetch_sub(1, std::memory_order_relaxed) == 1; + + auto IsReady = [atomic_deps](size_t next_id) { + VLOG(4) << "atomic:" << atomic_deps << " " << &(*atomic_deps)[next_id] + << " " << next_id; + return (*atomic_deps)[next_id].fetch_sub(1, std::memory_order_relaxed) == 1; }; if (instr.KernelType() == OpFuncType::kQueueAsync) { @@ -503,7 +522,9 @@ void InterpreterCore::RunNextInstructions( if (IsReady(next_id)) { async_work_queue_->AddTask( vec_instruction_[next_id].KernelType(), - [&, next_id] { RunInstructionAsync(next_id); }); + [this, next_id, atomic_deps, atomic_var_ref]() { + RunInstructionAsync(next_id, atomic_deps, atomic_var_ref); + }); } } // keep all async_ops running in current thread @@ -523,7 +544,9 @@ void InterpreterCore::RunNextInstructions( if (IsReady(next_id)) { async_work_queue_->AddTask( vec_instruction_[next_id].KernelType(), - [&, next_id] { RunInstructionAsync(next_id); }); + [this, next_id, atomic_deps, atomic_var_ref] { + RunInstructionAsync(next_id, atomic_deps, atomic_var_ref); + }); } } auto direct_run_ops = interpreter::merge_vector(next_instr.SyncRunIds(), @@ -539,14 +562,18 @@ void InterpreterCore::RunNextInstructions( // move rest ops into other threads async_work_queue_->AddTask( vec_instruction_[next_id].KernelType(), - [&, next_id] { RunInstructionAsync(next_id); }); + [this, next_id, atomic_deps, atomic_var_ref] { + RunInstructionAsync(next_id, atomic_deps, atomic_var_ref); + }); } } if (first_op != 0) reserved_next_ops->push(first_op); } } -void InterpreterCore::RunInstructionAsync(size_t instr_id) { +void InterpreterCore::RunInstructionAsync( + size_t instr_id, std::vector>* atomic_deps, + std::vector>* atomic_var_ref) { std::queue ready_ops; ready_ops.push(instr_id); while (!ready_ops.empty()) { @@ -571,7 +598,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) RecordStreamForGC(instr_node); #endif - CheckGC(instr_node); + CheckGC(instr_node, atomic_var_ref); } catch (platform::EnforceNotMet& ex) { framework::InsertCallStackInfo(op->Type(), op->Attrs(), &ex); exception_holder_.Catch(std::make_exception_ptr(std::move(ex))); @@ -605,7 +632,7 @@ void InterpreterCore::RunInstructionAsync(size_t instr_id) { interpreter::RecordEvent(instr_node, place_); - RunNextInstructions(instr_node, &ready_ops); + RunNextInstructions(instr_node, &ready_ops, atomic_deps, atomic_var_ref); } } @@ -703,17 +730,19 @@ void InterpreterCore::RecordStreamForGC(const Instruction& instr) { } #endif -void InterpreterCore::CheckGC(const Instruction& instr) { +void InterpreterCore::CheckGC( + const Instruction& instr, + std::vector>* atomic_var_ref) { size_t instr_id = instr.Id(); auto& var_scope = *global_scope_; - auto& atomic_var_ref = async_work_queue_->AtomicVarRef(); for (auto var_id : instr.GCCheckVars()) { VLOG(4) << "GC " << global_scope_->GetNameById(var_id) << " " << var_scope.VarDesc(var_id); - + VLOG(4) << "atomic:" << atomic_var_ref << " " << &(*atomic_var_ref)[var_id] + << " " << var_id; bool is_ready = - atomic_var_ref[var_id]->fetch_sub(1, std::memory_order_relaxed) == 1; + (*atomic_var_ref)[var_id].fetch_sub(1, std::memory_order_relaxed) == 1; // ignore all persistable var while GC if (var_scope.VarDesc(var_id) && var_scope.VarDesc(var_id)->Persistable()) { continue; diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h index 51734abbb1bf82345b525e6df3f766f99921b8b1..c1ade85e1384c0e1f6fe3f3d6480b606e8a24391 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.h +++ b/paddle/fluid/framework/new_executor/interpretercore.h @@ -76,11 +76,16 @@ class InterpreterCore { void RecordStreamForGC(const Instruction& instr); #endif - void CheckGC(const Instruction& instr); + void CheckGC(const Instruction& instr, + std::vector>* atomic_var_ref); - void RunInstructionAsync(size_t instr_id); + void RunInstructionAsync(size_t instr_id, + std::vector>* atomic_deps, + std::vector>* atomic_var_ref); void RunNextInstructions(const Instruction& instr_id, - std::queue* reserved_next_ops); + std::queue* reserved_next_ops, + std::vector>* atomic_deps, + std::vector>* atomic_var_ref); void BuildSkipShareLoDInfo(); diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.cc b/paddle/fluid/framework/new_executor/interpretercore_util.cc index d595af58257d4f6e0f6bd1fd009ab78e181f96f7..a045d6c7f4a65fdda83578f319b75788d0f68f95 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.cc +++ b/paddle/fluid/framework/new_executor/interpretercore_util.cc @@ -44,32 +44,37 @@ void AsyncWorkQueue::AddTask(const OpFuncType& op_func_type, using VariableIdMap = std::map>; -AtomicVectorSizeT& AsyncWorkQueue::PrepareAtomicDeps( +void AsyncWorkQueue::PrepareAtomicDeps( const std::vector& dependecy_count) { - if (atomic_deps_.size() != dependecy_count.size()) { - atomic_deps_.clear(); - std::generate_n(std::back_inserter(atomic_deps_), dependecy_count.size(), - [] { return std::make_unique>(0); }); - } - - for (size_t i = 0; i < dependecy_count.size(); ++i) { - atomic_deps_[i]->store(dependecy_count[i]); - } - return atomic_deps_; + VLOG(4) << "PrepareAtomicDeps"; + auto p = std::make_shared< + std::promise>>>>(); + atomic_deps_ = p->get_future(); + queue_group_->AddTask(2, [&dependecy_count, p] { + auto* op_deps = + new std::vector>(dependecy_count.size()); + for (size_t i = 0; i < dependecy_count.size(); ++i) { + (*op_deps)[i] = dependecy_count[i]; + } + VLOG(4) << "AtomicDeps:" << op_deps << " " << (*op_deps).size(); + p->set_value(std::unique_ptr>>(op_deps)); + }); } -AtomicVectorSizeT& AsyncWorkQueue::PrepareAtomicVarRef( +void AsyncWorkQueue::PrepareAtomicVarRef( const std::vector& vec_meta_info) { - if (atomic_var_ref_.size() != vec_meta_info.size()) { - atomic_var_ref_.clear(); - std::generate_n(std::back_inserter(atomic_var_ref_), vec_meta_info.size(), - [] { return std::make_unique>(0); }); - } - - for (size_t i = 0; i < vec_meta_info.size(); ++i) { - atomic_var_ref_[i]->store(vec_meta_info[i].var_ref_count_); - } - return atomic_var_ref_; + VLOG(4) << "PrepareAtomicVarRef"; + auto p = std::make_shared< + std::promise>>>>(); + atomic_var_ref_ = p->get_future(); + queue_group_->AddTask(2, [&vec_meta_info, p] { + auto* var_ref = new std::vector>(vec_meta_info.size()); + for (size_t i = 0; i < vec_meta_info.size(); ++i) { + (*var_ref)[i] = vec_meta_info[i].var_ref_count_; + } + VLOG(4) << "AtomicVarRef:" << var_ref << " " << (*var_ref).size(); + p->set_value(std::unique_ptr>>(var_ref)); + }); } bool var_can_be_deleted(const std::string& name, const BlockDesc& block) { diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.h b/paddle/fluid/framework/new_executor/interpretercore_util.h index 81c05df62ec41970bcfbefe5e001527e777051ed..044a9ea368cbc506ce4a30bb82562177263786f9 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.h +++ b/paddle/fluid/framework/new_executor/interpretercore_util.h @@ -50,11 +50,13 @@ namespace framework { namespace interpreter { -using AtomicVectorSizeT = std::vector>>; +using AtomicVectorSizeT = + std::future>>>; class AsyncWorkQueue { public: - AsyncWorkQueue(size_t host_num_threads, EventsWaiter* waiter) + AsyncWorkQueue(size_t host_num_threads, size_t deivce_num_threads, + EventsWaiter* waiter) : host_num_thread_(host_num_threads) { std::vector group_options; // for execute host Kernel @@ -66,6 +68,13 @@ class AsyncWorkQueue { /*events_waiter*/ waiter); // for launch device Kernel group_options.emplace_back(/*name*/ "DeviceKernelLaunch", + /*num_threads*/ deivce_num_threads, + /*allow_spinning*/ true, + /*track_task*/ false, + /*detached*/ true, + /*events_waiter*/ waiter); + // for prepare deps and others + group_options.emplace_back(/*name*/ "Prepare", /*num_threads*/ 1, /*allow_spinning*/ true, /*track_task*/ false, @@ -74,10 +83,8 @@ class AsyncWorkQueue { queue_group_ = CreateWorkQueueGroup(group_options); } - AtomicVectorSizeT& PrepareAtomicDeps( - const std::vector& dependecy_count); - AtomicVectorSizeT& PrepareAtomicVarRef( - const std::vector& vec_meta_info); + void PrepareAtomicDeps(const std::vector& dependecy_count); + void PrepareAtomicVarRef(const std::vector& vec_meta_info); // void WaitEmpty() { queue_group_->WaitQueueGroupEmpty(); } @@ -85,8 +92,12 @@ class AsyncWorkQueue { void Cancel() { queue_group_->Cancel(); } - AtomicVectorSizeT& AtomicDeps() { return atomic_deps_; } - AtomicVectorSizeT& AtomicVarRef() { return atomic_var_ref_; } + std::unique_ptr>> AtomicDeps() { + return atomic_deps_.get(); + } + std::unique_ptr>> AtomicVarRef() { + return atomic_var_ref_.get(); + } private: size_t host_num_thread_; diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 7fe1852f7396cb8cebe4b83f4cc80a8023421351..8d5058a586b9e2324bc204bc89d1f3bf0c53bd45 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -20,45 +20,65 @@ // #include "gperftools/profiler.h" #include "paddle/fluid/framework/new_executor/standalone_executor.h" +#include "paddle/phi/core/kernel_registry.h" USE_OP_ITSELF(fill_constant); -USE_OP(uniform_random); +USE_OP_ITSELF(uniform_random); USE_OP(lookup_table); -USE_OP(transpose2); +USE_OP_ITSELF(transpose2); USE_OP_ITSELF(reshape2); -USE_OP(split); -USE_OP(slice); -USE_OP(concat); -USE_OP(matmul); +USE_OP_ITSELF(split); +USE_OP_ITSELF(slice); +USE_OP_ITSELF(concat); +USE_OP_ITSELF(matmul); USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(sigmoid); USE_OP_ITSELF(tanh); -USE_OP(elementwise_mul); +USE_OP_ITSELF(elementwise_mul); USE_OP(softmax_with_cross_entropy); USE_OP_ITSELF(reduce_mean); USE_OP_ITSELF(reduce_sum); USE_OP_ITSELF(reduce_sum_grad); USE_OP_ITSELF(reduce_mean_grad); USE_OP_ITSELF(reshape2_grad); -USE_OP(softmax_with_cross_entropy_grad); +USE_OP_ITSELF(softmax_with_cross_entropy_grad); USE_OP_ITSELF(elementwise_add_grad); -USE_OP(matmul_grad); -USE_OP(square); -USE_OP(transpose2_grad); +USE_OP_ITSELF(matmul_grad); +USE_OP_ITSELF(square); +USE_OP_ITSELF(transpose2_grad); USE_OP(concat_grad); USE_OP_ITSELF(elementwise_mul_grad); USE_OP_ITSELF(sigmoid_grad); USE_OP_ITSELF(tanh_grad); USE_OP(sum); -USE_OP(slice_grad); -USE_OP(lookup_table_grad); +USE_OP_ITSELF(slice_grad); +USE_OP_ITSELF(lookup_table_grad); USE_OP(sqrt); USE_OP(elementwise_max); USE_OP_ITSELF(elementwise_div); -USE_OP(sgd); +USE_OP_ITSELF(sgd); USE_OP(squared_l2_norm); -USE_OP(memcpy_h2d); -USE_OP(memcpy_d2h); +USE_OP_ITSELF(memcpy_h2d); +USE_OP_ITSELF(memcpy_d2h); + +PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(uniform_random_raw, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(transpose, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(reshape, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add_raw, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(reshape_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(transpose_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sgd, GPU, ALL_LAYOUT); + DECLARE_double(eager_delete_tensor_gb); namespace paddle { diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 42fbeb5d29ce4ac3a1498704b1fff88570c9c092..15777c287b42241350193583a21f3796febf5f02 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1122,7 +1122,15 @@ static void CheckTensorNANOrInf(const std::string& op_type, bool OperatorWithKernel::SupportsMKLDNN( const proto::VarType::Type data_type) const { - auto& op_kernels = OperatorWithKernel::AllOpKernels().at(type_); + auto op_kernel_iter = OperatorWithKernel::AllOpKernels().find(type_); + if (op_kernel_iter == OperatorWithKernel::AllOpKernels().end()) { + VLOG(6) << "Warning: " << type_ << " don't find its MKLDNN Kernel in Fluid " + "Registered Kernels. And We don't " + "search its kernels in phi lib, " + "SupportsMKLDNN() return false."; + return false; + } + auto& op_kernels = op_kernel_iter->second; return std::any_of(op_kernels.begin(), op_kernels.end(), [data_type](OpKernelMap::const_reference kern_pair) { return platform::is_cpu_place(kern_pair.first.place_) && diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index db4f6761bcec9d970863bd7f38b110ad719271ca..1669fba1327e5c08053735036b1946d35f2e8e49 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -144,6 +144,9 @@ class Scope : public ScopeBase { void Rename(const std::string& origin_name, const std::string& new_name) const; + // Return the number of variables in scope + size_t Size() { return vars_.size(); } + // Rename variable to a new name and return the new name std::string Rename(const std::string& origin_name) const; diff --git a/paddle/fluid/imperative/tests/test_tracer.cc b/paddle/fluid/imperative/tests/test_tracer.cc index f754c6fdd0ee7742f0e544baad0225502c172848..75876e07fb5c78fb6ec6949489efac9fcf618a69 100644 --- a/paddle/fluid/imperative/tests/test_tracer.cc +++ b/paddle/fluid/imperative/tests/test_tracer.cc @@ -32,6 +32,8 @@ PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sum, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(sum_grad, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 845d0ed073b32cc136ec6b9d76c9e3073d7b051a..8f7b62a2c9d27b699688d53ff1545b80a8f4400c 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -1496,6 +1496,9 @@ REGISTER_ACTIVATION_OP(hard_sigmoid, HardSigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); REGISTER_ACTIVATION_OP(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); +REGISTER_ACTIVATION_OP(log2, Log2, Log2Functor, Log2GradFunctor); +REGISTER_ACTIVATION_OP(log10, Log10, Log10Functor, Log10GradFunctor); +REGISTER_ACTIVATION_OP(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); /* ========================== sigmoid register ============================= */ @@ -1867,15 +1870,6 @@ REGISTER_OPERATOR( ops::ActivationOpDoubleGrad::FwdDeps()>, ops::ActivationDoubleGradOpInplaceInferer); -REGISTER_ACTIVATION_CPU_KERNEL(log, Log, LogFunctor, LogGradFunctor); - -REGISTER_OP_CPU_KERNEL( - log_grad_grad, ops::LogDoubleGradKernel>, - ops::LogDoubleGradKernel>, - ops::LogDoubleGradKernel>); /* ========================================================================== */ /* ========================== register checkpoint ===========================*/ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index f1984af6e15eac6682bd341f470727b899e82f3a..7db5675c16b2d3cc7df7176a40e2010355ebd37c 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -281,6 +281,11 @@ USE_PHI_DOUBLE_GRAD_FUNCTOR(Sigmoid) USE_PHI_TRIPLE_GRAD_FUNCTOR(Sigmoid) USE_PHI_FUNCTOR(LogSigmoid) USE_PHI_FUNCTOR(HardSigmoid) +USE_PHI_FUNCTOR(Log) +USE_PHI_DOUBLE_GRAD_FUNCTOR(Log) +USE_PHI_FUNCTOR(Log2) +USE_PHI_FUNCTOR(Log10) +USE_PHI_FUNCTOR(Log1p) template using ELUGradNegativeAlphaFunctor = phi::funcs::ELUGradNegativeAlphaFunctor; @@ -448,88 +453,6 @@ struct ReciprocalGradFunctor : public BaseActivationFunctor { } }; -// log(x) = natural logarithm of x -template -struct LogFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.log(); - } -}; - -template -struct LogGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * (static_cast(1) / x); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -// log2(x) = logarithm to the base 2 of the elements of x -template -struct Log2Functor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.log() / static_cast(log(2)); - } -}; - -// the gradient of log2(x) is 1/(x*ln(2)) -template -struct Log2GradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * static_cast(1) / (x * static_cast(log(2))); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -// log10(x) = logarithm to the base 10 of the elements of x -template -struct Log10Functor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = x.log() / static_cast(log(10)); - } -}; - -// the gradient of log10(x) is 1/(x*ln(10)) -template -struct Log10GradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * static_cast(1) / (x * static_cast(log(10))); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -// log1p(x) = natural logarithm of x+1 -template -struct Log1pFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out) const { - out.device(d) = (static_cast(1) + x).log(); - } -}; - -template -struct Log1pGradFunctor : public BaseActivationFunctor { - template - void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * (static_cast(1) / (x + static_cast(1))); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - // square(x) = x^2 template struct SquareFunctor : public BaseActivationFunctor { @@ -1197,37 +1120,6 @@ class SquareDoubleGradKernel } }; -template -class LogDoubleGradKernel - : public SquareDoubleGradKernel {}; - -template -class ELUDoubleGradKernel - : public framework::OpKernel { - public: - using T = typename Functor::ELEMENT_TYPE; - void Compute(const framework::ExecutionContext& ctx) const override { - const framework::Tensor *X, *ddX, *dOut; - X = ddX = dOut = nullptr; - framework::Tensor *dX, *ddOut; - dX = ddOut = nullptr; - - ExtractDoubleGradTensorWithInputDOut(ctx, &X, &ddX, &dX, &dOut, &ddOut); - - if (dX) dX->mutable_data(X->dims(), ctx.GetPlace()); - if (ddOut) ddOut->mutable_data(ctx.GetPlace()); - - auto& place = ctx.template device_context(); - - Functor functor; - auto attrs = functor.GetAttrs(); - for (auto& attr : attrs) { - *attr.second = ctx.Attr(attr.first); - } - functor(place, X, ddX, ddOut, dOut, dX); - } -}; - template class CELUDoubleGradKernel : public framework::OpKernel { @@ -1522,36 +1414,6 @@ class LogitGradKernel : public framework::OpKernel { } }; -template -struct LogGradGradFunctor : public BaseActivationFunctor { - template - void operator()(const Device& dev, const framework::Tensor* X, - const framework::Tensor* ddX, framework::Tensor* ddOut, - const framework::Tensor* dOut, framework::Tensor* dX) const { - auto* d = dev.eigen_device(); - auto ddx = framework::EigenVector::Flatten( - GET_DATA_SAFELY(ddX, "Input", "DDX", "LogGradGrad")); - auto x = framework::EigenVector::Flatten( - GET_DATA_SAFELY(X, "Input", "X", "LogGradGrad")); - // ddout = ddx / x; dx = -(dout / x) * (ddx / x) - // calculate dx first, so ddout can inplace ddx - if (dX) { - auto dout = framework::EigenVector::Flatten( - GET_DATA_SAFELY(dOut, "Output", "DOut", "LogGradGrad")); - auto dx = framework::EigenVector::Flatten( - GET_DATA_SAFELY(dX, "Output", "DX", "LogGradGrad")); - dx.device(*d) = dout * static_cast(-1) * ddx / (x * x); - } - if (ddOut) { - auto ddout = framework::EigenVector::Flatten( - GET_DATA_SAFELY(ddOut, "Output", "DDOut", "LogGradGrad")); - ddout.device(*d) = ddx * static_cast(1) / x; - } - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - } // namespace operators } // namespace paddle @@ -1560,9 +1422,6 @@ struct LogGradGradFunctor : public BaseActivationFunctor { __macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \ __macro(round, Round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ - __macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \ - __macro(log2, Log2, Log2Functor, Log2GradFunctor); \ - __macro(log10, Log10, Log10Functor, Log10GradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(stanh, STanh, STanhFunctor, STanhGradFunctor); \ __macro(softplus, Softplus, SoftplusFunctor, SoftplusGradFunctor); \ diff --git a/paddle/fluid/operators/activation_op.kps b/paddle/fluid/operators/activation_op.kps index 7c1b288080162e2a5bf847a795fc640ab5e5e4e1..bb08cee5bcde929f71415e5844a30c4d47f37b0a 100644 --- a/paddle/fluid/operators/activation_op.kps +++ b/paddle/fluid/operators/activation_op.kps @@ -131,27 +131,6 @@ struct CudaExpm1GradFunctor : public BaseActivationFunctor { } }; -template -struct CudaLogFunctor : public BaseActivationFunctor { - using MPType = typename details::MPTypeTrait::Type; - - // log(x) = log(x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(log(x)); - } -}; - -template -struct CudaLogGradFunctor : public BaseActivationFunctor { - // dx = dout / x - __device__ __forceinline__ T operator()(const T dout, const T x) const { - return dout / x; - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - template struct CudaSquareFunctor : public BaseActivationFunctor { // square(x) = x * x @@ -220,78 +199,6 @@ struct CudaRsqrtGradFunctor : public BaseActivationFunctor { } }; -template -struct CudaLog1pFunctor : public BaseActivationFunctor { - using MPType = typename details::MPTypeTrait::Type; - MPType one = static_cast(1.0f); - - // log1p(x) = log(1 + x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(log(one + x)); - } -}; - -template -struct CudaLog1pGradFunctor : public BaseActivationFunctor { - T one = static_cast(1.0f); - - // dx = dout / (1 + x) - __device__ __forceinline__ T operator()(const T dout, const T x) const { - return dout / (one + x); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -template -struct CudaLog2Functor : public BaseActivationFunctor { - using MPType = typename details::MPTypeTrait::Type; - - // log2(x) = log2(x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(log2(x)); - } -}; - -template -struct CudaLog2GradFunctor : public BaseActivationFunctor { - using MPType = typename details::MPTypeTrait::Type; - T log_two = static_cast(log(static_cast(2.0f))); - - // dx = dout / (x * log(2)) - __device__ __forceinline__ T operator()(const T dout, const T x) const { - return dout / (x * log_two); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - -template -struct CudaLog10Functor : public BaseActivationFunctor { - using MPType = typename details::MPTypeTrait::Type; - - // log10(x) = log10(x) - __device__ __forceinline__ T operator()(const T arg_x) const { - MPType x = static_cast(arg_x); - return static_cast(log10(x)); - } -}; - -template -struct CudaLog10GradFunctor : public BaseActivationFunctor { - using MPType = typename details::MPTypeTrait::Type; - T log_ten = static_cast(log(static_cast(10.0f))); - - // dx = dout / (x * log(10)) - __device__ __forceinline__ T operator()(const T dout, const T x) const { - return dout / (x * log_ten); - } - - static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } -}; - template struct CudaSoftReluFunctor : public BaseActivationFunctor { using MPType = typename details::MPTypeTrait::Type; @@ -773,6 +680,10 @@ USE_PHI_FUNCTOR(CudaELU) USE_PHI_FUNCTOR(CudaSigmoid) USE_PHI_FUNCTOR(CudaLogSigmoid) USE_PHI_FUNCTOR(CudaHardSigmoid) +USE_PHI_FUNCTOR(CudaLog) +USE_PHI_FUNCTOR(CudaLog2) +USE_PHI_FUNCTOR(CudaLog10) +USE_PHI_FUNCTOR(CudaLog1p) template using CudaELUGradNegativeAlphaFunctor = @@ -975,18 +886,6 @@ REGISTER_OP_CUDA_KERNEL( ops::CudaExpm1GradFunctor>); /* ========================================================================== */ -/* ========================== Log register ==================================*/ -REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, CudaLogFunctor, CudaLogGradFunctor); - -REGISTER_OP_CUDA_KERNEL( - log_grad_grad, ops::LogDoubleGradKernel>, - ops::LogDoubleGradKernel>, - ops::LogDoubleGradKernel>); -/* ========================================================================== */ - #define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \ __macro(softshrink, SoftShrink, CudaSoftShrinkFunctor, \ CudaSoftShrinkGradFunctor); \ @@ -995,9 +894,6 @@ REGISTER_OP_CUDA_KERNEL( __macro(round, Round, CudaRoundFunctor, CudaZeroGradFunctor); \ __macro(reciprocal, Reciprocal, CudaReciprocalFunctor, \ CudaReciprocalGradFunctor); \ - __macro(log1p, Log1p, CudaLog1pFunctor, CudaLog1pGradFunctor); \ - __macro(log2, Log2, CudaLog2Functor, CudaLog2GradFunctor); \ - __macro(log10, Log10, CudaLog10Functor, CudaLog10GradFunctor); \ __macro(soft_relu, SoftRelu, CudaSoftReluFunctor, CudaSoftReluGradFunctor); \ __macro(stanh, STanh, CudaSTanhFunctor, CudaSTanhGradFunctor); \ __macro(softplus, Softplus, CudaSoftplusFunctor, CudaSoftplusGradFunctor); \ diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc index f699dac7976c5aa6745ca5d08079699e3cc0a63c..57e7cbb74079ed44a3f5554cda00243dc51f3a31 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/fill_constant_batch_size_like_op.h" #include "paddle/fluid/operators/batch_size_like.h" namespace paddle { @@ -23,9 +22,13 @@ class FillConstantBatchSizeLikeOp : public BatchSizeLikeOp { using BatchSizeLikeOp::BatchSizeLikeOp; framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - return framework::OpKernelType( + framework::OpKernelType kernel_type = framework::OpKernelType( static_cast(ctx.Attr("dtype")), ctx.device_context()); + if (ctx.Attr("force_cpu")) { + kernel_type.place_ = platform::CPUPlace(); + } + return kernel_type; } }; @@ -64,15 +67,3 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, ops::FillConstantBatchSizeLikeOpMaker, ops::BatchSizeLikeNoNeedBufferVarsInferer); -REGISTER_OP_CPU_KERNEL( - fill_constant_batch_size_like, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.cu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op.cu.cc deleted file mode 100644 index de06aeb01e4dda4e8ca4b4e70ca2c3ad6aa4b5dc..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.cu.cc +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/fill_constant_batch_size_like_op.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - fill_constant_batch_size_like, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h deleted file mode 100644 index 31471c6b622684ac2134366bd23b8919ba1f93e5..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -template -class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto data_type = - static_cast(ctx.Attr("dtype")); - auto float_value = ctx.Attr("value"); - auto str_value = ctx.Attr("str_value"); - auto force_cpu = ctx.Attr("force_cpu"); - - auto *out = ctx.Output("Out"); - auto *in = ctx.Input("Input"); - if (in->lod().size() && ctx.Attr("input_dim_idx") == 0) { - // set the correct batch size for the LoDTensor. - auto odims = out->dims(); - int output_dim_idx = ctx.Attr("output_dim_idx"); - odims[output_dim_idx] = static_cast(in->lod().back().size()) - 1; - out->mutable_data(odims, ctx.GetPlace()); - } - - T value; - if (str_value.empty()) { - value = static_cast(float_value); - } else { - std::stringstream convert_stream(str_value); - if (std::is_same::value) { - int64_t tmp_value; - convert_stream >> tmp_value; - value = static_cast(tmp_value); - } else { - double tmp_value; - convert_stream >> tmp_value; - value = static_cast(tmp_value); - } - } - - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); - if (cpu_place) { - auto &dev_ctx = *pool.Get(platform::CPUPlace()); - phi::funcs::SetConstant functor; - out->mutable_data(platform::CPUPlace(), - framework::TransToPhiDataType(data_type)); - functor(reinterpret_cast(dev_ctx), - out, static_cast(value)); - } -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (!cpu_place) { - auto &dev_ctx = *pool.Get(ctx.GetPlace()); - phi::funcs::SetConstant functor; - out->mutable_data(ctx.GetPlace(), - framework::TransToPhiDataType(data_type)); - functor(reinterpret_cast(dev_ctx), - out, static_cast(value)); - } -#endif - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/squeeze_op.cc b/paddle/fluid/operators/squeeze_op.cc index b3403a960a128dae86395c2d7feabf2c07461c03..ff378396b188fbd55c8228dbba6226cb799204dc 100644 --- a/paddle/fluid/operators/squeeze_op.cc +++ b/paddle/fluid/operators/squeeze_op.cc @@ -19,7 +19,9 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/infermeta/unary.h" namespace paddle { namespace operators { @@ -113,13 +115,13 @@ class SqueezeOp : public framework::OperatorWithKernel { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); - //#ifdef PADDLE_WITH_MKLDNN + // #ifdef PADDLE_WITH_MKLDNN // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { // return framework::OpKernelType(input_data_type, ctx.GetPlace(), // framework::DataLayout::kMKLDNN, // framework::LibraryType::kMKLDNN); // } - //#endif + // #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -140,13 +142,13 @@ class SqueezeGradOp : public framework::OperatorWithKernel { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")); - //#ifdef PADDLE_WITH_MKLDNN + // #ifdef PADDLE_WITH_MKLDNN // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { // return framework::OpKernelType(input_data_type, ctx.GetPlace(), // framework::DataLayout::kMKLDNN, // framework::LibraryType::kMKLDNN); // } - //#endif + // #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -201,53 +203,18 @@ class SqueezeOpMaker : public framework::OpProtoAndCheckerMaker { class Squeeze2Op : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - - void InferShape(framework::InferShapeContext *ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Squeeze2"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Squeeze2"); - - const auto &x_dims = ctx->GetInputDim("X"); - // Check input tensor dims (<6) Eigen limit. - PADDLE_ENFORCE_LE(x_dims.size(), 6, - platform::errors::InvalidArgument( - "The dimensions of Input(X) " - "should be in the range of [1, 6] (Eigen limit)." - "But received X's dimensions = %d, X's shape = [%s].", - x_dims.size(), x_dims)); - - const auto &axes = ctx->Attrs().Get>("axes"); - - auto out_dims = GetOutputShape(axes, x_dims, false); - ctx->SetOutputDim("Out", out_dims); - if (x_dims[0] == out_dims[0]) { - // Only pass LoD when the first dimension of output and Input(X) - // are the same. - ctx->ShareLoD("X", "Out"); - } - - if (!ctx->HasOutput("XShape")) return; - - std::vector xshape_dims(x_dims.size() + 1); - xshape_dims[0] = 0; - for (int i = 0; i < x_dims.size(); ++i) { - xshape_dims[i + 1] = x_dims[i]; - } - ctx->SetOutputDim("XShape", phi::make_ddim(xshape_dims)); - ctx->ShareLoD("X", /*->*/ "XShape"); - } - framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); - //#ifdef PADDLE_WITH_MKLDNN + // #ifdef PADDLE_WITH_MKLDNN // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { // return framework::OpKernelType(input_data_type, ctx.GetPlace(), // framework::DataLayout::kMKLDNN, // framework::LibraryType::kMKLDNN); // } - //#endif + // #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -287,13 +254,13 @@ class Squeeze2GradOp : public framework::OperatorWithKernel { auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")); - //#ifdef PADDLE_WITH_MKLDNN + // #ifdef PADDLE_WITH_MKLDNN // if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { // return framework::OpKernelType(input_data_type, ctx.GetPlace(), // framework::DataLayout::kMKLDNN, // framework::LibraryType::kMKLDNN); // } - //#endif + // #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -365,6 +332,10 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(SqueezeGradNoNeedBufferVarsInferer, "X"); } // namespace paddle namespace ops = paddle::operators; + +DECLARE_INFER_SHAPE_FUNCTOR(squeeze2, SqueezeInferShapeFunctor, + PD_INFER_META(phi::SqueezeInferMeta)); + REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker, ops::SqueezeGradOpMaker, ops::SqueezeGradOpMaker); @@ -376,7 +347,7 @@ REGISTER_OPERATOR(squeeze_grad, ops::SqueezeGradOp, REGISTER_OPERATOR(squeeze2, ops::Squeeze2Op, ops::Squeeze2OpMaker, ops::Squeeze2GradOpMaker, ops::Squeeze2GradOpMaker, - ops::SqueezeInplaceInferer); + ops::SqueezeInplaceInferer, SqueezeInferShapeFunctor); REGISTER_OPERATOR(squeeze2_grad, ops::Squeeze2GradOp, ops::Squeeze2DoubleGradOpMaker, ops::Squeeze2DoubleGradOpMaker, @@ -411,34 +382,3 @@ REGISTER_OP_CPU_KERNEL( paddle::platform::complex>, ops::SqueezeGradKernel); - -REGISTER_OP_CPU_KERNEL( - squeeze2, ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel>, - ops::Squeeze2Kernel>, - ops::Squeeze2Kernel); - -REGISTER_OP_CPU_KERNEL( - squeeze2_grad, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel>, - ops::Squeeze2GradKernel>, - ops::Squeeze2GradKernel); diff --git a/paddle/fluid/operators/squeeze_op.cu.cc b/paddle/fluid/operators/squeeze_op.cu.cc index 8d7c0e5b4ff0e1351a3ebfccb70e33c36115d4de..19aa12cb55e2feb1bc7bb2070b165d923befb9a5 100644 --- a/paddle/fluid/operators/squeeze_op.cu.cc +++ b/paddle/fluid/operators/squeeze_op.cu.cc @@ -46,33 +46,3 @@ REGISTER_OP_CUDA_KERNEL( paddle::platform::complex>, ops::SqueezeGradKernel>); -REGISTER_OP_CUDA_KERNEL( - squeeze2, ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel, - ops::Squeeze2Kernel>, - ops::Squeeze2Kernel>); -REGISTER_OP_CUDA_KERNEL( - squeeze2_grad, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel>, - ops::Squeeze2GradKernel>); diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc index 6389c5b2680138930a9b6408da6128c8a5eefcd6..445e8cd468bf3d55215fb4e756a849500a3fbb2d 100644 --- a/paddle/fluid/operators/unsqueeze_op.cc +++ b/paddle/fluid/operators/unsqueeze_op.cc @@ -18,7 +18,9 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/infermeta/unary.h" namespace paddle { namespace operators { @@ -251,19 +253,6 @@ class UnsqueezeDoubleGradOpMaker : public framework::SingleGradOpMaker { class Unsqueeze2Op : public UnsqueezeOp { public: using UnsqueezeOp::UnsqueezeOp; - void InferShape(framework::InferShapeContext *ctx) const override { - UnsqueezeOp::InferShape(ctx); - const auto &x_dims = ctx->GetInputDim("X"); - - if (!ctx->HasOutput("XShape")) return; - std::vector xshape_dims(x_dims.size() + 1); - xshape_dims[0] = 0; - for (int i = 0; i < x_dims.size(); ++i) { - xshape_dims[i + 1] = x_dims[i]; - } - ctx->SetOutputDim("XShape", phi::make_ddim(xshape_dims)); - ctx->ShareLoD("X", /*->*/ "XShape"); - } }; class Unsqueeze2OpMaker : public UnsqueezeOpMaker { @@ -339,10 +328,14 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(UnsqueezeGradOpNoNeedBufferVarInferer, "X"); } // namespace operators } // namespace paddle +DECLARE_INFER_SHAPE_FUNCTOR(unsqueeze2, Unsqueeze2InferShapeFunctor, + PD_INFER_META(phi::UnsqueezeInferMeta)); + namespace ops = paddle::operators; REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker, ops::UnsqueezeGradOpMaker, ops::UnsqueezeGradOpMaker); + REGISTER_OPERATOR(unsqueeze_grad, ops::UnsqueezeGradOp, ops::UnsqueezeDoubleGradOpMaker, ops::UnsqueezeDoubleGradOpMaker, @@ -351,7 +344,8 @@ REGISTER_OPERATOR(unsqueeze_grad, ops::UnsqueezeGradOp, REGISTER_OPERATOR(unsqueeze2, ops::Unsqueeze2Op, ops::Unsqueeze2OpMaker, ops::Unsqueeze2GradOpMaker, ops::Unsqueeze2GradOpMaker, - ops::UnsqueezeInplaceInferer); + Unsqueeze2InferShapeFunctor, ops::UnsqueezeInplaceInferer); + REGISTER_OPERATOR(unsqueeze2_grad, ops::Unsqueeze2GradOp, ops::Unsqueeze2DoubleGradOpMaker, ops::Unsqueeze2DoubleGradOpMaker, @@ -388,34 +382,3 @@ REGISTER_OP_CPU_KERNEL( paddle::platform::complex>, ops::UnsqueezeGradKernel); -REGISTER_OP_CPU_KERNEL( - unsqueeze2, ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel>, - ops::UnsqueezeKernel>, - ops::UnsqueezeKernel); -REGISTER_OP_CPU_KERNEL( - unsqueeze2_grad, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel>, - ops::Unsqueeze2GradKernel>, - ops::Unsqueeze2GradKernel); diff --git a/paddle/fluid/operators/unsqueeze_op.cu.cc b/paddle/fluid/operators/unsqueeze_op.cu.cc index 2dcc4d2152a5c82a8f344b96084e70ba4df25bdd..f20ddb5c881e41fbecbb48d884004c1554e1fdb5 100644 --- a/paddle/fluid/operators/unsqueeze_op.cu.cc +++ b/paddle/fluid/operators/unsqueeze_op.cu.cc @@ -50,37 +50,3 @@ REGISTER_OP_CUDA_KERNEL( paddle::platform::complex>, ops::UnsqueezeGradKernel>); -REGISTER_OP_CUDA_KERNEL( - unsqueeze2, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel>, - ops::UnsqueezeKernel>); -REGISTER_OP_CUDA_KERNEL( - unsqueeze2_grad, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel>, - ops::Unsqueeze2GradKernel>); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index bbaa7e3dd6471587c82d271ef881276818dd1b79..dcfad030a689c278b72a0061cfb170762d1a3156 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1761,6 +1761,7 @@ All parameter, weight, gradient are variables in Paddle. out (core.Variable|None): the found variable or None. )DOC", py::return_value_policy::reference) + .def("size", &Scope::Size) .def("erase", &Scope::EraseVars, py::arg("names"), R"DOC( Find variable named :code:`name` in the current scope or @@ -2857,6 +2858,9 @@ All parameter, weight, gradient are variables in Paddle. .def("run", [](StandaloneExecutor &self, std::vector feed_names, std::vector fetch_names) { + platform::RecordEvent record_event( + "StandaloneExecutor:run", + platform::TracerEventType::UserDefined, 1); paddle::framework::FetchList ret; { pybind11::gil_scoped_release release; diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h index 946230cb169d20db56a46399552b629348c4783f..613a2f9960a6ffd2ca4a02f20710018fcc00eaed 100644 --- a/paddle/phi/core/compat/op_utils.h +++ b/paddle/phi/core/compat/op_utils.h @@ -42,6 +42,10 @@ const std::unordered_set deprecated_op_names({"diag", "flatten_grad", "isinf", "isnan", + "unsqueeze", + "unsqueeze_grad", + "squeeze", + "squeeze_grad", "isfinite", "matmul", "matmul_grad", diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 80503dd2430927223dedd80d8e44c08473536997..e44032285ac1af0e8e1930583b360b818c7455a4 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/funcs/unfold_functor.h" +#include "paddle/phi/kernels/funcs/unsqueeze.h" namespace phi { @@ -1497,6 +1498,40 @@ void SplitInferMeta(const MetaTensor& x, } } +void SqueezeInferMeta(const MetaTensor& x, + const std::vector& axes, + MetaTensor* xshape, + MetaTensor* out) { + const auto& x_dims = x.dims(); + // Check input tensor dims (<6) Eigen limit. + PADDLE_ENFORCE_LE(x_dims.size(), + 6, + phi::errors::InvalidArgument( + "The dimensions of Input(X) " + "should be in the range of [1, 6] (Eigen limit)." + "But received X's dimensions = %d, X's shape = [%s].", + x_dims.size(), + x_dims)); + + auto out_dims = funcs::GetOutputSqueezeShape(axes, x_dims, false); + out->set_dims(out_dims); + if (x_dims[0] == out_dims[0]) { + // Only pass LoD when the first dimension of output and Input(X) + // are the same. + out->share_lod(x); + } + + std::vector xshape_dims(x_dims.size() + 1); + xshape_dims[0] = 0; + for (int i = 0; i < x_dims.size(); ++i) { + xshape_dims[i + 1] = x_dims[i]; + } + xshape->set_dims(phi::make_ddim(xshape_dims)); + xshape->share_lod(x); + xshape->set_dtype(x.dtype()); + out->set_dtype(x.dtype()); +} + /* Why not use SumRawInferMeta directly? Because we need make InferMetaFunction's args follow the design of api.yaml */ @@ -1982,6 +2017,41 @@ void UnfoldInferMeta(const MetaTensor& x, out->set_dims(phi::make_ddim(out_dims)); } +void UnsqueezeInferMeta(const MetaTensor& x, + const ScalarArray& axes, + MetaTensor* xshape, + MetaTensor* out) { + const auto& x_dims = x.dims(); + // Validity Check: input tensor dims (<6). + PADDLE_ENFORCE_LE(x_dims.size(), + 6, + phi::errors::InvalidArgument( + "Invalid " + "dimensions, the rank of Input(X) " + "should be in the range of [1, 6] (Eigen limit)")); + if (!axes.GetData().empty()) { + std::vector tmp; + tmp.reserve(axes.GetData().size()); + std::for_each(axes.GetData().begin(), + axes.GetData().end(), + [&tmp](const int64_t& t) { tmp.push_back(t); }); + auto out_dims = funcs::GetUnsqueezeShape(tmp, x_dims); + out->set_dims(out_dims); + if (x_dims[0] == out_dims[0]) { + out->share_lod(x); + } + } + std::vector xshape_dims(x_dims.size() + 1); + xshape_dims[0] = 0; + for (int i = 0; i < x_dims.size(); ++i) { + xshape_dims[i + 1] = x_dims[i]; + } + xshape->set_dims(phi::make_ddim(xshape_dims)); + xshape->share_lod(x); + out->set_dtype(x.dtype()); + xshape->set_dtype(x.dtype()); +} + void OneHotRawInferMeta(const MetaTensor& x, int32_t depth, DataType dtype, @@ -1992,7 +2062,6 @@ void OneHotRawInferMeta(const MetaTensor& x, x_dims.size(), 1, phi::errors::InvalidArgument("Rank of Input(X) should be at least 1.")); - auto out_dims_vec = phi::vectorize(x_dims); out_dims_vec.push_back(depth); auto out_dims = phi::make_ddim(out_dims_vec); diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index 0322a18fc3153b996e03aace0f705f1a776ad99f..f623f14a709adb09bf23e60c1e5ce98759238bd9 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -229,6 +229,11 @@ void SplitInferMeta(const MetaTensor& x_meta, std::vector out, MetaConfig config = MetaConfig()); +void SqueezeInferMeta(const MetaTensor& x, + const std::vector& axes, + MetaTensor* xshape, + MetaTensor* out); + void SumInferMeta(const MetaTensor& x, const std::vector& axis, DataType dtype, @@ -290,6 +295,11 @@ void UnfoldInferMeta(const MetaTensor& x, MetaTensor* out, MetaConfig config = MetaConfig()); +void UnsqueezeInferMeta(const MetaTensor& x, + const ScalarArray& axes, + MetaTensor* xshape, + MetaTensor* out); + void OneHotRawInferMeta(const MetaTensor& x, int32_t depth, DataType dtype, diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index d140912aa783047ba021be171805adff071bf22b..59540dbaefdd81ace1ca232a1c54ba68fe953562 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -62,3 +62,6 @@ register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS $ add_subdirectory(sparse) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) + +# 5. kernel autotune +add_subdirectory(autotune) diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h index 241a80d85ead2d7bb6cd63105feb345c62a29a62..6ad28f348f22fc3c8bc6ba7e1ce2bb4d431971be 100644 --- a/paddle/phi/kernels/activation_grad_kernel.h +++ b/paddle/phi/kernels/activation_grad_kernel.h @@ -135,6 +135,14 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, DenseTensor* d_dout, DenseTensor* d_ddx); +template +void LogDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& dout, + const DenseTensor& ddx, + DenseTensor* dx, + DenseTensor* ddout); + DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Cos); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Tan); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Acos); @@ -149,6 +157,10 @@ DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Atanh); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Silu); DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid); +DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log); +DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log2); +DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log10); +DECLARE_ACTIVATION_GRAD_KERNEL_DEPX(Log1p); DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu); DECLARE_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh); diff --git a/paddle/phi/kernels/activation_kernel.h b/paddle/phi/kernels/activation_kernel.h index dbc63a636edb188e4640fdd02895868034f1dd80..785d1089f06e8eece211a6d5978e1e0b1b67ba7f 100644 --- a/paddle/phi/kernels/activation_kernel.h +++ b/paddle/phi/kernels/activation_kernel.h @@ -56,6 +56,10 @@ DECLARE_ACTIVATION_KERNEL(TanhShrink) DECLARE_ACTIVATION_KERNEL(Silu) DECLARE_ACTIVATION_KERNEL(Sigmoid) DECLARE_ACTIVATION_KERNEL(LogSigmoid) +DECLARE_ACTIVATION_KERNEL(Log) +DECLARE_ACTIVATION_KERNEL(Log2) +DECLARE_ACTIVATION_KERNEL(Log10) +DECLARE_ACTIVATION_KERNEL(Log1p) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(LeakyRelu, alpha) DECLARE_ACTIVATION_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, threshold) diff --git a/paddle/phi/kernels/autotune/CMakeLists.txt b/paddle/phi/kernels/autotune/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c7bb30d2d767cfc712fc19152f35bb406a89eac9 --- /dev/null +++ b/paddle/phi/kernels/autotune/CMakeLists.txt @@ -0,0 +1,5 @@ +if (WITH_GPU) + nv_test(gpu_timer_test SRCS gpu_timer_test.cu DEPS gtest) +elseif (WITH_ROCM) + hip_test(gpu_timer_test SRCS gpu_timer_test.cu DEPS gtest) +endif() diff --git a/paddle/phi/kernels/autotune/gpu_timer.h b/paddle/phi/kernels/autotune/gpu_timer.h new file mode 100644 index 0000000000000000000000000000000000000000..87eca2613a7b5290341b448e6910ddbbcc833325 --- /dev/null +++ b/paddle/phi/kernels/autotune/gpu_timer.h @@ -0,0 +1,88 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/backends/gpu/gpu_decls.h" +#include "paddle/phi/core/enforce.h" +#include "paddle/phi/core/errors.h" +#ifdef PADDLE_WITH_CUDA +#include +#endif +#ifdef PADDLE_WITH_HIP +#include +#endif + +namespace phi { + +class GpuTimer { + public: + GpuTimer() { +#ifdef PADDLE_WITH_HIP + hipEventCreate(&start_); + hipEventCreate(&stop_); +#else + cudaEventCreate(&start_); + cudaEventCreate(&stop_); +#endif + PADDLE_ENFORCE_NOT_NULL( + start_, phi::errors::PreconditionNotMet("Start Event is not ready.")); + PADDLE_ENFORCE_NOT_NULL( + stop_, phi::errors::PreconditionNotMet("Stop Event is not ready.")); + } + + ~GpuTimer() { +#ifdef PADDLE_WITH_HIP + hipEventDestroy(start_); + hipEventDestroy(stop_); +#else + cudaEventDestroy(start_); + cudaEventDestroy(stop_); +#endif + } + + void Start(gpuStream_t stream) { +#ifdef PADDLE_WITH_HIP + hipEventRecord(start_, stream); +#else + cudaEventRecord(start_, stream); +#endif + } + + void Stop(gpuStream_t stream) { +#ifdef PADDLE_WITH_HIP + hipEventRecord(stop_, stream); +#else + cudaEventRecord(stop_, stream); +#endif + } + + float ElapsedTime() { + float milliseconds = 0; +#ifdef PADDLE_WITH_HIP + hipEventSynchronize(stop_); + hipEventElapsedTime(&milliseconds, start_, stop_); +#else + cudaEventSynchronize(stop_); + cudaEventElapsedTime(&milliseconds, start_, stop_); +#endif + return milliseconds; + } + + private: + gpuEvent_t start_; + gpuEvent_t stop_; +}; + +} // namespace phi diff --git a/paddle/phi/kernels/autotune/gpu_timer_test.cu b/paddle/phi/kernels/autotune/gpu_timer_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..b6eb345885f30e2c0ab2406b65bbe5f2d01f944e --- /dev/null +++ b/paddle/phi/kernels/autotune/gpu_timer_test.cu @@ -0,0 +1,117 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "glog/logging.h" +#include "paddle/phi/kernels/autotune/gpu_timer.h" +#include "paddle/phi/kernels/funcs/aligned_vector.h" + +template +__global__ void VecSum(T *x, T *y, int N) { +#ifdef __HIPCC__ + int idx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; +#else + int idx = blockDim.x * blockIdx.x + threadIdx.x; +#endif + using LoadT = phi::AlignedVector; + for (int i = idx * VecSize; i < N; i += blockDim.x * gridDim.x * VecSize) { + LoadT x_vec; + LoadT y_vec; + phi::Load(&x[i], &x_vec); + phi::Load(&y[i], &y_vec); +#pragma unroll + for (int j = 0; j < VecSize; j++) { + y_vec[j] = x_vec[j] + y_vec[j]; + } + phi::Store(y_vec, &y[i]); + } +} + +template +void Algo(float *d_in, float *d_out, size_t N) { +#ifdef __HIPCC__ + hipLaunchKernelGGL(HIP_KERNEL_NAME(VecSum), + dim3(Blocks), + dim3(Threads), + 0, + 0, + d_in, + d_out, + N); +#else + VecSum<<>>(d_in, d_out, N); +#endif +} + +TEST(GpuTimer, Sum) { + float *in1, *in2, *out; + float *d_in1, *d_in2; + size_t N = 1 << 20; + size_t size = sizeof(float) * N; +#ifdef __HIPCC__ + hipMalloc(reinterpret_cast(&d_in1), size); + hipMalloc(reinterpret_cast(&d_in2), size); +#else + cudaMalloc(reinterpret_cast(&d_in1), size); + cudaMalloc(reinterpret_cast(&d_in2), size); +#endif + in1 = reinterpret_cast(malloc(size)); + in2 = reinterpret_cast(malloc(size)); + out = reinterpret_cast(malloc(size)); + for (size_t i = 0; i < N; i++) { + in1[i] = 1.0f; + in2[i] = 2.0f; + } + +#ifdef __HIPCC__ + hipMemcpy(d_in1, in1, size, hipMemcpyHostToDevice); + hipMemcpy(d_in2, in2, size, hipMemcpyHostToDevice); +#else + cudaMemcpy(d_in1, in1, size, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2, size, cudaMemcpyHostToDevice); +#endif + + using Functor = std::function; + Functor alog0 = Algo<4, 256, 1024>; + Functor algo1 = Algo<1, 256, 1024>; + Functor alog2 = Algo<1, 256, 8>; + + std::vector algos = {alog0, algo1, alog2}; + + for (int j = 0; j < algos.size(); ++j) { + auto algo = algos[j]; + phi::GpuTimer timer; + timer.Start(0); + algo(d_in1, d_in2, N); + timer.Stop(0); + VLOG(3) << "alog: " << j << " cost: " << timer.ElapsedTime() << "ms"; + } + +#ifdef __HIPCC__ + hipMemcpy(out, d_in2, size, hipMemcpyDeviceToHost); +#else + cudaMemcpy(out, d_in2, size, cudaMemcpyDeviceToHost); +#endif + free(in1); + free(in2); + free(out); +#ifdef __HIPCC__ + hipFree(d_in1); + hipFree(d_in2); +#else + cudaFree(d_in1); + cudaFree(d_in2); +#endif +} diff --git a/paddle/phi/kernels/cpu/activation_grad_kernel.cc b/paddle/phi/kernels/cpu/activation_grad_kernel.cc index c582261596221f4db8bd03599386082cee909096..0776e570e9cd35be4a0f7d10c8fb4dd40aa07171 100644 --- a/paddle/phi/kernels/cpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_grad_kernel.cc @@ -121,6 +121,10 @@ DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, AtanhGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, TanhShrinkGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, SiluGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, LogSigmoidGradFunctor); +DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, LogGradFunctor); +DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, Log2GradFunctor); +DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log10, Log10GradFunctor); +DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPX(Log1p, Log1pGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Relu, ReluGradFunctor); DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DEPOUT(Tanh, TanhGradFunctor); @@ -233,3 +237,8 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_sigmoid_grad, HardSigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(logsigmoid_grad, LogSigmoidGradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log2_grad, Log2GradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log10_grad, Log10GradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log1p_grad, Log1pGradKernel) +PD_REGISTER_ACTIVATION_DOUBLE_GRAD_KERNEL(log_double_grad, LogDoubleGradKernel) diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc index 1d7b77ea4445f494105d4c23516f31f349847089..c8709261d2cb0b162306b0c52d5dfefd6f09ef52 100644 --- a/paddle/phi/kernels/cpu/activation_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_kernel.cc @@ -74,6 +74,10 @@ DEFINE_CPU_ACTIVATION_KERNEL(TanhShrink, TanhShrinkFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Silu, SiluFunctor) DEFINE_CPU_ACTIVATION_KERNEL(Sigmoid, SigmoidFunctor) DEFINE_CPU_ACTIVATION_KERNEL(LogSigmoid, LogSigmoidFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Log, LogFunctor) +DEFINE_CPU_ACTIVATION_KERNEL(Log2, Log2Functor) +DEFINE_CPU_ACTIVATION_KERNEL(Log10, Log10Functor) +DEFINE_CPU_ACTIVATION_KERNEL(Log1p, Log1pFunctor) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, LeakyReluFunctor, alpha) DEFINE_CPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, @@ -118,3 +122,7 @@ PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(logsigmoid, LogSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel) +PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel) +PD_REGISTER_ACTIVATION_KERNEL(log2, Log2Kernel) +PD_REGISTER_ACTIVATION_KERNEL(log10, Log10Kernel) +PD_REGISTER_ACTIVATION_KERNEL(log1p, Log1pKernel) diff --git a/paddle/phi/kernels/cpu/squeeze_grad_kernel.cc b/paddle/phi/kernels/cpu/squeeze_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..5f605e6c2504baec2276ed9288c72f466f3c40f0 --- /dev/null +++ b/paddle/phi/kernels/cpu/squeeze_grad_kernel.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/squeeze_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h" + +PD_REGISTER_KERNEL(squeeze_grad, + CPU, + ALL_LAYOUT, + phi::SqueezeGradKernel, + float, + double, + phi::dtype::bfloat16, + bool, + int, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/squeeze_kernel.cc b/paddle/phi/kernels/cpu/squeeze_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..7d5a6ca4e884e80a20e9583be4aa9f48f9484793 --- /dev/null +++ b/paddle/phi/kernels/cpu/squeeze_kernel.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/squeeze_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/squeeze_kernel_impl.h" + +PD_REGISTER_KERNEL(squeeze, + CPU, + ALL_LAYOUT, + phi::SqueezeKernel, + float, + double, + phi::dtype::bfloat16, + bool, + int, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/unsqueeze_grad_kernel.cc b/paddle/phi/kernels/cpu/unsqueeze_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..0cbccac4734a768db143387bdbfa48fd0ca148f3 --- /dev/null +++ b/paddle/phi/kernels/cpu/unsqueeze_grad_kernel.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/unsqueeze_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h" + +PD_REGISTER_KERNEL(unsqueeze_grad, + CPU, + ALL_LAYOUT, + phi::UnsqueezeGradKernel, + phi::dtype::bfloat16, + bool, + int, + int16_t, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex, + float, + double) {} diff --git a/paddle/phi/kernels/cpu/unsqueeze_kernel.cc b/paddle/phi/kernels/cpu/unsqueeze_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..0152a31f80ba80280af137dcbc544172bf89c430 --- /dev/null +++ b/paddle/phi/kernels/cpu/unsqueeze_kernel.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/unsqueeze_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/unsqueeze_kernel_impl.h" + +PD_REGISTER_KERNEL(unsqueeze, + CPU, + ALL_LAYOUT, + phi::UnsqueezeKernel, + float, + double, + phi::dtype::bfloat16, + bool, + int, + int16_t, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/full_kernel.cc b/paddle/phi/kernels/full_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..9622bff5c255aef470cbd50c9e8496e39bf7d02b --- /dev/null +++ b/paddle/phi/kernels/full_kernel.cc @@ -0,0 +1,65 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/full_kernel.h" + +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void FullBatchSizeLikeKernel(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& shape, + const Scalar& val, + DataType dtype, + int x_batch_size_dim, + int out_batch_size_dim, + DenseTensor* out) { + if (x.lod().size() && x_batch_size_dim == 0) { + // set the correct batch size for the LoDTensor. + auto odims = out->dims(); + odims[out_batch_size_dim] = static_cast(x.lod().back().size()) - 1; + FullKernel(dev_ctx, phi::vectorize(odims), val, dtype, out); + } + FullLikeKernel(dev_ctx, x, val, dtype, out); +} + +} // namespace phi + +PD_REGISTER_KERNEL(full_batch_size_like, + CPU, + ALL_LAYOUT, + phi::FullBatchSizeLikeKernel, + float, + double, + int, + int64_t, + bool) { + kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); +} +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_REGISTER_KERNEL(full_batch_size_like, + GPU, + ALL_LAYOUT, + phi::FullBatchSizeLikeKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::float16) { + kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND); +} +#endif diff --git a/paddle/phi/kernels/full_kernel.h b/paddle/phi/kernels/full_kernel.h index 41fc96b6db1fae5eb54b24923b68b4491c158d93..df82e651a0b268cba49d64dff6307d41a377fe0a 100644 --- a/paddle/phi/kernels/full_kernel.h +++ b/paddle/phi/kernels/full_kernel.h @@ -14,6 +14,8 @@ #pragma once +#include + #include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar_array.h" #include "paddle/phi/core/dense_tensor.h" @@ -37,6 +39,18 @@ void FullLikeKernel(const Context& dev_ctx, DataType dtype, DenseTensor* out); +// In order to be compatible with fill_constant_batch_size_like op +// that are still used in the 2.x APIs +template +void FullBatchSizeLikeKernel(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& shape, + const Scalar& val, + DataType dtype, + int x_batch_size_dim, + int out_batch_size_dim, + DenseTensor* out); + template void Full(const Context& dev_ctx, const ScalarArray& shape, diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index 6c5ffbd06e3a435d9568a6c4717d8ce83b5aec00..6e536bd00a4a12fc2e4034920c344993f54f447b 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -1223,6 +1223,133 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { } }; +// log(x) = natural logarithm of x +template +struct LogFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.log(); + } +}; + +template +struct LogGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (static_cast(1) / x); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +// log2(x) = logarithm to the base 2 of the elements of x +template +struct Log2Functor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.log() / static_cast(log(2)); + } +}; + +// the gradient of log2(x) is 1/(x*ln(2)) +template +struct Log2GradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(1) / (x * static_cast(log(2))); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +// log10(x) = logarithm to the base 10 of the elements of x +template +struct Log10Functor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.log() / static_cast(log(10)); + } +}; + +// the gradient of log10(x) is 1/(x*ln(10)) +template +struct Log10GradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * static_cast(1) / (x * static_cast(log(10))); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +// log1p(x) = natural logarithm of x+1 +template +struct Log1pFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = (static_cast(1) + x).log(); + } +}; + +template +struct Log1pGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * (static_cast(1) / (x + static_cast(1))); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +template +struct LogGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, + const DenseTensor* X, + const DenseTensor* ddX, + DenseTensor* ddOut, + const DenseTensor* dOut, + DenseTensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "LogGradGrad")); + auto x = EigenVector::Flatten( + GET_DATA_SAFELY(X, "Input", "X", "LogGradGrad")); + // ddout = ddx / x; dx = -(dout / x) * (ddx / x) + // calculate dx first, so ddout can inplace ddx + if (dX) { + auto dout = EigenVector::Flatten( + GET_DATA_SAFELY(dOut, "Output", "DOut", "LogGradGrad")); + auto dx = EigenVector::Flatten( + GET_DATA_SAFELY(dX, "Output", "DX", "LogGradGrad")); + dx.device(*d) = dout * static_cast(-1) * ddx / (x * x); + } + if (ddOut) { + auto ddout = EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "LogGradGrad")); + ddout.device(*d) = ddx * static_cast(1) / x; + } + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + #if defined(__NVCC__) || defined(__HIPCC__) || defined(__xpu__) template struct CudaReluFunctor : public BaseActivationFunctor { @@ -1970,6 +2097,99 @@ struct CudaHardSigmoidGradFunctor : public BaseActivationFunctor { } }; +template +struct CudaLogFunctor : public BaseActivationFunctor { + using MPType = typename phi::dtype::MPTypeTrait::Type; + + // log(x) = log(x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(log(x)); + } +}; + +template +struct CudaLogGradFunctor : public BaseActivationFunctor { + // dx = dout / x + __device__ __forceinline__ T operator()(const T dout, const T x) const { + return dout / x; + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +template +struct CudaLog1pFunctor : public BaseActivationFunctor { + using MPType = typename phi::dtype::MPTypeTrait::Type; + MPType one = static_cast(1.0f); + + // log1p(x) = log(1 + x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(log(one + x)); + } +}; + +template +struct CudaLog1pGradFunctor : public BaseActivationFunctor { + T one = static_cast(1.0f); + + // dx = dout / (1 + x) + __device__ __forceinline__ T operator()(const T dout, const T x) const { + return dout / (one + x); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +template +struct CudaLog2Functor : public BaseActivationFunctor { + using MPType = typename phi::dtype::MPTypeTrait::Type; + + // log2(x) = log2(x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(log2(x)); + } +}; + +template +struct CudaLog2GradFunctor : public BaseActivationFunctor { + using MPType = typename phi::dtype::MPTypeTrait::Type; + T log_two = static_cast(log(static_cast(2.0f))); + + // dx = dout / (x * log(2)) + __device__ __forceinline__ T operator()(const T dout, const T x) const { + return dout / (x * log_two); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + +template +struct CudaLog10Functor : public BaseActivationFunctor { + using MPType = typename phi::dtype::MPTypeTrait::Type; + + // log10(x) = log10(x) + __device__ __forceinline__ T operator()(const T arg_x) const { + MPType x = static_cast(arg_x); + return static_cast(log10(x)); + } +}; + +template +struct CudaLog10GradFunctor : public BaseActivationFunctor { + using MPType = typename phi::dtype::MPTypeTrait::Type; + T log_ten = static_cast(log(static_cast(10.0f))); + + // dx = dout / (x * log(10)) + __device__ __forceinline__ T operator()(const T dout, const T x) const { + return dout / (x * log_ten); + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } +}; + #endif } // namespace funcs diff --git a/paddle/phi/kernels/funcs/unsqueeze.h b/paddle/phi/kernels/funcs/unsqueeze.h index 7b8a81471ef769dc5ddf18889f60813641d86d22..2d77c809bf9c9fd16d6c92d99ad686dae7642203 100644 --- a/paddle/phi/kernels/funcs/unsqueeze.h +++ b/paddle/phi/kernels/funcs/unsqueeze.h @@ -21,6 +21,118 @@ namespace phi { namespace funcs { +inline DDim GetOutputSqueezeShape(const std::vector squeeze_dims, + const DDim& in_dims, + bool is_runtime) { + size_t num_squeeze_dims = squeeze_dims.size(); + std::vector should_squeeze(in_dims.size(), false); + + // Mark dimensions need to be squeezed. + if (num_squeeze_dims == 0) { + for (int i = 0; i < in_dims.size(); ++i) { + if (in_dims[i] == 1) { + should_squeeze[i] = true; + } + } + } else { + for (size_t i = 0; i < num_squeeze_dims; ++i) { + int current = squeeze_dims[i] < 0 ? squeeze_dims[i] + in_dims.size() + : squeeze_dims[i]; + + PADDLE_ENFORCE_GE( + current, + 0, + phi::errors::InvalidArgument( + "Each axis in Attr(axes) should be in the range of [%d, %d]" + "But current axis is:%d, input tensor's shape = [%s].", + -in_dims.size(), + in_dims.size() - 1, + current, + in_dims)); + PADDLE_ENFORCE_LT( + current, + in_dims.size(), + phi::errors::InvalidArgument( + "Each axis in Attr(axes) should be in the range of [%d, %d]" + "But current axis is:%d, input tensor's shape = [%s].", + -in_dims.size(), + in_dims.size() - 1, + current, + in_dims)); + + if (!should_squeeze[current]) { + if (is_runtime) { + // At run time, dim of 1 is allowed to squeeze + if (in_dims[current] == 1) { + should_squeeze[current] = true; + } + } else { + // At compile time, dim of -1 or 1 is allowed to squeeze + if (in_dims[current] == 1 || in_dims[current] == -1) { + should_squeeze[current] = true; + } + } + } + } + } + // Make output dimensions + std::vector output_shape; + for (int i = 0; i < in_dims.size(); ++i) { + if (!should_squeeze[i]) { + output_shape.push_back(in_dims[i]); + } + } + return phi::make_ddim(output_shape); +} + +inline DDim GetUnsqueezeShape(const std::vector unsqz_dims, + const DDim& in_dims) { + int output_size = in_dims.size() + static_cast(unsqz_dims.size()); + int cur_output_size = in_dims.size(); + std::vector output_shape(output_size, 0); + + // Validity Check: rank range. + PADDLE_ENFORCE_LE( + output_size, + 6, + phi::errors::InvalidArgument("The output " + "tensor's rank should be less than 6.")); + + for (int axis : unsqz_dims) { + int cur = axis < 0 ? axis + cur_output_size + 1 : axis; + // Vaildity Check: the axis bound + PADDLE_ENFORCE_GE( + cur, + 0, + phi::errors::InvalidArgument("The insert dimension value should " + "not be less than 0")); + PADDLE_ENFORCE_LE(cur, + cur_output_size, + phi::errors::InvalidArgument( + "The insert dimension value shoule not be larger " + "than the dimension size of input tensor")); + // Move old axis, and insert new axis + for (int i = cur_output_size; i >= cur; --i) { + if (output_shape[i] == 1) { + // Move axis + output_shape[i + 1] = 1; + output_shape[i] = 0; + } + } + output_shape[cur] = 1; + // Add the output size. + cur_output_size++; + } + + // Make output shape + for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) { + if (output_shape[out_idx] == 0) { + output_shape[out_idx] = in_dims[in_idx++]; + } + } + + return phi::make_ddim(output_shape); +} inline const DenseTensor Unsqueeze(const DenseTensor& x, int axis = 0) { // don't copy data, only change the dims diff --git a/paddle/phi/kernels/gpu/activation_grad_kernel.cu b/paddle/phi/kernels/gpu/activation_grad_kernel.cu index c912d0c4686ff3fee88925f4d7121f38f24a5485..3cc41555a898b041921aecea309eb6d97d58a0ba 100644 --- a/paddle/phi/kernels/gpu/activation_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_grad_kernel.cu @@ -177,6 +177,10 @@ DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Atanh, CudaAtanhGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(TanhShrink, CudaTanhShrinkGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Silu, CudaSiluGradFunctor); DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(LogSigmoid, CudaLogSigmoidGradFunctor); +DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log, CudaLogGradFunctor); +DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log2, CudaLog2GradFunctor); +DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log10, CudaLog10GradFunctor); +DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DEPX(Log1p, CudaLog1pGradFunctor); DEFINE_GPU_ACT_GRAD_KERNEL_WITH_ONE_ATTRS_DEPX(LeakyRelu, CudaLeakyReluGradFunctor, @@ -300,3 +304,14 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_double_grad, SigmoidDoubleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(sigmoid_triple_grad, SigmoidTripleGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_sigmoid_grad, HardSigmoidGradKernel) PD_REGISTER_ACTIVATION_GRAD_KERNEL(logsigmoid_grad, LogSigmoidGradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log_grad, LogGradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log2_grad, Log2GradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log10_grad, Log10GradKernel) +PD_REGISTER_ACTIVATION_GRAD_KERNEL(log1p_grad, Log1pGradKernel) +PD_REGISTER_KERNEL(log_double_grad, + GPU, + ALL_LAYOUT, + phi::LogDoubleGradKernel, + float, + double, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/gpu/activation_kernel.cu b/paddle/phi/kernels/gpu/activation_kernel.cu index 6b598c764debb059072ba3ae3ac90e6985479133..fb4e2e07b21cbed384248fd8400a04a6e1965320 100644 --- a/paddle/phi/kernels/gpu/activation_kernel.cu +++ b/paddle/phi/kernels/gpu/activation_kernel.cu @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" -#include "paddle/phi/kernels/impl/activation_grad_impl.h" +#include "paddle/phi/kernels/impl/activation_impl.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" @@ -93,6 +93,10 @@ DEFINE_GPU_ACTIVATION_KERNEL(TanhShrink, CudaTanhShrinkFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Silu, CudaSiluFunctor) DEFINE_GPU_ACTIVATION_KERNEL(Sigmoid, CudaSigmoidFunctor) DEFINE_GPU_ACTIVATION_KERNEL(LogSigmoid, CudaLogSigmoidFunctor) +DEFINE_GPU_ACTIVATION_KERNEL(Log, CudaLogFunctor) +DEFINE_GPU_ACTIVATION_KERNEL(Log2, CudaLog2Functor) +DEFINE_GPU_ACTIVATION_KERNEL(Log10, CudaLog10Functor) +DEFINE_GPU_ACTIVATION_KERNEL(Log1p, CudaLog1pFunctor) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(LeakyRelu, CudaLeakyReluFunctor, alpha) DEFINE_GPU_ACT_KERNEL_WITH_ONE_ATTRS(ThresholdedRelu, @@ -164,3 +168,7 @@ PD_REGISTER_ACTIVATION_KERNEL(silu, SiluKernel) PD_REGISTER_ACTIVATION_KERNEL(sigmoid, SigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(logsigmoid, LogSigmoidKernel) PD_REGISTER_ACTIVATION_KERNEL(hard_sigmoid, HardSigmoidKernel) +PD_REGISTER_ACTIVATION_KERNEL(log, LogKernel) +PD_REGISTER_ACTIVATION_KERNEL(log2, Log2Kernel) +PD_REGISTER_ACTIVATION_KERNEL(log10, Log10Kernel) +PD_REGISTER_ACTIVATION_KERNEL(log1p, Log1pKernel) diff --git a/paddle/phi/kernels/gpu/squeeze_grad_kernel.cu b/paddle/phi/kernels/gpu/squeeze_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..c5a243f45bd9769b3f6a052248e979dad10e0a4f --- /dev/null +++ b/paddle/phi/kernels/gpu/squeeze_grad_kernel.cu @@ -0,0 +1,35 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/squeeze_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h" + +PD_REGISTER_KERNEL(squeeze_grad, + GPU, + ALL_LAYOUT, + phi::SqueezeGradKernel, + float, + double, + phi::dtype::bfloat16, + phi::dtype::float16, + bool, + int, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/squeeze_kernel.cu b/paddle/phi/kernels/gpu/squeeze_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..ae15e210a02e72fd681d18fc6679f6e61c74e2bf --- /dev/null +++ b/paddle/phi/kernels/gpu/squeeze_kernel.cu @@ -0,0 +1,35 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/squeeze_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/squeeze_kernel_impl.h" + +PD_REGISTER_KERNEL(squeeze, + GPU, + ALL_LAYOUT, + phi::SqueezeKernel, + float, + double, + phi::dtype::bfloat16, + phi::dtype::float16, + bool, + int, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/unsqueeze_grad_kernel.cu b/paddle/phi/kernels/gpu/unsqueeze_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..6c3a2066f0f2d18f35e8c2e58b9a26ae3b5624d3 --- /dev/null +++ b/paddle/phi/kernels/gpu/unsqueeze_grad_kernel.cu @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/unsqueeze_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h" + +PD_REGISTER_KERNEL(unsqueeze_grad, + GPU, + ALL_LAYOUT, + phi::UnsqueezeGradKernel, + phi::dtype::bfloat16, + phi::dtype::float16, + bool, + int, + int16_t, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex, + float, + double) {} diff --git a/paddle/phi/kernels/gpu/unsqueeze_kernel.cu b/paddle/phi/kernels/gpu/unsqueeze_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..86b44622546375321086216ff5a8b8c5b3429f55 --- /dev/null +++ b/paddle/phi/kernels/gpu/unsqueeze_kernel.cu @@ -0,0 +1,36 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/unsqueeze_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/unsqueeze_kernel_impl.h" + +PD_REGISTER_KERNEL(unsqueeze, + GPU, + ALL_LAYOUT, + phi::UnsqueezeKernel, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16, + bool, + int, + int16_t, + uint8_t, + int8_t, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/impl/activation_grad_impl.h b/paddle/phi/kernels/impl/activation_grad_impl.h index 7d6b6dc72ea60214ff4c9974b4ff885feecb5822..7ef8a0887c75cce8be9618593073e84089739c30 100644 --- a/paddle/phi/kernels/impl/activation_grad_impl.h +++ b/paddle/phi/kernels/impl/activation_grad_impl.h @@ -275,4 +275,22 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, d_ddx); } +template +void LogDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& dout, + const DenseTensor& ddx, + DenseTensor* dx, + DenseTensor* ddout) { + if (dx) { + dx->Resize(x.dims()); + dev_ctx.template Alloc(dx); + } + if (ddout) { + dev_ctx.template Alloc(ddout); + } + funcs::LogGradGradFunctor functor; + functor(dev_ctx, &x, &ddx, ddout, &dout, dx); +} + } // namespace phi diff --git a/paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h b/paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..c74aa5c7243f3ccd24c2e44042ea88826637b6a5 --- /dev/null +++ b/paddle/phi/kernels/impl/squeeze_grad_kernel_impl.h @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/copy_kernel.h" + +namespace phi { +template +void SqueezeGradKernel(const Context& dev_ctx, + const DenseTensor& xshape, + const DenseTensor& dout, + const std::vector& axes, + DenseTensor* dx) { + auto xshape_dims = xshape.dims(); + auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + + dev_ctx.template Alloc(dx); + phi::Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dx); + dx->Resize(x_dims); +} +} // namespace phi diff --git a/paddle/phi/kernels/impl/squeeze_kernel_impl.h b/paddle/phi/kernels/impl/squeeze_kernel_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..d2b40824a91c99ac4f69b689b2219822e21c6aea --- /dev/null +++ b/paddle/phi/kernels/impl/squeeze_kernel_impl.h @@ -0,0 +1,34 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/unsqueeze.h" + +namespace phi { +template +void SqueezeKernel(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& axes, + DenseTensor* xshape, + DenseTensor* out) { + auto x_dims = x.dims(); + auto out_dims = funcs::GetOutputSqueezeShape(axes, x_dims, true); + + dev_ctx.template Alloc(out); + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); + out->Resize(out_dims); +} +} // namespace phi diff --git a/paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h b/paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..54b332ea4c898d10b63037375805d07f04ab2e63 --- /dev/null +++ b/paddle/phi/kernels/impl/unsqueeze_grad_kernel_impl.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/copy_kernel.h" + +namespace phi { +template +void UnsqueezeGradKernel(const Context& dev_ctx, + const DenseTensor& x_shape, + const DenseTensor& dout, + DenseTensor* dx) { + auto xshape_dims = x_shape.dims(); + auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + dev_ctx.template Alloc(dx); + phi::Copy(dev_ctx, dout, dev_ctx.GetPlace(), true, dx); + dx->Resize(x_dims); +} +} // namespace phi diff --git a/paddle/phi/kernels/impl/unsqueeze_kernel_impl.h b/paddle/phi/kernels/impl/unsqueeze_kernel_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..884fa26df451c680fe8352f0c0b21b2ce8a33b6c --- /dev/null +++ b/paddle/phi/kernels/impl/unsqueeze_kernel_impl.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/unsqueeze.h" + +namespace phi { +template +void UnsqueezeKernel(const Context& dev_ctx, + const DenseTensor& x, + const ScalarArray& axes, + DenseTensor* xshape, + DenseTensor* out) { + auto x_dims = x.dims(); + auto out_dims = out->dims(); + if (axes.FromTensor()) { + std::vector tmp; + tmp.reserve(axes.GetData().size()); + std::for_each(axes.GetData().begin(), + axes.GetData().end(), + [&tmp](const int64_t& t) { tmp.push_back(t); }); + out_dims = funcs::GetUnsqueezeShape(tmp, x_dims); + } + out->Resize(out_dims); + dev_ctx.template Alloc(out); + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); + out->Resize(out_dims); // copy will reset the dims. +} +} // namespace phi diff --git a/paddle/phi/kernels/squeeze_grad_kernel.h b/paddle/phi/kernels/squeeze_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..52b02bdbb95299b37fc63532e8210af8be6a2e28 --- /dev/null +++ b/paddle/phi/kernels/squeeze_grad_kernel.h @@ -0,0 +1,28 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void SqueezeGradKernel(const Context& dev_ctx, + const DenseTensor& xshape, + const DenseTensor& dout, + const std::vector& axes, + DenseTensor* dx); +} // namespace phi diff --git a/paddle/phi/kernels/squeeze_kernel.h b/paddle/phi/kernels/squeeze_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..22254eacfcefcd3a3dd33870c4d068c1114335b7 --- /dev/null +++ b/paddle/phi/kernels/squeeze_kernel.h @@ -0,0 +1,28 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void SqueezeKernel(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& axes, + DenseTensor* xshape, + DenseTensor* out); +} // namespace phi diff --git a/paddle/phi/kernels/unsqueeze_grad_kernel.h b/paddle/phi/kernels/unsqueeze_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..0c5afe7be6039d408e4ad0b05144cc2fbe2c11cf --- /dev/null +++ b/paddle/phi/kernels/unsqueeze_grad_kernel.h @@ -0,0 +1,27 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void UnsqueezeGradKernel(const Context& dev_ctx, + const DenseTensor& x_shape, + const DenseTensor& dout, + DenseTensor* dx); +} // namespace phi diff --git a/paddle/phi/kernels/unsqueeze_kernel.h b/paddle/phi/kernels/unsqueeze_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..8f818a1b4904223e093c18107168afde2fd599a5 --- /dev/null +++ b/paddle/phi/kernels/unsqueeze_kernel.h @@ -0,0 +1,29 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/common/scalar_array.h" +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void UnsqueezeKernel(const Context& dev_ctx, + const DenseTensor& x, + const ScalarArray& axes, + DenseTensor* xshape, + DenseTensor* out); +} // namespace phi diff --git a/paddle/phi/ops/compat/activation_sig.cc b/paddle/phi/ops/compat/activation_sig.cc index 7ae0dc45c5e1be09a31821c171b84fbb47fe1c9e..8b4884e35b608c1d60645e5d2b59131f240d8be3 100644 --- a/paddle/phi/ops/compat/activation_sig.cc +++ b/paddle/phi/ops/compat/activation_sig.cc @@ -57,6 +57,10 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(HardShrink, "hard_shrink", "threshold"); DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(TanhShrink, "tanh_shrink", ); // NOLINT DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Silu, "silu", ); // NOLINT DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(LogSigmoid, "logsigmoid", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Log, "log", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Log2, "log2", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Log10, "log10", ); // NOLINT +DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Log1p, "log1p", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT @@ -125,6 +129,12 @@ KernelSignature EluDoubleGradOpArgumentMapping( "elu_double_grad", {"X", "DOut", "DDX"}, {"alpha"}, {"DX", "DDOut"}); } +KernelSignature LogDoubleGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "log_double_grad", {"X", "DOut", "DDX"}, {}, {"DX", "DDOut"}); +} + } // namespace phi PD_REGISTER_BASE_KERNEL_NAME(relu_grad_grad, relu_double_grad); @@ -134,6 +144,7 @@ PD_REGISTER_BASE_KERNEL_NAME(softshrink, soft_shrink); PD_REGISTER_BASE_KERNEL_NAME(softshrink_grad, soft_shrink_grad); PD_REGISTER_BASE_KERNEL_NAME(elu_grad_grad, elu_double_grad); PD_REGISTER_BASE_KERNEL_NAME(sigmoid_grad_grad, sigmoid_double_grad); +PD_REGISTER_BASE_KERNEL_NAME(log_grad_grad, log_double_grad); PD_REGISTER_ARG_MAPPING_FN(cos_grad, phi::CosGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(tan_grad, phi::TanGradOpArgumentMapping); @@ -181,3 +192,8 @@ PD_REGISTER_ARG_MAPPING_FN(logsigmoid_grad, phi::LogSigmoidGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(hard_sigmoid_grad, phi::HardSigmoidGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(log_grad, phi::LogGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(log_grad_grad, phi::LogDoubleGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(log2_grad, phi::Log2GradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(log10_grad, phi::Log10GradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(log1p_grad, phi::Log1pGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/fill_constant_batch_size_like_sig.cc b/paddle/phi/ops/compat/fill_constant_batch_size_like_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..444c0ec5b16fe639ee8ff36c22e38a2aa77db8fa --- /dev/null +++ b/paddle/phi/ops/compat/fill_constant_batch_size_like_sig.cc @@ -0,0 +1,43 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature FillConstantBatchSizeLikeOpArgumentMapping( + const ArgumentMappingContext& ctx) { + const auto& str_value = paddle::any_cast(ctx.Attr("str_value")); + if (str_value.empty()) { + return KernelSignature( + "full_batch_size_like", + {"Input"}, + {"shape", "value", "dtype", "input_dim_idx", "output_dim_idx"}, + {"Out"}); + } else { + return KernelSignature( + "full_batch_size_like", + {"Input"}, + {"shape", "str_value", "dtype", "input_dim_idx", "output_dim_idx"}, + {"Out"}); + } +} + +} // namespace phi + +PD_REGISTER_BASE_KERNEL_NAME(fill_constant_batch_size_like, + full_batch_size_like); + +PD_REGISTER_ARG_MAPPING_FN(fill_constant_batch_size_like, + phi::FillConstantBatchSizeLikeOpArgumentMapping); diff --git a/paddle/phi/ops/compat/squeeze_sig.cc b/paddle/phi/ops/compat/squeeze_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..276246533e89e29f1a5d38fd4f3d831a044b5535 --- /dev/null +++ b/paddle/phi/ops/compat/squeeze_sig.cc @@ -0,0 +1,36 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature SqueezeOpArgumentMapping(const ArgumentMappingContext& ctx) { + return KernelSignature("squeeze", {"X"}, {"axes"}, {"XShape", "Out"}); +} + +KernelSignature SqueezeGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("squeeze_grad", + {"XShape", GradVarName("Out")}, + {"axes"}, + {GradVarName("X")}); +} + +} // namespace phi +PD_REGISTER_BASE_KERNEL_NAME(squeeze2, squeeze); +PD_REGISTER_BASE_KERNEL_NAME(squeeze2_grad, squeeze_grad); +PD_REGISTER_ARG_MAPPING_FN(squeeze2, phi::SqueezeOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(squeeze2_grad, phi::SqueezeGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/unsqueeze_sig.cc b/paddle/phi/ops/compat/unsqueeze_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..20cd9701e83e5ecf3563eeccb5e1b4b2923bd65a --- /dev/null +++ b/paddle/phi/ops/compat/unsqueeze_sig.cc @@ -0,0 +1,46 @@ + +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature UnsqueezeOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.InputSize("AxesTensorList") > 0) { + VLOG(2) << "unsqueeze2 in AxesTensorList"; + return KernelSignature( + "unsqueeze", {"X"}, {"AxesTensorList"}, {"XShape", "Out"}); + } else if (ctx.InputSize("AxesTensor") > 0) { + VLOG(2) << "unsqueeze2 in AxesTensor"; + return KernelSignature( + "unsqueeze", {"X"}, {"AxesTensor"}, {"XShape", "Out"}); + } else { + VLOG(2) << "unsqueeze2 in axes"; + return KernelSignature("unsqueeze", {"X"}, {"axes"}, {"XShape", "Out"}); + } +} + +KernelSignature UnsqueezeGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "unsqueeze_grad", {"XShape", GradVarName("Out")}, {}, {GradVarName("X")}); +} +} // namespace phi +PD_REGISTER_BASE_KERNEL_NAME(unsqueeze2, unsqueeze); +PD_REGISTER_BASE_KERNEL_NAME(unsqueeze2_grad, unsqueeze_grad); + +PD_REGISTER_ARG_MAPPING_FN(unsqueeze2, phi::UnsqueezeOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(unsqueeze2_grad, + phi::UnsqueezeGradOpArgumentMapping); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 39676b916e50470ac9774f3564b4bdc3a8fcb20f..bc19b50616d139e2a2db83ad51f602dff0f0fa7a 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -774,12 +774,12 @@ set +x get_precision_ut_mac ut_actual_total_startTime_s=`date +%s` if [[ "$on_precision" == "0" ]];then - ctest -E "$disable_ut_quickly" -LE ${nightly_label} --output-on-failure -j $2 | tee $tmpfile + ctest -E "$disable_ut_quickly" -LE ${nightly_label} --timeout 120 --output-on-failure -j $2 | tee $tmpfile else - ctest -R "$UT_list_prec" -E "$disable_ut_quickly" -LE ${nightly_label} --output-on-failure -j $2 | tee $tmpfile + ctest -R "$UT_list_prec" -E "$disable_ut_quickly" -LE ${nightly_label} --timeout 120 --output-on-failure -j $2 | tee $tmpfile tmpfile_rand=`date +%s%N` tmpfile=$tmp_dir/$tmpfile_rand - ctest -R "$UT_list_prec_1" -E "$disable_ut_quickly" -LE ${nightly_label} --output-on-failure -j $2 | tee $tmpfile + ctest -R "$UT_list_prec_1" -E "$disable_ut_quickly" -LE ${nightly_label} --timeout 120 --output-on-failure -j $2 | tee $tmpfile fi ut_total_endTime_s=`date +%s` echo "TestCases Total Time: $[ $ut_total_endTime_s - $ut_actual_total_startTime_s ]s" @@ -848,7 +848,7 @@ set +x fi done failed_test_lists='' - ctest -R "$retry_unittests_regular" --output-on-failure -j 2 | tee $tmpfile + ctest -R "$retry_unittests_regular" --timeout 120 --output-on-failure -j 2 | tee $tmpfile collect_failed_tests rm -f $tmp_dir/* exec_times=$[$exec_times+1] diff --git a/python/paddle/fluid/tests/unittests/test_scope.py b/python/paddle/fluid/tests/unittests/test_scope.py index aa093069c49ecffd402eae018f2acbe30117a52e..805aabd393e49bc0fcf92c15d9c9de0c456770de 100644 --- a/python/paddle/fluid/tests/unittests/test_scope.py +++ b/python/paddle/fluid/tests/unittests/test_scope.py @@ -59,6 +59,13 @@ class TestScope(unittest.TestCase): # It is not allowed to delete a nonexistent scope. scope._remove_from_pool() + def test_size(self): + paddle_c = paddle.fluid.core + scope = paddle_c.Scope() + var_a = scope.var("var_a") + self.assertEqual(scope.size(), 1) + self.assertIsNotNone(scope.find_var('var_a')) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 07baa9b51de391721bc5c33745c77f7f56c4f974..cf9cb65f6d1f4c4464d084c36cc09e23d531f78c 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -148,7 +148,6 @@ def source_include(header_file_path): #include "paddle/phi/infermeta/nullary.h" #include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/ternary.h" -#include "paddle/phi/kernels/declarations.h" #include "paddle/fluid/platform/profiler/event_tracing.h" """ diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 9c802a56a7b6e29bc89ad164a15f2f6d4749734e..d2892d13fc401c069065675dbbb8f00bfa372797 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -231,6 +231,12 @@ if [ "${HAS_MODIFIED_ALLOCATION}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then check_approval 1 6888866 39303645 fi +HAS_MODIFIED_DECLARATIONS=`git diff --name-only upstream/$BRANCH | grep "paddle/phi/kernels/declarations.h" || true` +if [ "${HAS_MODIFIED_DECLARATIONS}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then + echo_line="You must be approved by chenwhql for any use of paddle/phi/kernels/declarations.h. Thanks!\n" + check_approval 1 22561442 + fi + ALL_PADDLE_ENFORCE=`git diff -U0 upstream/$BRANCH |grep "^+" |grep -zoE "PADDLE_ENFORCE\(.[^,\);]+.[^;]*\);\s" || true` if [ "${ALL_PADDLE_ENFORCE}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then echo_line="PADDLE_ENFORCE is not recommended. Please use PADDLE_ENFORCE_EQ/NE/GT/GE/LT/LE or PADDLE_ENFORCE_NOT_NULL or PADDLE_ENFORCE_GPU_SUCCESS instead, see [ https://github.com/PaddlePaddle/Paddle/wiki/PADDLE_ENFORCE-Rewriting-Specification ] for details.\nYou must have one RD (chenwhql (Recommend) , luotao1 (Recommend) or lanxianghit) approval for the usage (either add or delete) of PADDLE_ENFORCE.\n${ALL_PADDLE_ENFORCE}\n"