diff --git a/.gitignore b/.gitignore index 708126b3bb070f0ce3b4e751b8732b77af8b36c4..e905833cae7a60f46f6d8fddf5403d46808873f3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,8 @@ paddle/fluid/API_DEV.spec paddle/fluid/API_PR.spec paddle/fluid/op_use_default_grad_maker_DEV.spec paddle/fluid/op_use_default_grad_maker_PR.spec -paddle/pten/api/*/api* +paddle/pten/api/*/api.* +paddle/pten/api/*/backward* paddle/pten/include/* paddle/pten/extension.h diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index c7a6f04b5f40a202d849d91d3d07a8bfb3ea7fff..578fb1621603f203ca85aefbcdd27ae51481172a 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -36,7 +36,7 @@ ENDIF() if(NOT DEFINED XPU_BASE_URL) SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") - SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220104") + SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220116") else() SET(XPU_BASE_URL "${XPU_BASE_URL}") endif() diff --git a/paddle/fluid/distributed/service/brpc_utils.h b/paddle/fluid/distributed/service/brpc_utils.h index 556bbb1048e2c4cca4aef337235e3c476deb859b..ebae710acc28b58a503bc9c0b455ef7c5ca10cff 100644 --- a/paddle/fluid/distributed/service/brpc_utils.h +++ b/paddle/fluid/distributed/service/brpc_utils.h @@ -27,7 +27,7 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/pten/backends/dynload/port.h" namespace butil { class IOBuf; @@ -78,11 +78,11 @@ void DeserializeFromMultiVarMsgAndIOBuf(const MultiVarMsg& multi_msg, const framework::Scope* scope); void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg, - butil::IOBufBytesIterator& iobuf, + butil::IOBufBytesIterator& iobuf, // NOLINT const platform::DeviceContext& ctx); void DeserializeSelectedRows(framework::Variable* var, const VarMsg& msg, - butil::IOBufBytesIterator& iobuf, + butil::IOBufBytesIterator& iobuf, // NOLINT const platform::DeviceContext& ctx); std::string GetIntTypeEndpoint(const std::string& ip, const uint32_t& port); diff --git a/paddle/fluid/distributed/table/depends/large_scale_kv.h b/paddle/fluid/distributed/table/depends/large_scale_kv.h index 3b00f1d6ccc3a1b66ff2b3f146aa33b15fa9c41b..3408ef5f91ad009a33c28fb4093a79075112c0bd 100644 --- a/paddle/fluid/distributed/table/depends/large_scale_kv.h +++ b/paddle/fluid/distributed/table/depends/large_scale_kv.h @@ -40,9 +40,9 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/string_helper.h" +#include "paddle/pten/backends/dynload/port.h" namespace paddle { namespace distributed { @@ -202,7 +202,7 @@ class ValueBlock { // value = _alloc.acquire(value_length_); table[id] = value; } else { - value = (VALUE *)(void *)(res->second); + value = (VALUE *)(void *)(res->second); // NOLINT } return value; } @@ -282,8 +282,8 @@ class ValueBlock { value->unseen_days_++; if (value->unseen_days_ >= threshold) { butil::return_object(iter->second); - //_alloc.release(iter->second); - //_alloc.release(value); + // _alloc.release(iter->second); + // _alloc.release(value); iter = table.erase(iter); } else { ++iter; diff --git a/paddle/fluid/eager/accumulation/gradient_accumulation.cc b/paddle/fluid/eager/accumulation/gradient_accumulation.cc index ffd76c5bda62125bd2e38ed003cf001a9556081d..826b02b3db0720c0d158a778c8f441e312085c5c 100644 --- a/paddle/fluid/eager/accumulation/gradient_accumulation.cc +++ b/paddle/fluid/eager/accumulation/gradient_accumulation.cc @@ -216,8 +216,9 @@ void TensorAdd(const egr::EagerTensor& src, egr::EagerTensor* dst) { #define PADDLE_TENSOR_ADD(cpp_type) \ if (data_type == paddle::framework::DataTypeTrait::DataType()) { \ - TensorAddFunctor func(numel, src_tensor->data(), \ - dst_tensor->mutable_data()); \ + TensorAddFunctor func( \ + numel, src_tensor->data(), \ + dst_tensor->mutable_data(place)); \ paddle::platform::VisitPlace(place, func); \ return; \ } diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index 668e60d857b9ca371243891db686421810fda0bb..c504a126ddecaebfcb55313573d6bc490007feef 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -1,4 +1,4 @@ -add_subdirectory(final_state_generator) +#add_subdirectory(final_state_generator) set(EAGER_GENERETOR_DEPS ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} pybind proto_desc executor layer tracer engine imperative_profiler imperative_flag) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 4400e01b8a2d302ae638fcdbcaa016c0b92f8534..b74cdcf78dcb3b55eb7cb27459100fe6eb22ac9b 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -75,6 +75,10 @@ def GetAutoGradMetaName(string): return f"{string}_autograd_meta" +def GetAutoGradMetaVectorName(string): + return f"{string}_autograd_meta_vec" + + ###################### ### File Readers ### ###################### @@ -219,10 +223,6 @@ def ParseYamlBackward(args_str, returns_str): def ForwardsValidationCheck(forward_inputs_list, forward_attrs_list, forward_returns_list, orig_forward_inputs_list, orig_forward_attrs_list, orig_forward_returns_list): - # inputs_list = [ [input_name, input_type, orig_position], ...] - # attrs_list = [ [attr_name, attr_type, default_value, orig_position], ...] - # forward_returns_list = [ [ret_name, ret_type, orig_position] ...] - # orig_returns_list = [ [ret_type, orig_position], ...] for i in range(len(forward_inputs_list)): forward_input_name = forward_inputs_list[i][0] forward_input_type = forward_inputs_list[i][1] @@ -270,9 +270,6 @@ def ForwardsValidationCheck(forward_inputs_list, forward_attrs_list, def BackwardValidationCheck(backward_fwd_input_map, backward_grad_input_map, backward_attrs_list): - # backward_fwd_input_map = { "name" : [type, is_fwd_input, orig_position] ...} - # backward_grad_input_map = { "name" : [type, fwd_position, orig_position] ...} - # backward_attrs_list = [ [attr_name, attr_type, default_value, orig_position], ...] # Check Order: TensorWrappers, GradTensors, Attributes max_fwd_input_position = -1 @@ -291,10 +288,6 @@ def BackwardValidationCheck(backward_fwd_input_map, backward_grad_input_map, def DetermineForwardPositionMap(forward_inputs_list, forward_returns_list): - # inputs_list = [ [input_name, input_type, orig_position], ...] - # forward_returns_list = [ [ret_name, ret_type, orig_position] ...] - - # forward_position_map = { "name" : [type, fwd_position] ...} forward_inputs_position_map = {} forward_outputs_position_map = {} for i in range(len(forward_inputs_list)): @@ -319,15 +312,6 @@ def DetermineForwardPositionMap(forward_inputs_list, forward_returns_list): def SlotNameMatching(backward_inputs_list, backward_returns_list, forward_inputs_position_map, forward_outputs_position_map): - # backward_inputs_list = [ [input_name, input_type, orig_position], ...] - # backward_returns_list = [ [ret_name, ret_type, orig_position], ...] - # forward_inputs_position_map = { "name" : [type, fwd_position] } - # forward_outputs_position_map = { "name" : [type, fwd_position] } - - # backward_fwd_input_map = { "name" : [type, is_fwd_input, orig_position] ...} - # backward_grad_input_map = { "name" : [type, fwd_position, orig_position] ...} - # backward_grad_output_map = { "name" : [type, fwd_position, orig_position] ...} - backward_fwd_input_map = {} backward_grad_input_map = {} backward_grad_output_map = {} @@ -580,7 +564,14 @@ def GenerateNodeCreationCodes(fwd_api_name, bwd_api_name, compute_require_grad_args_list = ["trace_backward"] for name, (ttype, pos) in forward_inputs_position_map.items(): input_autograd_meta_name = GetAutoGradMetaName(name) - input_autograd_meta = f" auto* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({name});" + if IsPlainTensorType(ttype): + input_autograd_meta = f" egr::EagerTensor* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({name});" + else: + assert IsVectorTensorType(ttype) + input_autograd_meta_vec_name = GetAutoGradMetaVectorName(name) + input_autograd_meta = f" std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({name});\n" + input_autograd_meta += f" std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" + inputs_autograd_meta_list.append(input_autograd_meta) compute_require_grad_args_list.append(input_autograd_meta_name) inputs_autograd_meta_str = "\n".join(inputs_autograd_meta_list) @@ -592,11 +583,23 @@ def GenerateNodeCreationCodes(fwd_api_name, bwd_api_name, num_fwd_outputs = len(forward_outputs_position_map.keys()) for name, (rtype, pos) in forward_outputs_position_map.items(): output_autograd_meta_name = GetAutoGradMetaName(name) + output_autograd_meta_vec_name = GetAutoGradMetaVectorName(name) if num_fwd_outputs == 1: - output_autograd_meta = f" auto* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(outputs);" + if IsPlainTensorType(rtype): + output_autograd_meta = f" egr::EagerTensor* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(outputs);" + else: + assert IsVectorTensorType(rtype) + output_autograd_meta = f" std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({outputs});\n" + output_autograd_meta += f" std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" else: # Tuple api_result - outputs_autograd_meta = f" auto* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(outputs[{pos}]);" + if IsPlainTensorType(rtype): + outputs_autograd_meta = f" egr::EagerTensor* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(outputs[{pos}]);" + else: + assert IsVectorTensorType(rtype) + output_autograd_meta = f" std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta(outputs[{pos}]);\n" + output_autograd_meta += f" std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" + outputs_autograd_meta_list.append(output_autograd_meta) pass_stop_gradient_args_list.append(output_autograd_meta_name) @@ -786,7 +789,6 @@ def GenerateForwardDefinition(fwd_api_name, bwd_api_name, auto outputs = {}; - // Node Creation {} // Returns @@ -903,17 +905,10 @@ if __name__ == "__main__": # Collect Forward Inputs/Outputs forward_inputs_list, forward_attrs_list, forward_returns_list = ParseYamlForwardFromBackward( bwd_forward_str) - print("Parsed Forward Inputs List: ", forward_inputs_list) - print("Prased Forward Attrs List: ", forward_attrs_list) - print("Parsed Forward Returns List: ", forward_returns_list) # Collect Original Forward Inputs/Outputs and then perform validation checks orig_forward_inputs_list, orig_forward_attrs_list, orig_forward_returns_list = ParseYamlForward( fwd_args_str, fwd_returns_str) - print("Parsed Original Forward Inputs List: ", orig_forward_inputs_list) - print("Prased Original Forward Attrs List: ", orig_forward_attrs_list) - print("Parsed Original Forward Returns List: ", - orig_forward_returns_list) # Forward Validation Checks ForwardsValidationCheck(forward_inputs_list, forward_attrs_list, @@ -924,25 +919,15 @@ if __name__ == "__main__": # Parse Backward Inputs/Outputs backward_inputs_list, backward_attrs_list, backward_returns_list = ParseYamlBackward( bwd_args_str, bwd_returns_str) - print("Parsed Backward Inputs List: ", backward_inputs_list) - print("Prased Backward Attrs List: ", backward_attrs_list) - print("Parsed Backward Returns List: ", backward_returns_list) # Determine Forward Inputs/Outputs Position forward_inputs_position_map, forward_outputs_position_map = DetermineForwardPositionMap( forward_inputs_list, forward_returns_list) - print("Generated Forward Input Position Map: ", - forward_inputs_position_map) - print("Generated Forward Output Position Map: ", - forward_outputs_position_map) # SlotName Matching backward_fwd_input_map, backward_grad_input_map, backward_grad_output_map = SlotNameMatching( backward_inputs_list, backward_returns_list, forward_inputs_position_map, forward_outputs_position_map) - print("Generated Backward Fwd Input Map: ", backward_fwd_input_map) - print("Generated Backward Grad Input Map: ", backward_grad_input_map) - print("Generated Backward Grad Output Map: ", backward_grad_output_map) # Backward Validation Check BackwardValidationCheck(backward_fwd_input_map, backward_grad_input_map, @@ -951,13 +936,11 @@ if __name__ == "__main__": # Node Declaration Generation node_declaration_str += GenerateNodeDeclaration( fwd_api_name, backward_fwd_input_map, backward_attrs_list) - print("Generated Node Declaration: ", node_declaration_str) node_definition_str += GenerateNodeDefinition( fwd_api_name, bwd_api_name, backward_fwd_input_map, backward_grad_input_map, backward_grad_output_map, backward_attrs_list) - print("Generated Node Definition: ", node_definition_str) # Node Definition Generation definition_declaration_pair = GenerateForwardDefinition( @@ -965,8 +948,6 @@ if __name__ == "__main__": forward_outputs_position_map, forward_attrs_list, backward_fwd_input_map, backward_grad_input_map, backward_grad_output_map, backward_attrs_list) - print("Generated Forward Definition: ", forward_definition_str) - print("Generated Forward Declaration: ", forward_declaration_str) forward_definition_str += definition_declaration_pair[0] forward_declaration_str += definition_declaration_pair[1] diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index cdc970100951362eaa30692142b674a74a19064f..64f980d709ad99c6061ceda0759d12deaaac7dd8 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -36,7 +36,8 @@ TEST(AccumulationNode, EagerTensor) { paddle::platform::CPUPlace()) .get(), meta); - dt0->mutable_data()[0] = 10.0; + dt0->mutable_data( + paddle::platform::CPUPlace())[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( @@ -45,7 +46,8 @@ TEST(AccumulationNode, EagerTensor) { .get(), meta); - dt1->mutable_data()[0] = 20.0; + dt1->mutable_data( + paddle::platform::CPUPlace())[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); std::shared_ptr grad_dt = diff --git a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc index 3d45dc831d41124351cfa74507dd7c1371546186..1c5102f7a21a7f94c3587f315de60ec8c022c89b 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc @@ -46,7 +46,7 @@ TEST(AutogradMeta, MemberFunction) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; grad_t->set_impl(dt); diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index a483ddb6a98f6ddf0262a0c56cc5051872865047..620fa52cac6db9bbb638814729b006a736fef2f3 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -40,7 +40,7 @@ TEST(EagerTensor, Constructor) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; egr::EagerTensor et3 = egr::EagerTensor(dt); @@ -70,7 +70,7 @@ TEST(EagerTensor, MemberFunction) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; VLOG(6) << "Make Dense Tensor"; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index 19850b3210b7630e4071933f5d5149366a200b34..ea4b4a480e2c079660f8fbb00d55ee1eb41bdba6 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -45,7 +45,7 @@ TEST(GradNodeInfo, GradNodeBase) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; egr::EagerTensor et1(dt); grads = {{et1}}; @@ -102,7 +102,7 @@ TEST(GradNodeInfo, GradNodeBase) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 6.0f; auto* et_ptr = std::dynamic_pointer_cast(et.impl())->data(); @@ -121,8 +121,8 @@ TEST(GradNodeInfo, GradNodeBase) { VLOG(6) << "Test Reduce Hook"; auto reduce_hook = [&](void) -> void { - auto* et_ptr = std::dynamic_pointer_cast(et1.impl()) - ->mutable_data(); + auto* et_ptr = + std::dynamic_pointer_cast(et1.impl())->data(); et_ptr[0] = 100.0; VLOG(6) << "Running Reduce Hook"; }; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index 433a00e27be0e90800f1dffedf19b358c8fc9f56..bf9c3a93e1636dd67a173308ffe5b5df7916319c 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -41,7 +41,7 @@ class GradTestNode : public egr::GradNodeBase { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 6.0f; egr::EagerTensor et1(dt); std::vector> res = {{et1}}; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc index c88a5f5fdcef5701ca007d8a67682239922ee59a..c2830bf7ef6afa46a21889a8ac9a45a1a4c352ee 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc @@ -57,7 +57,7 @@ TEST(GradTensorHolder, Interfaces) { paddle::platform::CPUPlace()) .get(), meta); - dt0->mutable_data()[0] = 10.0; + dt0->mutable_data(paddle::platform::CPUPlace())[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( @@ -65,7 +65,7 @@ TEST(GradTensorHolder, Interfaces) { paddle::platform::CPUPlace()) .get(), meta); - dt1->mutable_data()[0] = 20.0; + dt1->mutable_data(paddle::platform::CPUPlace())[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); // Constructor empty GradTensorHolder diff --git a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc index 8bc739d455a958c43a581dde19c4c3be850a7caa..742a64ecec23975e2081ef16f2476206ff751442 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc @@ -29,7 +29,7 @@ TEST(TensorWrapper, Basic) { paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; et1.set_impl(dt); @@ -56,7 +56,7 @@ TEST(TensorWrapper, Basic) { paddle::platform::CPUPlace()) .get(), meta2); - auto* dt_ptr2 = dt->mutable_data(); + auto* dt_ptr2 = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr2[0] = 6.0f; dt_ptr2[1] = 11.0f; et2.set_impl(dt2); diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index 1b2f1287b069d9ef905f0e46336cddaf5a7d551b..3bd5b98a164d63bf778876f8924c54001348028e 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -35,7 +35,7 @@ TEST(EagerUtils, AutoGradMeta) { paddle::platform::CPUPlace()) .get(), meta); - dt0->mutable_data()[0] = 10.0; + dt0->mutable_data(paddle::platform::CPUPlace())[0] = 10.0; EagerTensor et0 = EagerTensor(dt0); std::shared_ptr dt1 = std::make_shared( @@ -43,7 +43,7 @@ TEST(EagerUtils, AutoGradMeta) { paddle::platform::CPUPlace()) .get(), meta); - dt1->mutable_data()[0] = 20.0; + dt1->mutable_data(paddle::platform::CPUPlace())[0] = 20.0; EagerTensor et1 = EagerTensor(dt1); std::vector ets = {et0, et1}; @@ -112,7 +112,7 @@ egr::EagerTensor CreateTestCPUTensor(T val, paddle::platform::CPUPlace()) .get(), meta); - auto* dt_ptr = dt->mutable_data(); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); for (int64_t i = 0; i < dt->numel(); i++) { dt_ptr[i] = val; } diff --git a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc index 1fef0905b4cc5a8f3923c8785507f3ce5da046b3..45b7b80049560befa6510220979f12c1476389de 100644 --- a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc +++ b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc @@ -44,8 +44,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { paddle::memory::Alloc(place, bytes_size)), std::move(ret_meta)); - float* t_ptr = t_dense->mutable_data(); - float* ret_ptr = ret_dense->mutable_data(); + float* t_ptr = t_dense->mutable_data(place); + float* ret_ptr = ret_dense->mutable_data(place); for (int i = 0; i < ret_dense->numel(); i++) { ret_ptr[i] = t_ptr[i] + 5.0; } @@ -184,7 +184,7 @@ TEST(FwdBwdJoint, BranchedNodes) { // Examine Forward Output 2 { auto dense_out = std::dynamic_pointer_cast(out2.impl()); - float* ptr = dense_out->mutable_data(); + float* ptr = dense_out->mutable_data(paddle::platform::CPUPlace()); for (int i = 0; i < 20; i++) { PADDLE_ENFORCE(ptr[i] == 150.0, paddle::platform::errors::Fatal( diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 4f4a33b1a743afdf3f6f5c3652a2d87b3e0499ef..3d61167c52efeaf5a9e6d506a2c0bd6088a71a7f 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -45,8 +45,8 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { paddle::memory::Alloc(place, bytes_size)), std::move(ret_meta)); - float* t_ptr = t_dense->mutable_data(); - float* ret_ptr = ret_dense->mutable_data(); + float* t_ptr = t_dense->mutable_data(place); + float* ret_ptr = ret_dense->mutable_data(place); for (int i = 0; i < ret_dense->numel(); i++) { ret_ptr[i] = t_ptr[i] + 3.0; } diff --git a/paddle/fluid/eager/tests/test_utils.h b/paddle/fluid/eager/tests/test_utils.h index e7f3a89bf06b3f1ed567c8df231576e0179b4777..9c217dff499607343dc346f6cf39cb5a8ba45d2d 100644 --- a/paddle/fluid/eager/tests/test_utils.h +++ b/paddle/fluid/eager/tests/test_utils.h @@ -34,7 +34,7 @@ bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) { egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target); auto grad_dense = std::dynamic_pointer_cast(meta->Grad().impl()); - T* ptr = grad_dense->mutable_data(); + T* ptr = grad_dense->data(); std::vector host_data(grad_dense->numel()); if (paddle::platform::is_gpu_place(grad_dense->place())) { @@ -67,7 +67,7 @@ template bool CompareTensorWithValue(const egr::EagerTensor& target, T value) { // TODO(jiabin): Support Selected Rows later auto dense_t = std::dynamic_pointer_cast(target.impl()); - T* ptr = dense_t->mutable_data(); + T* ptr = dense_t->data(); std::vector host_data(dense_t->numel()); if (paddle::platform::is_gpu_place(dense_t->place())) { diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index 791e9a83fa09cb946377059ceddc0170380a1dd8..dbde9aa24ff02474a5f231e7f5d556d4af6e8836 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -20,9 +20,9 @@ limitations under the License. */ #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/eigen_ext.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/eigen/extensions.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index d8b14fc0d4c3096126c0a5a743320024099e3215..7aadc856129a1302a0f349459636bda5e9456c1b 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -38,8 +38,8 @@ limitations under the License. */ #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/operators/reader/blocking_queue.h" #include "paddle/fluid/platform/place.h" -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/timer.h" +#include "paddle/pten/backends/dynload/port.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/expect.h b/paddle/fluid/framework/expect.h index 146f4de9382a687686d5f7fdd6f4fa2300cb043b..686a14fca15c19205b9242e5ad925f25520f133a 100644 --- a/paddle/fluid/framework/expect.h +++ b/paddle/fluid/framework/expect.h @@ -19,14 +19,18 @@ #define _LINUX #endif -#ifdef _LINUX #ifndef likely -#define likely(x) __builtin_expect((x), 1) +#ifdef _LINUX +#define likely(expr) (__builtin_expect(!!(expr), 1)) +#else +#define likely(expr) (expr) #endif #endif -#ifdef _LINUX #ifndef unlikely -#define unlikely(x) __builtin_expect((x), 0) +#ifdef _LINUX +#define unlikely(expr) (__builtin_expect(!!(expr), 0)) +#else +#define unlikely(expr) (expr) #endif #endif diff --git a/paddle/fluid/framework/io/shell.cc b/paddle/fluid/framework/io/shell.cc index 004dc71d07bf3795082d3a75d155e533580b0c83..f01894f2cf448130ee58d7716ddaef556c9ce9cd 100644 --- a/paddle/fluid/framework/io/shell.cc +++ b/paddle/fluid/framework/io/shell.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h #include "paddle/fluid/framework/io/shell.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/io/shell.h b/paddle/fluid/framework/io/shell.h index 6fd00a516de892bdd749b24687a742e04931d354..e92560980f52ca3d9f17fbf76f280c8b65b35b7d 100644 --- a/paddle/fluid/framework/io/shell.h +++ b/paddle/fluid/framework/io/shell.h @@ -34,8 +34,8 @@ #include #include -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/string_helper.h" +#include "paddle/pten/backends/dynload/port.h" #if defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON) || \ defined(__ARM_NEON__) diff --git a/paddle/fluid/framework/trainer.h b/paddle/fluid/framework/trainer.h index 91d618970e30c80507a386ba5ba437931771c637..6c97c7fefb184b033773f835cbf5b48014f4aa6b 100644 --- a/paddle/fluid/framework/trainer.h +++ b/paddle/fluid/framework/trainer.h @@ -34,7 +34,7 @@ limitations under the License. */ #include "paddle/fluid/framework/trainer_desc.pb.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/operators/reader/blocking_queue.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/pten/backends/dynload/port.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 135ef6a970621cea6ee1418f751ffc37406628db..1ef633d0f12ec0e3bf9c7ba0817301170bd9fb16 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -19,7 +19,7 @@ #include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/pten/backends/dynload/port.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index 61c5d8d0e4fd76a516837cc202ee56bc8dfd0739..92989eed7c0cb09c2ce71c1dd0e698b8d106bcac 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -28,7 +28,7 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/pten/backends/dynload/port.h" #ifdef _WIN32 #include diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h index 57052243d2f189ec6f722d5820cba223dd914e4a..e418d412b556023ee73e2e03e992b6604add2cc2 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h @@ -20,7 +20,7 @@ #include #include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/pten/backends/dynload/port.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index c6d25137594b76a1ff67d9fb25b2480372c3eefa..6c0707e3475c7270afb6044e93f87a370f8a6e5d 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -31,8 +31,8 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/printf.h" +#include "paddle/pten/backends/dynload/port.h" extern std::string paddle::framework::DataTypeToString( const framework::proto::VarType::Type type); diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index cf8a32ba94a1cacc1df9a195de7ff1ae8a790a98..ed0c8e51ac912bee53067c38aba09616d5ac801b 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -22,8 +22,8 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.h" +#include "paddle/pten/backends/dynload/port.h" DECLARE_bool(use_mkldnn); diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index f8a27da00ba2b84e9e60b26f4171053b91f03095..d18ff6f6bfe2f0b04966af9e80bc40f3bebfc593 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -8,7 +8,6 @@ set(pybind_file_final ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h) file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operators/CMakeLists.txt. DO NOT EDIT!\n\n") add_subdirectory(math) -add_subdirectory(eigen) add_subdirectory(controlflow) add_subdirectory(detection) add_subdirectory(elementwise) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index c5ca1fd0e8cab8b5c27115e9698b475f2fef1b82..e5ba46f312897b78dacd60701b71ca031cb43531 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -23,7 +23,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/pten/backends/dynload/port.h" DECLARE_bool(use_mkldnn); diff --git a/paddle/fluid/operators/controlflow/compare_op.h b/paddle/fluid/operators/controlflow/compare_op.h index d2ef4c9befba99290008508e43df6c84f969b710..957efbff1993792c1cc6162296dbdcf00abb61cf 100644 --- a/paddle/fluid/operators/controlflow/compare_op.h +++ b/paddle/fluid/operators/controlflow/compare_op.h @@ -22,49 +22,40 @@ limitations under the License. */ namespace paddle { namespace operators { -template -struct LessThanFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T a, const T b) const { return a < b; } -}; - -template -struct LessEqualFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T a, const T b) const { return a <= b; } -}; - -template -struct GreaterThanFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T a, const T b) const { return a > b; } -}; - -template -struct GreaterEqualFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T a, const T b) const { return a >= b; } -}; - -template +#define COMPARE_FUNCTOR(func_name, op) \ + template \ + struct func_name { \ + using ELEM_TYPE = InT; \ + HOSTDEVICE OutT operator()(const InT a, const InT b) const { \ + return static_cast(a op b); \ + } \ + }; + +COMPARE_FUNCTOR(LessThanFunctor, <) +COMPARE_FUNCTOR(LessEqualFunctor, <=) +COMPARE_FUNCTOR(GreaterThanFunctor, >) +COMPARE_FUNCTOR(GreaterEqualFunctor, >=) +#undef COMPARE_FUNCTOR + +template struct EqualFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T a, const T b) const { - if (std::is_floating_point::value) { + using ELEM_TYPE = InT; + HOSTDEVICE OutT operator()(const InT a, const InT b) const { + if (std::is_floating_point::value) { // This branch will be optimized while compiling if T is integer. It is // safe to cast a and b to double. - return fabs(static_cast(a - b)) < 1e-8; + return static_cast(fabs(static_cast(a - b)) < 1e-8); } else { - return (a == b); + return static_cast(a == b); } } }; -template +template struct NotEqualFunctor { - using ELEM_TYPE = T; - HOSTDEVICE bool operator()(const T a, const T b) const { - return !EqualFunctor()(a, b); + using ELEM_TYPE = InT; + HOSTDEVICE bool operator()(const InT a, const InT b) const { + return !EqualFunctor()(a, b); } }; diff --git a/paddle/fluid/operators/eigen/CMakeLists.txt b/paddle/fluid/operators/eigen/CMakeLists.txt deleted file mode 100644 index 8b64e35b93526eb7edbe7f723832126ef7f0e0a6..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/eigen/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -file(GLOB EIGEN_CC_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") -file(GLOB EIGEN_CU_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cu") -if(WITH_GPU) - nv_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3) -elseif(WITH_ROCM) - hip_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3) -else() - cc_library(eigen_function SRCS ${EIGEN_CC_SOURCES} DEPS eigen3) -endif() diff --git a/paddle/fluid/operators/eigen/eigen_function.h b/paddle/fluid/operators/eigen/eigen_function.h index 9a3be7ca439b9aead2e931c7fa3036128400b057..a460e4c04c5f9bedf6fb1d914cf0dcc9096d1332 100644 --- a/paddle/fluid/operators/eigen/eigen_function.h +++ b/paddle/fluid/operators/eigen/eigen_function.h @@ -18,243 +18,71 @@ limitations under the License. */ #ifndef NOMINMAX #define NOMINMAX #endif -#include "unsupported/Eigen/CXX11/Tensor" +#include "paddle/pten/kernels/funcs/eigen/eigen_function.h" namespace paddle { namespace operators { template -struct EigenBroadcast { - using Array = Eigen::DSizes; - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using InType32BitIndex = - Eigen::TensorMap, - Eigen::Aligned>; - using OutType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType32BitIndex = - Eigen::TensorMap, - Eigen::Aligned>; - static void Eval(const EigenDevice& dev, OutType out, InType in, - const Array& bcast); - static void Eval(const EigenDevice& dev, OutType32BitIndex out, - InType32BitIndex in, const Array& bcast); -}; +using EigenBroadcast = pten::funcs::EigenBroadcast; template -struct EigenBroadcastGrad { - using Array = Eigen::DSizes; - using Array2 = Eigen::DSizes; - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType out, InType in, - const Array& reduce_dims, const Array2& reshape_dims); -}; +using EigenBroadcastGrad = + pten::funcs::EigenBroadcastGrad; template -struct EigenConstant { - using Type = Eigen::TensorMap< - Eigen::Tensor>; - static void Eval(const EigenDevice& dev, Type out, const T value); -}; +using EigenConstant = pten::funcs::EigenConstant; template -struct EigenSign { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in); -}; +using EigenSign = pten::funcs::EigenSign; template -struct EigenReverse { - using Array = Eigen::DSizes; - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = Eigen::TensorMap< - Eigen::Tensor>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in, - const Array& reverse); -}; +using EigenReverse = pten::funcs::EigenReverse; template -struct EigenAdd { - using InType = Eigen::TensorMap, Eigen::RowMajor, Eigen::DenseIndex>>; - using OutType = Eigen::TensorMap, Eigen::RowMajor, Eigen::DenseIndex>>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in, - const T value); -}; +using EigenAdd = pten::funcs::EigenAdd; template -struct EigenSub { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType out, const InType& left, - const InType& right); -}; +using EigenSub = pten::funcs::EigenSub; template -struct EigenSlice { - using Array = Eigen::DSizes; - using Array32Bit = Eigen::DSizes; - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using InType32BitIndex = - Eigen::TensorMap, - Eigen::Aligned>; - using OutType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType32BitIndex = - Eigen::TensorMap, - Eigen::Aligned>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in, - const Array& offsets, const Array& extents); - static void Eval(const EigenDevice& dev, OutType32BitIndex out, - const InType32BitIndex& in, const Array32Bit& offsets, - const Array32Bit& extents); -}; +using EigenSlice = pten::funcs::EigenSlice; template -struct EigenPad { - using Array = std::array, Rank>; - using Array32Bit = std::array, Rank>; - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using InType32BitIndex = - Eigen::TensorMap, - Eigen::Aligned>; - using OutType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType32BitIndex = - Eigen::TensorMap, - Eigen::Aligned>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in, - const Array& padding, const T value); - static void Eval(const EigenDevice& dev, OutType32BitIndex out, - const InType32BitIndex& in, const Array32Bit& padding, - const T value); -}; +using EigenPad = pten::funcs::EigenPad; template -struct EigenScale { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in, - const T scale, const T bias, const bool bias_after_scale); -}; +using EigenScale = pten::funcs::EigenScale; template -struct EigenErf { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in); -}; +using EigenErf = pten::funcs::EigenErf; template -struct EigenErfGrad { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType din, const InType& in, - const InType& dout); -}; +using EigenErfGrad = pten::funcs::EigenErfGrad; template -struct EigenRankLoss { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType out, const InType& label, - const InType& left, const InType& right); -}; +using EigenRankLoss = pten::funcs::EigenRankLoss; template -struct EigenRankLossGrad { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void EvalLeft(const EigenDevice& dev, OutType dleft, - const InType& dout, const InType& label, - const InType& left, const InType& right); - static void EvalRight(const EigenDevice& dev, OutType dright, - const InType& dout, const InType& label, - const InType& left, const InType& right); -}; +using EigenRankLossGrad = pten::funcs::EigenRankLossGrad; template -struct EigenLogLoss { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType out, const InType& pred, - const InType& label, const T& epsilon); -}; +using EigenLogLoss = pten::funcs::EigenLogLoss; template -struct EigenLogLossGrad { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType dpred, const InType& dloss, - const InType& pred, const InType& label, const T& epsilon); -}; +using EigenLogLossGrad = pten::funcs::EigenLogLossGrad; template -struct EigenHingeLoss { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType loss, const InType& pred, - const InType& label); -}; +using EigenHingeLoss = pten::funcs::EigenHingeLoss; template -struct EigenHingeLossGrad { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType dpred, const InType& dloss, - const InType& pred, const InType& label); -}; +using EigenHingeLossGrad = pten::funcs::EigenHingeLossGrad; template -struct EigenL1Norm { - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = Eigen::TensorMap, Eigen::RowMajor, Eigen::DenseIndex>>; - static void Eval(const EigenDevice& dev, OutType out, const InType& in); -}; +using EigenL1Norm = pten::funcs::EigenL1Norm; template -struct EigenL1NormGrad { - using Array = Eigen::DSizes; - using InType = Eigen::TensorMap< - Eigen::Tensor>; - using OutType = - Eigen::TensorMap>; - static void Eval(const EigenDevice& dev, OutType din, const InType& dout, - const InType& in, const Array& bcast); -}; +using EigenL1NormGrad = pten::funcs::EigenL1NormGrad; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc index 6ef49e2cf3db7318f2eb8f0f55ffccd0e3bbad15..f1deab3e65299b5188fbbbb8583705a4560a9ad4 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/expect.h" #include "paddle/fluid/operators/fused/fusion_gru_op.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" @@ -41,7 +42,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler { ctx.InputName("X") + ctx.InputName("WeightH")) { const bool is_INT8 = std::is_same::value; - if (!this->isCached()) { + if (unlikely(!this->isCached())) { // oneDNN kernel has hardcoded activation functions PADDLE_ENFORCE_EQ( ctx.Attr("gate_activation"), "sigmoid", diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc index 385e4ad8808a51a207ef8779c4544da60f0a6a3d..dfd88248ede3452bab7a23ea8f3e349e23430349 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/expect.h" #include "paddle/fluid/operators/fused/fusion_lstm_op.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" @@ -40,7 +41,7 @@ class LSTMMKLDNNHandler ctx, dev_ctx, mkldnn_engine, ctx.GetPlace(), input, weight_h, h0, is_reverse, N, Ti, IC, OC, 4, ctx.InputName("X") + ctx.InputName("WeightH")) { - if (!this->isCached()) { + if (unlikely(!this->isCached())) { const bool is_INT8 = std::is_same::value; const bool use_peepholes = ctx.Attr("use_peepholes"); // oneDNN kernel has hardcoded activation functions diff --git a/paddle/fluid/operators/kernel_primitives/functor_primitives.h b/paddle/fluid/operators/kernel_primitives/functor_primitives.h index 5e3c1fc202d595cf7406841cae716d3ddcb59d02..03610d4589058e074f64940741df34bd8f66e379 100644 --- a/paddle/fluid/operators/kernel_primitives/functor_primitives.h +++ b/paddle/fluid/operators/kernel_primitives/functor_primitives.h @@ -15,9 +15,9 @@ #pragma once #include "paddle/fluid/operators/amp/fp16_type_traits.h" -#include "paddle/fluid/platform/eigen_ext.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/float16.h" +#include "paddle/pten/kernels/funcs/eigen/extensions.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 67176f26b079fcf294a7db2a6adc2d05d0908a24..f6178eb0a1eb6e8a4d1886443ec77b945c3b182f 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -477,6 +477,155 @@ struct MergeAdd { } }; +#ifdef PADDLE_WITH_XPU +template +struct MergeAdd { + framework::SelectedRows operator()(const platform::XPUDeviceContext& context, + const framework::SelectedRows& input, + const bool sorted_result = false) { + framework::SelectedRows out; + (*this)(context, input, &out, sorted_result); + return out; + } + + void operator()(const platform::XPUDeviceContext& context, + const framework::SelectedRows& input, + framework::SelectedRows* output, + const bool sorted_result = false) { + framework::Vector input_rows(input.rows()); + if (input_rows.size() == 0) { + return; + } + + framework::SelectedRows& out = *output; + std::set row_set(input_rows.begin(), input_rows.end()); + std::vector merge_rows(row_set.begin(), row_set.end()); + auto input_width = input.value().dims()[1]; + + out.set_rows(merge_rows); + out.set_height(input.height()); + out.mutable_value()->mutable_data( + framework::make_ddim( + {static_cast(merge_rows.size()), input_width}), + context.GetPlace()); + int r = + xpu::constant(context.x_context(), out.mutable_value()->data(), + merge_rows.size() * input_width, static_cast(0.f)); + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::External("XPU constant op return" + " wrong value[%d %s].", + r, XPUAPIErrorMsg[r])); + + std::unordered_map rows_to_id; + for (size_t i = 0; i < merge_rows.size(); ++i) { + rows_to_id[merge_rows[i]] = i; + } + + auto* out_data = out.mutable_value()->data(); + auto* input_data = input.value().data(); + int n = input_width; + for (size_t i = 0; i < input_rows.size(); i++) { + size_t out_i = rows_to_id[input_rows[i]]; + auto r = xpu::add(context.x_context(), &input_data[i * input_width], + &out_data[out_i * input_width], + &out_data[out_i * input_width], n); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API return wrong value[%d %s], ", r, + XPUAPIErrorMsg[r])); + } + } + + void operator()(const platform::XPUDeviceContext& context, + const std::vector& inputs, + framework::SelectedRows* output, + const bool sorted_result = false) { + if (inputs.size() == 0) { + VLOG(3) << "no input! return"; + return; + } + const framework::SelectedRows* has_value_input = nullptr; + for (auto* in : inputs) { + if (in->rows().size() > 0) { + has_value_input = in; + break; + } + } + if (has_value_input == nullptr) { + VLOG(3) << "no input has value! just return" << std::endl; + return; + } + auto input_width = has_value_input->value().dims()[1]; + auto input_height = has_value_input->height(); + framework::SelectedRows& out = *output; + std::set merged_row_set; + size_t row_num = 0; + for (auto* input : inputs) { + if (input->rows().size() == 0) { + continue; + } + PADDLE_ENFORCE_EQ(input_width, input->value().dims()[1], + platform::errors::InvalidArgument( + "All inputs should have same " + "dimension except for the first one.")); + PADDLE_ENFORCE_EQ(input_height, input->height(), + platform::errors::InvalidArgument( + "All inputs should have same height.")); + row_num += input->rows().size(); + merged_row_set.insert(input->rows().begin(), input->rows().end()); + } + + std::vector merge_rows(merged_row_set.begin(), + merged_row_set.end()); + + if (sorted_result) { + std::sort(merge_rows.begin(), merge_rows.end()); + } + + out.set_rows(merge_rows); + out.set_height(input_height); + out.mutable_value()->mutable_data( + framework::make_ddim( + {static_cast(merged_row_set.size()), input_width}), + context.GetPlace()); + + int r = + xpu::constant(context.x_context(), out.mutable_value()->data(), + merge_rows.size() * input_width, static_cast(0.f)); + PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS, + platform::errors::External("XPU constant op return" + " wrong value[%d %s].", + r, XPUAPIErrorMsg[r])); + + float* out_data = reinterpret_cast(out.mutable_value()->data()); + + std::unordered_map rows_to_id; + for (size_t i = 0; i < merge_rows.size(); ++i) { + rows_to_id[merge_rows[i]] = i; + } + + for (auto* input : inputs) { + if (input->rows().size() == 0) { + continue; + } + auto& input_rows = input->rows(); + + int n = input_width; + for (size_t i = 0; i < input_rows.size(); i++) { + size_t out_i = rows_to_id[input_rows[i]]; + auto r = xpu::add( + context.x_context(), input->value().data() + i * input_width, + &out_data[out_i * input_width], &out_data[out_i * input_width], n); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API return wrong value[%d %s], ", r, + XPUAPIErrorMsg[r])); + } + } + } +}; + +#endif template struct MergeAverage { framework::SelectedRows operator()(const platform::CPUDeviceContext& context, @@ -589,6 +738,10 @@ template struct MergeAdd; +#ifdef PADDLE_WITH_XPU +template struct MergeAdd; +#endif + template struct MergeAverage; template struct MergeAverage; template struct MergeAverage; diff --git a/paddle/fluid/operators/matrix_rank_op.cc b/paddle/fluid/operators/matrix_rank_op.cc index 3038a16dc0a5e53a0b8f7aa49942bcb916191f48..ddfb8d50c4e11ba47b7ac13b9da6405955d0ebf6 100644 --- a/paddle/fluid/operators/matrix_rank_op.cc +++ b/paddle/fluid/operators/matrix_rank_op.cc @@ -219,18 +219,20 @@ class MatrixRankCPUKernel : public framework::OpKernel { tol_tensor.Resize(detail::NewAxisDim(tol_tensor.dims(), 1)); Tensor compare_result; - compare_result.mutable_data(detail::NewAxisDim(dim_out, k), - context.GetPlace()); + compare_result.mutable_data(detail::NewAxisDim(dim_out, k), + context.GetPlace()); int axis = -1; if (eigenvalue_tensor.dims().size() >= tol_tensor.dims().size()) { - ElementwiseComputeEx, platform::CPUDeviceContext, T, - int>(context, &eigenvalue_tensor, &tol_tensor, axis, - GreaterThanFunctor(), &compare_result); + ElementwiseComputeEx, + platform::CPUDeviceContext, T, int>( + context, &eigenvalue_tensor, &tol_tensor, axis, + GreaterThanFunctor(), &compare_result); } else { - ElementwiseComputeEx, platform::CPUDeviceContext, T, - int>(context, &eigenvalue_tensor, &tol_tensor, axis, - LessThanFunctor(), &compare_result); + ElementwiseComputeEx, + platform::CPUDeviceContext, T, int>( + context, &eigenvalue_tensor, &tol_tensor, axis, + LessThanFunctor(), &compare_result); } auto dito_int = math::DeviceIndependenceTensorOperations { compare_result.mutable_data(detail::NewAxisDim(dim_out, k), context.GetPlace()); int axis = -1; - ElementwiseComputeEx, platform::CUDADeviceContext, T, - int64_t>(context, &eigenvalue_tensor, &tol_tensor, - axis, GreaterThanFunctor(), - &compare_result); + ElementwiseComputeEx, + platform::CUDADeviceContext, T, int64_t>( + context, &eigenvalue_tensor, &tol_tensor, axis, + GreaterThanFunctor(), &compare_result); auto dito_int = math::DeviceIndependenceTensorOperations(context); diff --git a/paddle/fluid/operators/matrix_rank_op.h b/paddle/fluid/operators/matrix_rank_op.h index c3d99a21b72358df5dedc7741072a7913de174af..550bc445ac4e66a74965fe635a36c95b33dbed29 100644 --- a/paddle/fluid/operators/matrix_rank_op.h +++ b/paddle/fluid/operators/matrix_rank_op.h @@ -16,6 +16,7 @@ #include #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/controlflow/compare_op.h" namespace paddle { namespace operators { @@ -46,16 +47,6 @@ static DDim RemoveLastDim(const DDim& dim) { } } // namespace detail -template -struct GreaterThanFunctor { - HOSTDEVICE int operator()(const T a, const T b) const { return a > b; } -}; - -template -struct LessThanFunctor { - HOSTDEVICE int operator()(const T a, const T b) const { return a < b; } -}; - template struct GreaterElementFunctor { HOSTDEVICE T operator()(const T a, const T b) const { diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 68e2a7c8a91bb232fb479942d307679137b6172a..eef38bf99b1366a46b80e7e0088e838110787c39 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -14,6 +14,7 @@ #include +#include "paddle/fluid/framework/expect.h" #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/mkldnn_helper.h" @@ -79,7 +80,7 @@ class ConvMKLDNNHandlerT dev_ctx, mkldnn_engine, cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(input->dims()), unique_name)) { - if (!this->isCached()) { + if (unlikely(!this->isCached())) { PADDLE_ENFORCE_EQ( input->layout(), framework::DataLayout::kMKLDNN, platform::errors::InvalidArgument( @@ -264,7 +265,7 @@ class ConvMKLDNNHandlerT dev_ctx, dev_ctx.GetEngine(), cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(in->dims()), unique_name)) { - if (!this->isBwdCached()) { + if (unlikely(!this->isBwdCached())) { PADDLE_ENFORCE_EQ( in->layout(), framework::DataLayout::kMKLDNN, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc index 8c7113d963bd5214d74b4289dc569e9c33359e57..7119d68d583f0a224860da51793ccc79ecb5b8c4 100644 --- a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/expect.h" #include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { @@ -39,7 +40,7 @@ class PReluMKLDNNHandler dev_ctx, engine, cpu_place, platform::CreateKey(dev_ctx, framework::vectorize(x->dims()), uniq_name)) { - if (!this->isCached()) { + if (unlikely(!this->isCached())) { auto x_md = memory::desc(framework::vectorize(x->dims()), MKLDNNGetDataType(), x->format()); diff --git a/paddle/fluid/operators/optimizers/adam_op_xpu.cc b/paddle/fluid/operators/optimizers/adam_op_xpu.cc index 0a653c401171948af545709978ea5892b4099a92..e462c20c7f51db8195c3acba019d0aa225005dce 100644 --- a/paddle/fluid/operators/optimizers/adam_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_xpu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/optimizers/adam_op.h" #include "gflags/gflags.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" namespace paddle { namespace operators { @@ -155,6 +156,11 @@ class AdamOpXPUKernel : public framework::OpKernel { mom2_out.template mutable_data(ctx.GetPlace()), param_out.template mutable_data(ctx.GetPlace()), beta1, beta2, epsilon, param.numel()); + + xpu_wait(dev_ctx.x_context()->xpu_stream); + PADDLE_ENFORCE_EQ( + r == xpu::Error_t::SUCCESS, true, + platform::errors::External("XPU API return wrong value[%d],", r)); if (!use_global_beta_pow) { // update in cpu and then copy to xpu if (beta1_pow.place() == platform::CPUPlace() && @@ -165,7 +171,6 @@ class AdamOpXPUKernel : public framework::OpKernel { const float* beta2_pow_p = beta2_pow.template data(); beta2_pow_out->mutable_data(platform::CPUPlace())[0] = beta2 * beta2_pow_p[0]; - xpu_wait(dev_ctx.x_context()->xpu_stream); } else { float* beta1_pow_out_p = beta1_pow_out->mutable_data(ctx.GetPlace()); @@ -177,23 +182,129 @@ class AdamOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( r, xpu::SUCCESS, platform::errors::External( - "XPU kernel scale occur error in adamw error code ", r, + "XPU kernel scale occur error in adam error code ", r, XPUAPIErrorMsg[r])); r = xpu::scale(dev_ctx.x_context(), beta2_pow_ptr, beta2_pow_out_p, beta2_pow.numel(), false, beta2, 0.0f); PADDLE_ENFORCE_EQ( r, xpu::SUCCESS, platform::errors::External( - "XPU kernel scale occur error in adamw error code ", r, + "XPU kernel scale occur error in adam error code ", r, XPUAPIErrorMsg[r])); + + xpu_wait(dev_ctx.x_context()->xpu_stream); + } + } + } else if (grad_var->IsType()) { + auto* grad = ctx.Input("Grad"); + auto& dev_ctx = ctx.template device_context(); + + if (grad->rows().size() == 0) { + VLOG(3) << "grad row size is 0!!"; + return; + } + + std::vector cpu_rows(grad->rows().begin(), grad->rows().end()); + bool is_strict_sorted = true; + for (size_t i = 1; i < cpu_rows.size(); ++i) { + if (cpu_rows[i - 1] >= cpu_rows[i]) { + is_strict_sorted = false; + break; } + } + + framework::SelectedRows tmp_grad_merge; + const framework::SelectedRows* grad_merge_ptr; + if (is_strict_sorted) { + grad_merge_ptr = grad; + } else { + scatter::MergeAdd merge_func; + merge_func(ctx.template device_context(), + *grad, &tmp_grad_merge, true); + + xpu_wait(dev_ctx.x_context()->xpu_stream); + grad_merge_ptr = &tmp_grad_merge; + } + const T* beta1_pow_ptr = beta1_pow.template data(); + const T* beta2_pow_ptr = beta2_pow.template data(); + Tensor xpu_beta1_pow; + Tensor xpu_beta2_pow; + if (beta1_pow.place() == platform::CPUPlace() && + beta2_pow.place() == platform::CPUPlace()) { + paddle::framework::TensorCopy(beta1_pow, ctx.GetPlace(), dev_ctx, + &xpu_beta1_pow); + paddle::framework::TensorCopy(beta2_pow, ctx.GetPlace(), dev_ctx, + &xpu_beta2_pow); + dev_ctx.Wait(); + beta1_pow_ptr = xpu_beta1_pow.template data(); + beta2_pow_ptr = xpu_beta2_pow.template data(); + } + auto& grad_merge = *grad_merge_ptr; + auto& grad_tensor = grad_merge.value(); + const T* grad_data = grad_tensor.template data(); + int row_count = grad_merge.rows().size(); + std::vector rows(row_count); + xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); + int* xpu_rows = RAII_GUARD.alloc_l3_or_gm(row_count); + std::vector merge_rows(grad_merge.rows().begin(), + grad_merge.rows().end()); + for (size_t i = 0; i < grad_merge.rows().size(); ++i) { + rows[i] = static_cast(merge_rows[i]); + } + xpu_wait(dev_ctx.x_context()->xpu_stream); + memory::Copy(ctx.GetPlace(), xpu_rows, platform::CPUPlace(), rows.data(), + row_count * sizeof(int)); + auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); + auto ori_rows = param.numel() / row_numel; - PADDLE_ENFORCE_EQ(r == xpu::Error_t::SUCCESS, true, - platform::errors::External( - "XPU API return wrong value[%d], please check " - "where Baidu Kunlun Card is properly installed.", - r)); + int lazy_mode = static_cast(ctx.Attr("lazy_mode")); + int r = xpu::sparse_adam( + dev_ctx.x_context(), grad_data, mom1.template data(), + mom2.template data(), param.template data(), beta1_pow_ptr, + beta2_pow_ptr, lr.template data(), + mom1_out.template mutable_data(ctx.GetPlace()), + mom2_out.template mutable_data(ctx.GetPlace()), + param_out.template mutable_data(ctx.GetPlace()), beta1, beta2, + epsilon, ori_rows, xpu_rows, row_numel, grad_merge.rows().size(), + lazy_mode); + + PADDLE_ENFORCE_EQ( + r == xpu::Error_t::SUCCESS, true, + platform::errors::External("XPU API return wrong value[%d],", r)); + + if (!use_global_beta_pow) { + // update in cpu and then copy to xpu + if (beta1_pow.place() == platform::CPUPlace() && + beta2_pow.place() == platform::CPUPlace()) { + const float* beta1_pow_p = beta1_pow.template data(); + beta1_pow_out->mutable_data(platform::CPUPlace())[0] = + beta1 * beta1_pow_p[0]; + const float* beta2_pow_p = beta2_pow.template data(); + beta2_pow_out->mutable_data(platform::CPUPlace())[0] = + beta2 * beta2_pow_p[0]; + } else { + float* beta1_pow_out_p = + beta1_pow_out->mutable_data(ctx.GetPlace()); + float* beta2_pow_out_p = + beta2_pow_out->mutable_data(ctx.GetPlace()); + int r = + xpu::scale(dev_ctx.x_context(), beta1_pow_ptr, beta1_pow_out_p, + beta1_pow.numel(), false, beta1, 0.0f); + PADDLE_ENFORCE_EQ( + r, xpu::SUCCESS, + platform::errors::External( + "XPU kernel scale occur error in adam error code ", r, + XPUAPIErrorMsg[r])); + r = xpu::scale(dev_ctx.x_context(), beta2_pow_ptr, beta2_pow_out_p, + beta2_pow.numel(), false, beta2, 0.0f); + PADDLE_ENFORCE_EQ( + r, xpu::SUCCESS, + platform::errors::External( + "XPU kernel scale occur error in adam error code ", r, + XPUAPIErrorMsg[r])); + } } + xpu_wait(dev_ctx.x_context()->xpu_stream); } else { PADDLE_ENFORCE_EQ(1, 2, platform::errors::InvalidArgument( "Variable type not supported by adam_op")); diff --git a/paddle/fluid/operators/optimizers/lamb_op.h b/paddle/fluid/operators/optimizers/lamb_op.h index 6d98522d752196690a110922a3c41c0bf60c7476..9a3eaa66caa8e870f2692c67aea29535dbd7492a 100644 --- a/paddle/fluid/operators/optimizers/lamb_op.h +++ b/paddle/fluid/operators/optimizers/lamb_op.h @@ -22,8 +22,8 @@ limitations under the License. */ #include "paddle/fluid/operators/math/algorithm.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/operators/math/squared_l2_norm.h" -#include "paddle/fluid/platform/eigen_ext.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/pten/kernels/funcs/eigen/extensions.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h index 6e6c826a22892dad86aad907ca360bd3b8236d03..0aa39c9af5c1723dd6b99ec8598762e5dd9d7a98 100644 --- a/paddle/fluid/operators/save_combine_op.h +++ b/paddle/fluid/operators/save_combine_op.h @@ -27,7 +27,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/string_array.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/pten/backends/dynload/port.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/viterbi_decode_op.cu b/paddle/fluid/operators/viterbi_decode_op.cu index d40d14435a5fd016a9ab5aaeb0436f13654a510b..7ad3335009b06056fb624ef97305ac549b14035f 100644 --- a/paddle/fluid/operators/viterbi_decode_op.cu +++ b/paddle/fluid/operators/viterbi_decode_op.cu @@ -72,7 +72,8 @@ struct BinaryOperation { } }; -template