From 945a3ce9e7ba4d39578aa196861da0290edfeea2 Mon Sep 17 00:00:00 2001 From: Jiabin Yang <360788950@qq.com> Date: Wed, 9 Feb 2022 14:07:09 +0800 Subject: [PATCH] Replace EagerTensor with Tensor (#39376) * merge legacy to fluid * Remove legacy code * Remove legacy code * Remove DataType test * Using Tensor directly instead of using EagerTensor * support gradient_accumulation * make test_imperative_lod_tensor_to_selected_rows longer * make test_imperative_lod_tensor_to_selected_rows longer --- .../eager/accumulation/accumulation_node.cc | 19 +- .../eager/accumulation/accumulation_node.h | 17 +- .../eager_generated/backwards/scale_node.cc | 17 +- .../eager_generated/backwards/scale_node.h | 12 +- .../eager_generated/forwards/scale.cc | 7 +- .../eager_generated/forwards/scale.h | 5 +- paddle/fluid/eager/api/utils/hook_utils.cc | 41 ++- paddle/fluid/eager/api/utils/hook_utils.h | 9 +- paddle/fluid/eager/api/utils/tensor_utils.cc | 15 +- paddle/fluid/eager/api/utils/tensor_utils.h | 11 +- .../auto_code_generator/eager_generator.cc | 105 ++++-- .../final_state_generator/eager_gen.py | 61 ++-- paddle/fluid/eager/autograd_meta.h | 13 +- paddle/fluid/eager/backward.cc | 22 +- paddle/fluid/eager/backward.h | 4 +- paddle/fluid/eager/eager_tensor.h | 303 ++++-------------- paddle/fluid/eager/grad_node_info.cc | 18 +- paddle/fluid/eager/grad_node_info.h | 19 +- paddle/fluid/eager/grad_tensor_holder.cc | 88 ++--- paddle/fluid/eager/grad_tensor_holder.h | 12 +- paddle/fluid/eager/tensor_wrapper.h | 16 +- .../accumulation_node_test.cc | 28 +- .../autograd_meta_test.cc | 4 +- .../data_structure_tests/eager_tensor_test.cc | 59 ++-- .../grad_node_info_test.cc | 9 +- .../data_structure_tests/grad_node_test.h | 9 +- .../grad_tensor_holder_test.cc | 8 +- .../tensor_wrapper_test.cc | 6 +- .../performance_tests/benchmark_eager_cpu.cc | 16 +- .../performance_tests/benchmark_eager_cuda.cc | 16 +- .../performance_tests/benchmark_utils.cc | 36 ++- .../tests/performance_tests/benchmark_utils.h | 24 +- .../eager/tests/task_tests/backward_test.cc | 56 ++-- .../cross_batch_accumulation_test.cc | 8 +- .../tests/task_tests/eager_utils_test.cc | 93 ++---- .../tests/task_tests/forward_autograd_test.cc | 30 +- .../tests/task_tests/fwd_bwd_joint_test.cc | 70 ++-- .../eager/tests/task_tests/generated_test.cc | 24 +- .../fluid/eager/tests/task_tests/hook_test.cc | 41 +-- .../tests/task_tests/tensor_utils_test.cc | 6 +- paddle/fluid/eager/tests/test_utils.h | 73 +---- paddle/fluid/eager/utils.cc | 220 ++++++------- paddle/fluid/eager/utils.h | 88 ++--- .../fluid/imperative/gradient_accumulator.cc | 115 ++++--- .../fluid/imperative/gradient_accumulator.h | 13 +- paddle/fluid/imperative/infer_shape_context.h | 3 +- paddle/fluid/imperative/tracer.cc | 12 +- .../fluid/operators/optimizers/rmsprop_op.h | 6 +- paddle/fluid/pybind/eager.cc | 88 +++-- paddle/fluid/pybind/eager_functions.cc | 57 ++-- paddle/fluid/pybind/eager_method.cc | 214 ++++++------- .../pybind/eager_op_function_generator.cc | 4 +- paddle/fluid/pybind/eager_properties.cc | 72 ++--- paddle/fluid/pybind/eager_utils.cc | 96 +++--- paddle/fluid/pybind/eager_utils.h | 37 ++- paddle/fluid/pybind/exception.h | 7 +- paddle/pten/api/include/tensor.h | 8 + paddle/pten/api/lib/tensor.cc | 7 + paddle/pten/core/selected_rows.h | 36 +++ python/paddle/fluid/dygraph/tracer.py | 13 +- .../fluid/tests/unittests/CMakeLists.txt | 4 +- 61 files changed, 1131 insertions(+), 1399 deletions(-) diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index f6d66ac81b5..02ce94766aa 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -25,29 +25,28 @@ #include "glog/logging.h" -static void CopyOrAddTensor(egr::EagerTensor* tensor, - const egr::EagerTensor& t) { - if (t.Var().IsInitialized()) { - const_cast(&t)->SyncToTensor(); - } +static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, + const paddle::experimental::Tensor& t) { if (!tensor->defined() || !tensor->initialized()) { // Simply copy tensor->impl *tensor = t; } else { // Accumulation - paddle::imperative::TensorAdd(t, tensor); + paddle::imperative::TensorAdd(t, tensor); } } namespace egr { void GradNodeAccumulation::RetainGrad( - const std::function& hook) { + const std::function& hook) { retain_grad_hook_ = hook; } -std::vector> GradNodeAccumulation::operator()( - const std::vector>& grads) { +std::vector> GradNodeAccumulation:: +operator()( + const std::vector>& grads) { PADDLE_ENFORCE(grads.size() == 1, paddle::platform::errors::Fatal( "GradNodeAccumulation should take exactly 1 grad tensor" @@ -60,7 +59,7 @@ std::vector> GradNodeAccumulation::operator()( grads[0].size(), 0)); // Apply Gradient Hooks if (GradientHooksRegistered()) { - std::vector> hooked_grads = + std::vector> hooked_grads = ApplyGradientHooks(grads); // TODO(jiabin): It's little weird CopyOrAddTensor(&accumulated_grad, hooked_grads[0][0]); diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index 9578924b783..2b11e67ad5e 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -26,18 +26,21 @@ class GradNodeAccumulation : public GradNodeBase { ~GradNodeAccumulation() override = default; // Functor: perform backward computations - virtual std::vector> operator()( - const std::vector>& grads) override; + virtual std::vector> operator()( + const std::vector>& grads) + override; - void RetainGrad( - const std::function& hook); + void RetainGrad(const std::function& hook); - egr::EagerTensor* Grad() { return &accumulated_grad; } + paddle::experimental::Tensor* Grad() { return &accumulated_grad; } private: - egr::EagerTensor accumulated_grad; + paddle::experimental::Tensor accumulated_grad; - std::function retain_grad_hook_; + std::function + retain_grad_hook_; }; } // namespace egr diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index cd91209c9cc..5955f7fa933 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -77,8 +77,8 @@ static void ScaleDeviceDispatch(const pten::DenseTensor& dense_tensor, } } -void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, - bool bias_after_scale, egr::EagerTensor* out) { +void ScaleAPI(const paddle::experimental::Tensor& x, float scale, float bias, + bool bias_after_scale, paddle::experimental::Tensor* out) { // TODO(jiabin): Support multiple tensor here, Create DenseTensor is not a // proper way to Demo it // Run Forward Function @@ -138,14 +138,15 @@ void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, } void GradNodeScale::SetTensorWrappers_X( - const std::vector& tensors) { + const std::vector& tensors) { // Does nothing for scale } void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } -std::vector> GradNodeScale::operator()( - const std::vector>& grads) { +std::vector> GradNodeScale:: +operator()( + const std::vector>& grads) { // 1. Check Output Size PADDLE_ENFORCE( ((grads.size() == 1) && (grads[0].size() == 1)), @@ -154,14 +155,14 @@ std::vector> GradNodeScale::operator()( "However received: %d", "This indicates an issue with Eager Dygraph Backward logic", grads.size())); - std::vector> outs; + std::vector> outs; // 2. Create needed out parttern - egr::EagerTensor out; + paddle::experimental::Tensor out; // Apply Gradient Hooks if (GradientHooksRegistered()) { // TODO(jiabin): Shall we apply hook slot by slot here or accept // vector> to apply all hooks? - std::vector> hooked_grads = + std::vector> hooked_grads = ApplyGradientHooks(grads); ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */, true /* bias_after_scale */, &out); diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h index 1618662516c..c0150a1730d 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h @@ -27,8 +27,8 @@ */ namespace egr { -void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, - bool bias_after_scale, egr::EagerTensor* out); +void ScaleAPI(const paddle::experimental::Tensor& x, float scale, float bias, + bool bias_after_scale, paddle::experimental::Tensor* out); class GradNodeScale : public GradNodeBase { public: @@ -38,10 +38,12 @@ class GradNodeScale : public GradNodeBase { ~GradNodeScale() override = default; // Functor: perform backward computations - virtual std::vector> operator()( - const std::vector>& grads) override; + virtual std::vector> operator()( + const std::vector>& grads) + override; - void SetTensorWrappers_X(const std::vector& tensors); + void SetTensorWrappers_X( + const std::vector& tensors); void SetAttributes_scale(float scale); diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc index 6390f06640c..8b7ecc35af2 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc @@ -32,11 +32,12 @@ namespace egr { -egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, - bool bias_after_scale, bool trace_backward) { +paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, + float scale, float bias, + bool bias_after_scale, bool trace_backward) { // 1. Run Forward // 1.1 Create outputs - egr::EagerTensor out; + paddle::experimental::Tensor out; // 1.2 Need by original op, we assemble ins, outs, attrs here // 1.3 Call forward C++ api diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h index 13b03c3b760..250e016f61a 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h @@ -17,7 +17,8 @@ #include "paddle/fluid/eager/eager_tensor.h" namespace egr { -egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, - bool bias_after_scale, bool trace_backward); +paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, + float scale, float bias, + bool bias_after_scale, bool trace_backward); } // namespace egr diff --git a/paddle/fluid/eager/api/utils/hook_utils.cc b/paddle/fluid/eager/api/utils/hook_utils.cc index 7aa1917240f..c24f7f01615 100644 --- a/paddle/fluid/eager/api/utils/hook_utils.cc +++ b/paddle/fluid/eager/api/utils/hook_utils.cc @@ -23,8 +23,9 @@ namespace egr { namespace egr_utils_api { void RegisterGradientHookForTensor( - const egr::EagerTensor& tensor, - std::function& hook) { + const paddle::experimental::Tensor& tensor, + std::function& hook) { // Find grad_node and out_rank from AutogradMeta std::shared_ptr grad_node = EagerUtils::grad_node(tensor); auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo(); @@ -32,7 +33,7 @@ void RegisterGradientHookForTensor( grad_node->RegisterGradientHook(rank_info.first, rank_info.second, hook); } -void RegisterReduceHookForTensor(const egr::EagerTensor& tensor, +void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, const std::function& hook) { // Find grad_node and out_rank from AutogradMeta std::shared_ptr grad_node = EagerUtils::grad_node(tensor); @@ -40,14 +41,16 @@ void RegisterReduceHookForTensor(const egr::EagerTensor& tensor, grad_node->RegisterReduceHook(hook); } -void RetainGradForTensor(const egr::EagerTensor& tensor) { +void RetainGradForTensor(const paddle::experimental::Tensor& tensor) { // TODO(jiabin): Support More Tensor type here AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor); - std::weak_ptr weak_grad_tensor = meta->WeakGrad(); + std::weak_ptr weak_grad_tensor = + meta->WeakGrad(); // Define Hook - std::function hook = - [weak_grad_tensor](const egr::EagerTensor& t) { + std::function + hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) { if (!weak_grad_tensor.expired()) { auto grad_tensor = weak_grad_tensor.lock(); if (t.defined()) { @@ -56,24 +59,16 @@ void RetainGradForTensor(const egr::EagerTensor& tensor) { grad_tensor->set_impl(t.impl()); return *grad_tensor.get(); } else { - VLOG(7) << "Set Var for RetainGrad Hook for tensor: " << t.name(); - PADDLE_ENFORCE_EQ( - t.Var().IsInitialized(), true, - paddle::platform::errors::Fatal( - "Detected uninitialized variable, causing segmentation " - "fault " - "inside the hook." - "Variable %s has to be initialized while we need to set it." - "please check tensor initialization status.", - t.name())); - grad_tensor->MutableVar() - ->GetMutable() - ->ShareDataWith(t.Var().Get()); - return *grad_tensor.get(); + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected uninitialized variable, causing segmentation " + "fault " + "inside the hook." + "Tensor has to be initialized while we need to set it." + "please check tensor initialization status.")); } } else { - VLOG(7) << "Retain NULL EagerTensor in Grad Hook"; - return EagerTensor(); + VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook"; + return paddle::experimental::Tensor(); } }; diff --git a/paddle/fluid/eager/api/utils/hook_utils.h b/paddle/fluid/eager/api/utils/hook_utils.h index 7e4faa5a2c7..79e077d31e3 100644 --- a/paddle/fluid/eager/api/utils/hook_utils.h +++ b/paddle/fluid/eager/api/utils/hook_utils.h @@ -21,12 +21,13 @@ namespace egr { namespace egr_utils_api { void RegisterGradientHookForTensor( - const egr::EagerTensor& tensor, - std::function& hook); + const paddle::experimental::Tensor& tensor, + std::function& hook); -void RegisterReduceHookForTensor(const egr::EagerTensor& tensor, +void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor, const std::function& hook); -void RetainGradForTensor(const egr::EagerTensor& tensor); +void RetainGradForTensor(const paddle::experimental::Tensor& tensor); } // namespace egr_utils_api } // namespace egr diff --git a/paddle/fluid/eager/api/utils/tensor_utils.cc b/paddle/fluid/eager/api/utils/tensor_utils.cc index 115c9144df2..bd4e9f0af94 100644 --- a/paddle/fluid/eager/api/utils/tensor_utils.cc +++ b/paddle/fluid/eager/api/utils/tensor_utils.cc @@ -28,7 +28,7 @@ namespace egr { namespace egr_utils_api { -bool IsLeafTensor(const egr::EagerTensor& target) { +bool IsLeafTensor(const paddle::experimental::Tensor& target) { std::shared_ptr grad_node = EagerUtils::grad_node(target); if (std::dynamic_pointer_cast(grad_node)) { return true; @@ -37,17 +37,14 @@ bool IsLeafTensor(const egr::EagerTensor& target) { return false; } -egr::EagerTensor CreateTensorWithValue(const pten::DDim& ddim, - const paddle::platform::Place& place, - const pten::DataType& dtype, - const pten::DataLayout& layout, - float value, bool is_leaf) { - paddle::experimental::Tensor tensor = paddle::experimental::full( +paddle::experimental::Tensor CreateTensorWithValue( + const pten::DDim& ddim, const paddle::platform::Place& place, + const pten::DataType& dtype, const pten::DataLayout& layout, float value, + bool is_leaf) { + paddle::experimental::Tensor out = paddle::experimental::full( paddle::framework::vectorize(ddim), paddle::experimental::Scalar(value), dtype, pten::TransToPtenBackend(place), layout); - egr::EagerTensor out = egr::EagerTensor(); - out.set_tensor(std::make_shared(tensor)); auto meta = EagerUtils::autograd_meta(&out); if (is_leaf) { auto accumulation_node = std::make_shared(); diff --git a/paddle/fluid/eager/api/utils/tensor_utils.h b/paddle/fluid/eager/api/utils/tensor_utils.h index b3c4b596823..63336b53190 100644 --- a/paddle/fluid/eager/api/utils/tensor_utils.h +++ b/paddle/fluid/eager/api/utils/tensor_utils.h @@ -22,13 +22,12 @@ namespace egr_utils_api { // If and only if the tensor holds an AccumulationNode // Then it's treated as a leaf tensor -bool IsLeafTensor(const egr::EagerTensor& target); +bool IsLeafTensor(const paddle::experimental::Tensor& target); -egr::EagerTensor CreateTensorWithValue(const pten::DDim& ddim, - const paddle::platform::Place& place, - const pten::DataType& dtype, - const pten::DataLayout& layout, - float value, bool is_leaf = true); +paddle::experimental::Tensor CreateTensorWithValue( + const pten::DDim& ddim, const paddle::platform::Place& place, + const pten::DataType& dtype, const pten::DataLayout& layout, float value, + bool is_leaf = true); } // namespace egr_utils_api } // namespace egr diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 4f404bbf8ee..ab3b33d411c 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1022,13 +1022,27 @@ static std::string GenerateGradNodeCreationContent( "egr::EagerUtils::autograd_meta(&%s);\n"; get_autograd_meta_str += paddle::string::Sprintf( GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name); - + if (op_passing_outs_map[op_type].count(output_name)) { + const std::string output_var_args_name = output_name + "Var"; + const char* FWD_OUT_SYNC_BACK_TEMPLATE = + " egr::EagerUtils::OverwriteOutputs(%s, %s);\n"; + get_autograd_meta_str += paddle::string::Sprintf( + FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name); + } } else { const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = " egr::AutogradMeta* %s = " "egr::EagerUtils::autograd_meta(&%s);\n"; get_autograd_meta_str += paddle::string::Sprintf( GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name); + + if (op_passing_outs_map[op_type].count(output_name)) { + const std::string output_var_args_name = output_name + "Var"; + const char* FWD_OUT_SYNC_BACK_TEMPLATE = + " egr::EagerUtils::OverwriteOutputs(%s, %s);\n"; + get_autograd_meta_str += paddle::string::Sprintf( + FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name); + } } } VLOG(6) << "Generated outputs autograd_meta"; @@ -1215,13 +1229,13 @@ static std::pair GenerateForwardFunctionContents( // According to fwd_inputs_name_pos_map std::map>> ins = - { {"X" , SyncToVars(X)}, { "Y" , SyncToVars(Y)} }; + { {"X" , TrySyncToVars(X)}, { "Y" , TrySyncToVars(Y)} }; std::map>> outs = { - {"Out0" , ConstructDuplicableOutput(Out0Num)}, {"Out1" - ,ConstructDuplicableOutput(Out1Num)} }; + {"Out0" , CreateVars(Out0Num)}, {"Out1" + ,CreateVars(Out1Num)} }; // According to op_proto->attrs() @@ -1230,9 +1244,11 @@ static std::pair GenerateForwardFunctionContents( Controller.Instance().GetExpectedPlace(), {}); // According to fwd_outputs_names - std::vector Out0 = GGetOutputetOutputs(outs["Out0"]); - egr::EagerTensor Out1 = GetOutputs(outs["Out1"][0]); - std::vector Out2 = GetOutputs(outs["Out2"]); + std::vector Out0 = + GetOutputs(outs["Out0"]); + paddle::experimental::Tensor Out1 = GetOutputs(outs["Out1"][0]); + std::vector Out2 = + GetOutputs(outs["Out2"]); // Grad Node Generation Codes ... @@ -1266,13 +1282,14 @@ static std::pair GenerateForwardFunctionContents( if (input.duplicable()) { const char* FWD_INS_ARG_TEMPLATE = - "const std::vector& %s"; + "const std::vector& %s"; input_args_str_list[input_position] = paddle::string::Sprintf(FWD_INS_ARG_TEMPLATE, input_name); core_ops_args_type_info[op_type][input_position] = "list"; } else { - const char* FWD_INS_ARG_TEMPLATE = "const egr::EagerTensor& %s"; + const char* FWD_INS_ARG_TEMPLATE = + "const paddle::experimental::Tensor& %s"; input_args_str_list[input_position] = paddle::string::Sprintf(FWD_INS_ARG_TEMPLATE, input_name); @@ -1283,7 +1300,7 @@ static std::pair GenerateForwardFunctionContents( if (input.dispensable()) continue; const char* FWD_INS_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::SyncToVars(%s) },"; + "{ \"%s\", egr::EagerUtils::TrySyncToVars(%s) },"; ins_contents_str += paddle::string::Sprintf(FWD_INS_CONTENT_TEMPLATE, input_name, input_name); } @@ -1313,13 +1330,13 @@ static std::pair GenerateForwardFunctionContents( if (input.duplicable()) { const char* FWD_INS_CONTENT_TEMPLATE = " if(%s.size() > 0) " - "ins[\"%s\"] = egr::EagerUtils::SyncToVars(%s)\n;"; + "ins[\"%s\"] = egr::EagerUtils::TrySyncToVars(%s)\n;"; generated_function_body += paddle::string::Sprintf( FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name); } else { const char* FWD_INS_CONTENT_TEMPLATE = - " if(%s.safe_initialized()) " - "ins[\"%s\"] = egr::EagerUtils::SyncToVars(%s)\n;"; + " if(%s.initialized()) " + "ins[\"%s\"] = egr::EagerUtils::TrySyncToVars(%s)\n;"; generated_function_body += paddle::string::Sprintf( FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name); } @@ -1340,14 +1357,14 @@ static std::pair GenerateForwardFunctionContents( // in form of shared_ptr/vector> if (output.duplicable()) { const char* FWD_NUM_ARG_TEMPLATE = - ", std::vector& %s"; + ", std::vector& %s"; std::string arg_str = paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name); dygraph_function_args_str += arg_str; core_ops_args_type_info[op_type].push_back("list"); } else { - const char* FWD_NUM_ARG_TEMPLATE = ", egr::EagerTensor* %s"; + const char* FWD_NUM_ARG_TEMPLATE = ", paddle::experimental::Tensor* %s"; std::string arg_str = paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name); dygraph_function_args_str += arg_str; @@ -1370,7 +1387,7 @@ static std::pair GenerateForwardFunctionContents( paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, outnum); dygraph_function_args_str += arg_str; const char* FWD_OUTS_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::ConstructDuplicableOutput(%s) },"; + "{ \"%s\", egr::EagerUtils::CreateVars(%s) },"; outs_contents_str += paddle::string::Sprintf(FWD_OUTS_CONTENT_TEMPLATE, output_name, outnum); core_ops_args_info[op_type].push_back(outnum); @@ -1424,24 +1441,41 @@ static std::pair GenerateForwardFunctionContents( std::vector return_types(output_size); for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); + const std::string output_var_args_name = output_name + "Var"; std::string out_tensor_str; size_t return_position = fwd_outputs_name_pos_map.at(output_name); std::string output_varname = LegalizeVariableName(output_name); if (output.duplicable()) { const char* FWD_OUT_TENSORS_TEMPLATE = - " std::vector %s = " + " std::vector %s = " "egr::EagerUtils::GetOutputs(outs[\"%s\"]);\n"; out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSORS_TEMPLATE, output_varname, output_name); - return_types[return_position] = "std::vector"; + return_types[return_position] = + "std::vector"; + if (op_passing_outs_map[op_type].count(output_name) && + bwd_info.GenerateForwardOnly()) { + const char* FWD_OUT_SYNC_BACK_TEMPLATE = + " egr::EagerUtils::OverwriteOutputs(outs[\"%s\"], %s);\n"; + out_tensor_str += paddle::string::Sprintf( + FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name); + } } else { const char* FWD_OUT_TENSOR_TEMPLATE = - " egr::EagerTensor %s = " + " paddle::experimental::Tensor %s = " "egr::EagerUtils::GetOutput(outs[\"%s\"][0]);\n"; out_tensor_str = paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, output_varname, output_name); - return_types[return_position] = "egr::EagerTensor"; + + if (op_passing_outs_map[op_type].count(output_name) && + bwd_info.GenerateForwardOnly()) { + const char* FWD_OUT_SYNC_BACK_TEMPLATE = + " egr::EagerUtils::OverwriteOutputs(outs[\"%s\"][0], %s);\n"; + out_tensor_str += paddle::string::Sprintf( + FWD_OUT_SYNC_BACK_TEMPLATE, output_name, output_var_args_name); + } + return_types[return_position] = "paddle::experimental::Tensor"; } return_contents[return_position] = output_varname; @@ -1563,7 +1597,8 @@ static std::string GenerateSingleOpBase( grad_ins_fwd_slotname_map.at(grad_input_name) + "_"; const char* GRAD_INS_FWD_CONTENT_TEMPLATE = "{ \"%s\", " - "egr::EagerUtils::SyncToVars(egr::EagerUtils::RecoverTensorWrapper(" + "egr::EagerUtils::TrySyncToVars(egr::EagerUtils::" + "RecoverTensorWrapper(" "&" "this->%s, " "nullptr)) },"; @@ -1576,7 +1611,7 @@ static std::string GenerateSingleOpBase( size_t fwd_output_position = fwd_outputs_name_pos_map.at( grad_ins_grad_slotname_map.at(grad_input_name)); const char* GRAD_INS_GRAD_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::SyncToVars(grads[%d]) },"; + "{ \"%s\", egr::EagerUtils::TrySyncToVars(grads[%d]) },"; ins_contents_str += paddle::string::Sprintf( GRAD_INS_GRAD_CONTENT_TEMPLATE, grad_input_name, fwd_output_position); @@ -1653,7 +1688,7 @@ static std::string GenerateSingleOpBase( size_t grads_position = fwd_outputs_name_pos_map.at(fwd_name); const char* GRAD_OUTS_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::SyncToVars(grads[%d]) },"; + "{ \"%s\", egr::EagerUtils::TrySyncToVars(grads[%d]) },"; outs_contents_str += paddle::string::Sprintf( GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grads_position); @@ -1662,7 +1697,7 @@ static std::string GenerateSingleOpBase( if (duplicable_input_name_set.count(fwd_name) && !is_op_base_per_duplicable_input) { const char* GRAD_OUTS_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::ConstructDuplicableOutput( " + "{ \"%s\", egr::EagerUtils::CreateVars( " "this->OutputMeta()[%d].Size() ) },"; outs_contents_str += paddle::string::Sprintf( GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position); @@ -1813,7 +1848,7 @@ static std::string GenerateGradNodeCCContents( { "X" : this->"X", "Y" : this->"Y", "Out0@Grad": - SyncToVars(grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out0@Grad"]]"]), + TrySyncToVars(grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out0@Grad"]]"]), "Out1@Grad": TensorsToVarBases(grads["fwd_outputs_name_pos_map[grad_ins_grad_slotname_map["Out1@Grad"]]"]) }; @@ -1822,9 +1857,9 @@ static std::string GenerateGradNodeCCContents( std::map>> outs = { "X@Grad" : - ConstructDuplicableOutput(this->OutputMeta()["fwd_inputs_name_pos_map[grad_outs_slotname_map["X@Grad"]]"].Size()), + CreateVars(this->OutputMeta()["fwd_inputs_name_pos_map[grad_outs_slotname_map["X@Grad"]]"].Size()), "Y@Grad" : - ConstructDuplicableOutput(this->OutputMeta()["fwd_inputs_name_pos_map[grad_outs_slotname_map["Y@Grad"]]"].Size()) + CreateVars(this->OutputMeta()["fwd_inputs_name_pos_map[grad_outs_slotname_map["Y@Grad"]]"].Size()) }; // Visit each OpBase @@ -1835,7 +1870,7 @@ static std::string GenerateGradNodeCCContents( egr::Controller::Instance().ExpectedPlace(), false, {}); } - vector> outputs(outs.size()); + vector> outputs(outs.size()); for(auto& kv : outs) { outputs["fwd_inputs_name_pos_map[grad_outs_slotname_map[kv.first]]"] = GetOutputs(outs["kv.first"]); @@ -1899,7 +1934,7 @@ static std::string GenerateGradNodeCCContents( } const char* BWD_RETURN_TEMPLATE = - " std::vector> outputs(%d);\n" + " std::vector> outputs(%d);\n" " %s\n" " return outputs;\n"; generated_grad_function_body = paddle::string::Sprintf( @@ -1907,9 +1942,9 @@ static std::string GenerateGradNodeCCContents( // [Generation] Get Full Grad Function const char* GRAD_FUNCTION_TEMPLATE = - "std::vector> " + "std::vector> " "GradNode%s::operator()(const " - "std::vector>& grads) {\n%s\n}"; + "std::vector>& grads) {\n%s\n}"; std::string grad_function_str = paddle::string::Sprintf( GRAD_FUNCTION_TEMPLATE, fwd_op_type, generated_grad_function_body); @@ -1940,9 +1975,9 @@ static std::string GenerateGradNodeHeaderContents( "egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}\n" " ~GradNode%s() override = default;\n" "\n" - " virtual std::vector> " + " virtual std::vector> " "operator()(const " - "std::vector>& grads) " + "std::vector>& grads) " "override;\n" "\n" " // SetX, SetY, ...\n" @@ -1999,7 +2034,7 @@ static std::string GenerateGradNodeHeaderContents( std::string full_reserved_str = "full_reserved"; if (duplicable_tensors.count(tensor_wrapper_name)) { const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE = - "const std::vector& %s"; + "const std::vector& %s"; tensor_wrapper_arg_str = paddle::string::Sprintf( ATTR_TENSOR_WRAPPER_ARG_TEMPLATE, tensor_wrapper_name); @@ -2019,7 +2054,7 @@ static std::string GenerateGradNodeHeaderContents( } else { const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE = - "const egr::EagerTensor& %s"; + "const paddle::experimental::Tensor& %s"; tensor_wrapper_arg_str = paddle::string::Sprintf( ATTR_TENSOR_WRAPPER_ARG_TEMPLATE, tensor_wrapper_name); diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 63a74fd1008..3c629b2145e 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -409,7 +409,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map, tensor_wrapper_name = GetSavedName(tname) if IsPlainTensorType(ttype): SET_PLAIN_TENSOR_WRAPPER_TEMPLATE = """ - void SetTensorWrapper{}(const egr::EagerTensor& {}, bool full_reserved) {{ + void SetTensorWrapper{}(const paddle::experimental::Tensor& {}, bool full_reserved) {{ {} = egr::TensorWrapper({}, full_reserved); }} """ @@ -424,7 +424,7 @@ def GenerateNodeDeclaration(fwd_api_name, backward_fwd_input_map, else: assert IsVectorTensorType(ttype) SET_VECTOR_TENSOR_WRAPPER_TEMPLATE = """ - void SetTensorWrapper{}(const std::vector& {}, bool full_reserved) {{ + void SetTensorWrapper{}(const std::vector& {}, bool full_reserved) {{ for(const auto& eager_tensor : {}) {{ {}.emplace_back( egr::TensorWrapper(eager_tensor, full_reserved) ); }}; @@ -469,8 +469,8 @@ class {} : public egr::GradNodeBase {{ egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}} ~{}() override = default; - virtual std::vector> operator()( - const std::vector>& grads) override; + virtual std::vector> operator()( + const std::vector>& grads) override; // SetTensorWrapperX, SetTensorWrapperY, ... {} @@ -510,17 +510,15 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map, grad_api_position), in backward_fwd_input_map.items(): tensor_wrapper_name = GetSavedName(name) grad_api_args[ - grad_api_position] = f"egr::EagerUtils::SyncToPtenTensors( egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name}, nullptr) )" + grad_api_position] = f"egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name}, nullptr)" for _, (ttype, fwd_position, grad_api_position) in backward_grad_input_map.items(): if IsPlainTensorType(ttype): - grad_api_args[ - grad_api_position] = f"egr::EagerUtils::SyncToPtenTensors( grads[{fwd_position}][0] )" + grad_api_args[grad_api_position] = f"grads[{fwd_position}][0]" else: assert IsVectorTensorType(ttype) - grad_api_args[ - grad_api_position] = f"egr::EagerUtils::SyncToPtenTensors( grads[{fwd_position}] )" + grad_api_args[grad_api_position] = f"grads[{fwd_position}]" for name, _, _, grad_api_position in backward_attrs_list: saved_attribute_name = GetSavedName(name) @@ -529,25 +527,25 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map, # Construct grad_api returns num_bwd_outputs = len(backward_grad_output_map.keys()) - returns_str = f"std::vector> returns({num_bwd_outputs});\n" + returns_str = f"std::vector> returns({num_bwd_outputs});\n" for _, (ttype, fwd_position, grad_api_position) in backward_grad_output_map.items(): # Infer Grad API Return Type if num_bwd_outputs == 1: # Single tensor output, return as is if IsPlainTensorType(ttype): - returns_str += "returns[0] = { egr::EagerUtils::CreateEagerTensorFromTensor(grad_api_returns) };\n" + returns_str += "returns[0] = { grad_api_returns };\n" else: assert IsVectorTensorType(ttype) - returns_str += "returns[0] = egr::EagerUtils::CreateEagerTensorFromTensor(grad_api_returns);\n" + returns_str += "returns[0] = grad_api_returns;\n" else: # Rearrange output order accordingly - returns_str += f"returns[{fwd_position}] = egr::EagerUtils::CreateEagerTensorFromTensor( grad_api_returns[{grad_api_position}] );\n" + returns_str += f"returns[{fwd_position}] = grad_api_returns[{grad_api_position}];\n" returns_str += f"return returns;\n" grad_node_name = GetGradNodeName(fwd_api_name) FUNCTION_TEMPLATE = """ -std::vector> {}::operator()(const std::vector>& grads) {{ +std::vector> {}::operator()(const std::vector>& grads) {{ // Call grad_api function auto grad_api_returns = paddle::experimental::{}({}); {} @@ -601,18 +599,18 @@ def GenerateNodeCreationCodes(fwd_api_name, bwd_api_name, output_autograd_meta_vec_name = GetAutoGradMetaVectorName(name) if num_fwd_outputs == 1: if IsPlainTensorType(rtype): - output_autograd_meta = f" egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&outputs);" + output_autograd_meta = f" egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&api_result);" else: assert IsVectorTensorType(rtype) - output_autograd_meta = f" std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&outputs);\n" + output_autograd_meta = f" std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&api_result);\n" output_autograd_meta += f" std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" else: # Tuple api_result if IsPlainTensorType(rtype): - outputs_autograd_meta = f" egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&outputs[{pos}]);" + outputs_autograd_meta = f" egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&api_result[{pos}]);" else: assert IsVectorTensorType(rtype) - output_autograd_meta = f" std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&outputs[{pos}]);\n" + output_autograd_meta = f" std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&api_result[{pos}]);\n" output_autograd_meta += f" std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" outputs_autograd_meta_list.append(output_autograd_meta) @@ -674,9 +672,9 @@ def GenerateNodeCreationCodes(fwd_api_name, bwd_api_name, set_grad_in_meta_list.append(set_grad_in_meta) if num_outputs == 1: - set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(outputs);" + set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result);" else: - set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(outputs[{pos}]);" + set_retain_grad = f" egr::EagerUtils::CheckAndRetainGrad(api_result[{pos}]);" set_retain_grad_list.append(set_retain_grad) set_out_rank_str = "\n".join(set_out_rank_list) set_history_str = "\n".join(set_history_list) @@ -746,13 +744,14 @@ def GenerateForwardDefinition(fwd_api_name, bwd_api_name, inputs_args_list = ["" for i in range(num_inputs)] inputs_call_list = ["" for i in range(num_inputs)] for name, (ttype, pos) in forward_inputs_position_map.items(): - inputs_call_list[pos] = f"egr::EagerUtils::SyncToPtenTensors({name})" + inputs_call_list[pos] = f"{name}" if IsPlainTensorType(ttype): - inputs_args_list[pos] = f"const egr::EagerTensor& {name}" + inputs_args_list[ + pos] = f"const paddle::experimental::Tensor& {name}" else: assert IsVectorTensorType(ttype) inputs_args_list[ - pos] = f"const std::vector& {name}" + pos] = f"const std::vector& {name}" for name, atype, default_val, pos in forward_attrs_list: inputs_call_list[pos] = name @@ -773,18 +772,16 @@ def GenerateForwardDefinition(fwd_api_name, bwd_api_name, returns_list = ["" for i in range(num_outputs)] for name, (rtype, pos) in forward_outputs_position_map.items(): if num_outputs == 1: - returns_list[ - 0] = f"egr::EagerUtils::CreateEagerTensorFromTensor(api_result)" + returns_list[0] = f"api_result" else: # Tuple api_result - returns_list[ - pos] = f"egr::EagerUtils::CreateEagerTensorFromTensor(api_result[{pos}])" + returns_list[pos] = f"api_result[{pos}]" if IsPlainTensorType(rtype): - returns_type_list[pos] = "egr::EagerTensor" + returns_type_list[pos] = "paddle::experimental::Tensor" else: assert IsVectorTensorType(rtype) - returns_type_list[pos] = "std::vector" + returns_type_list[pos] = "std::vector" if num_outputs == 1: returns_str = returns_list[0] @@ -806,19 +803,17 @@ def GenerateForwardDefinition(fwd_api_name, bwd_api_name, // Forward API Call {} - auto outputs = {}; - {} // Returns - return outputs; + return {}; }} """ forward_function_name = GetForwardFunctionName(fwd_api_name) forward_function_str = FORWARD_FUNCTION_TEMPLATE.format( returns_type_str, forward_function_name, inputs_args_str, - forward_call_str, returns_str, node_creation_str) + forward_call_str, node_creation_str, returns_str) forward_function_declaration_str = f"{returns_type_str} {forward_function_name}({inputs_args_str});" return forward_function_str, forward_function_declaration_str diff --git a/paddle/fluid/eager/autograd_meta.h b/paddle/fluid/eager/autograd_meta.h index 18156f913de..f4b2b8e08d4 100644 --- a/paddle/fluid/eager/autograd_meta.h +++ b/paddle/fluid/eager/autograd_meta.h @@ -75,20 +75,20 @@ class AutogradMeta : public AbstractAutogradMeta { ~AutogradMeta() override = default; - const egr::EagerTensor& Grad() const { + const paddle::experimental::Tensor& Grad() const { PADDLE_ENFORCE_NOT_NULL( grad_.get(), paddle::platform::errors::InvalidArgument( "Should Not get NULL from Grad pointer, since " - "we should have default EagerTensor once we init AutoGradMeta. " + "we should have default Tensor once we init AutoGradMeta. " "if you got this error may indicates framework error in " "PaddlePaddle")); return *(grad_.get()); } - egr::EagerTensor* MutableGrad() { return grad_.get(); } + paddle::experimental::Tensor* MutableGrad() { return grad_.get(); } - std::weak_ptr WeakGrad() { return grad_; } + std::weak_ptr WeakGrad() { return grad_; } void SetGradNode(const std::shared_ptr& grad_node) { PADDLE_ENFORCE_NOT_NULL( @@ -137,8 +137,9 @@ class AutogradMeta : public AbstractAutogradMeta { private: // TODO(jiabin) :Should we use pointer instead of object? - std::shared_ptr grad_{std::make_shared( - egr::Controller::Instance().GenerateUniqueName("@grad"))}; + std::shared_ptr grad_{ + std::make_shared( + egr::Controller::Instance().GenerateUniqueName("@grad"))}; // GradNodeBase is base class of all grad op which is a // wrapper for grad op. This class will make grad op easy diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index b0e3d81df3a..87840b53b00 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -72,15 +72,15 @@ std::unordered_map getInDegreeMap( } void RunBackwardHooks( - const std::vector>& grad_tensors, + const std::vector>& grad_tensors, egr::GradNodeBase* grad_node) { grad_node->ApplyGradientHooks(grad_tensors); VLOG(6) << "Apply Reduce Hooks for node"; grad_node->ApplyReduceHooks(); } -void RunBackward(const std::vector& tensors, - const std::vector& grad_tensors, +void RunBackward(const std::vector& tensors, + const std::vector& grad_tensors, bool retain_graph) { VLOG(6) << "Start Backward"; // *Gradient Hook should happen at node-level @@ -94,7 +94,7 @@ void RunBackward(const std::vector& tensors, std::unordered_map> node_input_buffers_dict; for (size_t i = 0; i < tensors.size(); i++) { - const egr::EagerTensor& tensor = tensors[i]; + const paddle::experimental::Tensor& tensor = tensors[i]; AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(tensor); // Get grad input info from target tensors @@ -180,7 +180,7 @@ void RunBackward(const std::vector& tensors, // TODO(jiabin): Support post hook here and make hook run in seperate // operator // Run Pre Backward Node and get outputs - std::vector> grad_output_tensors = + std::vector> grad_output_tensors = (*node)(node_input_buffer->Buffers()); // TODO(jiabin): Should we erase it or find a more efficient way. node_input_buffers_dict.erase(node); @@ -218,16 +218,14 @@ void RunBackward(const std::vector& tensors, "grad_output_tensors[i].size(), which is: %d. This error may " "indicate autoprune or autograd api error. ", grad_output_tensors.size())); - egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j]; + paddle::experimental::Tensor& grad_output_tensor = + grad_output_tensors[i][j]; if ((!grad_output_tensor.defined() || !grad_output_tensor.initialized())) { - if (!grad_output_tensor.Var().IsInitialized()) { - VLOG(6) - << "We get grad_output_tensor with slot: " << i - << ", rank: " << j - << " as uninitialized or undefined in both tensor and variable"; - } + VLOG(6) + << "We get grad_output_tensor with slot: " << i << ", rank: " << j + << " as uninitialized or undefined in both tensor and variable"; } VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i << ", rank: " << j diff --git a/paddle/fluid/eager/backward.h b/paddle/fluid/eager/backward.h index b077fb37e87..73272461b79 100644 --- a/paddle/fluid/eager/backward.h +++ b/paddle/fluid/eager/backward.h @@ -22,8 +22,8 @@ namespace egr { // run_backward(): // tensors corresponds to those lived in the backward graph // each grad_tensors[i] keeps the value for its corresponding tensors[i] -void RunBackward(const std::vector &tensors, - const std::vector &grad_tensors, +void RunBackward(const std::vector &tensors, + const std::vector &grad_tensors, bool retain_graph = false); // Reserved for gradient() diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index e11a471946a..2326ab012e3 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -42,238 +42,52 @@ namespace egr { class EagerTensor final { public: - /* Part 1: Constructors */ - EagerTensor() - : tensor_(std::make_shared()), - var_(paddle::framework::Variable()) {} - explicit EagerTensor(const std::string& name) - : tensor_(std::make_shared(name)), - var_(paddle::framework::Variable()) {} - /** - * @description: Use a TensorImpl pointer to construct a Tensor - * @param {shared_ptr} tensor_impl - * @return {Tensor} - */ - explicit EagerTensor(const std::shared_ptr& tensor_impl) - : tensor_(std::make_shared(tensor_impl)), - var_(paddle::framework::Variable()) {} + /* Default constructor and name constructor should only be used for contruct + * output and in fluid*/ + EagerTensor() = default; - EagerTensor(const EagerTensor&) = default; - EagerTensor(EagerTensor&&) = default; + explicit EagerTensor(const std::string& name) : name_(name) {} - /* Part 2: Name access methods */ - /** - * @description: Return the name of current Tensor. - * @param None - * @return {const std::string&} - */ - const std::string& name() const { return tensor_->name(); } - /** - * @description: Set the name of current Tensor. - * @param {const std::string& name} - * @return None - */ - void set_name(const std::string& name) { tensor_->set_name(name); } - - /* Part 3: Dimension, DataType and DataLayout methods */ - /** - * @description: Return the number of elements of current Tensor. - * @param None - * @return {int64_t} - */ - int64_t numel() const { return tensor_->numel(); } - /** - * @description: Return the shape (dimensions) of current Tensor. - * @param None - * @return {DDim} - */ - paddle::framework::DDim shape() const { return tensor_->dims(); } - - /** - * @description: Return the data type of current Tensor. - * @param None - * @return {DataType} - */ - paddle::experimental::DataType type() const { return tensor_->type(); } - - /** - * @description: Return the layout of current Tensor. - * @param None - * @return {DataLayout} - */ - paddle::experimental::DataLayout layout() const { return tensor_->layout(); } - - /* Part 3: Device and Backend methods */ - /** - * @description: Return the place (device) of current Tensor. - * @param None - * @return {Place} - */ - paddle::platform::Place place() const { return tensor_->inner_place(); } - - /** - * Backend judgment APIs, shield the concept of Backend. - */ - bool is_cpu() const { return paddle::platform::is_cpu_place(place()); } - bool is_cuda() const { return paddle::platform::is_gpu_place(place()); } - - /* Part 4: Data Access methods */ - /** - * @description: Return the implemention of current Tensor. - * @param None - * @return {std::shared_ptr} - */ - std::shared_ptr impl() const { return tensor_->impl(); } - - /** - * @description: Set the implemention of current Tensor. - * @param {std::shared_ptr} - * @return None - */ - void set_impl(const std::shared_ptr& impl) { - tensor_->set_impl(impl); - } - - // TODO(chenweihang): Whether API Tensor need `data` and `mutable_data`? - - // TODO(chenweihang): slice and split methods use kernels? - - /* Part 5: Status utils methods */ - /** - * @description: Determine whether it is a meaningful Tensor - * @param None - * @return {bool} - */ - bool defined() const { return tensor_->defined(); } - - /** - * @description: Determine whether Tensor is initialized - * @param None - * @return {bool} - */ - bool initialized() const { return tensor_->initialized(); } - - bool safe_initialized() const { - return initialized() || var_.IsInitialized(); - } - - /** - * @description: Reset the Tensor implementation - * @param None - * @return {void} - */ - void reset() { tensor_->reset(); } - - /** - * @brief Determine whether tensor is DenseTensor - * - * @return true - * @return false - */ - bool is_dense_tensor() const { return tensor_->is_dense_tensor(); } - - /** - * @brief Transfer the current Tensor to the specified device and return. - * - * @param place, the target place of which the tensor will copy to. - * @return Tensor - */ - // TODO(chenweihang): replace Backend by new Place - EagerTensor copy_to(pten::Backend backend, bool blocking) const { - if (Var().IsInitialized()) { - const_cast(this)->SyncToTensor(); - } - return EagerTensor(tensor_->copy_to(backend, blocking)); - } - - /** - * @brief Transfer the source Tensor to current Tensor. - * - * @param src, the source Tensor to be copied. - * @param blocking, Should we copy this in sync way. - * @return void - */ - void copy_(const EagerTensor& src, const bool blocking) { - if (src.Var().IsInitialized()) { - const_cast(&src)->SyncToTensor(); - } - if (Var().IsInitialized()) { - SyncToTensor(); - } - tensor_->copy_(*(src.tensor_.get()), blocking); - } - /* Part 6: Operator overloading */ - EagerTensor& operator=(const EagerTensor& x) & { - tensor_ = x.tensor_; - var_ = x.var_; - return *this; - } - EagerTensor& operator=(EagerTensor&& x) & { - tensor_ = std::move(x.tensor_); - var_ = std::move(x.var_); - return *this; - } - - /* Part 7: Autograd methods */ - paddle::experimental::AbstractAutogradMeta* get_autograd_meta() const { - return tensor_->get_autograd_meta(); - } - void set_autograd_meta( - std::shared_ptr - autograd_meta) { - tensor_->set_autograd_meta(autograd_meta); - } - - /** Part 9: Get framework::Variable from EagerTensor **/ - const paddle::framework::Variable& Var() const { return var_; } - - paddle::framework::Variable* MutableVar() { return &var_; } - - /** Part 10: Sync paddle::framework::Variable with pten::Tensor **/ - void SyncToVar(paddle::framework::proto::VarType_Type type = - paddle::framework::proto::VarType::LOD_TENSOR) { - // Synchronize allocation only once. - if (!var_.IsInitialized()) { - // TODO(jiabin): Support selected rows later. - if (this->initialized()) { - if (type == paddle::framework::proto::VarType::LOD_TENSOR) { - auto* framework_tensor = - var_.GetMutable(); - framework_tensor->Resize(tensor_->dims()); - framework_tensor->set_layout(tensor_->layout()); - // Contruct framework::Tensor from egr::EagerTensor - auto tensor_dense = - std::dynamic_pointer_cast(tensor_->impl()); - if (tensor_dense && tensor_dense.get()) { - *framework_tensor = *tensor_dense; - } else { - PADDLE_THROW(paddle::platform::errors::Fatal( - "Unrecognized egr::EagerTensor type, only " - "DenseTensor is supported for now.")); - } - } + explicit EagerTensor(const paddle::experimental::Tensor& tensor) + : name_(tensor.name()) { + if (tensor.defined()) { + if (tensor.is_dense_tensor()) { + auto* framework_tensor = + var_.GetMutable(); + // Contruct framework::Tensor from egr::EagerTensor + auto tensor_dense = + std::dynamic_pointer_cast(tensor.impl()); + PADDLE_ENFORCE_EQ((tensor_dense.get() && tensor_dense), true, + paddle::platform::errors::Fatal( + "Failed to Trans Tensor to EagerVariable since " + "we got Tensor with type DenseTensor, and we got " + "EagerVariable with another type.")); + *framework_tensor = *tensor_dense; } else { PADDLE_THROW(paddle::platform::errors::Fatal( - "Can not Sync EagerTensor %s whose " - "pten::DenseTensor is not initialized!", - name())); + "Unrecognized egr::EagerVariable type, only " + "DenseTensor and SelectedRows is supported for now.")); } + } else { + VLOG(6) << "Build Empty EagerTensor with name " << name_; } } - /** Part 11: Sync paddle::framework::Variable with pten::Tensor **/ - void SyncToTensor() { - // Synchronize allocation only once. + + /** Part 11: Construct paddle::framework::Variable with pten::Tensor **/ + std::shared_ptr GetTensorBase() { + // Construct allocation only once. if (var_.IsInitialized()) { if (var_.IsType()) { - SetImplWithLegacyTensor(); + return SetImplWithLegacyTensor(); } else if (var_.IsType()) { - SetImplWithLegacyTensor(); + return SetImplWithLegacyTensor(); + } else if (var_.IsType()) { + return SetImplWithSelectedRows(); } else { - PADDLE_THROW( - paddle::platform::errors::Fatal("Unable to fetch underlying tensor " - "from VarBase, only LoDTensor and " - "Tensor are supported for now")); + PADDLE_THROW(paddle::platform::errors::Fatal( + "Unable to fetch underlying tensor " + "from EagerTensor, only LoDTensor and " + "Tensor are supported for now")); } } else { PADDLE_THROW(paddle::platform::errors::Fatal( @@ -282,43 +96,36 @@ class EagerTensor final { name())); } } + const paddle::framework::Variable& Var() const { return var_; } + + paddle::framework::Variable* MutableVar() { return &var_; } void ResetVar(const paddle::framework::Variable& src) { var_ = src; } - const std::shared_ptr& Tensor() const { - return tensor_; - } + const std::string& name() const { return name_; } - void set_tensor(const std::shared_ptr& tensor) { - tensor_ = tensor; - } + void set_name(const std::string& name) { name_ = name; } private: - template - void SetImplWithLegacyTensor() { + template + std::shared_ptr SetImplWithLegacyTensor() { const auto& framework_tensor = var_.Get(); - if (defined()) { - VLOG(8) << "Sync Var to initialized tensor for: " << name(); - static_cast(*impl()) = framework_tensor; - } else { - VLOG(8) << "Sync Var to uninitialized tensor for: " << name(); - this->set_impl(std::make_shared(framework_tensor)); - } - var_.Clear(); + VLOG(8) << "Sync Var to tensor for: " << name(); + return std::make_shared(std::move(framework_tensor)); } - private: - /** - * @description: Use a pten::Tensor pointer to construct a EagerTensor, never - * public this!!!!. - * @param {pten::Tensor} tensor - * @return {EagerTensor} - */ - explicit EagerTensor(const paddle::experimental::Tensor& tensor) - : tensor_(std::make_shared(tensor)), - var_(paddle::framework::Variable()) {} + std::shared_ptr SetImplWithSelectedRows() { + auto* selected_rows = var_.GetMutable(); + auto res = std::make_shared(selected_rows->rows_, + selected_rows->height_); + res->value_.reset(selected_rows->value_.release()); + res->id_to_index_ = std::move(selected_rows->id_to_index_); + res->rwlock_.reset(selected_rows->rwlock_.release()); + return res; + } - std::shared_ptr tensor_ = nullptr; + private: + std::string name_{""}; paddle::framework::Variable var_; }; } // namespace egr diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 54990140183..ac0e56f1776 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -209,7 +209,8 @@ const std::vector>& GradNodeBase::GetEdges() const { void GradNodeBase::RegisterGradientHook( size_t slot_id, size_t rank, - const std::function& hook) { + const std::function& hook) { gradient_hooks_.emplace_back(std::make_tuple(slot_id, rank, hook)); } @@ -217,14 +218,15 @@ void GradNodeBase::RegisterReduceHook(const std::function& hook) { reduce_hooks_.emplace_back(hook); } -std::vector> GradNodeBase::ApplyGradientHooks( - const std::vector>& tensors) { - std::vector> outs(tensors.size()); +std::vector> +GradNodeBase::ApplyGradientHooks( + const std::vector>& tensors) { + std::vector> outs(tensors.size()); for (auto& tuple : gradient_hooks_) { size_t slot_id = std::get<0>(tuple); size_t rank = std::get<1>(tuple); - std::function& hook = - std::get<2>(tuple); + std::function& hook = std::get<2>(tuple); PADDLE_ENFORCE(slot_id < tensors.size(), paddle::platform::errors::Fatal( @@ -237,9 +239,9 @@ std::vector> GradNodeBase::ApplyGradientHooks( "than rank size of grad_tensors", slot_id)); - std::vector& slot_out = outs[slot_id]; + std::vector& slot_out = outs[slot_id]; slot_out.resize(tensors[slot_id].size()); - egr::EagerTensor& out = slot_out[rank]; + paddle::experimental::Tensor& out = slot_out[rank]; if (!out.defined() || !out.initialized()) { VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name(); out = hook(tensors[slot_id][rank]); diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 3257c144970..dbfb3547a75 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -88,13 +88,13 @@ class GradNodeBase { * Tensor which contains grads input of current operator * * Note: why we need backward inputs and outputs construct as vector of vector - * of egr::EagerTensor? + * of paddle::experimental::Tensor? * Since all of paddle op composite in form of {"Slot name ", vector}, * so, vector of vector * is better choice to fit this format. * **/ - virtual std::vector> operator()( - const std::vector>& grads) = 0; + virtual std::vector> operator()( + const std::vector>& grads) = 0; /** * AddEdges is designed to set input tensors' backward Node as current @@ -135,9 +135,9 @@ class GradNodeBase { /** * Register GradientHook or ReduceHook * **/ - void RegisterGradientHook( - size_t slot_id, size_t rank, - const std::function& hook); + void RegisterGradientHook(size_t slot_id, size_t rank, + const std::function& hook); void RegisterReduceHook(const std::function& hook); /** @@ -146,8 +146,8 @@ class GradNodeBase { inline bool GradientHooksRegistered() { return gradient_hooks_.size() != 0; } inline bool ReduceHooksRegistered() { return reduce_hooks_.size() != 0; } - std::vector> ApplyGradientHooks( - const std::vector>& tensors); + std::vector> ApplyGradientHooks( + const std::vector>& tensors); void ApplyReduceHooks(); private: @@ -170,7 +170,8 @@ class GradNodeBase { // Each entry consists one pair of std::vector>> + /* hook */ std::function>> gradient_hooks_; std::vector> reduce_hooks_; }; diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index 0183f88772f..90ae91db5f5 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -20,25 +20,9 @@ namespace egr { -static void FillUnderlyingVariableWithValue( - double value, const paddle::framework::DDim& ddim, - const paddle::platform::Place& place, - const paddle::framework::proto::VarType::Type& dtype, - egr::EagerTensor* target) { - auto* dst_tensor = - target->MutableVar()->GetMutable(); - auto* dev_ctx = paddle::platform::DeviceContextPool::Instance().Get(place); - dst_tensor->Resize(ddim); - // TOOD(jiabin): Ugly fix here we have fwd_data_type_ and data_type, since in - // grad mission - // we can't get data_type_ directly. We need to check if we can only use - // default data_type for now. - dst_tensor->mutable_data(place, dtype); - paddle::operators::math::set_constant(*dev_ctx, dst_tensor, value); -} - void GradTensorHolder::add(size_t slot_id, size_t rank, - const egr::EagerTensor& t, bool fill_one) { + const paddle::experimental::Tensor& t, + bool fill_one) { // TODO(jiabin): We need to deal with empty input_buffer with slot size not // empty; PADDLE_ENFORCE(slot_id < buffer_.size(), @@ -58,62 +42,52 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, "Invalid rank for GradTensorHolder::add() which exceeds size " "of buffer slot %d, got slot size is: %d rank is: %d", slot_id, buffer_[slot_id].size(), rank)); - egr::EagerTensor& buffer_tensor = buffer_[slot_id][rank]; if (!fill_one) { + paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank]; // TODO(jiabin): Code bellow is ugly to divide which inner var we used, // remove framework::Variable // related code later. // This if statement is trying to test neither pten::Tensor nor // framework::Variable is initialized. - if ((!buffer_tensor.defined() || !buffer_tensor.initialized()) && - (!buffer_tensor.Var().IsInitialized())) { + if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) { // Simply copy tensor->impl buffer_tensor = t; } else { // Accumulation - if (t.initialized() && buffer_tensor.initialized()) { - paddle::imperative::TensorAdd(t, &buffer_tensor); - } else if (t.Var().IsInitialized() && - buffer_tensor.Var().IsInitialized()) { - paddle::imperative::VariableAdd(t, &buffer_tensor); - } else if (t.Var().IsInitialized() && buffer_tensor.initialized()) { - // TODO(jiabin): This can be merge to upper if case. - buffer_tensor.SyncToVar(); - paddle::imperative::VariableAdd(t, &buffer_tensor); - } else if (t.initialized() && buffer_tensor.Var().IsInitialized()) { - buffer_tensor.SyncToTensor(); - paddle::imperative::TensorAdd(t, &buffer_tensor); + PADDLE_ENFORCE_EQ(t.initialized(), true, + paddle::platform::errors::Fatal( + "We can only accumulate initialized tensor, but we " + "got tensor: %s is empty please check you network " + "and make sure it creates grads.", + t.name())); + if (t.is_dense_tensor()) { + if (buffer_tensor.is_dense_tensor()) { + paddle::imperative::TensorAdd( + t, &buffer_tensor); + } else { + // TODO(jiabin): Support Other TensorBase later + paddle::experimental::Tensor new_buffer( + std::make_shared(), "tmp_accumulator"); + paddle::imperative::SelectedRowsAddTensor(buffer_tensor, t, + &new_buffer); + buffer_tensor.set_impl(new_buffer.impl()); + } } else { - // Should not happend case - // 1. both not init + // TODO(jiabin): Support Other TensorBase later + if (buffer_tensor.is_dense_tensor()) { + paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor); + } else { + PADDLE_THROW(paddle::platform::errors::Fatal( + "We don't support Selected Rows merge for now, support it later " + "and make all kinds of grads can be merged.")); + } } } } else { // Create new tensor->impl and fill it with 1.0 if (t.defined()) { // Fill 1.0 - paddle::experimental::Tensor tensor = - paddle::experimental::ones_like(*t.Tensor().get()); - buffer_tensor.set_tensor( - std::make_shared(tensor)); - - } else { - // TODO(jiabin): Only Support LodTensorForNow - auto type = paddle::framework::ToVarType(t.Var().Type()); - switch (type) { - case paddle::framework::proto::VarType::LOD_TENSOR: { - auto t_ftensor = t.Var().Get(); - FillUnderlyingVariableWithValue(1.0, t_ftensor.dims(), - t_ftensor.place(), t_ftensor.type(), - &buffer_tensor); - break; - } - default: { - PADDLE_THROW(paddle::platform::errors::NotFound( - "Cannot found var type: %s in Fill Constant API", - paddle::framework::ToTypeName(type))); - } - } + buffer_[slot_id][rank] = paddle::experimental::ones_like(t); } } } diff --git a/paddle/fluid/eager/grad_tensor_holder.h b/paddle/fluid/eager/grad_tensor_holder.h index 5072447fa93..d66a81fe828 100644 --- a/paddle/fluid/eager/grad_tensor_holder.h +++ b/paddle/fluid/eager/grad_tensor_holder.h @@ -37,25 +37,27 @@ class GradTensorHolder { GradTensorHolder(const GradTensorHolder& other) = default; - explicit GradTensorHolder(std::vector>&& inputs) + explicit GradTensorHolder( + std::vector>&& inputs) : buffer_(std::move(inputs)) {} GradTensorHolder& operator=(const GradTensorHolder& other) = default; // Create new tensor and copy tensor->impl - void add(size_t slot_id, size_t rank, const egr::EagerTensor& t, + void add(size_t slot_id, size_t rank, const paddle::experimental::Tensor& t, bool fill_one = false); - const std::vector& operator[](const size_t& pos) { + const std::vector& operator[]( + const size_t& pos) { return buffer_[pos]; } - const std::vector>& Buffers() { + const std::vector>& Buffers() { return buffer_; } private: - std::vector> buffer_; + std::vector> buffer_; }; } // namespace egr diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h index d760a76ec68..6cc17b0a9c5 100644 --- a/paddle/fluid/eager/tensor_wrapper.h +++ b/paddle/fluid/eager/tensor_wrapper.h @@ -33,7 +33,7 @@ namespace egr { class TensorWrapper { public: TensorWrapper() = default; - explicit TensorWrapper(const egr::EagerTensor& tensor, + explicit TensorWrapper(const paddle::experimental::Tensor& tensor, bool full_reserved = false) { /** * Normally, we should fully reserved all non-output or non-leaf fwd tensor @@ -49,7 +49,6 @@ class TensorWrapper { // shallow copy tensor_impl here intermidiate_tensor_.set_impl(tensor.impl()); - intermidiate_tensor_.ResetVar(tensor.Var()); intermidiate_tensor_.set_name(tensor.name() + "@Saved"); PADDLE_ENFORCE_NOT_NULL( EagerUtils::unsafe_autograd_meta(tensor), @@ -61,12 +60,13 @@ class TensorWrapper { out_rank_info_ = EagerUtils::OutRankInfo(tensor); } - egr::EagerTensor recover(const std::shared_ptr& grad_node) { - VLOG(6) << "Recover tensor for wrapper"; - if ((!intermidiate_tensor_.defined()) && - (!intermidiate_tensor_.Var().IsInitialized())) { + paddle::experimental::Tensor recover( + const std::shared_ptr& grad_node) { + VLOG(6) << "Recover tensor: " << intermidiate_tensor_.name() + << " for wrapper"; + if (!intermidiate_tensor_.defined()) { VLOG(6) << "Return NULL tensor Here. "; - return egr::EagerTensor(); + return paddle::experimental::Tensor(); } // if it's full_reserved just return the full copy of tensor @@ -86,6 +86,6 @@ class TensorWrapper { private: bool full_reserved_ = false; std::pair out_rank_info_; - egr::EagerTensor intermidiate_tensor_; + paddle::experimental::Tensor intermidiate_tensor_; }; } // namespace egr diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index 64f980d709a..01af22d5afb 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -27,7 +27,7 @@ // TODO(jiabin): remove nolint here!!! using namespace egr; // NOLINT -TEST(AccumulationNode, EagerTensor) { +TEST(AccumulationNode, Tensor) { // Construct Eager Tensor pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT16, paddle::framework::make_ddim({1, 1})); @@ -38,7 +38,7 @@ TEST(AccumulationNode, EagerTensor) { meta); dt0->mutable_data( paddle::platform::CPUPlace())[0] = 10.0; - EagerTensor et0 = EagerTensor(dt0); + paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0); std::shared_ptr dt1 = std::make_shared( std::make_unique( @@ -48,7 +48,7 @@ TEST(AccumulationNode, EagerTensor) { dt1->mutable_data( paddle::platform::CPUPlace())[0] = 20.0; - EagerTensor et1 = EagerTensor(dt1); + paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); std::shared_ptr grad_dt = std::make_shared( @@ -56,34 +56,28 @@ TEST(AccumulationNode, EagerTensor) { paddle::platform::CPUPlace()) .get(), meta); - EagerTensor grad_et = EagerTensor(grad_dt); + paddle::experimental::Tensor grad_et = paddle::experimental::Tensor(grad_dt); // AccumulationNode GradNodeAccumulation node = GradNodeAccumulation(); // Hook - std::function hook = - [&grad_et](const egr::EagerTensor& t) { - if (t.defined()) { - grad_et.set_impl(t.impl()); - return grad_et; - } else { - grad_et.MutableVar() - ->GetMutable() - ->ShareDataWith(t.Var().Get()); - return grad_et; - } + std::function + hook = [&grad_et](const paddle::experimental::Tensor& t) { + grad_et.set_impl(t.impl()); + return grad_et; }; node.RetainGrad(hook); // operator() - EagerTensor ret_et0 = node({{et0}})[0][0]; + paddle::experimental::Tensor ret_et0 = node({{et0}})[0][0]; auto* ret_et0_ptr = std::dynamic_pointer_cast(ret_et0.impl()) ->data(); CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f)); - EagerTensor ret_et1 = node({{et1}})[0][0]; + paddle::experimental::Tensor ret_et1 = node({{et1}})[0][0]; auto* ret_et1_ptr = std::dynamic_pointer_cast(ret_et1.impl()) ->data(); diff --git a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc index 1c5102f7a21..ab4d8fadeee 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc @@ -22,7 +22,7 @@ #include "paddle/pten/api/lib/utils/allocator.h" TEST(AutogradMeta, Constructor) { - egr::EagerTensor et1; + paddle::experimental::Tensor et1; auto auto_grad = std::make_shared(); et1.set_autograd_meta(auto_grad); auto* tmp_auto = static_cast(et1.get_autograd_meta()); @@ -32,7 +32,7 @@ TEST(AutogradMeta, Constructor) { } TEST(AutogradMeta, MemberFunction) { - egr::EagerTensor et1; + paddle::experimental::Tensor et1; auto auto_grad = std::make_shared(); et1.set_autograd_meta(auto_grad); auto* tmp_auto = static_cast(et1.get_autograd_meta()); diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index 620fa52cac6..c27d1871e39 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -26,9 +26,9 @@ class AutogradMetaTest : public AbstractAutogradMeta { int val_ = 0; }; } -TEST(EagerTensor, Constructor) { - egr::EagerTensor et1 = egr::EagerTensor(); - egr::EagerTensor et2 = egr::EagerTensor("et2"); +TEST(Tensor, Constructor) { + paddle::experimental::Tensor et1 = paddle::experimental::Tensor(); + paddle::experimental::Tensor et2 = paddle::experimental::Tensor("et2"); CHECK_EQ(et1.defined(), false); CHECK_EQ(et2.name(), "et2"); @@ -43,26 +43,26 @@ TEST(EagerTensor, Constructor) { auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; dt_ptr[1] = 10.0f; - egr::EagerTensor et3 = egr::EagerTensor(dt); + paddle::experimental::Tensor et3 = paddle::experimental::Tensor(dt); auto* et3_ptr = std::dynamic_pointer_cast(et3.impl())->data(); CHECK_EQ(et3_ptr[0], 5.0f); CHECK_EQ(et3_ptr[1], 10.0f); // copy constructor - egr::EagerTensor et4(et3); + paddle::experimental::Tensor et4(et3); auto* et4_ptr = std::dynamic_pointer_cast(et4.impl())->data(); CHECK_EQ(et4_ptr[0], 5.0f); CHECK_EQ(et4_ptr[1], 10.0f); - egr::EagerTensor et5(std::move(et4)); + paddle::experimental::Tensor et5(std::move(et4)); auto* et5_ptr = std::dynamic_pointer_cast(et5.impl())->data(); CHECK_EQ(et5_ptr[0], 5.0f); CHECK_EQ(et5_ptr[1], 10.0f); } -TEST(EagerTensor, MemberFunction) { - egr::EagerTensor et3; +TEST(Tensor, MemberFunction) { + paddle::experimental::Tensor et3; pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( @@ -85,16 +85,16 @@ TEST(EagerTensor, MemberFunction) { CHECK_EQ(et3.is_cuda(), false); CHECK_EQ(et3.numel(), 2); auto expected_dim = paddle::framework::make_ddim({1, 2}); - CHECK_EQ(et3.shape(), expected_dim); + CHECK_EQ(et3.dims(), expected_dim); CHECK_EQ(et3.type(), paddle::experimental::DataType::FLOAT32); CHECK_EQ(et3.layout(), paddle::experimental::DataLayout::NCHW); - CHECK(paddle::platform::is_cpu_place(et3.place())); + CHECK(paddle::platform::is_cpu_place(et3.inner_place())); VLOG(6) << "Get impl"; auto* dt3_ptr = std::dynamic_pointer_cast(et3.impl())->data(); CHECK_EQ(dt3_ptr[0], 5.0f); CHECK_EQ(dt3_ptr[1], 10.0f); - egr::EagerTensor et4 = et3; + paddle::experimental::Tensor et4 = et3; VLOG(6) << "copy ="; CHECK(et4.initialized() == true); auto* dt4_ptr = @@ -102,7 +102,7 @@ TEST(EagerTensor, MemberFunction) { CHECK_EQ(dt4_ptr[0], 5.0f); CHECK_EQ(dt4_ptr[1], 10.0f); VLOG(6) << "move ="; - egr::EagerTensor et5 = std::move(et4); + paddle::experimental::Tensor et5 = std::move(et4); auto* dt5_ptr = std::dynamic_pointer_cast(et5.impl())->data(); CHECK_EQ(dt5_ptr[0], 5.0f); @@ -113,22 +113,43 @@ TEST(EagerTensor, MemberFunction) { auto* tmp_autograd_meta_test = static_cast(et3.get_autograd_meta()); CHECK_EQ(tmp_autograd_meta_test->val_, 2); +} + +TEST(EagerTensor, Constructor) { + paddle::experimental::Tensor t3; + pten::DenseTensorMeta meta = pten::DenseTensorMeta( + pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); + std::shared_ptr dt = std::make_shared( + std::make_unique( + paddle::platform::CPUPlace()) + .get(), + meta); + auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); + dt_ptr[0] = 5.0f; + dt_ptr[1] = 10.0f; + VLOG(6) << "Make Dense Tensor"; + t3.set_name("t3"); + VLOG(6) << "Set Name"; + CHECK_EQ(t3.name(), "t3"); + CHECK_EQ(t3.defined(), false); + t3.set_impl(dt); + + egr::EagerTensor et3 = egr::EagerTensor(t3); VLOG(6) << "SyncToVar"; - et3.SyncToVar(); CHECK_EQ(et3.Var().Get().data()[0], 5.0f); CHECK_EQ(et3.Var().Get().data()[1], 10.0f); VLOG(6) << "SyncToTensor"; - CHECK(et3.initialized() == true); - et3.SyncToTensor(); - CHECK(et3.initialized() == true); + paddle::experimental::Tensor t4; + t4.set_impl(et3.GetTensorBase()); + CHECK(t4.initialized() == true); VLOG(6) << "Check Tensor"; auto* dt3_tmp_ptr = - std::dynamic_pointer_cast(et3.impl())->data(); + std::dynamic_pointer_cast(t4.impl())->data(); CHECK_EQ(dt3_tmp_ptr[0], 5.0f); CHECK_EQ(dt3_tmp_ptr[1], 10.0f); - et3.reset(); - CHECK(et3.defined() == false); + t4.reset(); + CHECK(t4.defined() == false); VLOG(6) << "Finish"; } diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index fbec5cc568f..780d99bc5c2 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -37,7 +37,7 @@ TEST(GradNodeInfo, GradNodeBase) { auto grad_test_node0 = std::make_shared( /* val */ 5.0, /* in_num */ 2, /* out_num */ 2); auto grad_test_node1 = std::make_shared(); - std::vector> grads; + std::vector> grads; pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( @@ -47,7 +47,7 @@ TEST(GradNodeInfo, GradNodeBase) { meta); auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 5.0f; - egr::EagerTensor et1(dt); + paddle::experimental::Tensor et1(dt); grads = {{et1}}; VLOG(6) << "Test Grad Node Call"; auto res = (*grad_test_node0)(grads); @@ -93,8 +93,9 @@ TEST(GradNodeInfo, GradNodeBase) { CHECK_EQ(grad_test_node2->OutputMeta()[0].Size(), 1); VLOG(6) << "Test Gradient Hook"; - auto gradient_hook = [](const egr::EagerTensor& et) -> egr::EagerTensor { - egr::EagerTensor res; + auto gradient_hook = []( + const paddle::experimental::Tensor& et) -> paddle::experimental::Tensor { + paddle::experimental::Tensor res; pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index bf9c3a93e16..3c19726d1f5 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -30,8 +30,9 @@ class GradTestNode : public egr::GradNodeBase { GradTestNode(float val, int in_num, int out_num) : GradNodeBase(in_num, out_num), val_(val) {} GradTestNode() : GradNodeBase() { val_ = 1.0; } - std::vector> operator()( - const std::vector>& grads) override { + std::vector> operator()( + const std::vector>& grads) + override { val_ = std::dynamic_pointer_cast(grads[0][0].impl()) ->data()[0]; pten::DenseTensorMeta meta = pten::DenseTensorMeta( @@ -43,8 +44,8 @@ class GradTestNode : public egr::GradNodeBase { meta); auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 6.0f; - egr::EagerTensor et1(dt); - std::vector> res = {{et1}}; + paddle::experimental::Tensor et1(dt); + std::vector> res = {{et1}}; return res; } float val_; diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc index c2830bf7ef6..b771ff28d8e 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc @@ -40,9 +40,9 @@ TEST(GradTensorHolder, Constructor) { paddle::platform::CPUPlace()) .get(), meta); - EagerTensor et = EagerTensor(dt); + paddle::experimental::Tensor et = paddle::experimental::Tensor(dt); - std::vector> inputs; + std::vector> inputs; inputs.push_back({et}); GradTensorHolder grad_tensor_holder4 = GradTensorHolder(std::move(inputs)); @@ -58,7 +58,7 @@ TEST(GradTensorHolder, Interfaces) { .get(), meta); dt0->mutable_data(paddle::platform::CPUPlace())[0] = 10.0; - EagerTensor et0 = EagerTensor(dt0); + paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0); std::shared_ptr dt1 = std::make_shared( std::make_unique( @@ -66,7 +66,7 @@ TEST(GradTensorHolder, Interfaces) { .get(), meta); dt1->mutable_data(paddle::platform::CPUPlace())[0] = 20.0; - EagerTensor et1 = EagerTensor(dt1); + paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); // Constructor empty GradTensorHolder GradSlotMeta slot_meta; diff --git a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc index 742a64ecec2..09ef8509510 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc @@ -21,7 +21,7 @@ TEST(TensorWrapper, Basic) { VLOG(6) << "Test Full reserved"; - egr::EagerTensor et1; + paddle::experimental::Tensor et1; pten::DenseTensorMeta meta = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( @@ -48,7 +48,7 @@ TEST(TensorWrapper, Basic) { CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et1).second, egr::EagerUtils::OutRankInfo(et1).second); VLOG(6) << "Test reconstruct"; - egr::EagerTensor et2; + paddle::experimental::Tensor et2; pten::DenseTensorMeta meta2 = pten::DenseTensorMeta( pten::DataType::FLOAT32, paddle::framework::make_ddim({1, 2})); std::shared_ptr dt2 = std::make_shared( @@ -74,7 +74,7 @@ TEST(TensorWrapper, Basic) { CHECK_EQ(egr::EagerUtils::OutRankInfo(recover_et2).second, egr::EagerUtils::OutRankInfo(et2).second); // Test Raw recover - egr::EagerTensor et3; + paddle::experimental::Tensor et3; auto tw2 = egr::TensorWrapper(et3, true); CHECK( tw2.recover(std::make_shared()).initialized() == diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc index d71d78b5d9d..176a02d8963 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc @@ -42,7 +42,7 @@ TEST(Benchmark, EagerScaleCPU) { for (const std::string& mode : {"Accuracy", "Performance"}) { paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); - egr::EagerTensor tensor = CreateTensorWithValue( + paddle::experimental::Tensor tensor = CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0, true); RetainGradForTensor(tensor); @@ -81,13 +81,13 @@ TEST(Benchmark, EagerIntermediateMatmulCPU) { for (const std::string& mode : {"Accuracy", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor X = CreateTensorWithValue( + paddle::experimental::Tensor X = CreateTensorWithValue( ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0, true); RetainGradForTensor(X); paddle::framework::DDim ddimY = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor Y = CreateTensorWithValue( + paddle::experimental::Tensor Y = CreateTensorWithValue( ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 2.0, true); RetainGradForTensor(Y); @@ -126,23 +126,23 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { for (const std::string& mode : {"Accuracy", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({MLP_M, MLP_N}); - egr::EagerTensor X = CreateTensorWithValue( + paddle::experimental::Tensor X = CreateTensorWithValue( ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_X_VAL, true); RetainGradForTensor(X); - std::vector Ws; - std::vector Bs; + std::vector Ws; + std::vector Bs; for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { paddle::framework::DDim ddimW = paddle::framework::make_ddim({MLP_N, MLP_K}); - egr::EagerTensor W = CreateTensorWithValue( + paddle::experimental::Tensor W = CreateTensorWithValue( ddimW, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_W_VAL, true); RetainGradForTensor(W); paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K}); - egr::EagerTensor B = CreateTensorWithValue( + paddle::experimental::Tensor B = CreateTensorWithValue( ddimB, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_B_VAL, true); RetainGradForTensor(B); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index 640ee0152ef..d2bc05f41b5 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -42,7 +42,7 @@ TEST(Benchmark, EagerScaleCUDA) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); - egr::EagerTensor tensor = CreateTensorWithValue( + paddle::experimental::Tensor tensor = CreateTensorWithValue( ddim, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); RetainGradForTensor(tensor); @@ -84,13 +84,13 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor X = CreateTensorWithValue( + paddle::experimental::Tensor X = CreateTensorWithValue( ddimX, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0, true); RetainGradForTensor(X); paddle::framework::DDim ddimY = paddle::framework::make_ddim({2, 2}); - egr::EagerTensor Y = CreateTensorWithValue( + paddle::experimental::Tensor Y = CreateTensorWithValue( ddimY, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 2.0, true); RetainGradForTensor(Y); @@ -133,23 +133,23 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { paddle::framework::DDim ddimX = paddle::framework::make_ddim({MLP_M, MLP_N}); - egr::EagerTensor X = CreateTensorWithValue( + paddle::experimental::Tensor X = CreateTensorWithValue( ddimX, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_X_VAL, true); RetainGradForTensor(X); - std::vector Ws; - std::vector Bs; + std::vector Ws; + std::vector Bs; for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { paddle::framework::DDim ddimW = paddle::framework::make_ddim({MLP_N, MLP_K}); - egr::EagerTensor W = CreateTensorWithValue( + paddle::experimental::Tensor W = CreateTensorWithValue( ddimW, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_W_VAL, true); RetainGradForTensor(W); paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K}); - egr::EagerTensor B = CreateTensorWithValue( + paddle::experimental::Tensor B = CreateTensorWithValue( ddimB, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, MLP_B_VAL, true); RetainGradForTensor(B); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc index b50d7713d30..96126fa5466 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc @@ -43,8 +43,9 @@ namespace egr { /* --------------------- */ /* ---- Eager Scale ---- */ /* --------------------- */ -void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) { - EagerTensor input_tensor = tensor; +void benchmark_eager_scale(const paddle::experimental::Tensor& tensor, + bool accuracy_check) { + paddle::experimental::Tensor input_tensor = tensor; float scale = 2.0; float bias = 3.0; @@ -55,7 +56,7 @@ void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) { true /*trace_backward*/); } - std::vector target_tensors = {input_tensor}; + std::vector target_tensors = {input_tensor}; RunBackward(target_tensors, {}); if (accuracy_check) { @@ -69,10 +70,10 @@ void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) { /* ----------------------------------- */ /* ---- Eager Intermediate Matmul ---- */ /* ----------------------------------- */ -void benchmark_eager_intermediate_matmul(const EagerTensor& X, - const EagerTensor& Y, +void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X, + const paddle::experimental::Tensor& Y, bool accuracy_check) { - EagerTensor input_tensor0 = X; + paddle::experimental::Tensor input_tensor0 = X; size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs; for (size_t i = 0; i < max_num_runs; i++) { @@ -80,12 +81,12 @@ void benchmark_eager_intermediate_matmul(const EagerTensor& X, input_tensor0, Y, {{"trans_x", false}, {"trans_y", false}}); } - std::vector target_tensors = {input_tensor0}; + std::vector target_tensors = {input_tensor0}; RunBackward(target_tensors, {}); if (accuracy_check) { // Examine Forward Grad (w.r.t max_num_runs = 2) - eager_test::CompareVariableWithValue(input_tensor0, 16); + eager_test::CompareTensorWithValue(input_tensor0, 16); // Examine Backward Grad (w.r.t max_num_runs = 2) eager_test::CompareGradTensorWithValue(X, 16); eager_test::CompareGradTensorWithValue(Y, 16); @@ -95,22 +96,23 @@ void benchmark_eager_intermediate_matmul(const EagerTensor& X, /* -------------------------------- */ /* ---- Eager Intermediate MLP ---- */ /* -------------------------------- */ -void benchmark_eager_intermediate_mlp(const EagerTensor& X, - const std::vector& Ws, - const std::vector& Bs, - bool accuracy_check) { - EagerTensor input0 = X; +void benchmark_eager_intermediate_mlp( + const paddle::experimental::Tensor& X, + const std::vector& Ws, + const std::vector& Bs, bool accuracy_check) { + paddle::experimental::Tensor input0 = X; for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { - EagerTensor Out = matmul_v2_dygraph_function( + paddle::experimental::Tensor Out = matmul_v2_dygraph_function( input0, Ws[i], {{"trans_x", false}, {"trans_y", false}}); input0 = elementwise_add_dygraph_function(Out, Bs[i], {}); } - EagerTensor Out = reduce_sum_dygraph_function(input0, {{"reduce_all", true}}); + paddle::experimental::Tensor Out = + reduce_sum_dygraph_function(input0, {{"reduce_all", true}}); - std::vector target_tensors = {Out}; + std::vector target_tensors = {Out}; RunBackward(target_tensors, {}); if (accuracy_check) { @@ -118,7 +120,7 @@ void benchmark_eager_intermediate_mlp(const EagerTensor& X, compute_mlp_expected_results(); // Examine Forward Grad (w.r.t max_num_runs = 2) - eager_test::CompareVariableWithValue(Out, result["Out"]); + eager_test::CompareTensorWithValue(Out, result["Out"]); // Examine Backward Grad (w.r.t max_num_runs = 2) eager_test::CompareGradTensorWithValue(X, result["GradX"]); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h index 70ecf2af8e4..bce71761c1a 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h @@ -47,26 +47,28 @@ inline std::unordered_map compute_mlp_expected_results() { } /* ---- Eager Scale ---- */ -void benchmark_eager_scale(const EagerTensor& tensor, +void benchmark_eager_scale(const paddle::experimental::Tensor& tensor, bool accuracy_check = false); /* ---- Eager MatMul ---- */ /* -void benchmark_eager_matmul(const EagerTensor& X, const EagerTensor& Y, +void benchmark_eager_matmul(const paddle::experimental::Tensor& X, const +paddle::experimental::Tensor& Y, bool accuracy_check = false); -void benchmark_eager_mlp(const EagerTensor& X, - const std::vector& Ws, - const std::vector& Bs, +void benchmark_eager_mlp(const paddle::experimental::Tensor& X, + const std::vector& Ws, + const std::vector& Bs, bool accuracy_check = false); */ -void benchmark_eager_intermediate_matmul(const EagerTensor& X, - const EagerTensor& Y, +void benchmark_eager_intermediate_matmul(const paddle::experimental::Tensor& X, + const paddle::experimental::Tensor& Y, bool accuracy_check = false); -void benchmark_eager_intermediate_mlp(const EagerTensor& X, - const std::vector& Ws, - const std::vector& Bs, - bool accuracy_check = false); +void benchmark_eager_intermediate_mlp( + const paddle::experimental::Tensor& X, + const std::vector& Ws, + const std::vector& Bs, + bool accuracy_check = false); } // namespace egr diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index 8f0e6cc5e41..74dba986579 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -40,11 +40,12 @@ TEST(Backward, SingleNodeEmptyGrad) { paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor target_tensor = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, - pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor target_tensor = + egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); - egr::EagerTensor leaf_tensor; + paddle::experimental::Tensor leaf_tensor; { // Create Scale Node auto node0_ptr = std::make_shared(1, 1); @@ -76,7 +77,7 @@ TEST(Backward, SingleNodeEmptyGrad) { std::vector res = {&meta}; node0_ptr->AddEdges(&res, 0); } - std::vector outs = {target_tensor}; + std::vector outs = {target_tensor}; // Run Backward RunBackward(outs, {}); @@ -89,23 +90,24 @@ TEST(Backward, SingleNodeCustomGrad) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); - std::vector grad_tensors; + std::vector grad_tensors; // Create Grad Tensor - egr::EagerTensor grad_tensor = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, - pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor grad_tensor = + egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); grad_tensors.emplace_back(std::move(grad_tensor)); - egr::EagerTensor leaf_tensor; + paddle::experimental::Tensor leaf_tensor; { // Create Scale Node auto node0_ptr = std::make_shared(1, 1); @@ -159,16 +161,16 @@ TEST(Backward, LinearNodes) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); - egr::EagerTensor leaf_tensor; + paddle::experimental::Tensor leaf_tensor; { // Create Node0 auto node0_ptr = std::make_shared(1, 1); @@ -240,28 +242,30 @@ TEST(Backward, WithAccumulation) { paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - std::vector target_tensors; - egr::EagerTensor tensor0 = egr_utils_api::CreateTensorWithValue( + std::vector target_tensors; + paddle::experimental::Tensor tensor0 = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); - egr::EagerTensor tensor1 = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor1 = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor0)); target_tensors.emplace_back(std::move(tensor1)); // Create Grad Tensor - std::vector grad_tensors; - egr::EagerTensor grad_tensor0 = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, - pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); - egr::EagerTensor grad_tensor1 = egr_utils_api::CreateTensorWithValue( - ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, - pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + std::vector grad_tensors; + paddle::experimental::Tensor grad_tensor0 = + egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + paddle::experimental::Tensor grad_tensor1 = + egr_utils_api::CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); grad_tensors.emplace_back(std::move(grad_tensor0)); grad_tensors.emplace_back(std::move(grad_tensor1)); - egr::EagerTensor leaf_tensor; + paddle::experimental::Tensor leaf_tensor; { // Create Node0 auto node0_ptr = std::make_shared(1, 1); diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index 523f7102af0..dce4566bfea 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -36,16 +36,16 @@ namespace egr { TEST(CrossBatchAccumulation, SingleScaleNode) { eager_test::InitEnv(paddle::platform::CPUPlace()); - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); - egr::EagerTensor& target_tensor = target_tensors[0]; + paddle::experimental::Tensor& target_tensor = target_tensors[0]; - egr::EagerTensor leaf_tensor = egr::EagerTensor(); + paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); { auto scale_node_ptr = std::make_shared(1, 1); scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index 1f649f23906..c11bd94ee93 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -36,7 +36,7 @@ TEST(EagerUtils, AutoGradMeta) { .get(), meta); dt0->mutable_data(paddle::platform::CPUPlace())[0] = 10.0; - EagerTensor et0 = EagerTensor(dt0); + paddle::experimental::Tensor et0 = paddle::experimental::Tensor(dt0); std::shared_ptr dt1 = std::make_shared( std::make_unique( @@ -44,10 +44,7 @@ TEST(EagerUtils, AutoGradMeta) { .get(), meta); dt1->mutable_data(paddle::platform::CPUPlace())[0] = 20.0; - EagerTensor et1 = EagerTensor(dt1); - - std::vector ets = {et0, et1}; - auto test_node = std::make_shared(); + paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1); // unsafe_autograd_meta() // autograd_meta() @@ -58,6 +55,10 @@ TEST(EagerUtils, AutoGradMeta) { EagerUtils::unsafe_autograd_meta(et0); CHECK_NOTNULL(unsafe_autograd_meta_after); + // NOTE: Since autograd_meta will be copied make sure it's not null + std::vector ets = {et0, et1}; + auto test_node = std::make_shared(); + std::vector autograd_metas = EagerUtils::autograd_meta(&ets); std::vector unsafe_autograd_metas = EagerUtils::unsafe_autograd_meta(ets); @@ -100,11 +101,11 @@ TEST(EagerUtils, AutoGradMeta) { } template -egr::EagerTensor CreateTestCPUTensor(T val, - const paddle::framework::DDim& ddim) { +paddle::experimental::Tensor CreateTestCPUTensor( + T val, const paddle::framework::DDim& ddim) { pten::DenseTensorMeta meta = pten::DenseTensorMeta(pten::DataType::FLOAT32, ddim); - egr::EagerTensor tensor; + paddle::experimental::Tensor tensor; std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -163,11 +164,11 @@ TEST(EagerUtils, PassStopGradient) { CHECK(auto_grad3->StopGradient() == true); } -TEST(EagerUtils, SyncToVarsSingle) { +TEST(EagerUtils, TrySyncToVar) { paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); auto tensor = CreateTestCPUTensor(5.0f, ddim); - std::vector> var_bases = - egr::EagerUtils::SyncToVars(tensor); + std::vector> var_bases = { + egr::EagerUtils::TrySyncToVar(tensor)}; paddle::framework::Variable* var = var_bases[0]->MutableVar(); const auto& framework_tensor = var->Get(); @@ -181,13 +182,13 @@ TEST(EagerUtils, SyncToVarsSingle) { } } -TEST(EagerUtils, SyncToVarsMultiple) { +TEST(EagerUtils, TrySyncToVars) { paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); - std::vector tensors = {CreateTestCPUTensor(1.0f, ddim), - CreateTestCPUTensor(2.0f, ddim)}; + std::vector tensors = { + CreateTestCPUTensor(1.0f, ddim), CreateTestCPUTensor(2.0f, ddim)}; std::vector> var_bases = - egr::EagerUtils::SyncToVars(tensors); + egr::EagerUtils::TrySyncToVars(tensors); { paddle::framework::Variable* var = var_bases[0]->MutableVar(); @@ -215,66 +216,12 @@ TEST(EagerUtils, SyncToVarsMultiple) { } } -TEST(EagerUtils, SyncToTensorSingle) { - std::shared_ptr X(new egr::EagerTensor()); - std::vector src_data(128, 5.0); - std::vector dims = {2, 4, 4, 4}; - paddle::platform::CPUPlace place; - - auto* x_tensor = X->MutableVar()->GetMutable(); - x_tensor->Resize(paddle::framework::make_ddim(dims)); - auto* mutable_x = x_tensor->mutable_data(place); - paddle::memory::Copy(place, mutable_x, place, src_data.data(), - sizeof(float) * src_data.size()); - auto X_ = egr::EagerUtils::SyncToTensors(*(X.get())); - egr::EagerTensor tensor = egr::EagerUtils::GetOutput(X_[0]); - VLOG(6) << "Check Value for SyncToTensorSingle"; - CHECK(eager_test::CompareTensorWithValue(tensor, 5.0)); -} - -TEST(EagerUtils, SyncToTensorMultiple) { - eager_test::InitEnv(paddle::platform::CPUPlace()); - std::vector dims = {2, 4, 4, 4}; - paddle::platform::CPUPlace place; - - std::vector egr_tensors; - { - auto egr_tensor = egr::EagerTensor(); - std::vector src_data(128, 1.0); - auto* x_tensor = - egr_tensor.MutableVar()->GetMutable(); - x_tensor->Resize(paddle::framework::make_ddim(dims)); - auto* mutable_x = x_tensor->mutable_data(place); - paddle::memory::Copy(place, mutable_x, place, src_data.data(), - sizeof(float) * src_data.size()); - egr_tensors.emplace_back(egr_tensor); - } - { - auto egr_tensor = egr::EagerTensor(); - std::vector src_data(128, 2.0); - auto* x_tensor = - egr_tensor.MutableVar()->GetMutable(); - x_tensor->Resize(paddle::framework::make_ddim(dims)); - auto* mutable_x = x_tensor->mutable_data(place); - paddle::memory::Copy(place, mutable_x, place, src_data.data(), - sizeof(float) * src_data.size()); - egr_tensors.emplace_back(std::move(egr_tensor)); - } - std::vector tensors = - egr::EagerUtils::GetOutputs(egr::EagerUtils::SyncToTensors(egr_tensors)); - - VLOG(6) << "Check Value for SyncToTensorMultiple"; - CHECK(eager_test::CompareTensorWithValue(tensors[0], 1.0) == true); - CHECK(eager_test::CompareTensorWithValue(tensors[1], 2.0) == true); -} - -TEST(EagerUtils, ConstructDuplicableOutput) { - VLOG(6) << "Check ConstructDuplicableOutput"; +TEST(EagerUtils, CreateVars) { + VLOG(6) << "Check CreateVars"; std::vector> outs = - egr::EagerUtils::ConstructDuplicableOutput(2); + egr::EagerUtils::CreateVars(2); CHECK_EQ(outs.size(), size_t(2)); - CHECK(outs[0]->defined() == false); - CHECK(outs[0]->initialized() == false); + CHECK(outs[0]->Var().IsInitialized() == false); } } // namespace egr diff --git a/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc index 205f231ecee..ea5ebc934fa 100644 --- a/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc +++ b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc @@ -34,21 +34,21 @@ TEST(Forward, SingleNode) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor t = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor t = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(t)); - egr::EagerTensor& tensor = target_tensors[0]; + paddle::experimental::Tensor& tensor = target_tensors[0]; EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); // Run Forward float scale = 2.0; float bias = 3.0; - egr::EagerTensor out = egr::scale( + paddle::experimental::Tensor out = egr::scale( tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output @@ -80,28 +80,28 @@ TEST(Forward, LinearNodes) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor t = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor t = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(t)); - egr::EagerTensor& tensor = target_tensors[0]; + paddle::experimental::Tensor& tensor = target_tensors[0]; EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); // Run Forward Node 0 float scale0 = 2.0; float bias0 = 3.0; - egr::EagerTensor out0 = + paddle::experimental::Tensor out0 = egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 1 float scale1 = 5.0; float bias1 = 10.0; - egr::EagerTensor out1 = egr::scale( + paddle::experimental::Tensor out1 = egr::scale( out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output 0 @@ -156,34 +156,34 @@ TEST(Forward, BranchedNodes) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor t = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor t = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(t)); - egr::EagerTensor& tensor = target_tensors[0]; + paddle::experimental::Tensor& tensor = target_tensors[0]; EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); // Run Forward Node 0 float scale0 = 2.0; float bias0 = 3.0; - egr::EagerTensor out0 = + paddle::experimental::Tensor out0 = egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 1 float scale1 = 5.0; float bias1 = 10.0; - egr::EagerTensor out1 = egr::scale( + paddle::experimental::Tensor out1 = egr::scale( out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 2 float scale2 = 10.0; float bias2 = 20.0; - egr::EagerTensor out2 = egr::scale( + paddle::experimental::Tensor out2 = egr::scale( out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output 0 diff --git a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc index 45b7b800495..c77910766e0 100644 --- a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc +++ b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc @@ -31,7 +31,8 @@ namespace egr { -egr::EagerTensor hook_function(const egr::EagerTensor& t) { +paddle::experimental::Tensor hook_function( + const paddle::experimental::Tensor& t) { auto t_dense = std::dynamic_pointer_cast(t.impl()); auto ret_meta = pten::DenseTensorMeta(t_dense->dtype(), t_dense->dims(), @@ -51,7 +52,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { } auto ret_impl = std::dynamic_pointer_cast(ret_dense); - egr::EagerTensor ret = egr::EagerTensor(); + paddle::experimental::Tensor ret = paddle::experimental::Tensor(); ret.set_impl(ret_impl); return ret; @@ -62,7 +63,7 @@ TEST(FwdBwdJoint, SingleNode) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); egr_utils_api::RetainGradForTensor(tensor); @@ -70,13 +71,13 @@ TEST(FwdBwdJoint, SingleNode) { // 3. Run Forward float scale = 2.0; float bias = 3.0; - egr::EagerTensor out = egr::scale( + paddle::experimental::Tensor out = egr::scale( tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output eager_test::CompareTensorWithValue(out, 13.0); - std::vector outs = {out}; + std::vector outs = {out}; // 4. Run Backward RunBackward(outs, {}); @@ -102,7 +103,7 @@ TEST(FwdBwdJoint, LinearNodes) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); egr_utils_api::RetainGradForTensor(tensor); @@ -111,14 +112,14 @@ TEST(FwdBwdJoint, LinearNodes) { // Run Forward Node 0 float scale0 = 2.0; float bias0 = 3.0; - egr::EagerTensor out0 = + paddle::experimental::Tensor out0 = egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 1 float scale1 = 5.0; float bias1 = 10.0; - egr::EagerTensor out1 = egr::scale( + paddle::experimental::Tensor out1 = egr::scale( out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output 0 @@ -127,7 +128,7 @@ TEST(FwdBwdJoint, LinearNodes) { // Examine Forward Output 1 eager_test::CompareTensorWithValue(out1, 75.0); - std::vector outs = {out1}; + std::vector outs = {out1}; // 4. Run Backward RunBackward(outs, {}); @@ -150,7 +151,7 @@ TEST(FwdBwdJoint, BranchedNodes) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); egr_utils_api::RetainGradForTensor(tensor); @@ -159,20 +160,20 @@ TEST(FwdBwdJoint, BranchedNodes) { // Run Forward Node 0 float scale0 = 2.0; float bias0 = 3.0; - egr::EagerTensor out0 = + paddle::experimental::Tensor out0 = egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 1 float scale1 = 5.0; float bias1 = 10.0; - egr::EagerTensor out1 = egr::scale( + paddle::experimental::Tensor out1 = egr::scale( out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 2 float scale2 = 10.0; float bias2 = 20.0; - egr::EagerTensor out2 = egr::scale( + paddle::experimental::Tensor out2 = egr::scale( out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output 0 @@ -194,7 +195,7 @@ TEST(FwdBwdJoint, BranchedNodes) { } // 4. Run Backward - std::vector outs = {out1, out2}; + std::vector outs = {out1, out2}; RunBackward(outs, {}); // Examine Backward Grad @@ -216,19 +217,20 @@ TEST(FwdBwdJoint, GradientHook) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); egr_utils_api::RetainGradForTensor(tensor); - std::function hook = - &hook_function; + std::function + hook = &hook_function; // 3. Run Forward // Run Forward Node 0 float scale0 = 2.0; float bias0 = 3.0; - egr::EagerTensor out0 = + paddle::experimental::Tensor out0 = egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, true /*trace_backward*/); egr_utils_api::RetainGradForTensor(out0); // hook: +5 @@ -237,7 +239,7 @@ TEST(FwdBwdJoint, GradientHook) { // Run Forward Node 1 float scale1 = 5.0; float bias1 = 10.0; - egr::EagerTensor out1 = egr::scale( + paddle::experimental::Tensor out1 = egr::scale( out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); egr_utils_api::RetainGradForTensor(out1); // hook: +5 egr_utils_api::RegisterGradientHookForTensor(out1, hook); // hook: +5 @@ -245,13 +247,13 @@ TEST(FwdBwdJoint, GradientHook) { // Run Forward Node 2 float scale2 = 10.0; float bias2 = 20.0; - egr::EagerTensor out2 = egr::scale( + paddle::experimental::Tensor out2 = egr::scale( out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/); egr_utils_api::RetainGradForTensor(out2); // hook: +5 egr_utils_api::RegisterGradientHookForTensor(out2, hook); // hook: +5 // 4. Run Backward - std::vector outs = {out1, out2}; + std::vector outs = {out1, out2}; RunBackward(outs, {}); // Examine Backward Grad @@ -283,7 +285,7 @@ TEST(FwdBwdJoint, CrossBatchAccumulation) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); egr_utils_api::RetainGradForTensor(tensor); @@ -292,24 +294,24 @@ TEST(FwdBwdJoint, CrossBatchAccumulation) { // Run Forward Node 0 float scale0 = 2.0; float bias0 = 3.0; - egr::EagerTensor out0 = + paddle::experimental::Tensor out0 = egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 1 float scale1 = 5.0; float bias1 = 10.0; - egr::EagerTensor out1 = egr::scale( + paddle::experimental::Tensor out1 = egr::scale( out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 2 float scale2 = 10.0; float bias2 = 20.0; - egr::EagerTensor out2 = egr::scale( + paddle::experimental::Tensor out2 = egr::scale( out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/); // 4. Run Backward - std::vector outs = {out1, out2}; + std::vector outs = {out1, out2}; RunBackward(outs, {}); // Examine Backward Grad @@ -332,7 +334,7 @@ TEST(FwdBwdJoint, SingleNodeCUDA) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); egr_utils_api::RetainGradForTensor(tensor); @@ -340,13 +342,13 @@ TEST(FwdBwdJoint, SingleNodeCUDA) { // 3. Run Forward float scale = 2.0; float bias = 3.0; - egr::EagerTensor out = egr::scale( + paddle::experimental::Tensor out = egr::scale( tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output eager_test::CompareTensorWithValue(out, 13.0); - std::vector outs = {out}; + std::vector outs = {out}; // 4. Run Backward RunBackward(outs, {}); @@ -369,7 +371,7 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); egr_utils_api::RetainGradForTensor(tensor); @@ -378,20 +380,20 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) { // Run Forward Node 0 float scale0 = 2.0; float bias0 = 3.0; - egr::EagerTensor out0 = + paddle::experimental::Tensor out0 = egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 1 float scale1 = 5.0; float bias1 = 10.0; - egr::EagerTensor out1 = egr::scale( + paddle::experimental::Tensor out1 = egr::scale( out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); // Run Forward Node 2 float scale2 = 10.0; float bias2 = 20.0; - egr::EagerTensor out2 = egr::scale( + paddle::experimental::Tensor out2 = egr::scale( out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/); // Examine Forward Output 0 @@ -403,7 +405,7 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) { // TODO(jiabin): fix this with add functor // 4. Run Backward - std::vector outs = {out1, out2}; + std::vector outs = {out1, out2}; RunBackward(outs, {}); // Examine Backward Grad diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc index b5ce9223f6c..5b95b43edea 100644 --- a/paddle/fluid/eager/tests/task_tests/generated_test.cc +++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc @@ -39,17 +39,17 @@ TEST(Generated, Sigmoid) { // 1. Prepare Input paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 4, 4, 4}); VLOG(6) << "Make Dim"; - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 0.0, true); - VLOG(6) << "Make EagerTensor"; + VLOG(6) << "Make paddle::experimental::Tensor"; egr_utils_api::RetainGradForTensor(tensor); VLOG(6) << "Retain Grad for Tensor"; auto output_tensor = sigmoid_dygraph_function(tensor, {}); VLOG(6) << "Run Backward"; - eager_test::CompareVariableWithValue(output_tensor, 0.5); + eager_test::CompareTensorWithValue(output_tensor, 0.5); - std::vector target_tensors = {output_tensor}; + std::vector target_tensors = {output_tensor}; VLOG(6) << "Runing Backward"; RunBackward(target_tensors, {}); @@ -66,13 +66,13 @@ TEST(Generated, Matmul_v2) { // 1. Prepare Input paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16}); - egr::EagerTensor X = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue( ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 3.0, true); egr_utils_api::RetainGradForTensor(X); paddle::framework::DDim ddimY = paddle::framework::make_ddim({16, 20}); - egr::EagerTensor Y = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue( ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 2.0, true); egr_utils_api::RetainGradForTensor(Y); @@ -80,9 +80,9 @@ TEST(Generated, Matmul_v2) { auto output_tensor = matmul_v2_dygraph_function( X, Y, {{"trans_x", false}, {"trans_y", false}}); - eager_test::CompareVariableWithValue(output_tensor, 96); + eager_test::CompareTensorWithValue(output_tensor, 96); - std::vector target_tensors = {output_tensor}; + std::vector target_tensors = {output_tensor}; RunBackward(target_tensors, {}); eager_test::CompareGradTensorWithValue(X, 2.0 * 20); @@ -98,22 +98,22 @@ TEST(Generated, ElementwiseAdd) { // 1. Prepare Input paddle::framework::DDim ddimX = paddle::framework::make_ddim({4, 16}); - egr::EagerTensor X = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor X = egr_utils_api::CreateTensorWithValue( ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 3.0, true); egr_utils_api::RetainGradForTensor(X); paddle::framework::DDim ddimY = paddle::framework::make_ddim({4, 16}); - egr::EagerTensor Y = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor Y = egr_utils_api::CreateTensorWithValue( ddimY, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 2.0, true); egr_utils_api::RetainGradForTensor(Y); auto output_tensor = elementwise_add_dygraph_function(X, Y, {}); - eager_test::CompareVariableWithValue(output_tensor, 5); + eager_test::CompareTensorWithValue(output_tensor, 5); - std::vector target_tensors = {output_tensor}; + std::vector target_tensors = {output_tensor}; RunBackward(target_tensors, {}); eager_test::CompareGradTensorWithValue(X, 1.0); diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 3d61167c52e..8039bc32124 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -32,7 +32,8 @@ namespace egr { -egr::EagerTensor hook_function(const egr::EagerTensor& t) { +paddle::experimental::Tensor hook_function( + const paddle::experimental::Tensor& t) { auto t_dense = std::dynamic_pointer_cast(t.impl()); auto ret_meta = pten::DenseTensorMeta(t_dense->dtype(), t_dense->dims(), @@ -52,7 +53,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { } auto ret_impl = std::dynamic_pointer_cast(ret_dense); - egr::EagerTensor ret = egr::EagerTensor(); + paddle::experimental::Tensor ret = paddle::experimental::Tensor(); ret.set_impl(ret_impl); return ret; @@ -62,15 +63,15 @@ TEST(RetainGrad, HookBeforeRetainGrad) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); - egr::EagerTensor& target_tensor = target_tensors[0]; + paddle::experimental::Tensor& target_tensor = target_tensors[0]; // Create ScaleNode auto scale_node_ptr = std::make_shared(1, 1); @@ -86,8 +87,9 @@ TEST(RetainGrad, HookBeforeRetainGrad) { // Apply RetainGrad { // ScaleNode Hook: +3 - std::function hook = - &hook_function; + std::function + hook = &hook_function; auto auto_grad_meta = std::make_shared(); auto_grad_meta->SetGradNode( @@ -114,11 +116,12 @@ TEST(RetainGrad, HookBeforeRetainGrad) { } // Retain Grad for leaf tensor1 - egr::EagerTensor leaf_tensor = egr::EagerTensor(); + paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); { // AccumulationNode Hook: +3 - std::function hook = - &hook_function; + std::function + hook = &hook_function; auto auto_grad_meta = std::make_shared(); auto_grad_meta->SetGradNode( @@ -143,15 +146,15 @@ TEST(RetainGrad, HookAfterRetainGrad) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor tensor = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor tensor = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); target_tensors.emplace_back(std::move(tensor)); - egr::EagerTensor& target_tensor = target_tensors[0]; + paddle::experimental::Tensor& target_tensor = target_tensors[0]; // Create ScaleNode auto scale_node_ptr = std::make_shared(1, 1); @@ -165,8 +168,9 @@ TEST(RetainGrad, HookAfterRetainGrad) { // Apply RetainGrad { // ScaleNode Hook: +3 - std::function hook = - &hook_function; + std::function + hook = &hook_function; auto auto_grad_meta = std::make_shared(); auto_grad_meta->SetGradNode( @@ -192,11 +196,12 @@ TEST(RetainGrad, HookAfterRetainGrad) { } // Retain Grad for leaf tensor1 - egr::EagerTensor leaf_tensor = egr::EagerTensor(); + paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); { // AccumulationNode Hook: +3 - std::function hook = - &hook_function; + std::function + hook = &hook_function; auto auto_grad_meta = std::make_shared(); auto_grad_meta->SetGradNode( diff --git a/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc b/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc index 5e86cac83a2..1bd7d80d634 100644 --- a/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc @@ -30,15 +30,15 @@ TEST(TensorUtils, Test) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - std::vector target_tensors; + std::vector target_tensors; paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); // Create Target Tensor - egr::EagerTensor t = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor t = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 5.0 /*value*/, true /*is_leaf*/); - egr::EagerTensor t_grad = egr_utils_api::CreateTensorWithValue( + paddle::experimental::Tensor t_grad = egr_utils_api::CreateTensorWithValue( ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); diff --git a/paddle/fluid/eager/tests/test_utils.h b/paddle/fluid/eager/tests/test_utils.h index 9c217dff499..38bccc0dd5e 100644 --- a/paddle/fluid/eager/tests/test_utils.h +++ b/paddle/fluid/eager/tests/test_utils.h @@ -30,7 +30,8 @@ namespace eager_test { template -bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) { +bool CompareGradTensorWithValue(const paddle::experimental::Tensor& target, + T value) { egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target); auto grad_dense = std::dynamic_pointer_cast(meta->Grad().impl()); @@ -64,7 +65,8 @@ bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) { } template -bool CompareTensorWithValue(const egr::EagerTensor& target, T value) { +bool CompareTensorWithValue(const paddle::experimental::Tensor& target, + T value) { // TODO(jiabin): Support Selected Rows later auto dense_t = std::dynamic_pointer_cast(target.impl()); T* ptr = dense_t->data(); @@ -97,73 +99,6 @@ bool CompareTensorWithValue(const egr::EagerTensor& target, T value) { return true; } -template -bool CompareVariableWithValue(const egr::EagerTensor& target, T value) { - // TODO(jiabin): Support Selected Rows later - auto lod_tensor = target.Var().Get(); - T* ptr = lod_tensor.data(); - - std::vector host_data(lod_tensor.numel()); - if (paddle::platform::is_gpu_place(lod_tensor.place())) { -#ifdef PADDLE_WITH_CUDA - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - auto* dev_ctx = dynamic_cast( - pool.Get(paddle::platform::CUDAPlace())); - auto stream = dev_ctx->stream(); - - paddle::memory::Copy(paddle::platform::CPUPlace(), host_data.data(), - paddle::platform::CUDAPlace(), ptr, - sizeof(T) * lod_tensor.numel(), stream); - ptr = host_data.data(); -#endif - } - VLOG(6) << "CompareVariableWithValue"; - for (int i = 0; i < lod_tensor.numel(); i++) { - PADDLE_ENFORCE(value == ptr[i], - paddle::platform::errors::PreconditionNotMet( - "Numerical Error in Compare Grad Variable With Value of " - "%d, we expected got value: %f, but got: %f instead. " - "Please check it later.", - i, value, ptr[i])); - } - return true; -} - -template -bool CompareGradVariableWithValue(const egr::EagerTensor& target, T value) { - // TODO(jiabin): Support Selected Rows later - egr::AutogradMeta* meta = egr::EagerUtils::unsafe_autograd_meta(target); - auto lod_tensor = meta->Grad().Var().Get(); - T* ptr = lod_tensor.data(); - - std::vector host_data(lod_tensor.numel()); - if (paddle::platform::is_gpu_place(lod_tensor.place())) { -#ifdef PADDLE_WITH_CUDA - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - auto* dev_ctx = dynamic_cast( - pool.Get(paddle::platform::CUDAPlace())); - auto stream = dev_ctx->stream(); - - paddle::memory::Copy(paddle::platform::CPUPlace(), host_data.data(), - paddle::platform::CUDAPlace(), ptr, - sizeof(T) * lod_tensor.numel(), stream); - ptr = host_data.data(); -#endif - } - VLOG(6) << "CompareGradVariableWithValue"; - for (int i = 0; i < lod_tensor.numel(); i++) { - PADDLE_ENFORCE(value == ptr[i], - paddle::platform::errors::PreconditionNotMet( - "Numerical Error in Compare Grad Variable With Value of " - "%d, we expected got value: %f, but got: %f instead. " - "Please check it later.", - i, value, ptr[i])); - } - return true; -} - inline void InitEnv(paddle::platform::Place place) { // Prepare Device Contexts // Init DeviceContextPool diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 88030d91bf9..7be70ff9575 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -33,7 +33,7 @@ namespace egr { * Implementation of Eager Utils. **/ -AutogradMeta* EagerUtils::autograd_meta(egr::EagerTensor* target) { +AutogradMeta* EagerUtils::autograd_meta(paddle::experimental::Tensor* target) { auto* p_autograd_meta = target->get_autograd_meta(); if (!p_autograd_meta) { auto p_autograd_meta_ptr = std::make_shared(); @@ -43,7 +43,8 @@ AutogradMeta* EagerUtils::autograd_meta(egr::EagerTensor* target) { return static_cast(p_autograd_meta); } -AutogradMeta* EagerUtils::unsafe_autograd_meta(const egr::EagerTensor& target) { +AutogradMeta* EagerUtils::unsafe_autograd_meta( + const paddle::experimental::Tensor& target) { auto* p_autograd_meta = target.get_autograd_meta(); PADDLE_ENFORCE(p_autograd_meta, paddle::platform::errors::Fatal( @@ -52,17 +53,17 @@ AutogradMeta* EagerUtils::unsafe_autograd_meta(const egr::EagerTensor& target) { } std::vector EagerUtils::unsafe_autograd_meta( - const std::vector& targets) { + const std::vector& targets) { std::vector metas; metas.reserve(targets.size()); - for (const egr::EagerTensor& t : targets) { + for (const paddle::experimental::Tensor& t : targets) { metas.emplace_back(unsafe_autograd_meta(t)); } return metas; } AutogradMeta* EagerUtils::nullable_autograd_meta( - const egr::EagerTensor& target) { + const paddle::experimental::Tensor& target) { auto* p_autograd_meta = target.get_autograd_meta(); if (!p_autograd_meta) return nullptr; @@ -70,35 +71,35 @@ AutogradMeta* EagerUtils::nullable_autograd_meta( } std::vector EagerUtils::nullable_autograd_meta( - const std::vector& targets) { + const std::vector& targets) { std::vector metas; metas.reserve(targets.size()); - for (const egr::EagerTensor& t : targets) { + for (const paddle::experimental::Tensor& t : targets) { metas.emplace_back(nullable_autograd_meta(t)); } return metas; } std::vector EagerUtils::autograd_meta( - std::vector* targets) { + std::vector* targets) { std::vector ret; ret.reserve(targets->size()); // for autograd_meta we can tolerent it has nullptr. - for (auto& t : (*targets)) { - auto* p_autograd_meta = autograd_meta(&t); - ret.push_back(static_cast(p_autograd_meta)); + for (size_t i = 0; i < targets->size(); i++) { + auto* p_autograd_meta = autograd_meta(&((*targets)[i])); + ret.emplace_back(p_autograd_meta); } return ret; } std::pair EagerUtils::OutRankInfo( - const egr::EagerTensor& target) { + const paddle::experimental::Tensor& target) { return unsafe_autograd_meta(target)->OutRankInfo(); } std::shared_ptr EagerUtils::grad_node( - const egr::EagerTensor& target) { + const paddle::experimental::Tensor& target) { auto* meta = nullable_autograd_meta(target); if (meta) { return meta->GetMutableGradNode(); @@ -130,91 +131,56 @@ void EagerUtils::SetOutRankWithSlot(AutogradMeta* target, size_t slot_id) { target->SetSingleOutRankWithSlot(slot_id, 0); } -/* ---- Tensor -> Var ---- */ -std::vector> EagerUtils::SyncToVars( - const egr::EagerTensor& tensor) { - // TODO(jiabin): No const cast here. We should call SyncToVar in Python_C - // wrapper - const_cast(&tensor)->SyncToVar( - paddle::framework::proto::VarType_Type_LOD_TENSOR); - return {std::make_shared(tensor)}; -} - -std::vector> EagerUtils::SyncToVars( - const std::vector& tensors) { - // TODO(jiabin): No const cast here. We should call SyncToVar in Python_C - // wrapper - std::vector> res; - size_t num = tensors.size(); - res.reserve(num); - for (size_t i = 0; i < num; i++) { - const_cast(&(tensors[i])) - ->SyncToVar(paddle::framework::proto::VarType_Type_LOD_TENSOR); - res.emplace_back(new EagerTensor(tensors[i])); - } - return res; -} - -static std::shared_ptr TrySyncToVar( - egr::EagerTensor* tensor) { - if (tensor->initialized() || tensor->Var().IsInitialized()) { - tensor->SyncToVar(paddle::framework::proto::VarType_Type_LOD_TENSOR); - } - return std::shared_ptr(tensor, - [&](egr::EagerTensor* ptr) {}); +std::shared_ptr EagerUtils::TrySyncToVar( + const paddle::experimental::Tensor& tensor) { + return std::make_shared(tensor); } std::vector> EagerUtils::TrySyncToVars( - egr::EagerTensor* tensor) { + const paddle::experimental::Tensor& tensor) { return {TrySyncToVar(tensor)}; } std::vector> EagerUtils::TrySyncToVars( - std::vector* tensors) { - std::vector> res; - size_t num = tensors->size(); - res.reserve(num); - for (size_t i = 0; i < num; i++) { - res.emplace_back(TrySyncToVar(&(*tensors)[i])); - } - return res; + paddle::experimental::Tensor* tensor) { + PADDLE_ENFORCE_NOT_NULL( + tensor, + paddle::platform::errors::Fatal( + "Should Not Pass Empty tensor pointer in, since only output can " + "reach this, please check output value and make sure it's not null")); + return {TrySyncToVar(*tensor)}; } std::vector> EagerUtils::TrySyncToVars( - const std::vector& tensors) { + const std::vector& tensors) { std::vector> res; size_t num = tensors.size(); res.reserve(num); for (size_t i = 0; i < num; i++) { - res.emplace_back(TrySyncToVar(tensors[i])); + auto* tensor = tensors[i]; + PADDLE_ENFORCE_NOT_NULL( + tensor, paddle::platform::errors::Fatal( + "Tensor is null and cannot be copied. " + "We are tring to TrySyncToVars tensor from its " + "shared_ptr, this error may indicate some outputs " + "are nullptr")); + res.emplace_back(TrySyncToVar(*tensor)); } return res; } -/* ---- VarBase -> Tensor ---- */ -std::vector> EagerUtils::SyncToTensors( - const egr::EagerTensor& tensor) { - // TODO(jiabin): No const cast here. We should call SyncToTensor in Python_C - // wrapper - const_cast(&tensor)->SyncToTensor(); - return {std::make_shared(tensor)}; -} - -std::vector> EagerUtils::SyncToTensors( - const std::vector& tensors) { - // TODO(jiabin): No const cast here. We should call SyncToTensor in Python_C - // wrapper +std::vector> EagerUtils::TrySyncToVars( + const std::vector& tensors) { std::vector> res; size_t num = tensors.size(); res.reserve(num); for (size_t i = 0; i < num; i++) { - const_cast(&(tensors[i]))->SyncToTensor(); - res.emplace_back(new EagerTensor(tensors[i])); + res.emplace_back(TrySyncToVar(tensors[i])); } return res; } -std::vector> EagerUtils::ConstructDuplicableOutput( +std::vector> EagerUtils::CreateVars( const size_t num) { std::vector> res; res.reserve(num); @@ -225,9 +191,9 @@ std::vector> EagerUtils::ConstructDuplicableOutput( return res; } -std::vector EagerUtils::GetOutputs( +std::vector EagerUtils::GetOutputs( const std::vector>& outs) { - std::vector res; + std::vector res; res.reserve(outs.size()); for (const auto& out : outs) { PADDLE_ENFORCE_NOT_NULL( @@ -237,12 +203,12 @@ std::vector EagerUtils::GetOutputs( "shared_ptr, this error may indicate some outputs " "are nullptr", out->name())); - res.emplace_back((*(out.get()))); + res.emplace_back(out->GetTensorBase(), out->name()); } return res; } -egr::EagerTensor EagerUtils::GetOutput( +paddle::experimental::Tensor EagerUtils::GetOutput( const std::shared_ptr& out) { PADDLE_ENFORCE_NOT_NULL( out.get(), paddle::platform::errors::Fatal( @@ -250,25 +216,76 @@ egr::EagerTensor EagerUtils::GetOutput( "are tring to Get Output tensor from its shared_ptr, " "this error may indicate output is nullptr", out->name())); - return EagerTensor((*(out.get()))); + return paddle::experimental::Tensor(out->GetTensorBase(), out->name()); +} + +void EagerUtils::OverwriteOutputs(const std::shared_ptr& out, + paddle::experimental::Tensor* tensor) { + PADDLE_ENFORCE_NOT_NULL( + tensor, paddle::platform::errors::Fatal( + "Tensor is null and cannot be copied. " + "We are tring to OverwriteOutput from its " + "shared_ptr, this error may indicate some outputs " + "are nullptr")); + tensor->set_impl(out->GetTensorBase()); +} + +void EagerUtils::OverwriteOutputs( + const std::vector>& outs, + const std::vector& tensors) { + PADDLE_ENFORCE_EQ( + outs.size(), tensors.size(), + paddle::platform::errors::Fatal( + "We are tring to OverwriteOutputs which passed in and it expected " + "elements num of outs and origin outputs are equal, but we got outs " + "size of: %d, and tensors passed in size is: %d", + outs.size(), tensors.size())); + for (size_t i = 0; i < outs.size(); i++) { + OverwriteOutputs(outs[i], tensors[i]); + } } -EagerTensor EagerUtils::RecoverTensorWrapper( +void EagerUtils::OverwriteOutputs(const paddle::experimental::Tensor& out, + paddle::experimental::Tensor* tensor) { + PADDLE_ENFORCE_NOT_NULL( + tensor, paddle::platform::errors::Fatal( + "Tensor is null and cannot be copied. " + "We are tring to OverwriteOutput from its " + "shared_ptr, this error may indicate some outputs " + "are nullptr")); + *tensor = out; +} +void EagerUtils::OverwriteOutputs( + const std::vector& outs, + const std::vector& tensors) { + for (size_t i = 0; i < outs.size(); i++) { + PADDLE_ENFORCE_NOT_NULL( + tensors[i], paddle::platform::errors::Fatal( + "Tensor is null and cannot be copied. " + "We are tring to OverwriteOutput from its " + "shared_ptr, this error may indicate some outputs " + "are nullptr")); + *tensors[i] = outs[i]; + } +} + +paddle::experimental::Tensor EagerUtils::RecoverTensorWrapper( TensorWrapper* tw, const std::shared_ptr& grad_node) { return tw->recover(grad_node); } -std::vector EagerUtils::RecoverTensorWrapper( +std::vector EagerUtils::RecoverTensorWrapper( std::vector* tw, const std::shared_ptr& grad_node) { - std::vector ret; + std::vector ret; for (auto& t : *tw) { ret.emplace_back(t.recover(grad_node)); } return ret; } -void EagerUtils::CheckAndRetainGrad(const egr::EagerTensor& tensor) { +void EagerUtils::CheckAndRetainGrad( + const paddle::experimental::Tensor& tensor) { VLOG(6) << "Check RetainGradForTensor: " << tensor.name(); if (FLAGS_retain_grad_for_all_tensor) { VLOG(6) << "RetainGradForTensor: " << tensor.name(); @@ -277,7 +294,7 @@ void EagerUtils::CheckAndRetainGrad(const egr::EagerTensor& tensor) { } void EagerUtils::CheckAndRetainGrad( - const std::vector& tensors) { + const std::vector& tensors) { if (FLAGS_retain_grad_for_all_tensor) { for (auto& tensor : tensors) { VLOG(6) << "RetainGradForTensor: " << tensor.name(); @@ -286,43 +303,4 @@ void EagerUtils::CheckAndRetainGrad( } } -paddle::experimental::Tensor EagerUtils::SyncToPtenTensors( - const egr::EagerTensor& tensor) { - const_cast(&tensor)->SyncToTensor(); - return *tensor.Tensor().get(); -} - -std::vector EagerUtils::SyncToPtenTensors( - const std::vector& tensors) { - std::vector res; - size_t num = tensors.size(); - res.reserve(num); - for (size_t i = 0; i < num; i++) { - const_cast(&(tensors[i]))->SyncToTensor(); - res.push_back(*tensors[i].Tensor().get()); - } - return res; -} - -egr::EagerTensor EagerUtils::CreateEagerTensorFromTensor( - const paddle::experimental::Tensor& tensor) { - egr::EagerTensor ret; - ret.set_tensor(std::make_shared(tensor)); - return ret; -} - -std::vector EagerUtils::CreateEagerTensorFromTensor( - const std::vector& tensors) { - std::vector res; - size_t num = tensors.size(); - res.reserve(num); - for (size_t i = 0; i < num; i++) { - egr::EagerTensor tmp; - tmp.set_tensor(std::make_shared(tensors[i])); - res.emplace_back(std::move(tmp)); - } - - return res; -} - } // namespace egr diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index 73839d34ec2..b0549488efc 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -92,15 +92,16 @@ class EagerUtils { * constructor (it's abstract class there) * * **/ - static AutogradMeta* autograd_meta(egr::EagerTensor* target); + static AutogradMeta* autograd_meta(paddle::experimental::Tensor* target); static std::vector autograd_meta( - std::vector* targets); + std::vector* targets); - static std::pair OutRankInfo(const egr::EagerTensor& target); + static std::pair OutRankInfo( + const paddle::experimental::Tensor& target); static std::shared_ptr grad_node( - const egr::EagerTensor& target); + const paddle::experimental::Tensor& target); // Set history is used to set backward info during forward process, it will // set forward var's autograd meta's grad node as current backward node. @@ -115,12 +116,14 @@ class EagerUtils { static void SetOutRankWithSlot(AutogradMeta* target, size_t slot_id); // This method will return an AutogradMeta pointer unsafely. - static AutogradMeta* nullable_autograd_meta(const egr::EagerTensor& target); + static AutogradMeta* nullable_autograd_meta( + const paddle::experimental::Tensor& target); static std::vector nullable_autograd_meta( - const std::vector& targets); - static AutogradMeta* unsafe_autograd_meta(const egr::EagerTensor& target); + const std::vector& targets); + static AutogradMeta* unsafe_autograd_meta( + const paddle::experimental::Tensor& target); static std::vector unsafe_autograd_meta( - const std::vector& targets); + const std::vector& targets); template static bool ComputeRequireGrad(T trace_backward, Args&&... args) { @@ -140,45 +143,50 @@ class EagerUtils { } // TensorWrapper Utils - static egr::EagerTensor RecoverTensorWrapper( - egr::TensorWrapper* tw, const std::shared_ptr& grad_node); - static std::vector RecoverTensorWrapper( - std::vector* tw, + static paddle::experimental::Tensor RecoverTensorWrapper( + TensorWrapper* tw, const std::shared_ptr& grad_node); + static std::vector RecoverTensorWrapper( + std::vector* tw, const std::shared_ptr& grad_node); // Intermidate needed remove this once we don't need legacy + // Inner Method + static std::shared_ptr TrySyncToVar( + const paddle::experimental::Tensor& tensor); + // Basic Input + static std::vector> TrySyncToVars( + const paddle::experimental::Tensor& tensor); + // Basic Output static std::vector> TrySyncToVars( - egr::EagerTensor* tensor); + paddle::experimental::Tensor* tensor); + // Multi Output static std::vector> TrySyncToVars( - std::vector* tensors); + const std::vector& tensors); + // Multi Input static std::vector> TrySyncToVars( - const std::vector& tensors); - - static std::vector> SyncToVars( - const egr::EagerTensor& tensor); - static std::vector> SyncToVars( - const std::vector& tensors); - static std::vector> SyncToTensors( - const egr::EagerTensor& tensor); - static std::vector> SyncToTensors( - const std::vector& tensors); - static std::vector> ConstructDuplicableOutput( - const size_t num); - static std::vector GetOutputs( + const std::vector& tensors); + // Construct empty output + static std::vector> CreateVars(const size_t num); + // Construct Tensor From var + static std::vector GetOutputs( const std::vector>& outs); - static egr::EagerTensor GetOutput(const std::shared_ptr& outs); - - static void CheckAndRetainGrad(const egr::EagerTensor& tensor); - static void CheckAndRetainGrad(const std::vector& tensors); - - static paddle::experimental::Tensor SyncToPtenTensors( - const egr::EagerTensor& tensor); - static std::vector SyncToPtenTensors( - const std::vector& tensors); - - static egr::EagerTensor CreateEagerTensorFromTensor( - const paddle::experimental::Tensor& tensor); - static std::vector CreateEagerTensorFromTensor( + static paddle::experimental::Tensor GetOutput( + const std::shared_ptr& out); + // Sync Back to origin output Tensor + static void OverwriteOutputs(const std::shared_ptr& out, + paddle::experimental::Tensor* tensor); + static void OverwriteOutputs(const paddle::experimental::Tensor& out, + paddle::experimental::Tensor* tensor); + static void OverwriteOutputs( + const std::vector>& outs, + const std::vector& tensors); + static void OverwriteOutputs( + const std::vector& outs, + const std::vector& tensors); + // end Intermidate needed + + static void CheckAndRetainGrad(const paddle::experimental::Tensor& tensor); + static void CheckAndRetainGrad( const std::vector& tensors); }; diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 257953252bc..75d4d8246e3 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -214,37 +214,38 @@ void TensorAddImpl(const framework::Tensor& src, framework::Tensor* dst, func(dev_ctx, src, dst); } -std::shared_ptr GetInnerDstTensor(egr::EagerTensor* dst) { - std::shared_ptr dst_tensor = - std::dynamic_pointer_cast(dst->impl()); +template +TType* GetInnerMutableTensor(framework::Variable* dst) { + auto* dst_tensor = dst->GetMutable(); return dst_tensor; } -std::shared_ptr GetInnerSrcTensor( - const egr::EagerTensor& src) { - std::shared_ptr dst_tensor = - std::dynamic_pointer_cast(src.impl()); +template +TType* GetInnerMutableTensor(paddle::experimental::Tensor* dst) { + auto* dst_tensor = static_cast(dst->impl().get()); return dst_tensor; } -std::shared_ptr GetInnerDstTensor(framework::Variable* dst) { - auto* dst_tensor = dst->GetMutable(); - return std::make_shared(*dst_tensor); +template +const TType& GetInnerTensor(const framework::Variable& src) { + return src.Get(); } -std::shared_ptr GetInnerSrcTensor( - const framework::Variable& src) { - auto& src_tensor = src.Get(); - return std::make_shared(src_tensor); +template +TType& GetInnerTensor(const paddle::experimental::Tensor& src) { + PADDLE_ENFORCE_EQ( + src.initialized(), true, + platform::errors::Fatal("We only add tensor with value if a tensor is " + "NOT INITILIZED, it should just move instead of " + "calling this method.")); + auto* src_tensor = static_cast(src.impl().get()); + return *src_tensor; } template void TensorAdd(const VarType& src, VarType* dst) { - std::shared_ptr d_tensor = GetInnerDstTensor(dst); - std::shared_ptr s_tensor = GetInnerSrcTensor(src); - - auto* dst_tensor = d_tensor.get(); - auto& src_tensor = *s_tensor.get(); + pten::DenseTensor* dst_tensor = GetInnerMutableTensor(dst); + const pten::DenseTensor& src_tensor = GetInnerTensor(src); auto numel = src_tensor.numel(); @@ -366,13 +367,14 @@ void TensorAdd(const VarType& src, VarType* dst) { template void TensorAdd(const framework::Variable& src, framework::Variable* dst); -template void TensorAdd(const egr::EagerTensor& src, - egr::EagerTensor* dst); +template void TensorAdd( + const paddle::experimental::Tensor& src, paddle::experimental::Tensor* dst); -void SelectedRowsAddToTensor(const framework::Variable& src, - framework::Variable* dst) { - auto* dst_tensor = dst->GetMutable(); - auto& src_selected_rows = src.Get(); +template +void SelectedRowsAddToTensor(const VarType& src, VarType* dst) { + pten::DenseTensor* dst_tensor = GetInnerMutableTensor(dst); + const pten::SelectedRows& src_selected_rows = + GetInnerTensor(src); auto place = dst_tensor->place(); auto data_type = src_selected_rows.value().type(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); @@ -406,20 +408,27 @@ void SelectedRowsAddToTensor(const framework::Variable& src, framework::DataTypeToString(data_type))); } -void SelectedRowsAddTensor(const framework::Variable& src_selected_rows_var, - const framework::Variable& src_tensor_var, - framework::Variable* dst_tensor_var) { - const auto& src_selected_rows = - src_selected_rows_var.Get(); - const auto& src_tensor = src_tensor_var.Get(); +template void SelectedRowsAddToTensor(const framework::Variable& src, + framework::Variable* dst); +template void SelectedRowsAddToTensor(const paddle::experimental::Tensor& src, + paddle::experimental::Tensor* dst); + +template +void SelectedRowsAddTensor(const VarType& src_selected_rows_var, + const VarType& src_tensor_var, + VarType* dst_tensor_var) { + const pten::SelectedRows& src_selected_rows = + GetInnerTensor(src_selected_rows_var); + const pten::DenseTensor& src_tensor = + GetInnerTensor(src_tensor_var); const auto& place = src_tensor.place(); auto data_type = src_tensor.type(); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); - auto* dst_tensor = dst_tensor_var->GetMutable(); + pten::DenseTensor* dst_tensor = + GetInnerMutableTensor(dst_tensor_var); dst_tensor->Resize(src_tensor.dims()); dst_tensor->mutable_data(place, data_type); - #define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \ if (data_type == framework::DataTypeTrait::DataType()) { \ paddle::operators::math::SelectedRowsAddTensor \ @@ -448,6 +457,18 @@ void SelectedRowsAddTensor(const framework::Variable& src_selected_rows_var, #undef PADDLE_SELECTED_ROWS_ADD_TENSOR } +template void SelectedRowsAddTensor( + const framework::Variable& src_selected_rows_var, + const framework::Variable& src_tensor_var, + framework::Variable* dst_tensor_var); +template void SelectedRowsAddTensor( + const paddle::experimental::Tensor& src_selected_rows_var, + const paddle::experimental::Tensor& src_tensor_var, + paddle::experimental::Tensor* dst_tensor_var); + +// Note(chenweihang): when two selected rows need to be added, +// adding one to another is not equal to merging two selected rows +// to one then add it to a empty selected rows, the after is correct // Note(chenweihang): when two selected rows need to be added, // adding one to another is not equal to merging two selected rows // to one then add it to a empty selected rows, the after is correct @@ -495,34 +516,6 @@ std::shared_ptr SelectedRowsMerge( framework::DataTypeToString(data_type))); } -void VariableAdd(const egr::EagerTensor& src_tensor, - egr::EagerTensor* dst_tensor) { - auto& src = src_tensor.Var(); - auto* dst = dst_tensor->MutableVar(); - - if (dst->IsType()) { - if (src.IsType()) { - paddle::imperative::TensorAdd(src, dst); - } else if (src.IsType()) { - paddle::imperative::SelectedRowsAddToTensor(src, dst); - } else { - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "Unexpected branch, output variable type is %s", - paddle::framework::ToTypeName(dst->Type()))); - } - } else { - if (src.IsType()) { - paddle::framework::Variable new_dst; - paddle::imperative::SelectedRowsAddTensor(*dst, src, &new_dst); - *dst = std::move(new_dst); - } else { - PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "Unexpected branch, output variable type is %s", - paddle::framework::ToTypeName(dst->Type()))); - } - } -} - void VariableWrapperAdd(std::shared_ptr var, VariableWrapper* dst_var, bool unchange_input) { auto& src = var->Var(); diff --git a/paddle/fluid/imperative/gradient_accumulator.h b/paddle/fluid/imperative/gradient_accumulator.h index a57335d08a2..6371f64fe61 100644 --- a/paddle/fluid/imperative/gradient_accumulator.h +++ b/paddle/fluid/imperative/gradient_accumulator.h @@ -164,17 +164,16 @@ class SortedGradientAccumulator : public GradientAccumulator { std::vector tmp_grad_vars_; }; -void SelectedRowsAddToTensor(const framework::Variable& src, - framework::Variable* dst); +template +void SelectedRowsAddToTensor(const VarType& src, VarType* dst); -void SelectedRowsAddTensor(const framework::Variable& src_selected_rows_var, - const framework::Variable& src_tensor_var, - framework::Variable* dst_tensor_var); +template +void SelectedRowsAddTensor(const VarType& src_selected_rows_var, + const VarType& src_tensor_var, + VarType* dst_tensor_var); template void TensorAdd(const VarType& src, VarType* dst); -void VariableAdd(const egr::EagerTensor& src, egr::EagerTensor* dst); - } // namespace imperative } // namespace paddle diff --git a/paddle/fluid/imperative/infer_shape_context.h b/paddle/fluid/imperative/infer_shape_context.h index eb7d419c298..7033b9c1171 100644 --- a/paddle/fluid/imperative/infer_shape_context.h +++ b/paddle/fluid/imperative/infer_shape_context.h @@ -371,7 +371,8 @@ class DygraphInferShapeContext : public framework::InferShapeContext { } else { PADDLE_THROW(platform::errors::PermissionDenied( "Only LoDTensor/SelectedRows support 'GetDim', but Variables " - "type_id is xx.")); + "type_id is: %s.", + framework::ToTypeName(var->Type()))); } } diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index ca8adc97615..81cd39c225b 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -260,8 +260,16 @@ void Tracer::TraceOp(const std::string& type, const NameVarMap& ins, } if (ComputeRequiredGrad(new_ins, outs, trace_backward)) { - CreateGradOpNode(*op, new_ins, outs, attrs, default_attrs, place, - inplace_map); + if (!override_default_attr_map) { + PADDLE_ENFORCE_NOT_NULL(passed_default_attrs_, + paddle::platform::errors::PermissionDenied( + "Detected default_attrs = nullptr.")); + CreateGradOpNode(*op, new_ins, outs, attrs, *passed_default_attrs_, place, + inplace_map); + } else { + CreateGradOpNode(*op, new_ins, outs, attrs, default_attrs, place, + inplace_map); + } } else { VLOG(3) << "No Grad to track for Op: " << type; } diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.h b/paddle/fluid/operators/optimizers/rmsprop_op.h index a01f84b37c4..27cdbf3df05 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.h +++ b/paddle/fluid/operators/optimizers/rmsprop_op.h @@ -143,14 +143,14 @@ class RmspropOpKernel : public framework::OpKernel { auto &lr_tensor = *ctx.Input("LearningRate"); auto &mom_tensor = *ctx.Input("Moment"); - PADDLE_ENFORCE_EQ(&p_tensor, param_out, + PADDLE_ENFORCE_EQ(p_tensor.IsSharedBufferWith(*param_out), true, platform::errors::InvalidArgument( "Param and ParamOut must be the same Tensor")); - PADDLE_ENFORCE_EQ(&mom_tensor, moment_out, + PADDLE_ENFORCE_EQ(mom_tensor.IsSharedBufferWith(*moment_out), true, platform::errors::InvalidArgument( "Moment and MomentOut must be the same Tensor")); PADDLE_ENFORCE_EQ( - &ms_tensor, mean_square_out, + ms_tensor.IsSharedBufferWith(*mean_square_out), true, platform::errors::InvalidArgument( "MeanSquare and MeanSquareOut must be the same Tensor")); diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 6ace8159426..1db7339664a 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -40,15 +40,15 @@ namespace pybind { namespace py = ::pybind11; -PyTypeObject* p_eager_tensor_type; +PyTypeObject* p_tensor_type; extern PyTypeObject* g_vartype_pytype; extern PyTypeObject* g_framework_tensor_pytype; PyObject* EagerTensorNew(PyTypeObject* type, PyObject* args, PyObject* kwargs) { PyObject* obj = type->tp_alloc(type, 0); if (obj) { - auto v = reinterpret_cast(obj); - new (&(v->eager_tensor)) egr::EagerTensor(); + auto v = reinterpret_cast(obj); + new (&(v->tensor)) paddle::experimental::Tensor(); Py_INCREF(obj); } return obj; @@ -56,7 +56,7 @@ PyObject* EagerTensorNew(PyTypeObject* type, PyObject* args, PyObject* kwargs) { // TODO(jiabin): Overload this once we need more constructor in Python void EmptyEagerTensorInitializer( - EagerTensorObject* self, const std::string& name, + TensorObject* self, const std::string& name, const paddle::platform::Place& place, bool persistable = false, bool stop_gradient = true, framework::proto::VarType::Type dtype = paddle::framework::proto::VarType::FP32, @@ -70,8 +70,8 @@ void EmptyEagerTensorInitializer( "Create Eager Tensor with dims contain minus num is ilegal" "Please check your code and make sure you new a " "eager tensor with fixed shape instead of using -1.")); - self->eager_tensor.set_name(name); - auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->eager_tensor)); + self->tensor.set_name(name); + auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor)); autograd_meta->SetPersistable(persistable); autograd_meta->SetStopGradient(stop_gradient); if (var_type == paddle::framework::proto::VarType::LOD_TENSOR) { @@ -81,7 +81,7 @@ void EmptyEagerTensorInitializer( pten::make_intrusive(place), pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims)); dense_tensor->mutable_data(place); - self->eager_tensor.set_impl(dense_tensor); + self->tensor.set_impl(dense_tensor); } else { PADDLE_THROW(platform::errors::InvalidArgument( "We only support LoDTensor to be constructed by this initializer, " @@ -96,18 +96,17 @@ void EmptyEagerTensorInitializer( } } -void InitEagerTensorWithNumpyValue(EagerTensorObject* self, - const py::object& array, +void InitEagerTensorWithNumpyValue(TensorObject* self, const py::object& array, bool zero_copy = false) { PADDLE_ENFORCE_EQ( - self->eager_tensor.defined(), true, + self->tensor.defined(), true, paddle::platform::errors::Fatal( "Calling InitEagerTensorWithNumpyValue of Eager Tensor without " "EmptyEagerTensorInitializer is " "forbidden. Please check your code and make sure you new a " "eager tensor before init it with NumPy.")); pten::DenseTensor* impl_ptr = - static_cast(self->eager_tensor.impl().get()); + static_cast(self->tensor.impl().get()); paddle::platform::Place place = impl_ptr->place(); paddle::framework::LoDTensor temp_tensor = paddle::framework::LoDTensor(); if (platform::is_cpu_place(place)) { @@ -133,48 +132,47 @@ void InitEagerTensorWithNumpyValue(EagerTensorObject* self, *impl_ptr = temp_tensor; } -void InitEagerTensorWithEagerTensor(EagerTensorObject* self, - const egr::EagerTensor& src, +void InitEagerTensorWithEagerTensor(TensorObject* self, + const paddle::experimental::Tensor& src, const paddle::platform::Place& place, const std::string& name) { - self->eager_tensor.set_name(name); - if (place == src.place()) { + self->tensor.set_name(name); + if (place == src.inner_place()) { auto impl = std::static_pointer_cast(src.impl()); - self->eager_tensor.set_impl(impl); + self->tensor.set_impl(impl); VLOG(4) << "Same place, do ShareDataWith"; } else { - self->eager_tensor.set_impl( + self->tensor.set_impl( src.copy_to(pten::TransToPtenBackend(place), true).impl()); VLOG(4) << "Different place, do TensorCopy"; } - egr::EagerUtils::autograd_meta(&(self->eager_tensor))->SetStopGradient(true); + egr::EagerUtils::autograd_meta(&(self->tensor))->SetStopGradient(true); if (src.get_autograd_meta()) { - egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor) + egr::EagerUtils::unsafe_autograd_meta(self->tensor) ->SetPersistable( egr::EagerUtils::unsafe_autograd_meta(src)->Persistable()); } else { - egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor) - ->SetPersistable(false); + egr::EagerUtils::unsafe_autograd_meta(self->tensor)->SetPersistable(false); } } -void InitEagerTensorWithFrameworkTensor(EagerTensorObject* self, +void InitEagerTensorWithFrameworkTensor(TensorObject* self, const framework::Tensor& src, const paddle::platform::Place& place, const std::string& name) { - self->eager_tensor.set_name(name); + self->tensor.set_name(name); if (place == src.place()) { - self->eager_tensor.set_impl(std::make_shared(src)); + self->tensor.set_impl(std::make_shared(src)); VLOG(4) << "Same place, do ShareDataWith"; } else { - auto temp = egr::EagerTensor(std::make_shared(src)); - self->eager_tensor.set_impl( + auto temp = + paddle::experimental::Tensor(std::make_shared(src)); + self->tensor.set_impl( temp.copy_to(pten::TransToPtenBackend(place), true).impl()); VLOG(4) << "Different place, do TensorCopy"; } - egr::EagerUtils::autograd_meta(&(self->eager_tensor))->SetStopGradient(true); - egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor) - ->SetPersistable(false); + egr::EagerUtils::autograd_meta(&(self->tensor))->SetStopGradient(true); + egr::EagerUtils::unsafe_autograd_meta(self->tensor)->SetPersistable(false); } py::object ParsePyArray( @@ -274,7 +272,7 @@ std::string ParseName(std::unordered_map kws_map, // initialize EagerTensor by PyArray(first argument is PyArray, // mix args and kwargs) automatically. void AutoInitEagerTensorByPyArray( - EagerTensorObject* py_tensor_ptr, + TensorObject* py_tensor_ptr, std::unordered_map kws_map, PyObject* args, bool flag_kwargs, Py_ssize_t args_num) { // The first argument of the EagerTensor constructor is PyArray, @@ -314,7 +312,7 @@ void AutoInitEagerTensorByPyArray( // initialize EagerTensor by EagerTensor or framework::Tensor (mix args and // kwargs) automatically. void AutoInitEagerTensorByTensor( - EagerTensorObject* py_tensor_ptr, + TensorObject* py_tensor_ptr, std::unordered_map kws_map, PyObject* args, bool flag_kwargs, Py_ssize_t args_num, bool init_by_egr_tensor = true) { // The first argument of the EagerTensor constructor is EagerTensor or @@ -335,14 +333,14 @@ void AutoInitEagerTensorByTensor( act_name = ParseName(kws_map, kw_order_map, args, flag_kwargs, args_num); if (init_by_egr_tensor) { - egr::EagerTensor src_tensor; + paddle::experimental::Tensor src_tensor; if (kw_order_map["value"] <= args_num) { - src_tensor = CastPyArg2EagerTensor( - PyTuple_GET_ITEM(args, kw_order_map["value"] - 1), - kw_order_map["value"] - 1); + src_tensor = + CastPyArg2Tensor(PyTuple_GET_ITEM(args, kw_order_map["value"] - 1), + kw_order_map["value"] - 1); } else { if (flag_kwargs && kws_map["value"] != NULL) { - src_tensor = CastPyArg2EagerTensor(kws_map["value"], 0); + src_tensor = CastPyArg2Tensor(kws_map["value"], 0); } else { PADDLE_THROW(platform::errors::InvalidArgument( "The first expected kwargs is {value: EagerTensor}, " @@ -480,7 +478,7 @@ int EagerTensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { "forbidden. Please check your code and make sure you new a " "eager tensor before init it.")); - auto py_tensor_ptr = reinterpret_cast(self); + auto py_tensor_ptr = reinterpret_cast(self); Py_ssize_t args_num = PyTuple_Size(args); VLOG(6) << " args_num: " << args_num; @@ -502,8 +500,8 @@ int EagerTensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { AutoInitEagerTensorByPyArray(py_tensor_ptr, kws_map, args, flag_kwargs, args_num); return 0; - } else if (PyObject_IsInstance(kw_value, reinterpret_cast( - p_eager_tensor_type))) { + } else if (PyObject_IsInstance( + kw_value, reinterpret_cast(p_tensor_type))) { VLOG(6) << "Calling case5's or case6's initializer"; AutoInitEagerTensorByTensor(py_tensor_ptr, kws_map, args, flag_kwargs, args_num); @@ -598,8 +596,8 @@ int EagerTensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { AutoInitEagerTensorByPyArray(py_tensor_ptr, kws_map, args, flag_kwargs, args_num); return 0; - } else if (PyObject_IsInstance(arg0_ptr, reinterpret_cast( - p_eager_tensor_type))) { + } else if (PyObject_IsInstance( + arg0_ptr, reinterpret_cast(p_tensor_type))) { VLOG(6) << "Calling case5's or case6's initializer."; AutoInitEagerTensorByTensor(py_tensor_ptr, kws_map, args, flag_kwargs, args_num); @@ -716,8 +714,8 @@ int EagerTensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { return 1; } -static void EagerTensorDealloc(EagerTensorObject* self) { - self->eager_tensor.~EagerTensor(); +static void EagerTensorDealloc(TensorObject* self) { + self->tensor.~Tensor(); Py_TYPE(self)->tp_free(reinterpret_cast(self)); } @@ -739,7 +737,7 @@ void BindEager(pybind11::module* module) { heap_type->ht_qualname = ToPyObject("EagerTensor"); auto type = &heap_type->ht_type; type->tp_name = "EagerTensor"; - type->tp_basicsize = sizeof(EagerTensorObject); + type->tp_basicsize = sizeof(TensorObject); type->tp_dealloc = (destructor)EagerTensorDealloc; type->tp_as_number = &number_methods; type->tp_as_sequence = &sequence_methods; @@ -755,7 +753,7 @@ void BindEager(pybind11::module* module) { #if PY_VERSION_HEX >= 0x03050000 type->tp_as_async = &heap_type->as_async; #endif - p_eager_tensor_type = type; + p_tensor_type = type; if (PyType_Ready(type) < 0) { PADDLE_THROW(platform::errors::Fatal( diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index a5167e9ebae..7aa810acc28 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -40,7 +40,7 @@ namespace pybind { namespace py = ::pybind11; -extern PyTypeObject* p_eager_tensor_type; +extern PyTypeObject* p_tensor_type; extern PyTypeObject* g_multidevicefeedreader_pytype; extern PyTypeObject* g_orderedmultidevicefeedreader_pytype; @@ -102,13 +102,12 @@ static PyObject* eager_api_scale(PyObject* self, PyObject* args, PyObject* kwargs) { EAGER_TRY // TODO(jiabin): Sync Tensor and Variable here when we support - egr::EagerTensor ret = - egr::scale(reinterpret_cast(PyTuple_GET_ITEM(args, 0)) - ->eager_tensor, - CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 1), 1), - CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 2), 2), - CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3), - CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4)); + paddle::experimental::Tensor ret = egr::scale( + reinterpret_cast(PyTuple_GET_ITEM(args, 0))->tensor, + CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 1), 1), + CastPyArg2AttrFloat(PyTuple_GET_ITEM(args, 2), 2), + CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3), + CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 4), 4)); return ToPyObject(ret); EAGER_CATCH_AND_THROW_RETURN_NULL } @@ -116,11 +115,10 @@ static PyObject* eager_api_scale(PyObject* self, PyObject* args, static PyObject* eager_api_run_backward(PyObject* self, PyObject* args, PyObject* kwargs) { EAGER_TRY - auto tensors = CastPyArg2VectorOfEagerTensor(PyTuple_GET_ITEM(args, 0), 0); - auto grad_tensors = - CastPyArg2VectorOfEagerTensor(PyTuple_GET_ITEM(args, 1), 1); - RunBackward(tensors, grad_tensors, - CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2)); + auto tensors = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 0), 0); + auto grad_tensors = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 1), 1); + egr::RunBackward(tensors, grad_tensors, + CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 2), 2)); Py_INCREF(Py_None); return Py_None; EAGER_CATCH_AND_THROW_RETURN_NULL @@ -129,12 +127,10 @@ static PyObject* eager_api_run_backward(PyObject* self, PyObject* args, static PyObject* eager_api_tensor_copy(PyObject* self, PyObject* args, PyObject* kwargs) { EAGER_TRY - egr::EagerTensor& src = - reinterpret_cast(PyTuple_GET_ITEM(args, 0)) - ->eager_tensor; - egr::EagerTensor& dst = - reinterpret_cast(PyTuple_GET_ITEM(args, 1)) - ->eager_tensor; + paddle::experimental::Tensor& src = + reinterpret_cast(PyTuple_GET_ITEM(args, 0))->tensor; + paddle::experimental::Tensor& dst = + reinterpret_cast(PyTuple_GET_ITEM(args, 1))->tensor; auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 2), 2); bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 3), 3); @@ -152,22 +148,23 @@ static PyObject* eager_api_read_next_eager_tensor_list(PyObject* self, PyObject* args, PyObject* kwargs) { EAGER_TRY - auto tensor_list = CastPyArg2VectorOfTensor(PyTuple_GET_ITEM(args, 0), 0); - std::vector eager_tensor_list; - eager_tensor_list.reserve(tensor_list.size()); - auto func = [](framework::Tensor& tensor) { - egr::EagerTensor eager_tensor( + auto tensor_base_list = + CastPyArg2VectorOfTensorBase(PyTuple_GET_ITEM(args, 0), 0); + std::vector tensor_list; + tensor_list.reserve(tensor_base_list.size()); + auto func = [](framework::Tensor& tensor_base) { + paddle::experimental::Tensor tensor( egr::Controller::Instance().GenerateUniqueName()); - auto autograd_meta = egr::EagerUtils::autograd_meta(&eager_tensor); + auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor); autograd_meta->SetPersistable(false); autograd_meta->SetStopGradient(true); - eager_tensor.set_impl(std::make_shared(tensor)); - return eager_tensor; + tensor.set_impl(std::make_shared(tensor_base)); + return tensor; }; - for (auto& tensor : tensor_list) { - eager_tensor_list.emplace_back(func(tensor)); + for (auto& tensor_base : tensor_base_list) { + tensor_list.emplace_back(func(tensor_base)); } - return ToPyObject(eager_tensor_list); + return ToPyObject(tensor_list); EAGER_CATCH_AND_THROW_RETURN_NULL } diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 414c60adf03..511cc10d645 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -34,24 +34,24 @@ limitations under the License. */ namespace paddle { namespace pybind { -extern void InitEagerTensorWithNumpyValue(EagerTensorObject* self, +extern void InitEagerTensorWithNumpyValue(TensorObject* self, const pybind11::object& array, bool zero_copy); -extern PyTypeObject* p_eager_tensor_type; +extern PyTypeObject* p_tensor_type; -static PyObject* eager_tensor_method_numpy(EagerTensorObject* self, - PyObject* args, PyObject* kwargs) { +static PyObject* eager_tensor_method_numpy(TensorObject* self, PyObject* args, + PyObject* kwargs) { EAGER_SYNC_TRY PADDLE_ENFORCE_EQ( - self->eager_tensor.initialized(), true, + self->tensor.initialized(), true, platform::errors::InvalidArgument( "Tensor data of %s is Empty that indicates we have null tensor for " "now, please check if it has no data and initialize it first.", - self->eager_tensor.name())); - auto tensor_dims = self->eager_tensor.shape(); - auto numpy_dtype = TensorDtype2NumpyDtype(self->eager_tensor.type()); - auto sizeof_dtype = pten::DataTypeSize(self->eager_tensor.type()); + self->tensor.name())); + auto tensor_dims = self->tensor.shape(); + auto numpy_dtype = TensorDtype2NumpyDtype(self->tensor.type()); + auto sizeof_dtype = pten::DataTypeSize(self->tensor.type()); Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank]; Py_intptr_t py_strides[paddle::framework::DDim::kMaxRank]; size_t numel = 1; @@ -68,18 +68,18 @@ static PyObject* eager_tensor_method_numpy(EagerTensorObject* self, pybind11::detail::npy_api::NPY_ARRAY_WRITEABLE_, nullptr); - if (self->eager_tensor.is_cpu()) { + if (self->tensor.is_cpu()) { auto dense_tensor = - std::dynamic_pointer_cast(self->eager_tensor.impl()); + std::dynamic_pointer_cast(self->tensor.impl()); platform::CPUPlace place; // deep copy paddle::memory::Copy(place, reinterpret_cast( pybind11::detail::array_proxy(array)->data), place, dense_tensor->data(), sizeof_dtype * numel); #if defined(PADDLE_WITH_CUDA) - } else if (self->eager_tensor.is_cuda()) { + } else if (self->tensor.is_cuda()) { auto dense_tensor = - std::dynamic_pointer_cast(self->eager_tensor.impl()); + std::dynamic_pointer_cast(self->tensor.impl()); paddle::platform::GpuMemcpySync( pybind11::detail::array_proxy(array)->data, dense_tensor->data(), @@ -97,108 +97,105 @@ static PyObject* eager_tensor_method_numpy(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor_method__is_initialized(EagerTensorObject* self, +static PyObject* eager_tensor_method__is_initialized(TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY - return ToPyObject(self->eager_tensor.initialized()); + return ToPyObject(self->tensor.initialized()); EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor_method__copy_to(EagerTensorObject* self, +static PyObject* eager_tensor_method__copy_to(TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 0), 0); auto place = CastPyArg2Place(PyTuple_GET_ITEM(args, 1), 1); auto cp_tensor = - self->eager_tensor.copy_to(pten::TransToPtenBackend(place), blocking); + self->tensor.copy_to(pten::TransToPtenBackend(place), blocking); egr::EagerUtils::autograd_meta(&cp_tensor)->SetStopGradient(true); egr::EagerUtils::autograd_meta(&cp_tensor) ->SetPersistable( - egr::EagerUtils::autograd_meta(&(self->eager_tensor))->Persistable()); + egr::EagerUtils::autograd_meta(&(self->tensor))->Persistable()); return ToPyObject(cp_tensor); EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor_method_reconstruct_from_(EagerTensorObject* self, +static PyObject* eager_tensor_method_reconstruct_from_(TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY - egr::EagerTensor src_tensor = - CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0); - bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); - std::string orig_name = self->eager_tensor.name(); + paddle::experimental::Tensor src_tensor = + CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0); + std::string orig_name = self->tensor.name(); VLOG(6) << "Start Reconstructing Tensor from" << src_tensor.name() << " to " << orig_name; - self->eager_tensor.copy_(src_tensor, blocking); - // Steal Tensor from src tensor - self->eager_tensor.set_tensor(src_tensor.Tensor()); + self->tensor = src_tensor; // Recover source name - self->eager_tensor.set_name(orig_name); + self->tensor.set_name(orig_name); VLOG(6) << "Finished Reconstructing Tensor from" << src_tensor.name() - << " to " << self->eager_tensor.name(); + << " to " << self->tensor.name(); Py_INCREF(Py_None); return Py_None; EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor_method_copy_(EagerTensorObject* self, - PyObject* args, PyObject* kwargs) { +static PyObject* eager_tensor_method_copy_(TensorObject* self, PyObject* args, + PyObject* kwargs) { EAGER_SYNC_TRY - egr::EagerTensor src_tensor = - CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0); + paddle::experimental::Tensor src_tensor = + CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0); bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to " - << self->eager_tensor.name(); - if (!self->eager_tensor.defined()) { - egr::EagerUtils::autograd_meta(&(self->eager_tensor)) + << self->tensor.name(); + if (!self->tensor.defined()) { + egr::EagerUtils::autograd_meta(&(self->tensor)) ->SetStopGradient( egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient()); - egr::EagerUtils::autograd_meta(&(self->eager_tensor)) + egr::EagerUtils::autograd_meta(&(self->tensor)) ->SetPersistable( egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); } - self->eager_tensor.copy_(src_tensor, blocking); + self->tensor.copy_(src_tensor, blocking); VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to " - << self->eager_tensor.name(); + << self->tensor.name(); Py_INCREF(Py_None); return Py_None; EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor_retain_grads(EagerTensorObject* self, - PyObject* args, PyObject* kwargs) { +static PyObject* eager_tensor_retain_grads(TensorObject* self, PyObject* args, + PyObject* kwargs) { EAGER_TRY if (egr::Controller::Instance().HasGrad()) { - auto meta = egr::EagerUtils::autograd_meta(&(self->eager_tensor)); + auto meta = egr::EagerUtils::autograd_meta(&(self->tensor)); if (!meta->GetMutableGradNode()) { - VLOG(6) << "Make grad node of tensor: " << self->eager_tensor.name() + VLOG(6) << "Make grad node of tensor: " << self->tensor.name() << "become accumulation node"; meta->SetGradNode(std::make_shared()); } - egr::egr_utils_api::RetainGradForTensor(self->eager_tensor); + egr::egr_utils_api::RetainGradForTensor(self->tensor); } Py_INCREF(Py_None); return Py_None; EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self, +static PyObject* eager_tensor__clear_gradient(TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY - VLOG(4) << "ClearGradient " << self->eager_tensor.name(); + VLOG(4) << "ClearGradient " << self->tensor.name(); - egr::EagerTensor* grad; - if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { + paddle::experimental::Tensor* grad; + if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { // Add RetainGrad as PostHook to AccumulationNode std::shared_ptr grad_node = - egr::EagerUtils::grad_node(self->eager_tensor); + egr::EagerUtils::grad_node(self->tensor); PADDLE_ENFORCE( grad_node.get() != nullptr, paddle::platform::errors::Fatal("Detected NULL grad_node" @@ -208,12 +205,12 @@ static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self, std::dynamic_pointer_cast(grad_node); grad = accumulation_grad_node->Grad(); } else { - auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); + auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor); grad = meta->MutableGrad(); } if (grad->initialized()) { - VLOG(4) << "Gradient of " << self->eager_tensor.name() + VLOG(4) << "Gradient of " << self->tensor.name() << " is initialized, will be released."; auto dense_tensor = std::dynamic_pointer_cast(grad->impl()); @@ -224,15 +221,15 @@ static PyObject* eager_tensor__clear_gradient(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, - PyObject* args, PyObject* kwargs) { +static PyObject* eager_tensor__zero_grads(TensorObject* self, PyObject* args, + PyObject* kwargs) { EAGER_TRY - VLOG(4) << "ZeroGrads " << self->eager_tensor.name(); + VLOG(4) << "ZeroGrads " << self->tensor.name(); - if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { + if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { // Add RetainGrad as PostHook to AccumulationNode std::shared_ptr grad_node = - egr::EagerUtils::grad_node(self->eager_tensor); + egr::EagerUtils::grad_node(self->tensor); PADDLE_ENFORCE( grad_node.get() != nullptr, paddle::platform::errors::Fatal("Detected NULL grad_node" @@ -241,18 +238,15 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, auto accumulation_grad_node = std::dynamic_pointer_cast(grad_node); if (accumulation_grad_node->Grad()->initialized()) { - accumulation_grad_node->Grad()->set_tensor( - std::make_shared( - paddle::experimental::zeros_like( - *(accumulation_grad_node->Grad()->Tensor().get())))); + accumulation_grad_node->Grad()->set_impl( + paddle::experimental::zeros_like(*(accumulation_grad_node->Grad())) + .impl()); } } else { - auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); + auto meta = egr::EagerUtils::unsafe_autograd_meta(self->tensor); if (meta->MutableGrad()->initialized()) { - meta->MutableGrad()->set_tensor( - std::make_shared( - paddle::experimental::zeros_like( - *(meta->MutableGrad()->Tensor().get())))); + meta->MutableGrad()->set_impl( + paddle::experimental::zeros_like(*(meta->MutableGrad())).impl()); } } @@ -261,20 +255,19 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor__share_buffer_to(EagerTensorObject* self, +static PyObject* eager_tensor__share_buffer_to(TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY - egr::EagerTensor* dst_ptr = - &(reinterpret_cast(PyTuple_GET_ITEM(args, 0)) - ->eager_tensor); - PADDLE_ENFORCE_EQ(self->eager_tensor.initialized(), true, + paddle::experimental::Tensor* dst_ptr = + &(reinterpret_cast(PyTuple_GET_ITEM(args, 0))->tensor); + PADDLE_ENFORCE_EQ(self->tensor.initialized(), true, platform::errors::InvalidArgument( "Tensor %s has not been initialized! please initialize " "src tensor before share_buffer_with to other.", - self->eager_tensor.name())); + self->tensor.name())); auto* src_tensor = - static_cast(self->eager_tensor.impl().get()); + static_cast(self->tensor.impl().get()); auto dst_tensor = static_cast(dst_ptr->impl().get()); dst_tensor->ShareDataWith(*src_tensor); @@ -284,24 +277,23 @@ static PyObject* eager_tensor__share_buffer_to(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor__is_shared_buffer_with(EagerTensorObject* self, +static PyObject* eager_tensor__is_shared_buffer_with(TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY - egr::EagerTensor* dst_ptr = - &(reinterpret_cast(PyTuple_GET_ITEM(args, 0)) - ->eager_tensor); - PADDLE_ENFORCE_EQ(self->eager_tensor.initialized(), true, + paddle::experimental::Tensor* dst_ptr = + &(reinterpret_cast(PyTuple_GET_ITEM(args, 0))->tensor); + PADDLE_ENFORCE_EQ(self->tensor.initialized(), true, platform::errors::InvalidArgument( "Tensor %s has not been initialized! please initialize " "src tensor before share_buffer_with to other.", - self->eager_tensor.name())); + self->tensor.name())); bool res = false; - if (!self->eager_tensor.defined() || !dst_ptr->defined()) { + if (!self->tensor.defined() || !dst_ptr->defined()) { return ToPyObject(res); } auto* self_ptr = - static_cast(self->eager_tensor.impl().get()); + static_cast(self->tensor.impl().get()); auto dst_tensor = static_cast(dst_ptr->impl().get()); res = dst_tensor->IsSharedBufferWith(*self_ptr); @@ -309,59 +301,58 @@ static PyObject* eager_tensor__is_shared_buffer_with(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor__share_underline_tensor_to( - EagerTensorObject* self, PyObject* args, PyObject* kwargs) { +static PyObject* eager_tensor__share_underline_tensor_to(TensorObject* self, + PyObject* args, + PyObject* kwargs) { EAGER_SYNC_TRY - egr::EagerTensor* src_ptr = - &(reinterpret_cast(PyTuple_GET_ITEM(args, 0)) - ->eager_tensor); - PADDLE_ENFORCE_EQ(self->eager_tensor.initialized(), true, + paddle::experimental::Tensor* src_ptr = + &(reinterpret_cast(PyTuple_GET_ITEM(args, 0))->tensor); + PADDLE_ENFORCE_EQ(self->tensor.initialized(), true, platform::errors::InvalidArgument( "Tensor %s has not been initialized! please initialize " "src tensor before share_buffer_with to other.", - self->eager_tensor.name())); - src_ptr->set_impl(self->eager_tensor.impl()); + self->tensor.name())); + src_ptr->set_impl(self->tensor.impl()); Py_INCREF(Py_None); return Py_None; EAGER_CATCH_AND_THROW_RETURN_NULL } static PyObject* eager_tensor__is_shared_underline_tensor_with( - EagerTensorObject* self, PyObject* args, PyObject* kwargs) { + TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY - egr::EagerTensor src_tensor = - CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0); + paddle::experimental::Tensor src_tensor = + CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0); PADDLE_ENFORCE_EQ(src_tensor.initialized(), true, platform::errors::InvalidArgument( "Tensor %s has not been initialized! please initialize " "src tensor before share_buffer_with to other.", src_tensor.name())); bool res = false; - if (!self->eager_tensor.defined() || !src_tensor.defined()) { + if (!self->tensor.defined() || !src_tensor.defined()) { return ToPyObject(res); } - res = (self->eager_tensor.impl().get() == src_tensor.impl().get()); + res = (self->tensor.impl().get() == src_tensor.impl().get()); return ToPyObject(res); EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor_method_detach(EagerTensorObject* self, - PyObject* args, PyObject* kwargs) { +static PyObject* eager_tensor_method_detach(TensorObject* self, PyObject* args, + PyObject* kwargs) { EAGER_SYNC_TRY PADDLE_ENFORCE_EQ( - self->eager_tensor.initialized(), true, + self->tensor.initialized(), true, platform::errors::InvalidArgument("Tensor %s has not been initialized!", - self->eager_tensor.name())); + self->tensor.name())); - PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0); + PyObject* obj = p_tensor_type->tp_alloc(p_tensor_type, 0); if (obj) { - auto v = reinterpret_cast(obj); - new (&(v->eager_tensor)) egr::EagerTensor(); - v->eager_tensor.set_impl(self->eager_tensor.impl()); - v->eager_tensor.set_name(egr::Controller::Instance().GenerateUniqueName()); - auto autograd_meta_src = - egr::EagerUtils::autograd_meta(&(self->eager_tensor)); - auto autograd_meta = egr::EagerUtils::autograd_meta(&(v->eager_tensor)); + auto v = reinterpret_cast(obj); + new (&(v->tensor)) paddle::experimental::Tensor(); + v->tensor.set_impl(self->tensor.impl()); + v->tensor.set_name(egr::Controller::Instance().GenerateUniqueName()); + auto autograd_meta_src = egr::EagerUtils::autograd_meta(&(self->tensor)); + auto autograd_meta = egr::EagerUtils::autograd_meta(&(v->tensor)); autograd_meta->SetPersistable(autograd_meta_src->Persistable()); } else { PADDLE_THROW(platform::errors::Fatal( @@ -372,12 +363,13 @@ static PyObject* eager_tensor_method_detach(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -static PyObject* eager_tensor_method_get_underline_tensor( - EagerTensorObject* self, PyObject* args, PyObject* kwargs) { +static PyObject* eager_tensor_method_get_underline_tensor(TensorObject* self, + PyObject* args, + PyObject* kwargs) { EAGER_SYNC_TRY - if (self->eager_tensor.is_dense_tensor()) { - auto* tensor = static_cast( - self->eager_tensor.impl().get()); + if (self->tensor.is_dense_tensor()) { + auto* tensor = + static_cast(self->tensor.impl().get()); VLOG(6) << "tensor: " << tensor->IsInitialized(); return ToPyObject(tensor); } else { @@ -388,11 +380,11 @@ static PyObject* eager_tensor_method_get_underline_tensor( } // NOTE(wuweilong): Set value and not change self's original place -static PyObject* eager_tensor_method_set_value(EagerTensorObject* self, +static PyObject* eager_tensor_method_set_value(TensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_TRY - VLOG(4) << "Value " << self->eager_tensor.name(); + VLOG(4) << "Value " << self->tensor.name(); pybind11::object numpy_value = pybind11::object(pybind11::handle(PyTuple_GET_ITEM(args, 0)), true); InitEagerTensorWithNumpyValue(self, numpy_value, false); diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc index b38c0eeaf96..ddf2cffbd64 100644 --- a/paddle/fluid/pybind/eager_op_function_generator.cc +++ b/paddle/fluid/pybind/eager_op_function_generator.cc @@ -73,10 +73,10 @@ const char* OUT_VAR_TYPE = R"(std::shared_ptr)"; const char* OUT_VAR_LIST_TYPE = R"(std::vector>)"; const char* CAST_VAR_TEMPLATE = R"( - auto& %s = GetEagerTensorFromArgs("%s", "%s", args, %d, %s);)"; + auto& %s = GetTensorFromArgs("%s", "%s", args, %d, %s);)"; const char* CAST_VAR_LIST_TEMPLATE = R"( - auto %s = GetEagerTensorListFromArgs("%s", "%s", args, %d, %s);)"; + auto %s = GetTensorListFromArgs("%s", "%s", args, %d, %s);)"; const char* CAST_VAR_PTR_TEMPLATE = R"( auto %s = GetEagerTensorPtrFromArgs("%s", "%s", args, %d, %s);)"; diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index d8dac3c6287..9fb0941fa36 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -33,19 +33,17 @@ limitations under the License. */ namespace paddle { namespace pybind { -extern PyTypeObject* p_eager_tensor_type; +extern PyTypeObject* p_tensor_type; -PyObject* eager_tensor_properties_get_name(EagerTensorObject* self, - void* closure) { +PyObject* eager_tensor_properties_get_name(TensorObject* self, void* closure) { EAGER_SYNC_TRY - return ToPyObject(self->eager_tensor.name()); + return ToPyObject(self->tensor.name()); EAGER_CATCH_AND_THROW_RETURN_NULL } -PyObject* eager_tensor_properties_get_type(EagerTensorObject* self, - void* closure) { +PyObject* eager_tensor_properties_get_type(TensorObject* self, void* closure) { EAGER_SYNC_TRY - if (self->eager_tensor.is_dense_tensor()) { + if (self->tensor.is_dense_tensor()) { return ToPyObject(paddle::framework::proto::VarType::LOD_TENSOR); } else { Py_INCREF(Py_None); @@ -54,28 +52,27 @@ PyObject* eager_tensor_properties_get_type(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value, +int eager_tensor_properties_set_name(TensorObject* self, PyObject* value, void* closure) { EAGER_SYNC_TRY - self->eager_tensor.set_name(CastPyArg2AttrString(value, 0)); + self->tensor.set_name(CastPyArg2AttrString(value, 0)); return 0; EAGER_CATCH_AND_THROW_RETURN_ZERO } -PyObject* eager_tensor_properties_get_stop_gradient(EagerTensorObject* self, +PyObject* eager_tensor_properties_get_stop_gradient(TensorObject* self, void* closure) { EAGER_SYNC_TRY - auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor); + auto meta = egr::EagerUtils::autograd_meta(&self->tensor); return ToPyObject(meta->StopGradient()); EAGER_CATCH_AND_THROW_RETURN_NULL } -PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, - void* closure) { +PyObject* eager_tensor_properties_get_grad(TensorObject* self, void* closure) { EAGER_SYNC_TRY - if (egr::egr_utils_api::IsLeafTensor(self->eager_tensor)) { + if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { std::shared_ptr grad_node = - egr::EagerUtils::grad_node(self->eager_tensor); + egr::EagerUtils::grad_node(self->tensor); PADDLE_ENFORCE( grad_node.get() != nullptr, paddle::platform::errors::Fatal("Detected NULL grad_node" @@ -85,8 +82,8 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, std::dynamic_pointer_cast(grad_node); return ToPyObject(*accumulation_grad_node->Grad()); } else { - VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name(); - auto meta = egr::EagerUtils::nullable_autograd_meta(self->eager_tensor); + VLOG(6) << "Get grad for tensor: " << self->tensor.name(); + auto meta = egr::EagerUtils::nullable_autograd_meta(self->tensor); if (meta) { return ToPyObject(meta->Grad()); } else { @@ -97,15 +94,15 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -int eager_tensor_properties_set_grad(EagerTensorObject* self, PyObject* value, +int eager_tensor_properties_set_grad(TensorObject* self, PyObject* value, void* closure) { EAGER_SYNC_TRY - auto src = CastPyArg2EagerTensor(value, 0); + auto src = CastPyArg2Tensor(value, 0); PADDLE_ENFORCE( - egr::egr_utils_api::IsLeafTensor(self->eager_tensor), + egr::egr_utils_api::IsLeafTensor(self->tensor), paddle::platform::errors::Fatal("Only leaf Tensor can be set grad.")); std::shared_ptr grad_node = - egr::EagerUtils::grad_node(self->eager_tensor); + egr::EagerUtils::grad_node(self->tensor); PADDLE_ENFORCE( grad_node.get() != nullptr, paddle::platform::errors::Fatal("Detected NULL grad_node" @@ -118,36 +115,35 @@ int eager_tensor_properties_set_grad(EagerTensorObject* self, PyObject* value, EAGER_CATCH_AND_THROW_RETURN_ZERO } -int eager_tensor_properties_set_stop_gradient(EagerTensorObject* self, +int eager_tensor_properties_set_stop_gradient(TensorObject* self, PyObject* value, void* closure) { EAGER_SYNC_TRY - auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor); + auto meta = egr::EagerUtils::autograd_meta(&self->tensor); meta->SetStopGradient(CastPyArg2AttrBoolean(value, 0)); return 0; EAGER_CATCH_AND_THROW_RETURN_ZERO } -PyObject* eager_tensor_properties_get_persistable(EagerTensorObject* self, +PyObject* eager_tensor_properties_get_persistable(TensorObject* self, void* closure) { EAGER_SYNC_TRY - auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor); + auto meta = egr::EagerUtils::autograd_meta(&self->tensor); return ToPyObject(meta->Persistable()); EAGER_CATCH_AND_THROW_RETURN_NULL } -int eager_tensor_properties_set_persistable(EagerTensorObject* self, - PyObject* value, void* closure) { +int eager_tensor_properties_set_persistable(TensorObject* self, PyObject* value, + void* closure) { EAGER_SYNC_TRY - auto meta = egr::EagerUtils::autograd_meta(&self->eager_tensor); + auto meta = egr::EagerUtils::autograd_meta(&self->tensor); meta->SetPersistable(CastPyArg2AttrBoolean(value, 0)); return 0; EAGER_CATCH_AND_THROW_RETURN_ZERO } -PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self, - void* closure) { +PyObject* eager_tensor_properties_get_shape(TensorObject* self, void* closure) { EAGER_SYNC_TRY - auto ddim = self->eager_tensor.shape(); + auto ddim = self->tensor.shape(); std::vector value; size_t rank = static_cast(ddim.size()); value.resize(rank); @@ -159,26 +155,24 @@ PyObject* eager_tensor_properties_get_shape(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } -PyObject* eager_tensor_properties_get_place(EagerTensorObject* self, - void* closure) { +PyObject* eager_tensor_properties_get_place(TensorObject* self, void* closure) { EAGER_SYNC_TRY - return ToPyObject(self->eager_tensor.place()); + return ToPyObject(self->tensor.inner_place()); EAGER_CATCH_AND_THROW_RETURN_NULL } -PyObject* eager_tensor_properties_get_place_str(EagerTensorObject* self, +PyObject* eager_tensor_properties_get_place_str(TensorObject* self, void* closure) { EAGER_SYNC_TRY std::stringstream ostr; - ostr << self->eager_tensor.place(); + ostr << self->tensor.inner_place(); return ToPyObject(ostr.str()); EAGER_CATCH_AND_THROW_RETURN_NULL } -PyObject* eager_tensor_properties_get_dtype(EagerTensorObject* self, - void* closure) { +PyObject* eager_tensor_properties_get_dtype(TensorObject* self, void* closure) { EAGER_SYNC_TRY - return ToPyObject(pten::TransToProtoVarType(self->eager_tensor.type())); + return ToPyObject(pten::TransToProtoVarType(self->tensor.type())); EAGER_CATCH_AND_THROW_RETURN_NULL } diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 0f21bd4ae37..1a6bd9f35aa 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -30,7 +30,7 @@ limitations under the License. */ namespace paddle { namespace pybind { -extern PyTypeObject* p_eager_tensor_type; +extern PyTypeObject* p_tensor_type; extern PyTypeObject* g_vartype_pytype; extern PyTypeObject* g_place_pytype; @@ -173,10 +173,9 @@ std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos) { } } -egr::EagerTensor CastPyArg2EagerTensor(PyObject* obj, ssize_t arg_pos) { - if (PyObject_IsInstance(obj, - reinterpret_cast(p_eager_tensor_type))) { - return reinterpret_cast(obj)->eager_tensor; +paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos) { + if (PyObject_IsInstance(obj, reinterpret_cast(p_tensor_type))) { + return reinterpret_cast(obj)->tensor; } else { PADDLE_THROW(platform::errors::InvalidArgument( "argument (position %d) must be " @@ -185,18 +184,17 @@ egr::EagerTensor CastPyArg2EagerTensor(PyObject* obj, ssize_t arg_pos) { } } -std::vector CastPyArg2VectorOfEagerTensor(PyObject* obj, - ssize_t arg_pos) { - std::vector result; +std::vector CastPyArg2VectorOfTensor( + PyObject* obj, ssize_t arg_pos) { + std::vector result; if (PyList_Check(obj)) { Py_ssize_t len = PyList_Size(obj); PyObject* item = nullptr; for (Py_ssize_t i = 0; i < len; i++) { item = PyList_GetItem(obj, i); - if (PyObject_IsInstance( - item, reinterpret_cast(p_eager_tensor_type))) { - result.emplace_back( - reinterpret_cast(item)->eager_tensor); + if (PyObject_IsInstance(item, + reinterpret_cast(p_tensor_type))) { + result.emplace_back(reinterpret_cast(item)->tensor); } else { PADDLE_THROW(platform::errors::InvalidArgument( "argument (position %d) must be " @@ -210,10 +208,9 @@ std::vector CastPyArg2VectorOfEagerTensor(PyObject* obj, PyObject* item = nullptr; for (Py_ssize_t i = 0; i < len; i++) { item = PyTuple_GetItem(obj, i); - if (PyObject_IsInstance( - item, reinterpret_cast(p_eager_tensor_type))) { - result.emplace_back( - reinterpret_cast(item)->eager_tensor); + if (PyObject_IsInstance(item, + reinterpret_cast(p_tensor_type))) { + result.emplace_back(reinterpret_cast(item)->tensor); } else { PADDLE_THROW(platform::errors::InvalidArgument( "argument (position %d) must be " @@ -317,8 +314,8 @@ framework::Tensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) { } } -std::vector CastPyArg2VectorOfTensor(PyObject* obj, - ssize_t arg_pos) { +std::vector CastPyArg2VectorOfTensorBase(PyObject* obj, + ssize_t arg_pos) { std::vector result; if (PyList_Check(obj)) { Py_ssize_t len = PyList_Size(obj); @@ -408,12 +405,12 @@ PyObject* ToPyObject(const std::string& value) { return PyUnicode_FromString(value.c_str()); } -PyObject* ToPyObject(const egr::EagerTensor& value) { - PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0); +PyObject* ToPyObject(const paddle::experimental::Tensor& value) { + PyObject* obj = p_tensor_type->tp_alloc(p_tensor_type, 0); if (obj) { - auto v = reinterpret_cast(obj); - new (&(v->eager_tensor)) egr::EagerTensor(); - v->eager_tensor = value; + auto v = reinterpret_cast(obj); + new (&(v->tensor)) paddle::experimental::Tensor(); + v->tensor = value; } else { PADDLE_THROW(platform::errors::Fatal( "tp_alloc return null, can not new a PyObject.")); @@ -471,15 +468,15 @@ PyObject* ToPyObject(const std::vector& value) { return result; } -PyObject* ToPyObject(const std::vector& value) { +PyObject* ToPyObject(const std::vector& value) { PyObject* result = PyList_New((Py_ssize_t)value.size()); for (size_t i = 0; i < value.size(); i++) { - PyObject* obj = p_eager_tensor_type->tp_alloc(p_eager_tensor_type, 0); + PyObject* obj = p_tensor_type->tp_alloc(p_tensor_type, 0); if (obj) { - auto v = reinterpret_cast(obj); - new (&(v->eager_tensor)) egr::EagerTensor(); - v->eager_tensor = value[i]; + auto v = reinterpret_cast(obj); + new (&(v->tensor)) paddle::experimental::Tensor(); + v->tensor = value[i]; } else { PADDLE_THROW(platform::errors::Fatal( "tp_alloc return null, can not new a PyObject.")); @@ -558,10 +555,10 @@ PyObject* ToPyObject( return dict; } -egr::EagerTensor& GetEagerTensorFromArgs(const std::string& op_type, - const std::string& arg_name, - PyObject* args, ssize_t arg_idx, - bool dispensable) { +paddle::experimental::Tensor& GetTensorFromArgs(const std::string& op_type, + const std::string& arg_name, + PyObject* args, ssize_t arg_idx, + bool dispensable) { PyObject* obj = PyTuple_GET_ITEM(args, arg_idx); if (PyTuple_Check(obj)) { @@ -574,14 +571,14 @@ egr::EagerTensor& GetEagerTensorFromArgs(const std::string& op_type, "%s(): argument '%s' (position %d) must be Tensor, but got None", op_type, arg_name, arg_idx)); } - static egr::EagerTensor emptytensor; + static paddle::experimental::Tensor emptytensor; return emptytensor; } - return reinterpret_cast(obj)->eager_tensor; + return reinterpret_cast(obj)->tensor; } -std::vector GetEagerTensorListFromArgs( +std::vector GetTensorListFromArgs( const std::string& op_type, const std::string& arg_name, PyObject* args, ssize_t arg_idx, bool dispensable) { PyObject* list = PyTuple_GET_ITEM(args, arg_idx); @@ -596,7 +593,7 @@ std::vector GetEagerTensorListFromArgs( return {}; } - std::vector result; + std::vector result; if (PyList_Check(list)) { Py_ssize_t len = PyList_Size(list); @@ -608,8 +605,7 @@ std::vector GetEagerTensorListFromArgs( } for (Py_ssize_t i = 0; i < len; i++) { result.emplace_back( - reinterpret_cast(PyList_GetItem(list, i)) - ->eager_tensor); + reinterpret_cast(PyList_GetItem(list, i))->tensor); } } else if (PyTuple_Check(list)) { Py_ssize_t len = PyTuple_Size(list); @@ -621,8 +617,7 @@ std::vector GetEagerTensorListFromArgs( } for (Py_ssize_t i = 0; i < len; i++) { result.emplace_back( - reinterpret_cast(PyTuple_GetItem(list, i)) - ->eager_tensor); + reinterpret_cast(PyTuple_GetItem(list, i))->tensor); } } else if (list == Py_None) { return {}; @@ -637,10 +632,9 @@ std::vector GetEagerTensorListFromArgs( return result; } -egr::EagerTensor* GetEagerTensorPtrFromArgs(const std::string& op_type, - const std::string& arg_name, - PyObject* args, ssize_t arg_idx, - bool dispensable) { +paddle::experimental::Tensor* GetEagerTensorPtrFromArgs( + const std::string& op_type, const std::string& arg_name, PyObject* args, + ssize_t arg_idx, bool dispensable) { PyObject* obj = PyTuple_GET_ITEM(args, arg_idx); if (PyTuple_Check(obj)) { @@ -653,14 +647,14 @@ egr::EagerTensor* GetEagerTensorPtrFromArgs(const std::string& op_type, "%s(): argument '%s' (position %d) must be Tensor, but got None", op_type, arg_name, arg_idx)); } - static egr::EagerTensor emptytensor; + static paddle::experimental::Tensor emptytensor; return &emptytensor; } - return &(reinterpret_cast(obj)->eager_tensor); + return &(reinterpret_cast(obj)->tensor); } -std::vector GetEagerTensorPtrListFromArgs( +std::vector GetEagerTensorPtrListFromArgs( const std::string& op_type, const std::string& arg_name, PyObject* args, ssize_t arg_idx, bool dispensable) { PyObject* list = PyTuple_GET_ITEM(args, arg_idx); @@ -675,7 +669,7 @@ std::vector GetEagerTensorPtrListFromArgs( return {}; } - std::vector result; + std::vector result; if (PyList_Check(list)) { Py_ssize_t len = PyList_Size(list); @@ -687,8 +681,7 @@ std::vector GetEagerTensorPtrListFromArgs( } for (Py_ssize_t i = 0; i < len; i++) { result.emplace_back( - &(reinterpret_cast(PyList_GetItem(list, i)) - ->eager_tensor)); + &(reinterpret_cast(PyList_GetItem(list, i))->tensor)); } } else if (PyTuple_Check(list)) { Py_ssize_t len = PyTuple_Size(list); @@ -700,8 +693,7 @@ std::vector GetEagerTensorPtrListFromArgs( } for (Py_ssize_t i = 0; i < len; i++) { result.emplace_back( - &(reinterpret_cast(PyTuple_GetItem(list, i)) - ->eager_tensor)); + &(reinterpret_cast(PyTuple_GetItem(list, i))->tensor)); } } else if (list == Py_None) { return {}; diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 8bec57bfefb..ead9f474f67 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -19,8 +19,8 @@ namespace paddle { namespace pybind { typedef struct { - PyObject_HEAD egr::EagerTensor eager_tensor; -} EagerTensorObject; + PyObject_HEAD paddle::experimental::Tensor tensor; +} TensorObject; int TensorDtype2NumpyDtype(pten::DataType dtype); @@ -32,13 +32,13 @@ int CastPyArg2AttrInt(PyObject* obj, ssize_t arg_pos); int64_t CastPyArg2AttrLong(PyObject* obj, ssize_t arg_pos); float CastPyArg2AttrFloat(PyObject* obj, ssize_t arg_pos); std::string CastPyArg2AttrString(PyObject* obj, ssize_t arg_pos); -egr::EagerTensor CastPyArg2EagerTensor(PyObject* obj, ssize_t arg_pos); -std::vector CastPyArg2VectorOfEagerTensor(PyObject* obj, - ssize_t arg_pos); +paddle::experimental::Tensor CastPyArg2Tensor(PyObject* obj, ssize_t arg_pos); +std::vector CastPyArg2VectorOfTensor( + PyObject* obj, ssize_t arg_pos); platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos); framework::Tensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos); -std::vector CastPyArg2VectorOfTensor(PyObject* obj, - ssize_t arg_pos); +std::vector CastPyArg2VectorOfTensorBase(PyObject* obj, + ssize_t arg_pos); std::vector CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos); framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj, ssize_t arg_pos); @@ -49,13 +49,13 @@ PyObject* ToPyObject(float value); PyObject* ToPyObject(double value); PyObject* ToPyObject(const char* value); PyObject* ToPyObject(const std::string& value); -PyObject* ToPyObject(const egr::EagerTensor& value); +PyObject* ToPyObject(const paddle::experimental::Tensor& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); -PyObject* ToPyObject(const std::vector& value); +PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const platform::Place& value); PyObject* ToPyObject(const framework::LoDTensor* value); PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype); @@ -89,19 +89,18 @@ PyObject* ToPyObject(const std::tuple& out) { return result; } -egr::EagerTensor& GetEagerTensorFromArgs(const std::string& op_type, - const std::string& arg_name, - PyObject* args, ssize_t arg_idx, - bool dispensable = false); -std::vector GetEagerTensorListFromArgs( +paddle::experimental::Tensor& GetTensorFromArgs(const std::string& op_type, + const std::string& arg_name, + PyObject* args, ssize_t arg_idx, + bool dispensable = false); +std::vector GetTensorListFromArgs( const std::string& op_type, const std::string& arg_name, PyObject* args, ssize_t arg_idx, bool dispensable = false); -egr::EagerTensor* GetEagerTensorPtrFromArgs(const std::string& op_type, - const std::string& arg_name, - PyObject* args, ssize_t arg_idx, - bool dispensable = false); -std::vector GetEagerTensorPtrListFromArgs( +paddle::experimental::Tensor* GetEagerTensorPtrFromArgs( + const std::string& op_type, const std::string& arg_name, PyObject* args, + ssize_t arg_idx, bool dispensable = false); +std::vector GetEagerTensorPtrListFromArgs( const std::string& op_type, const std::string& arg_name, PyObject* args, ssize_t arg_idx, bool dispensable = false); diff --git a/paddle/fluid/pybind/exception.h b/paddle/fluid/pybind/exception.h index 390fd56f8ee..7e44841e670 100644 --- a/paddle/fluid/pybind/exception.h +++ b/paddle/fluid/pybind/exception.h @@ -19,12 +19,7 @@ limitations under the License. */ #include "pybind11/pybind11.h" #define EAGER_TRY try { -#define EAGER_SYNC_TRY \ - try { \ - if (self->eager_tensor.Var().IsInitialized()) { \ - self->eager_tensor.SyncToTensor(); \ - } - +#define EAGER_SYNC_TRY try { #define EAGER_CATCH_AND_THROW_RETURN_NULL \ } \ catch (...) { \ diff --git a/paddle/pten/api/include/tensor.h b/paddle/pten/api/include/tensor.h index e93b9be7046..900de42bbac 100644 --- a/paddle/pten/api/include/tensor.h +++ b/paddle/pten/api/include/tensor.h @@ -130,6 +130,14 @@ class PADDLE_API Tensor final { */ Tensor(const PlaceType& place, const std::vector& shape); + /** + * @brief Construct a new Tensor object by a TensorBase pointer and name + * + * @param tensor_impl + */ + Tensor(std::shared_ptr tensor_impl, + const std::string& name); + /** * @brief Construct a new Tensor object with name * diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc index ef84e2a8c81..f1a54ee960d 100644 --- a/paddle/pten/api/lib/tensor.cc +++ b/paddle/pten/api/lib/tensor.cc @@ -90,6 +90,9 @@ Tensor::Tensor(const PlaceType &place, const std::vector &shape) pten::DataLayout::NCHW))))), place_{place} {} +Tensor::Tensor(std::shared_ptr tensor_impl, + const std::string &name) + : impl_(std::move(tensor_impl)), name_(std::move(name)) {} /* Part 2: Dimension, DataType and DataLayout methods */ int64_t Tensor::numel() const { return impl_->numel(); } @@ -377,12 +380,16 @@ void Tensor::reset() { impl_.reset(); } Tensor &Tensor::operator=(const Tensor &x) & { impl_ = x.impl_; autograd_meta_ = x.autograd_meta_; + name_ = x.name_; + place_ = x.place_; return *this; } Tensor &Tensor::operator=(Tensor &&x) & { impl_ = std::move(x.impl_); autograd_meta_ = std::move(x.autograd_meta_); + name_ = std::move(x.name_); + place_ = std::move(x.place_); return *this; } diff --git a/paddle/pten/core/selected_rows.h b/paddle/pten/core/selected_rows.h index 2f224e42ea0..58256f2ee61 100644 --- a/paddle/pten/core/selected_rows.h +++ b/paddle/pten/core/selected_rows.h @@ -31,6 +31,9 @@ limitations under the License. */ #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/memory/memcpy.h" +namespace egr { +class EagerTensor; +} // namespace egr namespace pten { class SelectedRows : public TensorBase, public TypeInfoTraits { @@ -197,6 +200,39 @@ class SelectedRows : public TensorBase, std::unique_ptr value_{nullptr}; int64_t height_; // height indicates the underline tensor's height std::unique_ptr rwlock_{nullptr}; + // TODO(jiabin): Remove this when we don't need EagerTensor support + // SelectedRows which is expected in next version. + /** Why we need this weird friend class? + * In eager mode, since some of ops doesn't support C++ API for now we need to + *use 'imperative::TraceOp' to run it. + * So, we need to support get a SelectedRows from egr::EagerTensor's + *framework::Variable obj and used it to reconstruct + * a new paddle::experimental::Tensor to support framework usage. However, we + *got 2 problems here. + * First, we got 2 unique_ptr in SelectedRows so that we can't support + *std::make_shared in EagerTensor's SetImplWithSelectedRows method, + * since we have to construct a shared_ptr for paddle::experimental::Tensor's + *impl. + * Second, when we are trying to support move constructor for SelectedRows we + *found that we can't get its rvalue from + * framework::Variable because it holds an obj of target type. + * + * + * The only three way to solve this problem is: + * 1. Just like what we have done, using friend class and just copy/move each + *member. In this way, we can avoid additional API + * and symbols. + * 2. Make pten::SelectedRows's member from unique_ptr to shared_ptr. However, + *this may cause some cost of performance. + * 3. Add some api to return or move member of framework::SelectedRows. + *However, it's not as safe as first solution. + * 4. Support all framework::SelectedRows related ops and make sure + *EagerTensor never holds framework::SelectedRows. + * + * If anyone got better ideas, welcome to contact JiabinYang, we are open for + *your help. + **/ + friend class egr::EagerTensor; }; } // namespace pten diff --git a/python/paddle/fluid/dygraph/tracer.py b/python/paddle/fluid/dygraph/tracer.py index f31edf142b2..a612a401371 100644 --- a/python/paddle/fluid/dygraph/tracer.py +++ b/python/paddle/fluid/dygraph/tracer.py @@ -107,23 +107,22 @@ class Tracer(core.Tracer): # Replaced outputs by function returns if isinstance(returns[i], list): for j in range(len(returns[i])): - outputs[retname][j].reconstruct_from_( - returns[i][j], False) + outputs[retname][j].reconstruct_from_(returns[i] + [j]) else: - outputs[retname][0].reconstruct_from_(returns[i], - False) + outputs[retname][0].reconstruct_from_(returns[i]) elif isinstance(returns, list): assert len(outputs.keys()) == 1 key = list(outputs.keys())[0] for j in range(len(returns)): - outputs[key][j].reconstruct_from_(returns[j], False) + outputs[key][j].reconstruct_from_(returns[j]) else: assert len(outputs.keys()) == 1 key = list(outputs.keys())[0] if isinstance(outputs[key], list): - outputs[key][0].reconstruct_from_(returns, False) + outputs[key][0].reconstruct_from_(returns) else: - outputs[key].reconstruct_from_(returns, False) + outputs[key].reconstruct_from_(returns) else: self.trace(type, inputs, outputs, attrs, framework._current_expected_place(), self._has_grad and diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 511aa7e06c4..f11f894970d 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -897,7 +897,7 @@ endif() # setting timeout value as 15S set_tests_properties(test_sync_batch_norm_op PROPERTIES TIMEOUT 120) set_tests_properties(test_cross_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_lod_tensor_to_selected_rows PROPERTIES TIMEOUT 120) +set_tests_properties(test_imperative_lod_tensor_to_selected_rows PROPERTIES TIMEOUT 200) set_tests_properties(test_lstm_op PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_star_gan_with_gradient_penalty PROPERTIES TIMEOUT 120) set_tests_properties(test_warpctc_op PROPERTIES TIMEOUT 120) @@ -1000,7 +1000,7 @@ set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT 120) set_tests_properties(test_masked_select_op PROPERTIES TIMEOUT 120) set_tests_properties(test_sigmoid_cross_entropy_with_logits_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 120) +set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 150) set_tests_properties(test_partial_sum_op PROPERTIES TIMEOUT 120) set_tests_properties(test_cond PROPERTIES TIMEOUT 120) set_tests_properties(test_space_to_depth_op PROPERTIES TIMEOUT 200) -- GitLab