未验证 提交 afadb8c5 编写于 作者: Z Zhanlue Yang 提交者: GitHub

[DoubleGrad PR #5] Enabled gradient computations for grad_tensors passed to paddle.grad() (#41198)

* [Refactor] refactored eager_gen.py PR #2

* [DoubleGrad PR #1] Decoupled code generation logics for Dygraph ForwardFunctions and GradNodes

* Fixed minor issue

* Adjusted logics of GenerateNodeCreationCodes and GenerateForwardDefinition

* Fixed issues

* Supported higher-order grad node generation

* [DoubleGrad PR #4] Supported higher-order GradNode generation

* [DoubleGrad #4] Bug Fixes to Double Grad Node Generation

* Fixed yaml typo

* Fixed yaml typo

* fixed minor issues

* [DoubleGrad PR #5] Enabled gradient computations for grad_tensors passed to paddle.grad()

* Fixed minor issue

* Fixed CI-Inference issue

* Fixed CI-inference issues
上级 56f108ff
......@@ -13,12 +13,16 @@ add_subdirectory(accumulation)
add_subdirectory(custom_operator)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_subdirectory(pylayer)
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulator)
add_dependencies(grad_tensor_holder eager_final_state_codegen)
cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info)
endif()
cc_library(grad_node_info SRCS grad_node_info.cc DEPS phi_api phi_tensor)
cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulator)
cc_library(autograd_meta SRCS autograd_meta.cc DEPS phi_api phi_tensor)
cc_library(utils SRCS utils.cc DEPS phi_api phi_tensor global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta hook_utils)
cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info)
add_subdirectory(tests)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_subdirectory(tests)
endif()
......@@ -76,6 +76,7 @@ void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
// Simply Copy impl() to grad_tensor
grad_tensor->set_impl(t.impl());
grad_tensor->set_autograd_meta(t.mutable_autograd_meta());
return *grad_tensor.get();
} else {
VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
......
......@@ -466,6 +466,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
continue;
}
// TODO(zhanlve): Copy and Modify GradNode if is_general_grad
GradNodeBase* grad_node = shared_grad_node.get();
// Prepare GradTensorHolder
......@@ -486,16 +487,9 @@ std::vector<paddle::experimental::Tensor> RunBackward(
// Feed given tensor if it's provided
VLOG(6) << "Fill grad input tensor " << i << "with give grad tensor";
if (grad_tensors[i].is_initialized()) {
// Deep copy
paddle::experimental::Tensor tmp_tensor;
tmp_tensor.copy_(grad_tensors[i], grad_tensors[i].inner_place(), false);
node_input_buffers_dict[grad_node]->add(input_info.first,
input_info.second, tmp_tensor);
} else {
node_input_buffers_dict[grad_node]->add(
node_input_buffers_dict[grad_node]->CopyValueFromTensor(
input_info.first, input_info.second, grad_tensors[i]);
}
} else {
VLOG(6) << "Fill grad input tensor " << i << " with 1.0";
......@@ -504,7 +498,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
// dims
// GradTensorHolder will initialize another tensor with same tensortype,
// datatype and dims but filled with 1.0
node_input_buffers_dict[grad_node]->add(
node_input_buffers_dict[grad_node]->CopyValueFromTensor(
input_info.first, input_info.second, tensor, true /*fill_one=true*/);
}
......@@ -686,6 +680,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
}
}
}
if (!is_general_grad) return {};
return GeneralGrad::Instance().GetResults(inputs, allow_unused, create_graph);
}
......
......@@ -15,6 +15,7 @@
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/imperative/gradient_accumulator.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......@@ -26,8 +27,8 @@ void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) {
paddle::experimental::zeros_like(buffer_[slot_id][rank]);
}
void GradTensorHolder::add(size_t slot_id, size_t rank,
const paddle::experimental::Tensor& t,
void GradTensorHolder::CopyValueFromTensor(
size_t slot_id, size_t rank, const paddle::experimental::Tensor& t,
bool fill_one) {
// TODO(jiabin): We need to deal with empty input_buffer with slot size not
// empty;
......@@ -49,6 +50,48 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
"of buffer slot %d, got slot size is: %d rank is: %d",
slot_id, buffer_[slot_id].size(), rank));
if (!fill_one) {
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
// Perform deep copy here
buffer_tensor.copy_(t, t.inner_place(), false);
buffer_tensor.set_autograd_meta(t.mutable_autograd_meta());
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Cannot copy grad_tensors' value to grad tensor holders,"
"input buffer has already been initialized."));
}
} else {
// Create new tensor->impl and fill it with 1.0
if (t.defined()) {
// Fill 1.0
buffer_[slot_id][rank] = paddle::experimental::ones_like(t, t.dtype());
}
}
}
void GradTensorHolder::add(size_t slot_id, size_t rank,
const paddle::experimental::Tensor& t) {
// TODO(jiabin): We need to deal with empty input_buffer with slot size not
// empty;
PADDLE_ENFORCE(slot_id < buffer_.size(),
paddle::platform::errors::Fatal(
"Invalid slot_id for GradTensorHolder::add() "
"which exceeds size of buffer"));
VLOG(6) << "Add Tensor for buffer_ slot: " << slot_id
<< ", size: " << buffer_[slot_id].size();
if (buffer_[slot_id].empty()) {
VLOG(6) << "Pass add Tensor for buffer_ slot: " << slot_id
<< " since its buffer_ is empty ";
return;
}
PADDLE_ENFORCE(
rank < buffer_[slot_id].size(),
paddle::platform::errors::Fatal(
"Invalid rank for GradTensorHolder::add() which exceeds size "
"of buffer slot %d, got slot size is: %d rank is: %d",
slot_id, buffer_[slot_id].size(), rank));
paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
// TODO(jiabin): Code bellow is ugly to divide which inner var we used,
// remove framework::Variable
......@@ -68,10 +111,12 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
t.name()));
if (t.is_dense_tensor()) {
if (buffer_tensor.is_dense_tensor()) {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(
t, &buffer_tensor);
buffer_tensor = add_final_state_dygraph_function(t, buffer_tensor);
} else {
// TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with
// add_dygraph_function once it's supported
paddle::experimental::Tensor new_buffer(
std::make_shared<phi::DenseTensor>(), "tmp_accumulator");
paddle::imperative::SelectedRowsAddTensor(buffer_tensor, t,
......@@ -80,6 +125,8 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
}
} else {
// TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with add_dygraph_function
// once it's supported
if (buffer_tensor.is_dense_tensor()) {
paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor);
} else {
......@@ -89,13 +136,6 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
}
}
}
} else {
// Create new tensor->impl and fill it with 1.0
if (t.defined()) {
// Fill 1.0
buffer_[slot_id][rank] = paddle::experimental::ones_like(t, t.dtype());
}
}
}
} // namespace egr
......@@ -45,7 +45,9 @@ class GradTensorHolder {
GradTensorHolder& operator=(const GradTensorHolder& other) = default;
// Create new tensor and copy tensor->impl
void add(size_t slot_id, size_t rank, const paddle::experimental::Tensor& t,
void add(size_t slot_id, size_t rank, const paddle::experimental::Tensor& t);
void CopyValueFromTensor(size_t slot_id, size_t rank,
const paddle::experimental::Tensor& t,
bool fill_one = false);
const std::vector<paddle::experimental::Tensor>& operator[](
......
add_subdirectory(data_structure_tests)
add_subdirectory(task_tests)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_subdirectory(performance_tests)
endif()
add_subdirectory(performance_tests)
cc_test(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS ${eager_deps})
cc_test(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS ${eager_deps})
cc_test(test_egr_ds_grad_node_info SRCS grad_node_info_test.cc DEPS ${eager_deps})
cc_test(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc DEPS ${eager_deps})
cc_test(test_egr_ds_accumulation_node SRCS accumulation_node_test.cc DEPS ${eager_deps})
cc_test(test_egr_ds_tensor_wrapper SRCS tensor_wrapper_test.cc DEPS ${eager_deps})
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_test(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc DEPS ${eager_deps} ${generated_deps})
endif()
......@@ -25,6 +25,7 @@
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
// TODO(jiabin): remove nolint here!!!
using namespace egr; // NOLINT
......@@ -77,11 +78,11 @@ TEST(GradTensorHolder, Interfaces) {
// add():
// fill one
grad_tensor_holder.add(0, 0, et0, true);
grad_tensor_holder.CopyValueFromTensor(0, 0, et0, true);
// accumulation
grad_tensor_holder.add(1, 0, et0, false);
grad_tensor_holder.add(1, 0, et1, false);
grad_tensor_holder.add(1, 0, et0);
grad_tensor_holder.add(1, 0, et1);
// Buffers()
const auto& buffers = grad_tensor_holder.Buffers();
......@@ -141,8 +142,8 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) {
GradTensorHolder({slot_meta, slot_meta});
// accumulation
grad_tensor_holder.add(0, 0, t1, false);
grad_tensor_holder.add(0, 0, t2, false);
grad_tensor_holder.add(0, 0, t1);
grad_tensor_holder.add(0, 0, t2);
// Buffers()
const auto& buffers = grad_tensor_holder.Buffers();
......
cc_test(test_egr_task_tensor_utils SRCS tensor_utils_test.cc DEPS ${eager_deps})
cc_test(test_egr_task_eager_utils SRCS eager_utils_test.cc DEPS ${eager_deps})
cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test(test_egr_task_grad SRCS grad_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
cc_test(test_egr_task_grad SRCS grad_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
cc_test(test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} dygraph_node)
cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps})
endif()
......@@ -34,6 +34,7 @@
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
namespace egr {
......
......@@ -33,8 +33,10 @@
#include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT);
#endif
namespace egr {
......
......@@ -33,6 +33,8 @@
PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
namespace egr {
TEST(Grad, SingleNodeEmptyGrad) {
......
......@@ -494,6 +494,7 @@ class PADDLE_API Tensor final {
* @return AbstractAutogradMeta*
*/
AbstractAutogradMeta* get_autograd_meta() const;
const std::shared_ptr<AbstractAutogradMeta>& mutable_autograd_meta() const;
/**
* @brief Set the autograd meta object
......
......@@ -354,6 +354,11 @@ AbstractAutogradMeta *Tensor::get_autograd_meta() const {
return autograd_meta_.get();
}
const std::shared_ptr<AbstractAutogradMeta> &Tensor::mutable_autograd_meta()
const {
return autograd_meta_;
}
void Tensor::set_autograd_meta(
std::shared_ptr<AbstractAutogradMeta> autograd_meta) {
autograd_meta_ = std::move(autograd_meta);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册