未验证 提交 1cddcd70 编写于 作者: J Jiabin Yang 提交者: GitHub

[Eager] Support Gradient Accumulation for sr (#42371)

* Support Gradient Accumulation for sr

* add ut

* change ut to fit small vector
上级 4e66010b
......@@ -34,7 +34,42 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
*tensor = t;
} else {
// Accumulation
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(t, tensor);
PADDLE_ENFORCE_EQ(t.initialized(), true,
paddle::platform::errors::Fatal(
"We can only accumulate initialized tensor, but we "
"got tensor: %s is empty please check you network "
"and make sure it creates grads.",
t.name()));
PADDLE_ENFORCE_NOT_NULL(
tensor, paddle::platform::errors::Fatal(
"We can only accumulate initialized tensor to non-nullptr "
"tensor but we got nullptr please check you network "
"and make sure it creates grads."));
if (t.is_dense_tensor()) {
if (tensor->is_dense_tensor()) {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(t, tensor);
} else {
// TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with
// add_dygraph_function once it's supported
paddle::experimental::Tensor new_buffer(
std::make_shared<phi::DenseTensor>(), "tmp_accumulator");
paddle::imperative::SelectedRowsAddTensor(*tensor, t, &new_buffer);
tensor->set_impl(new_buffer.impl());
}
} else {
// TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with add_dygraph_function
// once it's supported
if (tensor->is_dense_tensor()) {
paddle::imperative::SelectedRowsAddToTensor(t, tensor);
} else {
*tensor = std::move(*paddle::imperative::SelectedRowsMerge<
paddle::experimental::Tensor>(t, *tensor));
}
}
}
}
......
......@@ -416,10 +416,6 @@ class FunctionGeneratorBase:
self.forward_outputs_position_map[
return_name] = [return_type, return_pos]
print("Generated Forward Input Position Map: ",
self.forward_inputs_position_map)
print("Generated Forward Output Position Map: ",
self.forward_outputs_position_map)
class YamlGeneratorBase:
......
......@@ -551,12 +551,6 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
self.backward_inputs_list, self.backward_attrs_list, self.backward_returns_list = ParseYamlBackward(
backward_args_str, backward_returns_str)
logging.info(
f"Parsed Backward Inputs List: {self.backward_inputs_list}")
logging.info(f"Prased Backward Attrs List: {self.backward_attrs_list}")
logging.info(
f"Parsed Backward Returns List: {self.backward_returns_list}")
def CollectForwardInfoFromBackwardContents(self):
backward_forward_str = self.backward_forward_str
......@@ -628,15 +622,6 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
backward_output_type, matched_forward_input_pos,
backward_output_pos
]
logging.info(
f"Generated Backward Fwd Input Map: {self.backward_forward_inputs_map}"
)
logging.info(
f"Generated Backward Grad Input Map: {self.backward_grad_inputs_map}"
)
logging.info(
f"Generated Backward Grad Output Map: {self.backward_grad_outputs_map}"
)
def GenerateNodeCreationCodes(self):
forward_api_name = self.forward_api_name
......@@ -1044,11 +1029,6 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
returns_str)
self.forward_declaration_str += f"{returns_type_str} {forward_function_name}({inputs_args_declaration_str});\n"
logging.info(
f"Generated Forward Definition: {self.forward_definition_str}")
logging.info(
f"Generated Forward Declaration: {self.forward_declaration_str}")
def GenerateInplacedForwardDygraphFunctions(self):
# Inplaced Version Dygraph Function Generation
forward_api_name = self.forward_api_name
......@@ -1234,8 +1214,6 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
set_attribute_methods_str, tensor_wrapper_members_str,
attribute_members_str)
logging.info(f"Generated Node Declaration: {self.node_declaration_str}")
def GenerateNodeDefinition(self, grad_node_creation_str):
namespace = self.namespace
forward_api_name = self.forward_api_name
......@@ -1439,8 +1417,6 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
outputs_autograd_meta_str, compute_require_grad_str,
grad_node_creation_str, returns_str)
logging.info(f"Generated Node Definition: {self.node_definition_str}")
def run(self):
super().run()
......
......@@ -399,35 +399,15 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
# Initialized orig_forward_inputs_list, orig_forward_returns_list, orig_forward_attrs_list
self.CollectOriginalForwardInfo()
logging.info(
f"Parsed Original Forward Inputs List: \n{self.orig_forward_inputs_list}"
)
logging.info(
f"Prased Original Forward Attrs List: \n{self.orig_forward_attrs_list}"
)
logging.info(
f"Parsed Original Forward Returns List: \n{self.orig_forward_returns_list}"
)
if SkipAPIGeneration(self.forward_api_name): return False
# Initialized forward_inputs_position_map, forward_outputs_position_map
self.DetermineForwardPositionMap(self.orig_forward_inputs_list,
self.orig_forward_returns_list)
logging.info(
f"Generated Forward Input Position Map: {self.forward_inputs_position_map}"
)
logging.info(
f"Generated Forward Output Position Map: {self.forward_outputs_position_map}"
)
# Code Generation
self.GeneratePythonCFunction()
logging.info(
f"Generated Python-C Function: {self.python_c_function_str}")
logging.info(
f"Generated Python-C Function Declaration: {self.python_c_function_reg_str}"
)
return True
......@@ -536,8 +516,6 @@ if __name__ == "__main__":
python_c_str = GeneratePythonCWrappers(generated_python_c_functions,
generated_python_c_registration)
logging.info(f"Generated Python-C Codes: \n{python_c_str}")
output_path = args.output_path
for path in [output_path]:
if os.path.exists(path):
......
......@@ -21,14 +21,219 @@
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/selected_rows.h"
// TODO(jiabin): remove nolint here!!!
using namespace egr; // NOLINT
TEST(AccumulationNode, SelectedRowsAddToTensor) {
// Construct Eager Tensor
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::vector<int64_t> rows = {0};
std::shared_ptr<phi::SelectedRows> sr0 =
std::make_shared<phi::SelectedRows>(rows, 1);
sr0->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(sr0);
std::shared_ptr<phi::DenseTensor> dt1 = std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
dt1->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(dt1);
std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor input_et =
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor
std::shared_ptr<phi::SelectedRows> grad_dt =
std::make_shared<phi::SelectedRows>(rows, 1);
grad_dt->mutable_value()->Resize(phi::make_ddim({1, 1}));
grad_dt->mutable_value()->mutable_data<float>(
paddle::platform::CPUPlace())[0] = static_cast<float>(0.0f);
grad_meta->MutableGrad()->set_impl(grad_dt);
// AccumulationNode
auto node = std::make_shared<GradNodeAccumulation>(grad_meta);
grad_meta->SetGradNode(node);
grad_meta->SetStopGradient(false);
// operator()
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl())
->value()
.data<float>();
CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f));
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl())
->data<float>();
CHECK_EQ(ret_et1_ptr[0], static_cast<float>(20.0f));
// Check Retain Grad
CHECK_EQ(std::dynamic_pointer_cast<phi::SelectedRows>(et0.impl())
->value()
.data<float>()[0],
static_cast<float>(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())->data<float>();
CHECK_EQ(grad_ptr[0], static_cast<float>(30.0f));
}
TEST(AccumulationNode, SelectedRowsMerge) {
// Construct Eager Tensor
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::vector<int64_t> rows = {0};
std::shared_ptr<phi::SelectedRows> sr0 =
std::make_shared<phi::SelectedRows>(rows, 1);
sr0->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(sr0);
std::shared_ptr<phi::SelectedRows> sr1 =
std::make_shared<phi::SelectedRows>(rows, 1);
sr1->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr1->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(sr1);
std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor input_et =
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor
std::shared_ptr<phi::SelectedRows> grad_dt =
std::make_shared<phi::SelectedRows>(rows, 1);
grad_dt->mutable_value()->Resize(phi::make_ddim({1, 1}));
grad_dt->mutable_value()->mutable_data<float>(
paddle::platform::CPUPlace())[0] = static_cast<float>(0.0f);
grad_meta->MutableGrad()->set_impl(grad_dt);
// AccumulationNode
auto node = std::make_shared<GradNodeAccumulation>(grad_meta);
grad_meta->SetGradNode(node);
grad_meta->SetStopGradient(false);
// operator()
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl())
->value()
.data<float>();
CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f));
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et1.impl())
->value()
.data<float>();
CHECK_EQ(ret_et1_ptr[0], static_cast<float>(20.0f));
// Check Retain Grad
CHECK_EQ(std::dynamic_pointer_cast<phi::SelectedRows>(et0.impl())
->value()
.data<float>()[0],
static_cast<float>(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr = std::dynamic_pointer_cast<phi::SelectedRows>(grad->impl())
->value()
.data<float>();
CHECK_EQ(grad_ptr[0], static_cast<float>(30.0f));
}
TEST(AccumulationNode, SelectedRowsAddTensor) {
// Construct Eager Tensor
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::vector<int64_t> rows = {0};
std::shared_ptr<phi::SelectedRows> sr0 =
std::make_shared<phi::SelectedRows>(rows, 1);
sr0->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr0->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(10.0f);
paddle::experimental::Tensor et0 = paddle::experimental::Tensor(sr0);
std::shared_ptr<phi::SelectedRows> sr1 =
std::make_shared<phi::SelectedRows>(rows, 1);
sr1->mutable_value()->Resize(phi::make_ddim({1, 1}));
sr1->mutable_value()->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(20.0f);
paddle::experimental::Tensor et1 = paddle::experimental::Tensor(sr1);
std::shared_ptr<phi::DenseTensor> input_dt =
std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
paddle::experimental::Tensor input_et =
paddle::experimental::Tensor(input_dt);
auto grad_meta = EagerUtils::autograd_meta(&input_et);
// Initialize Grad Tensor
std::shared_ptr<phi::DenseTensor> grad_dt =
std::make_shared<phi::DenseTensor>(
std::make_unique<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace())
.get(),
meta);
grad_dt->mutable_data<float>(paddle::platform::CPUPlace())[0] =
static_cast<float>(0.0f);
grad_meta->MutableGrad()->set_impl(grad_dt);
// AccumulationNode
auto node = std::make_shared<GradNodeAccumulation>(grad_meta);
grad_meta->SetGradNode(node);
grad_meta->SetStopGradient(false);
// operator()
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et0.impl())
->value()
.data<float>();
CHECK_EQ(ret_et0_ptr[0], static_cast<float>(10.0f));
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::SelectedRows>(ret_et1.impl())
->value()
.data<float>();
CHECK_EQ(ret_et1_ptr[0], static_cast<float>(20.0f));
// Check Retain Grad
CHECK_EQ(std::dynamic_pointer_cast<phi::SelectedRows>(et0.impl())
->value()
.data<float>()[0],
static_cast<float>(10.0f));
paddle::experimental::Tensor* grad = EagerUtils::mutable_grad(input_et);
auto* grad_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(grad->impl())->data<float>();
CHECK_EQ(grad_ptr[0], static_cast<float>(30.0f));
}
TEST(AccumulationNode, Tensor) {
// Construct Eager Tensor
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册