未验证 提交 6549a041 编写于 作者: W Weilong Wu 提交者: GitHub

[Eager] Support SellectedRows MergeAdd case (#39449)


* Refactor SelectedRows MergeAdd func by using template

* Add GetInnerMutable func instead of modify GetInnerMutableTensor

* Updated PADDLE_ENFORCE statement

* Remove useless PADDLE_ENFORCE statement

* Polish Code
上级 f73f5b06
......@@ -78,9 +78,9 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
if (buffer_tensor.is_dense_tensor()) {
paddle::imperative::SelectedRowsAddToTensor(t, &buffer_tensor);
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"We don't support Selected Rows merge for now, support it later "
"and make all kinds of grads can be merged."));
buffer_tensor =
std::move(*paddle::imperative::SelectedRowsMerge<
paddle::experimental::Tensor>(t, buffer_tensor));
}
}
}
......
......@@ -20,6 +20,7 @@
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/grad_tensor_holder.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/selected_rows.h"
#include "paddle/pten/core/kernel_registry.h"
......@@ -102,3 +103,69 @@ TEST(GradTensorHolder, Interfaces) {
CHECK_EQ(holder_et0_ptr[0], 1.0f);
CHECK_EQ(holder_et1_ptr[0], 30.0f);
}
TEST(GradTensorHolder, SelectedRowsMergeAdd) {
pten::CPUPlace cpu;
std::vector<int64_t> rows{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int64_t table_size = 10;
int64_t embedding_width = 10;
auto sr1 = std::make_shared<pten::SelectedRows>(rows, table_size);
auto sr2 = std::make_shared<pten::SelectedRows>(rows, table_size);
// initialize a sparse table 1
sr1->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr1 = sr1->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr1[i * embedding_width + j] = static_cast<float>(i);
}
}
// initialize a sparse table 2
sr2->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr2 = sr2->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr2[i * embedding_width + j] = static_cast<float>(i);
}
}
// new 2 pten::Tensor
paddle::experimental::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2);
// Constructor empty GradTensorHolder
GradSlotMeta slot_meta;
slot_meta.Init(1);
GradTensorHolder grad_tensor_holder =
GradTensorHolder({slot_meta, slot_meta});
// accumulation
grad_tensor_holder.add(0, 0, t1, false);
grad_tensor_holder.add(0, 0, t2, false);
// Buffers()
const auto& buffers = grad_tensor_holder.Buffers();
CHECK_EQ(static_cast<int>(buffers.size()), 2);
CHECK_EQ(static_cast<int>(buffers[0].size()), 1);
CHECK_EQ(static_cast<int>(buffers[1].size()), 1);
// operator[]
const auto& holder_et0 = grad_tensor_holder[0][0];
auto* tmp_buffer_tensor =
static_cast<pten::SelectedRows*>(holder_et0.impl().get());
auto* tmp_buffer_data_sr =
tmp_buffer_tensor->mutable_value()->mutable_data<float>(cpu);
// verify the MergeAdd result (accumulation result)
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
EXPECT_EQ(tmp_buffer_data_sr[i * embedding_width + j],
(static_cast<float>(i) + static_cast<float>(i)));
}
}
}
......@@ -44,9 +44,9 @@ if(WITH_GLOO)
endif()
if(NOT WITH_ASCEND_CL)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function pten_tensor)
else()
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function npu_op_runner)
cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function npu_op_runner pten_tensor)
endif()
add_subdirectory(tests)
......@@ -243,6 +243,23 @@ TType& GetInnerTensor(const paddle::experimental::Tensor& src) {
return *src_tensor;
}
template <typename TType>
TType* GetEmptyInnerTensor(paddle::experimental::Tensor* dst) {
PADDLE_ENFORCE_EQ(
dst->defined(), false,
platform::errors::Fatal(
"The underlying Tensor implementation should be nullptr"));
dst->set_impl(std::make_shared<TType>());
auto* dst_tensor = static_cast<TType*>(dst->impl().get());
return dst_tensor;
}
template <typename TType>
TType* GetEmptyInnerTensor(paddle::imperative::VariableWrapper* dst) {
auto* dst_tensor = dst->MutableVar()->GetMutable<TType>();
return dst_tensor;
}
template <typename VarType>
void TensorAdd(const VarType& src, VarType* dst) {
pten::DenseTensor* dst_tensor = GetInnerMutableTensor<pten::DenseTensor>(dst);
......@@ -473,13 +490,14 @@ template void SelectedRowsAddTensor(
// Note(chenweihang): when two selected rows need to be added,
// adding one to another is not equal to merging two selected rows
// to one then add it to a empty selected rows, the after is correct
// Note(chenweihang): when two selected rows need to be added,
// adding one to another is not equal to merging two selected rows
// to one then add it to a empty selected rows, the after is correct
std::shared_ptr<VariableWrapper> SelectedRowsMerge(
const framework::Variable& src1, const framework::Variable& src2) {
auto& src_selected_rows1 = src1.Get<pten::SelectedRows>();
auto& src_selected_rows2 = src2.Get<pten::SelectedRows>();
template <typename ReturnVarType, typename VarType>
std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
const VarType& src2) {
const pten::SelectedRows& src_selected_rows1 =
GetInnerTensor<pten::SelectedRows>(src1);
const pten::SelectedRows& src_selected_rows2 =
GetInnerTensor<pten::SelectedRows>(src2);
auto place = src_selected_rows1.value().place();
auto data_type =
framework::TransToProtoVarType(src_selected_rows1.value().dtype());
......@@ -488,9 +506,10 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
std::vector<const pten::SelectedRows*> src_selected_rows;
src_selected_rows.emplace_back(&src_selected_rows1);
src_selected_rows.emplace_back(&src_selected_rows2);
auto dst_var = std::make_shared<VariableWrapper>("Temp");
auto* dst_selected_rows =
dst_var->MutableVar()->GetMutable<pten::SelectedRows>();
auto dst_var = std::make_shared<ReturnVarType>("Temp");
pten::SelectedRows* dst_selected_rows =
GetEmptyInnerTensor<pten::SelectedRows>(dst_var.get());
#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
......@@ -515,12 +534,17 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
#endif
#undef PADDLE_SELECTED_ROWS_ADD
PADDLE_THROW(platform::errors::InvalidArgument(
"Not supported data type %s for SelectedRowsMerge",
framework::DataTypeToString(data_type)));
}
template std::shared_ptr<paddle::experimental::Tensor> SelectedRowsMerge(
const paddle::experimental::Tensor& src1,
const paddle::experimental::Tensor& src2);
template std::shared_ptr<paddle::imperative::VariableWrapper> SelectedRowsMerge(
const framework::Variable& src1, const framework::Variable& src2);
void VariableWrapperAdd(std::shared_ptr<VariableWrapper> var,
VariableWrapper* dst_var, bool unchange_input) {
auto& src = var->Var();
......@@ -547,7 +571,7 @@ void VariableWrapperAdd(std::shared_ptr<VariableWrapper> var,
*dst = std::move(*(var->MutableVar()));
}
} else if (src.IsType<pten::SelectedRows>()) {
auto temp = SelectedRowsMerge(src, *dst);
auto temp = SelectedRowsMerge<VariableWrapper>(src, *dst);
*dst = std::move(*(temp->MutableVar()));
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
......@@ -603,7 +627,7 @@ void GradientAccumulator::AccumulateGrad() {
SelectedRowsAddToTensor(*dst, src);
*dst = std::move(*src);
} else if (src->IsType<pten::SelectedRows>()) {
auto temp = SelectedRowsMerge(*src, *dst);
auto temp = SelectedRowsMerge<VariableWrapper>(*src, *dst);
*dst = std::move(*(temp->MutableVar()));
}
} else {
......
......@@ -17,10 +17,10 @@
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/imperative/hooks.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/pten/api/include/tensor.h"
namespace paddle {
namespace imperative {
......@@ -164,6 +164,10 @@ class SortedGradientAccumulator : public GradientAccumulator {
std::vector<SavedVarInfo> tmp_grad_vars_;
};
template <typename ReturnVarType, typename VarType>
std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
const VarType& src2);
template <typename VarType>
void SelectedRowsAddToTensor(const VarType& src, VarType* dst);
......
......@@ -12,7 +12,7 @@ else()
endif(WIN32)
cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy selected_rows_utils selected_rows_functor gradient_accumulator math_function)
cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy selected_rows_utils selected_rows_functor gradient_accumulator math_function pten_tensor pten_api pten_api_utils)
cc_test(test_layer SRCS test_layer.cc DEPS layer proto_desc operator op_registry variable_helper mul_op memcpy)
cc_test(test_prepare_op SRCS test_prepare_op.cc DEPS prepared_operator op_info split_op layer concat_and_split activation_op place)
cc_test(test_tracer SRCS test_tracer.cc DEPS tracer layer proto_desc operator op_registry variable_helper mul_op reduce_sum_op elementwise_add_op memcpy)
......
......@@ -29,6 +29,57 @@ namespace framework = paddle::framework;
namespace paddle {
namespace imperative {
TEST(Test__SelectedRowsMerge_Test, SelectedRowsMerge) {
pten::CPUPlace cpu;
std::vector<int64_t> rows{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int64_t table_size = 10;
int64_t embedding_width = 10;
auto sr1 = std::make_shared<pten::SelectedRows>(rows, table_size);
auto sr2 = std::make_shared<pten::SelectedRows>(rows, table_size);
// initialize a sparse table 1
sr1->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr1 = sr1->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr1[i * embedding_width + j] = static_cast<float>(i);
}
}
// initialize a sparse table 2
sr2->mutable_value()->Resize(
pten::framework::make_ddim({table_size, embedding_width}));
auto* data_sr2 = sr2->mutable_value()->mutable_data<float>(cpu);
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
data_sr2[i * embedding_width + j] = static_cast<float>(i);
}
}
// new 2 pten::Tensor
paddle::experimental::Tensor t1(sr1);
paddle::experimental::Tensor t2(sr2);
// call SelectedRowsMerge
auto new_buffer =
paddle::imperative::SelectedRowsMerge<paddle::experimental::Tensor>(t1,
t2);
auto* new_buffer_tensor =
static_cast<pten::SelectedRows*>(new_buffer->impl().get());
auto* new_buffer_data_sr1 =
new_buffer_tensor->mutable_value()->mutable_data<float>(cpu);
// verify the MergeAdd result
for (int64_t i = 0; i < table_size; ++i) {
for (int64_t j = 0; j < embedding_width; ++j) {
EXPECT_EQ(new_buffer_data_sr1[i * embedding_width + j],
(static_cast<float>(i) + static_cast<float>(i)));
}
}
}
template <typename Place1, typename Place2, typename T>
int TensorddTest(Place1 place1, Place2 place2, T t1, T t2) {
framework::Variable var1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册